1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/raid/detect.h>
62#include <linux/slab.h>
63#include <linux/percpu-refcount.h>
64#include <linux/part_stat.h>
65
66#include <trace/events/block.h>
67#include "md.h"
68#include "md-bitmap.h"
69#include "md-cluster.h"
70
71
72
73
74
75
76static LIST_HEAD(pers_list);
77static DEFINE_SPINLOCK(pers_lock);
78
79static struct kobj_type md_ktype;
80
81struct md_cluster_operations *md_cluster_ops;
82EXPORT_SYMBOL(md_cluster_ops);
83static struct module *md_cluster_mod;
84
85static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
86static struct workqueue_struct *md_wq;
87static struct workqueue_struct *md_misc_wq;
88static struct workqueue_struct *md_rdev_misc_wq;
89
90static int remove_and_add_spares(struct mddev *mddev,
91 struct md_rdev *this);
92static void mddev_detach(struct mddev *mddev);
93
94
95
96
97
98
99#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
100
101#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
102
103
104
105
106
107
108
109
110
111
112
113
114
115static int sysctl_speed_limit_min = 1000;
116static int sysctl_speed_limit_max = 200000;
117static inline int speed_min(struct mddev *mddev)
118{
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
121}
122
123static inline int speed_max(struct mddev *mddev)
124{
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
127}
128
129static void rdev_uninit_serial(struct md_rdev *rdev)
130{
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
132 return;
133
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
136}
137
138static void rdevs_uninit_serial(struct mddev *mddev)
139{
140 struct md_rdev *rdev;
141
142 rdev_for_each(rdev, mddev)
143 rdev_uninit_serial(rdev);
144}
145
146static int rdev_init_serial(struct md_rdev *rdev)
147{
148
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
150 struct serial_in_rdev *serial = NULL;
151
152 if (test_bit(CollisionCheck, &rdev->flags))
153 return 0;
154
155 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
156 GFP_KERNEL);
157 if (!serial)
158 return -ENOMEM;
159
160 for (i = 0; i < serial_nums; i++) {
161 struct serial_in_rdev *serial_tmp = &serial[i];
162
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
166 }
167
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
170
171 return 0;
172}
173
174static int rdevs_init_serial(struct mddev *mddev)
175{
176 struct md_rdev *rdev;
177 int ret = 0;
178
179 rdev_for_each(rdev, mddev) {
180 ret = rdev_init_serial(rdev);
181 if (ret)
182 break;
183 }
184
185
186 if (ret && !mddev->serial_info_pool)
187 rdevs_uninit_serial(mddev);
188
189 return ret;
190}
191
192
193
194
195
196
197static int rdev_need_serial(struct md_rdev *rdev)
198{
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
202}
203
204
205
206
207
208
209void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
210 bool is_suspend)
211{
212 int ret = 0;
213
214 if (rdev && !rdev_need_serial(rdev) &&
215 !test_bit(CollisionCheck, &rdev->flags))
216 return;
217
218 if (!is_suspend)
219 mddev_suspend(mddev);
220
221 if (!rdev)
222 ret = rdevs_init_serial(mddev);
223 else
224 ret = rdev_init_serial(rdev);
225 if (ret)
226 goto abort;
227
228 if (mddev->serial_info_pool == NULL) {
229
230
231
232
233 mddev->serial_info_pool =
234 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
235 sizeof(struct serial_info));
236 if (!mddev->serial_info_pool) {
237 rdevs_uninit_serial(mddev);
238 pr_err("can't alloc memory pool for serialization\n");
239 }
240 }
241
242abort:
243 if (!is_suspend)
244 mddev_resume(mddev);
245}
246
247
248
249
250
251
252
253void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
254 bool is_suspend)
255{
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
257 return;
258
259 if (mddev->serial_info_pool) {
260 struct md_rdev *temp;
261 int num = 0;
262
263 if (!is_suspend)
264 mddev_suspend(mddev);
265 rdev_for_each(temp, mddev) {
266 if (!rdev) {
267 if (!mddev->serialize_policy ||
268 !rdev_need_serial(temp))
269 rdev_uninit_serial(temp);
270 else
271 num++;
272 } else if (temp != rdev &&
273 test_bit(CollisionCheck, &temp->flags))
274 num++;
275 }
276
277 if (rdev)
278 rdev_uninit_serial(rdev);
279
280 if (num)
281 pr_info("The mempool could be used by other devices\n");
282 else {
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
285 }
286 if (!is_suspend)
287 mddev_resume(mddev);
288 }
289}
290
291static struct ctl_table_header *raid_table_header;
292
293static struct ctl_table raid_table[] = {
294 {
295 .procname = "speed_limit_min",
296 .data = &sysctl_speed_limit_min,
297 .maxlen = sizeof(int),
298 .mode = S_IRUGO|S_IWUSR,
299 .proc_handler = proc_dointvec,
300 },
301 {
302 .procname = "speed_limit_max",
303 .data = &sysctl_speed_limit_max,
304 .maxlen = sizeof(int),
305 .mode = S_IRUGO|S_IWUSR,
306 .proc_handler = proc_dointvec,
307 },
308 { }
309};
310
311static struct ctl_table raid_dir_table[] = {
312 {
313 .procname = "raid",
314 .maxlen = 0,
315 .mode = S_IRUGO|S_IXUGO,
316 .child = raid_table,
317 },
318 { }
319};
320
321static struct ctl_table raid_root_table[] = {
322 {
323 .procname = "dev",
324 .maxlen = 0,
325 .mode = 0555,
326 .child = raid_dir_table,
327 },
328 { }
329};
330
331static int start_readonly;
332
333
334
335
336
337
338
339
340
341static bool create_on_open = true;
342
343struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
344 struct mddev *mddev)
345{
346 if (!mddev || !bioset_initialized(&mddev->bio_set))
347 return bio_alloc(gfp_mask, nr_iovecs);
348
349 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
350}
351EXPORT_SYMBOL_GPL(bio_alloc_mddev);
352
353static struct bio *md_bio_alloc_sync(struct mddev *mddev)
354{
355 if (!mddev || !bioset_initialized(&mddev->sync_set))
356 return bio_alloc(GFP_NOIO, 1);
357
358 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
359}
360
361
362
363
364
365
366
367
368
369
370
371static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
372static atomic_t md_event_count;
373void md_new_event(struct mddev *mddev)
374{
375 atomic_inc(&md_event_count);
376 wake_up(&md_event_waiters);
377}
378EXPORT_SYMBOL_GPL(md_new_event);
379
380
381
382
383
384static LIST_HEAD(all_mddevs);
385static DEFINE_SPINLOCK(all_mddevs_lock);
386
387
388
389
390
391
392
393
394#define for_each_mddev(_mddev,_tmp) \
395 \
396 for (({ spin_lock(&all_mddevs_lock); \
397 _tmp = all_mddevs.next; \
398 _mddev = NULL;}); \
399 ({ if (_tmp != &all_mddevs) \
400 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
401 spin_unlock(&all_mddevs_lock); \
402 if (_mddev) mddev_put(_mddev); \
403 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
404 _tmp != &all_mddevs;}); \
405 ({ spin_lock(&all_mddevs_lock); \
406 _tmp = _tmp->next;}) \
407 )
408
409
410
411
412
413
414
415
416static bool is_suspended(struct mddev *mddev, struct bio *bio)
417{
418 if (mddev->suspended)
419 return true;
420 if (bio_data_dir(bio) != WRITE)
421 return false;
422 if (mddev->suspend_lo >= mddev->suspend_hi)
423 return false;
424 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
425 return false;
426 if (bio_end_sector(bio) < mddev->suspend_lo)
427 return false;
428 return true;
429}
430
431void md_handle_request(struct mddev *mddev, struct bio *bio)
432{
433check_suspended:
434 rcu_read_lock();
435 if (is_suspended(mddev, bio)) {
436 DEFINE_WAIT(__wait);
437 for (;;) {
438 prepare_to_wait(&mddev->sb_wait, &__wait,
439 TASK_UNINTERRUPTIBLE);
440 if (!is_suspended(mddev, bio))
441 break;
442 rcu_read_unlock();
443 schedule();
444 rcu_read_lock();
445 }
446 finish_wait(&mddev->sb_wait, &__wait);
447 }
448 atomic_inc(&mddev->active_io);
449 rcu_read_unlock();
450
451 if (!mddev->pers->make_request(mddev, bio)) {
452 atomic_dec(&mddev->active_io);
453 wake_up(&mddev->sb_wait);
454 goto check_suspended;
455 }
456
457 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
458 wake_up(&mddev->sb_wait);
459}
460EXPORT_SYMBOL(md_handle_request);
461
462struct md_io {
463 struct mddev *mddev;
464 bio_end_io_t *orig_bi_end_io;
465 void *orig_bi_private;
466 unsigned long start_time;
467 struct block_device *part;
468};
469
470static void md_end_io(struct bio *bio)
471{
472 struct md_io *md_io = bio->bi_private;
473 struct mddev *mddev = md_io->mddev;
474
475 part_end_io_acct(md_io->part, bio, md_io->start_time);
476
477 bio->bi_end_io = md_io->orig_bi_end_io;
478 bio->bi_private = md_io->orig_bi_private;
479
480 mempool_free(md_io, &mddev->md_io_pool);
481
482 if (bio->bi_end_io)
483 bio->bi_end_io(bio);
484}
485
486static blk_qc_t md_submit_bio(struct bio *bio)
487{
488 const int rw = bio_data_dir(bio);
489 struct mddev *mddev = bio->bi_disk->private_data;
490
491 if (mddev == NULL || mddev->pers == NULL) {
492 bio_io_error(bio);
493 return BLK_QC_T_NONE;
494 }
495
496 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
497 bio_io_error(bio);
498 return BLK_QC_T_NONE;
499 }
500
501 blk_queue_split(&bio);
502
503 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
504 if (bio_sectors(bio) != 0)
505 bio->bi_status = BLK_STS_IOERR;
506 bio_endio(bio);
507 return BLK_QC_T_NONE;
508 }
509
510 if (bio->bi_end_io != md_end_io) {
511 struct md_io *md_io;
512
513 md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
514 md_io->mddev = mddev;
515 md_io->orig_bi_end_io = bio->bi_end_io;
516 md_io->orig_bi_private = bio->bi_private;
517
518 bio->bi_end_io = md_end_io;
519 bio->bi_private = md_io;
520
521 md_io->start_time = part_start_io_acct(mddev->gendisk,
522 &md_io->part, bio);
523 }
524
525
526 bio->bi_opf &= ~REQ_NOMERGE;
527
528 md_handle_request(mddev, bio);
529
530 return BLK_QC_T_NONE;
531}
532
533
534
535
536
537
538
539void mddev_suspend(struct mddev *mddev)
540{
541 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
542 lockdep_assert_held(&mddev->reconfig_mutex);
543 if (mddev->suspended++)
544 return;
545 synchronize_rcu();
546 wake_up(&mddev->sb_wait);
547 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
548 smp_mb__after_atomic();
549 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
550 mddev->pers->quiesce(mddev, 1);
551 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
552 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
553
554 del_timer_sync(&mddev->safemode_timer);
555
556 mddev->noio_flag = memalloc_noio_save();
557}
558EXPORT_SYMBOL_GPL(mddev_suspend);
559
560void mddev_resume(struct mddev *mddev)
561{
562
563 memalloc_noio_restore(mddev->noio_flag);
564 lockdep_assert_held(&mddev->reconfig_mutex);
565 if (--mddev->suspended)
566 return;
567 wake_up(&mddev->sb_wait);
568 mddev->pers->quiesce(mddev, 0);
569
570 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
571 md_wakeup_thread(mddev->thread);
572 md_wakeup_thread(mddev->sync_thread);
573}
574EXPORT_SYMBOL_GPL(mddev_resume);
575
576
577
578
579
580static void md_end_flush(struct bio *bio)
581{
582 struct md_rdev *rdev = bio->bi_private;
583 struct mddev *mddev = rdev->mddev;
584
585 rdev_dec_pending(rdev, mddev);
586
587 if (atomic_dec_and_test(&mddev->flush_pending)) {
588
589 queue_work(md_wq, &mddev->flush_work);
590 }
591 bio_put(bio);
592}
593
594static void md_submit_flush_data(struct work_struct *ws);
595
596static void submit_flushes(struct work_struct *ws)
597{
598 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
599 struct md_rdev *rdev;
600
601 mddev->start_flush = ktime_get_boottime();
602 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
603 atomic_set(&mddev->flush_pending, 1);
604 rcu_read_lock();
605 rdev_for_each_rcu(rdev, mddev)
606 if (rdev->raid_disk >= 0 &&
607 !test_bit(Faulty, &rdev->flags)) {
608
609
610
611
612 struct bio *bi;
613 atomic_inc(&rdev->nr_pending);
614 atomic_inc(&rdev->nr_pending);
615 rcu_read_unlock();
616 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
617 bi->bi_end_io = md_end_flush;
618 bi->bi_private = rdev;
619 bio_set_dev(bi, rdev->bdev);
620 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
621 atomic_inc(&mddev->flush_pending);
622 submit_bio(bi);
623 rcu_read_lock();
624 rdev_dec_pending(rdev, mddev);
625 }
626 rcu_read_unlock();
627 if (atomic_dec_and_test(&mddev->flush_pending))
628 queue_work(md_wq, &mddev->flush_work);
629}
630
631static void md_submit_flush_data(struct work_struct *ws)
632{
633 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
634 struct bio *bio = mddev->flush_bio;
635
636
637
638
639
640
641
642 spin_lock_irq(&mddev->lock);
643 mddev->prev_flush_start = mddev->start_flush;
644 mddev->flush_bio = NULL;
645 spin_unlock_irq(&mddev->lock);
646 wake_up(&mddev->sb_wait);
647
648 if (bio->bi_iter.bi_size == 0) {
649
650 bio_endio(bio);
651 } else {
652 bio->bi_opf &= ~REQ_PREFLUSH;
653 md_handle_request(mddev, bio);
654 }
655}
656
657
658
659
660
661
662
663bool md_flush_request(struct mddev *mddev, struct bio *bio)
664{
665 ktime_t req_start = ktime_get_boottime();
666 spin_lock_irq(&mddev->lock);
667
668
669
670 wait_event_lock_irq(mddev->sb_wait,
671 !mddev->flush_bio ||
672 ktime_before(req_start, mddev->prev_flush_start),
673 mddev->lock);
674
675 if (ktime_after(req_start, mddev->prev_flush_start)) {
676 WARN_ON(mddev->flush_bio);
677 mddev->flush_bio = bio;
678 bio = NULL;
679 }
680 spin_unlock_irq(&mddev->lock);
681
682 if (!bio) {
683 INIT_WORK(&mddev->flush_work, submit_flushes);
684 queue_work(md_wq, &mddev->flush_work);
685 } else {
686
687 if (bio->bi_iter.bi_size == 0)
688
689 bio_endio(bio);
690 else {
691 bio->bi_opf &= ~REQ_PREFLUSH;
692 return false;
693 }
694 }
695 return true;
696}
697EXPORT_SYMBOL(md_flush_request);
698
699static inline struct mddev *mddev_get(struct mddev *mddev)
700{
701 atomic_inc(&mddev->active);
702 return mddev;
703}
704
705static void mddev_delayed_delete(struct work_struct *ws);
706
707static void mddev_put(struct mddev *mddev)
708{
709 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
710 return;
711 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
712 mddev->ctime == 0 && !mddev->hold_active) {
713
714
715 list_del_init(&mddev->all_mddevs);
716
717
718
719
720
721
722 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
723 queue_work(md_misc_wq, &mddev->del_work);
724 }
725 spin_unlock(&all_mddevs_lock);
726}
727
728static void md_safemode_timeout(struct timer_list *t);
729
730void mddev_init(struct mddev *mddev)
731{
732 kobject_init(&mddev->kobj, &md_ktype);
733 mutex_init(&mddev->open_mutex);
734 mutex_init(&mddev->reconfig_mutex);
735 mutex_init(&mddev->bitmap_info.mutex);
736 INIT_LIST_HEAD(&mddev->disks);
737 INIT_LIST_HEAD(&mddev->all_mddevs);
738 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
739 atomic_set(&mddev->active, 1);
740 atomic_set(&mddev->openers, 0);
741 atomic_set(&mddev->active_io, 0);
742 spin_lock_init(&mddev->lock);
743 atomic_set(&mddev->flush_pending, 0);
744 init_waitqueue_head(&mddev->sb_wait);
745 init_waitqueue_head(&mddev->recovery_wait);
746 mddev->reshape_position = MaxSector;
747 mddev->reshape_backwards = 0;
748 mddev->last_sync_action = "none";
749 mddev->resync_min = 0;
750 mddev->resync_max = MaxSector;
751 mddev->level = LEVEL_NONE;
752}
753EXPORT_SYMBOL_GPL(mddev_init);
754
755static struct mddev *mddev_find(dev_t unit)
756{
757 struct mddev *mddev, *new = NULL;
758
759 if (unit && MAJOR(unit) != MD_MAJOR)
760 unit &= ~((1<<MdpMinorShift)-1);
761
762 retry:
763 spin_lock(&all_mddevs_lock);
764
765 if (unit) {
766 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
767 if (mddev->unit == unit) {
768 mddev_get(mddev);
769 spin_unlock(&all_mddevs_lock);
770 kfree(new);
771 return mddev;
772 }
773
774 if (new) {
775 list_add(&new->all_mddevs, &all_mddevs);
776 spin_unlock(&all_mddevs_lock);
777 new->hold_active = UNTIL_IOCTL;
778 return new;
779 }
780 } else if (new) {
781
782 static int next_minor = 512;
783 int start = next_minor;
784 int is_free = 0;
785 int dev = 0;
786 while (!is_free) {
787 dev = MKDEV(MD_MAJOR, next_minor);
788 next_minor++;
789 if (next_minor > MINORMASK)
790 next_minor = 0;
791 if (next_minor == start) {
792
793 spin_unlock(&all_mddevs_lock);
794 kfree(new);
795 return NULL;
796 }
797
798 is_free = 1;
799 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
800 if (mddev->unit == dev) {
801 is_free = 0;
802 break;
803 }
804 }
805 new->unit = dev;
806 new->md_minor = MINOR(dev);
807 new->hold_active = UNTIL_STOP;
808 list_add(&new->all_mddevs, &all_mddevs);
809 spin_unlock(&all_mddevs_lock);
810 return new;
811 }
812 spin_unlock(&all_mddevs_lock);
813
814 new = kzalloc(sizeof(*new), GFP_KERNEL);
815 if (!new)
816 return NULL;
817
818 new->unit = unit;
819 if (MAJOR(unit) == MD_MAJOR)
820 new->md_minor = MINOR(unit);
821 else
822 new->md_minor = MINOR(unit) >> MdpMinorShift;
823
824 mddev_init(new);
825
826 goto retry;
827}
828
829static struct attribute_group md_redundancy_group;
830
831void mddev_unlock(struct mddev *mddev)
832{
833 if (mddev->to_remove) {
834
835
836
837
838
839
840
841
842
843
844
845
846 struct attribute_group *to_remove = mddev->to_remove;
847 mddev->to_remove = NULL;
848 mddev->sysfs_active = 1;
849 mutex_unlock(&mddev->reconfig_mutex);
850
851 if (mddev->kobj.sd) {
852 if (to_remove != &md_redundancy_group)
853 sysfs_remove_group(&mddev->kobj, to_remove);
854 if (mddev->pers == NULL ||
855 mddev->pers->sync_request == NULL) {
856 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
857 if (mddev->sysfs_action)
858 sysfs_put(mddev->sysfs_action);
859 if (mddev->sysfs_completed)
860 sysfs_put(mddev->sysfs_completed);
861 if (mddev->sysfs_degraded)
862 sysfs_put(mddev->sysfs_degraded);
863 mddev->sysfs_action = NULL;
864 mddev->sysfs_completed = NULL;
865 mddev->sysfs_degraded = NULL;
866 }
867 }
868 mddev->sysfs_active = 0;
869 } else
870 mutex_unlock(&mddev->reconfig_mutex);
871
872
873
874
875 spin_lock(&pers_lock);
876 md_wakeup_thread(mddev->thread);
877 wake_up(&mddev->sb_wait);
878 spin_unlock(&pers_lock);
879}
880EXPORT_SYMBOL_GPL(mddev_unlock);
881
882struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
883{
884 struct md_rdev *rdev;
885
886 rdev_for_each_rcu(rdev, mddev)
887 if (rdev->desc_nr == nr)
888 return rdev;
889
890 return NULL;
891}
892EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
893
894static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
895{
896 struct md_rdev *rdev;
897
898 rdev_for_each(rdev, mddev)
899 if (rdev->bdev->bd_dev == dev)
900 return rdev;
901
902 return NULL;
903}
904
905struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
906{
907 struct md_rdev *rdev;
908
909 rdev_for_each_rcu(rdev, mddev)
910 if (rdev->bdev->bd_dev == dev)
911 return rdev;
912
913 return NULL;
914}
915EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
916
917static struct md_personality *find_pers(int level, char *clevel)
918{
919 struct md_personality *pers;
920 list_for_each_entry(pers, &pers_list, list) {
921 if (level != LEVEL_NONE && pers->level == level)
922 return pers;
923 if (strcmp(pers->name, clevel)==0)
924 return pers;
925 }
926 return NULL;
927}
928
929
930static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
931{
932 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
933 return MD_NEW_SIZE_SECTORS(num_sectors);
934}
935
936static int alloc_disk_sb(struct md_rdev *rdev)
937{
938 rdev->sb_page = alloc_page(GFP_KERNEL);
939 if (!rdev->sb_page)
940 return -ENOMEM;
941 return 0;
942}
943
944void md_rdev_clear(struct md_rdev *rdev)
945{
946 if (rdev->sb_page) {
947 put_page(rdev->sb_page);
948 rdev->sb_loaded = 0;
949 rdev->sb_page = NULL;
950 rdev->sb_start = 0;
951 rdev->sectors = 0;
952 }
953 if (rdev->bb_page) {
954 put_page(rdev->bb_page);
955 rdev->bb_page = NULL;
956 }
957 badblocks_exit(&rdev->badblocks);
958}
959EXPORT_SYMBOL_GPL(md_rdev_clear);
960
961static void super_written(struct bio *bio)
962{
963 struct md_rdev *rdev = bio->bi_private;
964 struct mddev *mddev = rdev->mddev;
965
966 if (bio->bi_status) {
967 pr_err("md: %s gets error=%d\n", __func__,
968 blk_status_to_errno(bio->bi_status));
969 md_error(mddev, rdev);
970 if (!test_bit(Faulty, &rdev->flags)
971 && (bio->bi_opf & MD_FAILFAST)) {
972 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
973 set_bit(LastDev, &rdev->flags);
974 }
975 } else
976 clear_bit(LastDev, &rdev->flags);
977
978 if (atomic_dec_and_test(&mddev->pending_writes))
979 wake_up(&mddev->sb_wait);
980 rdev_dec_pending(rdev, mddev);
981 bio_put(bio);
982}
983
984void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
985 sector_t sector, int size, struct page *page)
986{
987
988
989
990
991
992
993 struct bio *bio;
994 int ff = 0;
995
996 if (!page)
997 return;
998
999 if (test_bit(Faulty, &rdev->flags))
1000 return;
1001
1002 bio = md_bio_alloc_sync(mddev);
1003
1004 atomic_inc(&rdev->nr_pending);
1005
1006 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
1007 bio->bi_iter.bi_sector = sector;
1008 bio_add_page(bio, page, size, 0);
1009 bio->bi_private = rdev;
1010 bio->bi_end_io = super_written;
1011
1012 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
1013 test_bit(FailFast, &rdev->flags) &&
1014 !test_bit(LastDev, &rdev->flags))
1015 ff = MD_FAILFAST;
1016 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
1017
1018 atomic_inc(&mddev->pending_writes);
1019 submit_bio(bio);
1020}
1021
1022int md_super_wait(struct mddev *mddev)
1023{
1024
1025 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1026 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1027 return -EAGAIN;
1028 return 0;
1029}
1030
1031int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
1032 struct page *page, int op, int op_flags, bool metadata_op)
1033{
1034 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1035 int ret;
1036
1037 if (metadata_op && rdev->meta_bdev)
1038 bio_set_dev(bio, rdev->meta_bdev);
1039 else
1040 bio_set_dev(bio, rdev->bdev);
1041 bio_set_op_attrs(bio, op, op_flags);
1042 if (metadata_op)
1043 bio->bi_iter.bi_sector = sector + rdev->sb_start;
1044 else if (rdev->mddev->reshape_position != MaxSector &&
1045 (rdev->mddev->reshape_backwards ==
1046 (sector >= rdev->mddev->reshape_position)))
1047 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
1048 else
1049 bio->bi_iter.bi_sector = sector + rdev->data_offset;
1050 bio_add_page(bio, page, size, 0);
1051
1052 submit_bio_wait(bio);
1053
1054 ret = !bio->bi_status;
1055 bio_put(bio);
1056 return ret;
1057}
1058EXPORT_SYMBOL_GPL(sync_page_io);
1059
1060static int read_disk_sb(struct md_rdev *rdev, int size)
1061{
1062 char b[BDEVNAME_SIZE];
1063
1064 if (rdev->sb_loaded)
1065 return 0;
1066
1067 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1068 goto fail;
1069 rdev->sb_loaded = 1;
1070 return 0;
1071
1072fail:
1073 pr_err("md: disabled device %s, could not read superblock.\n",
1074 bdevname(rdev->bdev,b));
1075 return -EINVAL;
1076}
1077
1078static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1079{
1080 return sb1->set_uuid0 == sb2->set_uuid0 &&
1081 sb1->set_uuid1 == sb2->set_uuid1 &&
1082 sb1->set_uuid2 == sb2->set_uuid2 &&
1083 sb1->set_uuid3 == sb2->set_uuid3;
1084}
1085
1086static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1087{
1088 int ret;
1089 mdp_super_t *tmp1, *tmp2;
1090
1091 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1092 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1093
1094 if (!tmp1 || !tmp2) {
1095 ret = 0;
1096 goto abort;
1097 }
1098
1099 *tmp1 = *sb1;
1100 *tmp2 = *sb2;
1101
1102
1103
1104
1105 tmp1->nr_disks = 0;
1106 tmp2->nr_disks = 0;
1107
1108 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1109abort:
1110 kfree(tmp1);
1111 kfree(tmp2);
1112 return ret;
1113}
1114
1115static u32 md_csum_fold(u32 csum)
1116{
1117 csum = (csum & 0xffff) + (csum >> 16);
1118 return (csum & 0xffff) + (csum >> 16);
1119}
1120
1121static unsigned int calc_sb_csum(mdp_super_t *sb)
1122{
1123 u64 newcsum = 0;
1124 u32 *sb32 = (u32*)sb;
1125 int i;
1126 unsigned int disk_csum, csum;
1127
1128 disk_csum = sb->sb_csum;
1129 sb->sb_csum = 0;
1130
1131 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1132 newcsum += sb32[i];
1133 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1134
1135#ifdef CONFIG_ALPHA
1136
1137
1138
1139
1140
1141
1142
1143
1144 sb->sb_csum = md_csum_fold(disk_csum);
1145#else
1146 sb->sb_csum = disk_csum;
1147#endif
1148 return csum;
1149}
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181struct super_type {
1182 char *name;
1183 struct module *owner;
1184 int (*load_super)(struct md_rdev *rdev,
1185 struct md_rdev *refdev,
1186 int minor_version);
1187 int (*validate_super)(struct mddev *mddev,
1188 struct md_rdev *rdev);
1189 void (*sync_super)(struct mddev *mddev,
1190 struct md_rdev *rdev);
1191 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1192 sector_t num_sectors);
1193 int (*allow_new_offset)(struct md_rdev *rdev,
1194 unsigned long long new_offset);
1195};
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205int md_check_no_bitmap(struct mddev *mddev)
1206{
1207 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1208 return 0;
1209 pr_warn("%s: bitmaps are not supported for %s\n",
1210 mdname(mddev), mddev->pers->name);
1211 return 1;
1212}
1213EXPORT_SYMBOL(md_check_no_bitmap);
1214
1215
1216
1217
1218static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1219{
1220 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1221 mdp_super_t *sb;
1222 int ret;
1223 bool spare_disk = true;
1224
1225
1226
1227
1228
1229
1230
1231 rdev->sb_start = calc_dev_sboffset(rdev);
1232
1233 ret = read_disk_sb(rdev, MD_SB_BYTES);
1234 if (ret)
1235 return ret;
1236
1237 ret = -EINVAL;
1238
1239 bdevname(rdev->bdev, b);
1240 sb = page_address(rdev->sb_page);
1241
1242 if (sb->md_magic != MD_SB_MAGIC) {
1243 pr_warn("md: invalid raid superblock magic on %s\n", b);
1244 goto abort;
1245 }
1246
1247 if (sb->major_version != 0 ||
1248 sb->minor_version < 90 ||
1249 sb->minor_version > 91) {
1250 pr_warn("Bad version number %d.%d on %s\n",
1251 sb->major_version, sb->minor_version, b);
1252 goto abort;
1253 }
1254
1255 if (sb->raid_disks <= 0)
1256 goto abort;
1257
1258 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1259 pr_warn("md: invalid superblock checksum on %s\n", b);
1260 goto abort;
1261 }
1262
1263 rdev->preferred_minor = sb->md_minor;
1264 rdev->data_offset = 0;
1265 rdev->new_data_offset = 0;
1266 rdev->sb_size = MD_SB_BYTES;
1267 rdev->badblocks.shift = -1;
1268
1269 if (sb->level == LEVEL_MULTIPATH)
1270 rdev->desc_nr = -1;
1271 else
1272 rdev->desc_nr = sb->this_disk.number;
1273
1274
1275 if (sb->level == LEVEL_MULTIPATH ||
1276 (rdev->desc_nr >= 0 &&
1277 rdev->desc_nr < MD_SB_DISKS &&
1278 sb->disks[rdev->desc_nr].state &
1279 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1280 spare_disk = false;
1281
1282 if (!refdev) {
1283 if (!spare_disk)
1284 ret = 1;
1285 else
1286 ret = 0;
1287 } else {
1288 __u64 ev1, ev2;
1289 mdp_super_t *refsb = page_address(refdev->sb_page);
1290 if (!md_uuid_equal(refsb, sb)) {
1291 pr_warn("md: %s has different UUID to %s\n",
1292 b, bdevname(refdev->bdev,b2));
1293 goto abort;
1294 }
1295 if (!md_sb_equal(refsb, sb)) {
1296 pr_warn("md: %s has same UUID but different superblock to %s\n",
1297 b, bdevname(refdev->bdev, b2));
1298 goto abort;
1299 }
1300 ev1 = md_event(sb);
1301 ev2 = md_event(refsb);
1302
1303 if (!spare_disk && ev1 > ev2)
1304 ret = 1;
1305 else
1306 ret = 0;
1307 }
1308 rdev->sectors = rdev->sb_start;
1309
1310
1311
1312
1313 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1314 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1315
1316 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1317
1318 ret = -EINVAL;
1319
1320 abort:
1321 return ret;
1322}
1323
1324
1325
1326
1327static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1328{
1329 mdp_disk_t *desc;
1330 mdp_super_t *sb = page_address(rdev->sb_page);
1331 __u64 ev1 = md_event(sb);
1332
1333 rdev->raid_disk = -1;
1334 clear_bit(Faulty, &rdev->flags);
1335 clear_bit(In_sync, &rdev->flags);
1336 clear_bit(Bitmap_sync, &rdev->flags);
1337 clear_bit(WriteMostly, &rdev->flags);
1338
1339 if (mddev->raid_disks == 0) {
1340 mddev->major_version = 0;
1341 mddev->minor_version = sb->minor_version;
1342 mddev->patch_version = sb->patch_version;
1343 mddev->external = 0;
1344 mddev->chunk_sectors = sb->chunk_size >> 9;
1345 mddev->ctime = sb->ctime;
1346 mddev->utime = sb->utime;
1347 mddev->level = sb->level;
1348 mddev->clevel[0] = 0;
1349 mddev->layout = sb->layout;
1350 mddev->raid_disks = sb->raid_disks;
1351 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1352 mddev->events = ev1;
1353 mddev->bitmap_info.offset = 0;
1354 mddev->bitmap_info.space = 0;
1355
1356 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1357 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1358 mddev->reshape_backwards = 0;
1359
1360 if (mddev->minor_version >= 91) {
1361 mddev->reshape_position = sb->reshape_position;
1362 mddev->delta_disks = sb->delta_disks;
1363 mddev->new_level = sb->new_level;
1364 mddev->new_layout = sb->new_layout;
1365 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1366 if (mddev->delta_disks < 0)
1367 mddev->reshape_backwards = 1;
1368 } else {
1369 mddev->reshape_position = MaxSector;
1370 mddev->delta_disks = 0;
1371 mddev->new_level = mddev->level;
1372 mddev->new_layout = mddev->layout;
1373 mddev->new_chunk_sectors = mddev->chunk_sectors;
1374 }
1375 if (mddev->level == 0)
1376 mddev->layout = -1;
1377
1378 if (sb->state & (1<<MD_SB_CLEAN))
1379 mddev->recovery_cp = MaxSector;
1380 else {
1381 if (sb->events_hi == sb->cp_events_hi &&
1382 sb->events_lo == sb->cp_events_lo) {
1383 mddev->recovery_cp = sb->recovery_cp;
1384 } else
1385 mddev->recovery_cp = 0;
1386 }
1387
1388 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1389 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1390 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1391 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1392
1393 mddev->max_disks = MD_SB_DISKS;
1394
1395 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1396 mddev->bitmap_info.file == NULL) {
1397 mddev->bitmap_info.offset =
1398 mddev->bitmap_info.default_offset;
1399 mddev->bitmap_info.space =
1400 mddev->bitmap_info.default_space;
1401 }
1402
1403 } else if (mddev->pers == NULL) {
1404
1405
1406 ++ev1;
1407 if (sb->disks[rdev->desc_nr].state & (
1408 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1409 if (ev1 < mddev->events)
1410 return -EINVAL;
1411 } else if (mddev->bitmap) {
1412
1413
1414
1415 if (ev1 < mddev->bitmap->events_cleared)
1416 return 0;
1417 if (ev1 < mddev->events)
1418 set_bit(Bitmap_sync, &rdev->flags);
1419 } else {
1420 if (ev1 < mddev->events)
1421
1422 return 0;
1423 }
1424
1425 if (mddev->level != LEVEL_MULTIPATH) {
1426 desc = sb->disks + rdev->desc_nr;
1427
1428 if (desc->state & (1<<MD_DISK_FAULTY))
1429 set_bit(Faulty, &rdev->flags);
1430 else if (desc->state & (1<<MD_DISK_SYNC)
1431) {
1432 set_bit(In_sync, &rdev->flags);
1433 rdev->raid_disk = desc->raid_disk;
1434 rdev->saved_raid_disk = desc->raid_disk;
1435 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1436
1437
1438
1439 if (mddev->minor_version >= 91) {
1440 rdev->recovery_offset = 0;
1441 rdev->raid_disk = desc->raid_disk;
1442 }
1443 }
1444 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1445 set_bit(WriteMostly, &rdev->flags);
1446 if (desc->state & (1<<MD_DISK_FAILFAST))
1447 set_bit(FailFast, &rdev->flags);
1448 } else
1449 set_bit(In_sync, &rdev->flags);
1450 return 0;
1451}
1452
1453
1454
1455
1456static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1457{
1458 mdp_super_t *sb;
1459 struct md_rdev *rdev2;
1460 int next_spare = mddev->raid_disks;
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472 int i;
1473 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1474
1475 rdev->sb_size = MD_SB_BYTES;
1476
1477 sb = page_address(rdev->sb_page);
1478
1479 memset(sb, 0, sizeof(*sb));
1480
1481 sb->md_magic = MD_SB_MAGIC;
1482 sb->major_version = mddev->major_version;
1483 sb->patch_version = mddev->patch_version;
1484 sb->gvalid_words = 0;
1485 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1486 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1487 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1488 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1489
1490 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1491 sb->level = mddev->level;
1492 sb->size = mddev->dev_sectors / 2;
1493 sb->raid_disks = mddev->raid_disks;
1494 sb->md_minor = mddev->md_minor;
1495 sb->not_persistent = 0;
1496 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1497 sb->state = 0;
1498 sb->events_hi = (mddev->events>>32);
1499 sb->events_lo = (u32)mddev->events;
1500
1501 if (mddev->reshape_position == MaxSector)
1502 sb->minor_version = 90;
1503 else {
1504 sb->minor_version = 91;
1505 sb->reshape_position = mddev->reshape_position;
1506 sb->new_level = mddev->new_level;
1507 sb->delta_disks = mddev->delta_disks;
1508 sb->new_layout = mddev->new_layout;
1509 sb->new_chunk = mddev->new_chunk_sectors << 9;
1510 }
1511 mddev->minor_version = sb->minor_version;
1512 if (mddev->in_sync)
1513 {
1514 sb->recovery_cp = mddev->recovery_cp;
1515 sb->cp_events_hi = (mddev->events>>32);
1516 sb->cp_events_lo = (u32)mddev->events;
1517 if (mddev->recovery_cp == MaxSector)
1518 sb->state = (1<< MD_SB_CLEAN);
1519 } else
1520 sb->recovery_cp = 0;
1521
1522 sb->layout = mddev->layout;
1523 sb->chunk_size = mddev->chunk_sectors << 9;
1524
1525 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1526 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1527
1528 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1529 rdev_for_each(rdev2, mddev) {
1530 mdp_disk_t *d;
1531 int desc_nr;
1532 int is_active = test_bit(In_sync, &rdev2->flags);
1533
1534 if (rdev2->raid_disk >= 0 &&
1535 sb->minor_version >= 91)
1536
1537
1538
1539
1540 is_active = 1;
1541 if (rdev2->raid_disk < 0 ||
1542 test_bit(Faulty, &rdev2->flags))
1543 is_active = 0;
1544 if (is_active)
1545 desc_nr = rdev2->raid_disk;
1546 else
1547 desc_nr = next_spare++;
1548 rdev2->desc_nr = desc_nr;
1549 d = &sb->disks[rdev2->desc_nr];
1550 nr_disks++;
1551 d->number = rdev2->desc_nr;
1552 d->major = MAJOR(rdev2->bdev->bd_dev);
1553 d->minor = MINOR(rdev2->bdev->bd_dev);
1554 if (is_active)
1555 d->raid_disk = rdev2->raid_disk;
1556 else
1557 d->raid_disk = rdev2->desc_nr;
1558 if (test_bit(Faulty, &rdev2->flags))
1559 d->state = (1<<MD_DISK_FAULTY);
1560 else if (is_active) {
1561 d->state = (1<<MD_DISK_ACTIVE);
1562 if (test_bit(In_sync, &rdev2->flags))
1563 d->state |= (1<<MD_DISK_SYNC);
1564 active++;
1565 working++;
1566 } else {
1567 d->state = 0;
1568 spare++;
1569 working++;
1570 }
1571 if (test_bit(WriteMostly, &rdev2->flags))
1572 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1573 if (test_bit(FailFast, &rdev2->flags))
1574 d->state |= (1<<MD_DISK_FAILFAST);
1575 }
1576
1577 for (i=0 ; i < mddev->raid_disks ; i++) {
1578 mdp_disk_t *d = &sb->disks[i];
1579 if (d->state == 0 && d->number == 0) {
1580 d->number = i;
1581 d->raid_disk = i;
1582 d->state = (1<<MD_DISK_REMOVED);
1583 d->state |= (1<<MD_DISK_FAULTY);
1584 failed++;
1585 }
1586 }
1587 sb->nr_disks = nr_disks;
1588 sb->active_disks = active;
1589 sb->working_disks = working;
1590 sb->failed_disks = failed;
1591 sb->spare_disks = spare;
1592
1593 sb->this_disk = sb->disks[rdev->desc_nr];
1594 sb->sb_csum = calc_sb_csum(sb);
1595}
1596
1597
1598
1599
1600static unsigned long long
1601super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1602{
1603 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1604 return 0;
1605 if (rdev->mddev->bitmap_info.offset)
1606 return 0;
1607 rdev->sb_start = calc_dev_sboffset(rdev);
1608 if (!num_sectors || num_sectors > rdev->sb_start)
1609 num_sectors = rdev->sb_start;
1610
1611
1612
1613 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1614 num_sectors = (sector_t)(2ULL << 32) - 2;
1615 do {
1616 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1617 rdev->sb_page);
1618 } while (md_super_wait(rdev->mddev) < 0);
1619 return num_sectors;
1620}
1621
1622static int
1623super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1624{
1625
1626 return new_offset == 0;
1627}
1628
1629
1630
1631
1632
1633static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1634{
1635 __le32 disk_csum;
1636 u32 csum;
1637 unsigned long long newcsum;
1638 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1639 __le32 *isuper = (__le32*)sb;
1640
1641 disk_csum = sb->sb_csum;
1642 sb->sb_csum = 0;
1643 newcsum = 0;
1644 for (; size >= 4; size -= 4)
1645 newcsum += le32_to_cpu(*isuper++);
1646
1647 if (size == 2)
1648 newcsum += le16_to_cpu(*(__le16*) isuper);
1649
1650 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1651 sb->sb_csum = disk_csum;
1652 return cpu_to_le32(csum);
1653}
1654
1655static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1656{
1657 struct mdp_superblock_1 *sb;
1658 int ret;
1659 sector_t sb_start;
1660 sector_t sectors;
1661 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1662 int bmask;
1663 bool spare_disk = true;
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673 switch(minor_version) {
1674 case 0:
1675 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1676 sb_start -= 8*2;
1677 sb_start &= ~(sector_t)(4*2-1);
1678 break;
1679 case 1:
1680 sb_start = 0;
1681 break;
1682 case 2:
1683 sb_start = 8;
1684 break;
1685 default:
1686 return -EINVAL;
1687 }
1688 rdev->sb_start = sb_start;
1689
1690
1691
1692
1693 ret = read_disk_sb(rdev, 4096);
1694 if (ret) return ret;
1695
1696 sb = page_address(rdev->sb_page);
1697
1698 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1699 sb->major_version != cpu_to_le32(1) ||
1700 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1701 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1702 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1703 return -EINVAL;
1704
1705 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1706 pr_warn("md: invalid superblock checksum on %s\n",
1707 bdevname(rdev->bdev,b));
1708 return -EINVAL;
1709 }
1710 if (le64_to_cpu(sb->data_size) < 10) {
1711 pr_warn("md: data_size too small on %s\n",
1712 bdevname(rdev->bdev,b));
1713 return -EINVAL;
1714 }
1715 if (sb->pad0 ||
1716 sb->pad3[0] ||
1717 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1718
1719 return -EINVAL;
1720
1721 rdev->preferred_minor = 0xffff;
1722 rdev->data_offset = le64_to_cpu(sb->data_offset);
1723 rdev->new_data_offset = rdev->data_offset;
1724 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1725 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1726 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1727 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1728
1729 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1730 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1731 if (rdev->sb_size & bmask)
1732 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1733
1734 if (minor_version
1735 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1736 return -EINVAL;
1737 if (minor_version
1738 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1739 return -EINVAL;
1740
1741 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1742 rdev->desc_nr = -1;
1743 else
1744 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1745
1746 if (!rdev->bb_page) {
1747 rdev->bb_page = alloc_page(GFP_KERNEL);
1748 if (!rdev->bb_page)
1749 return -ENOMEM;
1750 }
1751 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1752 rdev->badblocks.count == 0) {
1753
1754
1755
1756 s32 offset;
1757 sector_t bb_sector;
1758 __le64 *bbp;
1759 int i;
1760 int sectors = le16_to_cpu(sb->bblog_size);
1761 if (sectors > (PAGE_SIZE / 512))
1762 return -EINVAL;
1763 offset = le32_to_cpu(sb->bblog_offset);
1764 if (offset == 0)
1765 return -EINVAL;
1766 bb_sector = (long long)offset;
1767 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1768 rdev->bb_page, REQ_OP_READ, 0, true))
1769 return -EIO;
1770 bbp = (__le64 *)page_address(rdev->bb_page);
1771 rdev->badblocks.shift = sb->bblog_shift;
1772 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1773 u64 bb = le64_to_cpu(*bbp);
1774 int count = bb & (0x3ff);
1775 u64 sector = bb >> 10;
1776 sector <<= sb->bblog_shift;
1777 count <<= sb->bblog_shift;
1778 if (bb + 1 == 0)
1779 break;
1780 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1781 return -EINVAL;
1782 }
1783 } else if (sb->bblog_offset != 0)
1784 rdev->badblocks.shift = 0;
1785
1786 if ((le32_to_cpu(sb->feature_map) &
1787 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1788 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1789 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1790 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1791 }
1792
1793 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1794 sb->level != 0)
1795 return -EINVAL;
1796
1797
1798 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1799 (rdev->desc_nr >= 0 &&
1800 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1801 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1802 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1803 spare_disk = false;
1804
1805 if (!refdev) {
1806 if (!spare_disk)
1807 ret = 1;
1808 else
1809 ret = 0;
1810 } else {
1811 __u64 ev1, ev2;
1812 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1813
1814 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1815 sb->level != refsb->level ||
1816 sb->layout != refsb->layout ||
1817 sb->chunksize != refsb->chunksize) {
1818 pr_warn("md: %s has strangely different superblock to %s\n",
1819 bdevname(rdev->bdev,b),
1820 bdevname(refdev->bdev,b2));
1821 return -EINVAL;
1822 }
1823 ev1 = le64_to_cpu(sb->events);
1824 ev2 = le64_to_cpu(refsb->events);
1825
1826 if (!spare_disk && ev1 > ev2)
1827 ret = 1;
1828 else
1829 ret = 0;
1830 }
1831 if (minor_version) {
1832 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1833 sectors -= rdev->data_offset;
1834 } else
1835 sectors = rdev->sb_start;
1836 if (sectors < le64_to_cpu(sb->data_size))
1837 return -EINVAL;
1838 rdev->sectors = le64_to_cpu(sb->data_size);
1839 return ret;
1840}
1841
1842static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1843{
1844 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1845 __u64 ev1 = le64_to_cpu(sb->events);
1846
1847 rdev->raid_disk = -1;
1848 clear_bit(Faulty, &rdev->flags);
1849 clear_bit(In_sync, &rdev->flags);
1850 clear_bit(Bitmap_sync, &rdev->flags);
1851 clear_bit(WriteMostly, &rdev->flags);
1852
1853 if (mddev->raid_disks == 0) {
1854 mddev->major_version = 1;
1855 mddev->patch_version = 0;
1856 mddev->external = 0;
1857 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1858 mddev->ctime = le64_to_cpu(sb->ctime);
1859 mddev->utime = le64_to_cpu(sb->utime);
1860 mddev->level = le32_to_cpu(sb->level);
1861 mddev->clevel[0] = 0;
1862 mddev->layout = le32_to_cpu(sb->layout);
1863 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1864 mddev->dev_sectors = le64_to_cpu(sb->size);
1865 mddev->events = ev1;
1866 mddev->bitmap_info.offset = 0;
1867 mddev->bitmap_info.space = 0;
1868
1869
1870
1871 mddev->bitmap_info.default_offset = 1024 >> 9;
1872 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1873 mddev->reshape_backwards = 0;
1874
1875 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1876 memcpy(mddev->uuid, sb->set_uuid, 16);
1877
1878 mddev->max_disks = (4096-256)/2;
1879
1880 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1881 mddev->bitmap_info.file == NULL) {
1882 mddev->bitmap_info.offset =
1883 (__s32)le32_to_cpu(sb->bitmap_offset);
1884
1885
1886
1887
1888
1889 if (mddev->minor_version > 0)
1890 mddev->bitmap_info.space = 0;
1891 else if (mddev->bitmap_info.offset > 0)
1892 mddev->bitmap_info.space =
1893 8 - mddev->bitmap_info.offset;
1894 else
1895 mddev->bitmap_info.space =
1896 -mddev->bitmap_info.offset;
1897 }
1898
1899 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1900 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1901 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1902 mddev->new_level = le32_to_cpu(sb->new_level);
1903 mddev->new_layout = le32_to_cpu(sb->new_layout);
1904 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1905 if (mddev->delta_disks < 0 ||
1906 (mddev->delta_disks == 0 &&
1907 (le32_to_cpu(sb->feature_map)
1908 & MD_FEATURE_RESHAPE_BACKWARDS)))
1909 mddev->reshape_backwards = 1;
1910 } else {
1911 mddev->reshape_position = MaxSector;
1912 mddev->delta_disks = 0;
1913 mddev->new_level = mddev->level;
1914 mddev->new_layout = mddev->layout;
1915 mddev->new_chunk_sectors = mddev->chunk_sectors;
1916 }
1917
1918 if (mddev->level == 0 &&
1919 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1920 mddev->layout = -1;
1921
1922 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1923 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1924
1925 if (le32_to_cpu(sb->feature_map) &
1926 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1927 if (le32_to_cpu(sb->feature_map) &
1928 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1929 return -EINVAL;
1930 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1931 (le32_to_cpu(sb->feature_map) &
1932 MD_FEATURE_MULTIPLE_PPLS))
1933 return -EINVAL;
1934 set_bit(MD_HAS_PPL, &mddev->flags);
1935 }
1936 } else if (mddev->pers == NULL) {
1937
1938
1939 ++ev1;
1940 if (rdev->desc_nr >= 0 &&
1941 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1942 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1943 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1944 if (ev1 < mddev->events)
1945 return -EINVAL;
1946 } else if (mddev->bitmap) {
1947
1948
1949
1950 if (ev1 < mddev->bitmap->events_cleared)
1951 return 0;
1952 if (ev1 < mddev->events)
1953 set_bit(Bitmap_sync, &rdev->flags);
1954 } else {
1955 if (ev1 < mddev->events)
1956
1957 return 0;
1958 }
1959 if (mddev->level != LEVEL_MULTIPATH) {
1960 int role;
1961 if (rdev->desc_nr < 0 ||
1962 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1963 role = MD_DISK_ROLE_SPARE;
1964 rdev->desc_nr = -1;
1965 } else
1966 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1967 switch(role) {
1968 case MD_DISK_ROLE_SPARE:
1969 break;
1970 case MD_DISK_ROLE_FAULTY:
1971 set_bit(Faulty, &rdev->flags);
1972 break;
1973 case MD_DISK_ROLE_JOURNAL:
1974 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1975
1976 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1977 return -EINVAL;
1978 }
1979 set_bit(Journal, &rdev->flags);
1980 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1981 rdev->raid_disk = 0;
1982 break;
1983 default:
1984 rdev->saved_raid_disk = role;
1985 if ((le32_to_cpu(sb->feature_map) &
1986 MD_FEATURE_RECOVERY_OFFSET)) {
1987 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1988 if (!(le32_to_cpu(sb->feature_map) &
1989 MD_FEATURE_RECOVERY_BITMAP))
1990 rdev->saved_raid_disk = -1;
1991 } else {
1992
1993
1994
1995
1996 if (!test_bit(MD_RECOVERY_FROZEN,
1997 &mddev->recovery))
1998 set_bit(In_sync, &rdev->flags);
1999 }
2000 rdev->raid_disk = role;
2001 break;
2002 }
2003 if (sb->devflags & WriteMostly1)
2004 set_bit(WriteMostly, &rdev->flags);
2005 if (sb->devflags & FailFast1)
2006 set_bit(FailFast, &rdev->flags);
2007 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
2008 set_bit(Replacement, &rdev->flags);
2009 } else
2010 set_bit(In_sync, &rdev->flags);
2011
2012 return 0;
2013}
2014
2015static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
2016{
2017 struct mdp_superblock_1 *sb;
2018 struct md_rdev *rdev2;
2019 int max_dev, i;
2020
2021
2022 sb = page_address(rdev->sb_page);
2023
2024 sb->feature_map = 0;
2025 sb->pad0 = 0;
2026 sb->recovery_offset = cpu_to_le64(0);
2027 memset(sb->pad3, 0, sizeof(sb->pad3));
2028
2029 sb->utime = cpu_to_le64((__u64)mddev->utime);
2030 sb->events = cpu_to_le64(mddev->events);
2031 if (mddev->in_sync)
2032 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2033 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2034 sb->resync_offset = cpu_to_le64(MaxSector);
2035 else
2036 sb->resync_offset = cpu_to_le64(0);
2037
2038 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2039
2040 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2041 sb->size = cpu_to_le64(mddev->dev_sectors);
2042 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2043 sb->level = cpu_to_le32(mddev->level);
2044 sb->layout = cpu_to_le32(mddev->layout);
2045 if (test_bit(FailFast, &rdev->flags))
2046 sb->devflags |= FailFast1;
2047 else
2048 sb->devflags &= ~FailFast1;
2049
2050 if (test_bit(WriteMostly, &rdev->flags))
2051 sb->devflags |= WriteMostly1;
2052 else
2053 sb->devflags &= ~WriteMostly1;
2054 sb->data_offset = cpu_to_le64(rdev->data_offset);
2055 sb->data_size = cpu_to_le64(rdev->sectors);
2056
2057 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2058 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2059 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2060 }
2061
2062 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2063 !test_bit(In_sync, &rdev->flags)) {
2064 sb->feature_map |=
2065 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2066 sb->recovery_offset =
2067 cpu_to_le64(rdev->recovery_offset);
2068 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2069 sb->feature_map |=
2070 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2071 }
2072
2073 if (test_bit(Journal, &rdev->flags))
2074 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2075 if (test_bit(Replacement, &rdev->flags))
2076 sb->feature_map |=
2077 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2078
2079 if (mddev->reshape_position != MaxSector) {
2080 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2081 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2082 sb->new_layout = cpu_to_le32(mddev->new_layout);
2083 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2084 sb->new_level = cpu_to_le32(mddev->new_level);
2085 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2086 if (mddev->delta_disks == 0 &&
2087 mddev->reshape_backwards)
2088 sb->feature_map
2089 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2090 if (rdev->new_data_offset != rdev->data_offset) {
2091 sb->feature_map
2092 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2093 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2094 - rdev->data_offset));
2095 }
2096 }
2097
2098 if (mddev_is_clustered(mddev))
2099 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2100
2101 if (rdev->badblocks.count == 0)
2102 ;
2103 else if (sb->bblog_offset == 0)
2104
2105 md_error(mddev, rdev);
2106 else {
2107 struct badblocks *bb = &rdev->badblocks;
2108 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2109 u64 *p = bb->page;
2110 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2111 if (bb->changed) {
2112 unsigned seq;
2113
2114retry:
2115 seq = read_seqbegin(&bb->lock);
2116
2117 memset(bbp, 0xff, PAGE_SIZE);
2118
2119 for (i = 0 ; i < bb->count ; i++) {
2120 u64 internal_bb = p[i];
2121 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2122 | BB_LEN(internal_bb));
2123 bbp[i] = cpu_to_le64(store_bb);
2124 }
2125 bb->changed = 0;
2126 if (read_seqretry(&bb->lock, seq))
2127 goto retry;
2128
2129 bb->sector = (rdev->sb_start +
2130 (int)le32_to_cpu(sb->bblog_offset));
2131 bb->size = le16_to_cpu(sb->bblog_size);
2132 }
2133 }
2134
2135 max_dev = 0;
2136 rdev_for_each(rdev2, mddev)
2137 if (rdev2->desc_nr+1 > max_dev)
2138 max_dev = rdev2->desc_nr+1;
2139
2140 if (max_dev > le32_to_cpu(sb->max_dev)) {
2141 int bmask;
2142 sb->max_dev = cpu_to_le32(max_dev);
2143 rdev->sb_size = max_dev * 2 + 256;
2144 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2145 if (rdev->sb_size & bmask)
2146 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2147 } else
2148 max_dev = le32_to_cpu(sb->max_dev);
2149
2150 for (i=0; i<max_dev;i++)
2151 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2152
2153 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2154 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2155
2156 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2157 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2158 sb->feature_map |=
2159 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2160 else
2161 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2162 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2163 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2164 }
2165
2166 rdev_for_each(rdev2, mddev) {
2167 i = rdev2->desc_nr;
2168 if (test_bit(Faulty, &rdev2->flags))
2169 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2170 else if (test_bit(In_sync, &rdev2->flags))
2171 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2172 else if (test_bit(Journal, &rdev2->flags))
2173 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2174 else if (rdev2->raid_disk >= 0)
2175 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2176 else
2177 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2178 }
2179
2180 sb->sb_csum = calc_sb_1_csum(sb);
2181}
2182
2183static sector_t super_1_choose_bm_space(sector_t dev_size)
2184{
2185 sector_t bm_space;
2186
2187
2188
2189
2190 if (dev_size < 64*2)
2191 bm_space = 0;
2192 else if (dev_size - 64*2 >= 200*1024*1024*2)
2193 bm_space = 128*2;
2194 else if (dev_size - 4*2 > 8*1024*1024*2)
2195 bm_space = 64*2;
2196 else
2197 bm_space = 4*2;
2198 return bm_space;
2199}
2200
2201static unsigned long long
2202super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2203{
2204 struct mdp_superblock_1 *sb;
2205 sector_t max_sectors;
2206 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2207 return 0;
2208 if (rdev->data_offset != rdev->new_data_offset)
2209 return 0;
2210 if (rdev->sb_start < rdev->data_offset) {
2211
2212 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2213 max_sectors -= rdev->data_offset;
2214 if (!num_sectors || num_sectors > max_sectors)
2215 num_sectors = max_sectors;
2216 } else if (rdev->mddev->bitmap_info.offset) {
2217
2218 return 0;
2219 } else {
2220
2221 sector_t sb_start, bm_space;
2222 sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
2223
2224
2225 sb_start = dev_size - 8*2;
2226 sb_start &= ~(sector_t)(4*2 - 1);
2227
2228 bm_space = super_1_choose_bm_space(dev_size);
2229
2230
2231
2232
2233 max_sectors = sb_start - bm_space - 4*2;
2234
2235 if (!num_sectors || num_sectors > max_sectors)
2236 num_sectors = max_sectors;
2237 }
2238 sb = page_address(rdev->sb_page);
2239 sb->data_size = cpu_to_le64(num_sectors);
2240 sb->super_offset = cpu_to_le64(rdev->sb_start);
2241 sb->sb_csum = calc_sb_1_csum(sb);
2242 do {
2243 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2244 rdev->sb_page);
2245 } while (md_super_wait(rdev->mddev) < 0);
2246 return num_sectors;
2247
2248}
2249
2250static int
2251super_1_allow_new_offset(struct md_rdev *rdev,
2252 unsigned long long new_offset)
2253{
2254
2255 struct bitmap *bitmap;
2256 if (new_offset >= rdev->data_offset)
2257 return 1;
2258
2259
2260
2261 if (rdev->mddev->minor_version == 0)
2262 return 1;
2263
2264
2265
2266
2267
2268
2269
2270 if (rdev->sb_start + (32+4)*2 > new_offset)
2271 return 0;
2272 bitmap = rdev->mddev->bitmap;
2273 if (bitmap && !rdev->mddev->bitmap_info.file &&
2274 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2275 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2276 return 0;
2277 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2278 return 0;
2279
2280 return 1;
2281}
2282
2283static struct super_type super_types[] = {
2284 [0] = {
2285 .name = "0.90.0",
2286 .owner = THIS_MODULE,
2287 .load_super = super_90_load,
2288 .validate_super = super_90_validate,
2289 .sync_super = super_90_sync,
2290 .rdev_size_change = super_90_rdev_size_change,
2291 .allow_new_offset = super_90_allow_new_offset,
2292 },
2293 [1] = {
2294 .name = "md-1",
2295 .owner = THIS_MODULE,
2296 .load_super = super_1_load,
2297 .validate_super = super_1_validate,
2298 .sync_super = super_1_sync,
2299 .rdev_size_change = super_1_rdev_size_change,
2300 .allow_new_offset = super_1_allow_new_offset,
2301 },
2302};
2303
2304static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2305{
2306 if (mddev->sync_super) {
2307 mddev->sync_super(mddev, rdev);
2308 return;
2309 }
2310
2311 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2312
2313 super_types[mddev->major_version].sync_super(mddev, rdev);
2314}
2315
2316static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2317{
2318 struct md_rdev *rdev, *rdev2;
2319
2320 rcu_read_lock();
2321 rdev_for_each_rcu(rdev, mddev1) {
2322 if (test_bit(Faulty, &rdev->flags) ||
2323 test_bit(Journal, &rdev->flags) ||
2324 rdev->raid_disk == -1)
2325 continue;
2326 rdev_for_each_rcu(rdev2, mddev2) {
2327 if (test_bit(Faulty, &rdev2->flags) ||
2328 test_bit(Journal, &rdev2->flags) ||
2329 rdev2->raid_disk == -1)
2330 continue;
2331 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2332 rcu_read_unlock();
2333 return 1;
2334 }
2335 }
2336 }
2337 rcu_read_unlock();
2338 return 0;
2339}
2340
2341static LIST_HEAD(pending_raid_disks);
2342
2343
2344
2345
2346
2347
2348
2349
2350int md_integrity_register(struct mddev *mddev)
2351{
2352 struct md_rdev *rdev, *reference = NULL;
2353
2354 if (list_empty(&mddev->disks))
2355 return 0;
2356 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2357 return 0;
2358 rdev_for_each(rdev, mddev) {
2359
2360 if (test_bit(Faulty, &rdev->flags))
2361 continue;
2362 if (rdev->raid_disk < 0)
2363 continue;
2364 if (!reference) {
2365
2366 reference = rdev;
2367 continue;
2368 }
2369
2370 if (blk_integrity_compare(reference->bdev->bd_disk,
2371 rdev->bdev->bd_disk) < 0)
2372 return -EINVAL;
2373 }
2374 if (!reference || !bdev_get_integrity(reference->bdev))
2375 return 0;
2376
2377
2378
2379
2380 blk_integrity_register(mddev->gendisk,
2381 bdev_get_integrity(reference->bdev));
2382
2383 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2384 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2385 pr_err("md: failed to create integrity pool for %s\n",
2386 mdname(mddev));
2387 return -EINVAL;
2388 }
2389 return 0;
2390}
2391EXPORT_SYMBOL(md_integrity_register);
2392
2393
2394
2395
2396
2397int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2398{
2399 struct blk_integrity *bi_mddev;
2400 char name[BDEVNAME_SIZE];
2401
2402 if (!mddev->gendisk)
2403 return 0;
2404
2405 bi_mddev = blk_get_integrity(mddev->gendisk);
2406
2407 if (!bi_mddev)
2408 return 0;
2409
2410 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2411 pr_err("%s: incompatible integrity profile for %s\n",
2412 mdname(mddev), bdevname(rdev->bdev, name));
2413 return -ENXIO;
2414 }
2415
2416 return 0;
2417}
2418EXPORT_SYMBOL(md_integrity_add_rdev);
2419
2420static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2421{
2422 char b[BDEVNAME_SIZE];
2423 int err;
2424
2425
2426 if (find_rdev(mddev, rdev->bdev->bd_dev))
2427 return -EEXIST;
2428
2429 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2430 mddev->pers)
2431 return -EROFS;
2432
2433
2434 if (!test_bit(Journal, &rdev->flags) &&
2435 rdev->sectors &&
2436 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2437 if (mddev->pers) {
2438
2439
2440
2441
2442 if (mddev->level > 0)
2443 return -ENOSPC;
2444 } else
2445 mddev->dev_sectors = rdev->sectors;
2446 }
2447
2448
2449
2450
2451
2452 rcu_read_lock();
2453 if (rdev->desc_nr < 0) {
2454 int choice = 0;
2455 if (mddev->pers)
2456 choice = mddev->raid_disks;
2457 while (md_find_rdev_nr_rcu(mddev, choice))
2458 choice++;
2459 rdev->desc_nr = choice;
2460 } else {
2461 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2462 rcu_read_unlock();
2463 return -EBUSY;
2464 }
2465 }
2466 rcu_read_unlock();
2467 if (!test_bit(Journal, &rdev->flags) &&
2468 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2469 pr_warn("md: %s: array is limited to %d devices\n",
2470 mdname(mddev), mddev->max_disks);
2471 return -EBUSY;
2472 }
2473 bdevname(rdev->bdev,b);
2474 strreplace(b, '/', '!');
2475
2476 rdev->mddev = mddev;
2477 pr_debug("md: bind<%s>\n", b);
2478
2479 if (mddev->raid_disks)
2480 mddev_create_serial_pool(mddev, rdev, false);
2481
2482 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2483 goto fail;
2484
2485
2486 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2487 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2488 rdev->sysfs_unack_badblocks =
2489 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2490 rdev->sysfs_badblocks =
2491 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2492
2493 list_add_rcu(&rdev->same_set, &mddev->disks);
2494 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2495
2496
2497 mddev->recovery_disabled++;
2498
2499 return 0;
2500
2501 fail:
2502 pr_warn("md: failed to register dev-%s for %s\n",
2503 b, mdname(mddev));
2504 return err;
2505}
2506
2507static void rdev_delayed_delete(struct work_struct *ws)
2508{
2509 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2510 kobject_del(&rdev->kobj);
2511 kobject_put(&rdev->kobj);
2512}
2513
2514static void unbind_rdev_from_array(struct md_rdev *rdev)
2515{
2516 char b[BDEVNAME_SIZE];
2517
2518 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2519 list_del_rcu(&rdev->same_set);
2520 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2521 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2522 rdev->mddev = NULL;
2523 sysfs_remove_link(&rdev->kobj, "block");
2524 sysfs_put(rdev->sysfs_state);
2525 sysfs_put(rdev->sysfs_unack_badblocks);
2526 sysfs_put(rdev->sysfs_badblocks);
2527 rdev->sysfs_state = NULL;
2528 rdev->sysfs_unack_badblocks = NULL;
2529 rdev->sysfs_badblocks = NULL;
2530 rdev->badblocks.count = 0;
2531
2532
2533
2534
2535 synchronize_rcu();
2536 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2537 kobject_get(&rdev->kobj);
2538 queue_work(md_rdev_misc_wq, &rdev->del_work);
2539}
2540
2541
2542
2543
2544
2545
2546static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2547{
2548 int err = 0;
2549 struct block_device *bdev;
2550
2551 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2552 shared ? (struct md_rdev *)lock_rdev : rdev);
2553 if (IS_ERR(bdev)) {
2554 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2555 MAJOR(dev), MINOR(dev));
2556 return PTR_ERR(bdev);
2557 }
2558 rdev->bdev = bdev;
2559 return err;
2560}
2561
2562static void unlock_rdev(struct md_rdev *rdev)
2563{
2564 struct block_device *bdev = rdev->bdev;
2565 rdev->bdev = NULL;
2566 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2567}
2568
2569void md_autodetect_dev(dev_t dev);
2570
2571static void export_rdev(struct md_rdev *rdev)
2572{
2573 char b[BDEVNAME_SIZE];
2574
2575 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2576 md_rdev_clear(rdev);
2577#ifndef MODULE
2578 if (test_bit(AutoDetected, &rdev->flags))
2579 md_autodetect_dev(rdev->bdev->bd_dev);
2580#endif
2581 unlock_rdev(rdev);
2582 kobject_put(&rdev->kobj);
2583}
2584
2585void md_kick_rdev_from_array(struct md_rdev *rdev)
2586{
2587 unbind_rdev_from_array(rdev);
2588 export_rdev(rdev);
2589}
2590EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2591
2592static void export_array(struct mddev *mddev)
2593{
2594 struct md_rdev *rdev;
2595
2596 while (!list_empty(&mddev->disks)) {
2597 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2598 same_set);
2599 md_kick_rdev_from_array(rdev);
2600 }
2601 mddev->raid_disks = 0;
2602 mddev->major_version = 0;
2603}
2604
2605static bool set_in_sync(struct mddev *mddev)
2606{
2607 lockdep_assert_held(&mddev->lock);
2608 if (!mddev->in_sync) {
2609 mddev->sync_checkers++;
2610 spin_unlock(&mddev->lock);
2611 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2612 spin_lock(&mddev->lock);
2613 if (!mddev->in_sync &&
2614 percpu_ref_is_zero(&mddev->writes_pending)) {
2615 mddev->in_sync = 1;
2616
2617
2618
2619
2620 smp_mb();
2621 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2622 sysfs_notify_dirent_safe(mddev->sysfs_state);
2623 }
2624 if (--mddev->sync_checkers == 0)
2625 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2626 }
2627 if (mddev->safemode == 1)
2628 mddev->safemode = 0;
2629 return mddev->in_sync;
2630}
2631
2632static void sync_sbs(struct mddev *mddev, int nospares)
2633{
2634
2635
2636
2637
2638
2639
2640 struct md_rdev *rdev;
2641 rdev_for_each(rdev, mddev) {
2642 if (rdev->sb_events == mddev->events ||
2643 (nospares &&
2644 rdev->raid_disk < 0 &&
2645 rdev->sb_events+1 == mddev->events)) {
2646
2647 rdev->sb_loaded = 2;
2648 } else {
2649 sync_super(mddev, rdev);
2650 rdev->sb_loaded = 1;
2651 }
2652 }
2653}
2654
2655static bool does_sb_need_changing(struct mddev *mddev)
2656{
2657 struct md_rdev *rdev;
2658 struct mdp_superblock_1 *sb;
2659 int role;
2660
2661
2662 rdev_for_each(rdev, mddev)
2663 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2664 break;
2665
2666
2667 if (!rdev)
2668 return false;
2669
2670 sb = page_address(rdev->sb_page);
2671
2672 rdev_for_each(rdev, mddev) {
2673 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2674
2675 if (role == 0xffff && rdev->raid_disk >=0 &&
2676 !test_bit(Faulty, &rdev->flags))
2677 return true;
2678
2679 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2680 return true;
2681 }
2682
2683
2684 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2685 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2686 (mddev->layout != le32_to_cpu(sb->layout)) ||
2687 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2688 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2689 return true;
2690
2691 return false;
2692}
2693
2694void md_update_sb(struct mddev *mddev, int force_change)
2695{
2696 struct md_rdev *rdev;
2697 int sync_req;
2698 int nospares = 0;
2699 int any_badblocks_changed = 0;
2700 int ret = -1;
2701
2702 if (mddev->ro) {
2703 if (force_change)
2704 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2705 return;
2706 }
2707
2708repeat:
2709 if (mddev_is_clustered(mddev)) {
2710 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2711 force_change = 1;
2712 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2713 nospares = 1;
2714 ret = md_cluster_ops->metadata_update_start(mddev);
2715
2716 if (!does_sb_need_changing(mddev)) {
2717 if (ret == 0)
2718 md_cluster_ops->metadata_update_cancel(mddev);
2719 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2720 BIT(MD_SB_CHANGE_DEVS) |
2721 BIT(MD_SB_CHANGE_CLEAN));
2722 return;
2723 }
2724 }
2725
2726
2727
2728
2729
2730
2731
2732 rdev_for_each(rdev, mddev) {
2733 if (rdev->raid_disk >= 0 &&
2734 mddev->delta_disks >= 0 &&
2735 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2736 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2737 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2738 !test_bit(Journal, &rdev->flags) &&
2739 !test_bit(In_sync, &rdev->flags) &&
2740 mddev->curr_resync_completed > rdev->recovery_offset)
2741 rdev->recovery_offset = mddev->curr_resync_completed;
2742
2743 }
2744 if (!mddev->persistent) {
2745 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2746 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2747 if (!mddev->external) {
2748 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2749 rdev_for_each(rdev, mddev) {
2750 if (rdev->badblocks.changed) {
2751 rdev->badblocks.changed = 0;
2752 ack_all_badblocks(&rdev->badblocks);
2753 md_error(mddev, rdev);
2754 }
2755 clear_bit(Blocked, &rdev->flags);
2756 clear_bit(BlockedBadBlocks, &rdev->flags);
2757 wake_up(&rdev->blocked_wait);
2758 }
2759 }
2760 wake_up(&mddev->sb_wait);
2761 return;
2762 }
2763
2764 spin_lock(&mddev->lock);
2765
2766 mddev->utime = ktime_get_real_seconds();
2767
2768 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2769 force_change = 1;
2770 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2771
2772
2773
2774
2775 nospares = 1;
2776 if (force_change)
2777 nospares = 0;
2778 if (mddev->degraded)
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788 nospares = 0;
2789
2790 sync_req = mddev->in_sync;
2791
2792
2793
2794 if (nospares
2795 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2796 && mddev->can_decrease_events
2797 && mddev->events != 1) {
2798 mddev->events--;
2799 mddev->can_decrease_events = 0;
2800 } else {
2801
2802 mddev->events ++;
2803 mddev->can_decrease_events = nospares;
2804 }
2805
2806
2807
2808
2809
2810
2811 WARN_ON(mddev->events == 0);
2812
2813 rdev_for_each(rdev, mddev) {
2814 if (rdev->badblocks.changed)
2815 any_badblocks_changed++;
2816 if (test_bit(Faulty, &rdev->flags))
2817 set_bit(FaultRecorded, &rdev->flags);
2818 }
2819
2820 sync_sbs(mddev, nospares);
2821 spin_unlock(&mddev->lock);
2822
2823 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2824 mdname(mddev), mddev->in_sync);
2825
2826 if (mddev->queue)
2827 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2828rewrite:
2829 md_bitmap_update_sb(mddev->bitmap);
2830 rdev_for_each(rdev, mddev) {
2831 char b[BDEVNAME_SIZE];
2832
2833 if (rdev->sb_loaded != 1)
2834 continue;
2835
2836 if (!test_bit(Faulty, &rdev->flags)) {
2837 md_super_write(mddev,rdev,
2838 rdev->sb_start, rdev->sb_size,
2839 rdev->sb_page);
2840 pr_debug("md: (write) %s's sb offset: %llu\n",
2841 bdevname(rdev->bdev, b),
2842 (unsigned long long)rdev->sb_start);
2843 rdev->sb_events = mddev->events;
2844 if (rdev->badblocks.size) {
2845 md_super_write(mddev, rdev,
2846 rdev->badblocks.sector,
2847 rdev->badblocks.size << 9,
2848 rdev->bb_page);
2849 rdev->badblocks.size = 0;
2850 }
2851
2852 } else
2853 pr_debug("md: %s (skipping faulty)\n",
2854 bdevname(rdev->bdev, b));
2855
2856 if (mddev->level == LEVEL_MULTIPATH)
2857
2858 break;
2859 }
2860 if (md_super_wait(mddev) < 0)
2861 goto rewrite;
2862
2863
2864 if (mddev_is_clustered(mddev) && ret == 0)
2865 md_cluster_ops->metadata_update_finish(mddev);
2866
2867 if (mddev->in_sync != sync_req ||
2868 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2869 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2870
2871 goto repeat;
2872 wake_up(&mddev->sb_wait);
2873 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2874 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2875
2876 rdev_for_each(rdev, mddev) {
2877 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2878 clear_bit(Blocked, &rdev->flags);
2879
2880 if (any_badblocks_changed)
2881 ack_all_badblocks(&rdev->badblocks);
2882 clear_bit(BlockedBadBlocks, &rdev->flags);
2883 wake_up(&rdev->blocked_wait);
2884 }
2885}
2886EXPORT_SYMBOL(md_update_sb);
2887
2888static int add_bound_rdev(struct md_rdev *rdev)
2889{
2890 struct mddev *mddev = rdev->mddev;
2891 int err = 0;
2892 bool add_journal = test_bit(Journal, &rdev->flags);
2893
2894 if (!mddev->pers->hot_remove_disk || add_journal) {
2895
2896
2897
2898
2899 super_types[mddev->major_version].
2900 validate_super(mddev, rdev);
2901 if (add_journal)
2902 mddev_suspend(mddev);
2903 err = mddev->pers->hot_add_disk(mddev, rdev);
2904 if (add_journal)
2905 mddev_resume(mddev);
2906 if (err) {
2907 md_kick_rdev_from_array(rdev);
2908 return err;
2909 }
2910 }
2911 sysfs_notify_dirent_safe(rdev->sysfs_state);
2912
2913 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2914 if (mddev->degraded)
2915 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2916 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2917 md_new_event(mddev);
2918 md_wakeup_thread(mddev->thread);
2919 return 0;
2920}
2921
2922
2923
2924
2925static int cmd_match(const char *cmd, const char *str)
2926{
2927
2928
2929
2930
2931 while (*cmd && *str && *cmd == *str) {
2932 cmd++;
2933 str++;
2934 }
2935 if (*cmd == '\n')
2936 cmd++;
2937 if (*str || *cmd)
2938 return 0;
2939 return 1;
2940}
2941
2942struct rdev_sysfs_entry {
2943 struct attribute attr;
2944 ssize_t (*show)(struct md_rdev *, char *);
2945 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2946};
2947
2948static ssize_t
2949state_show(struct md_rdev *rdev, char *page)
2950{
2951 char *sep = ",";
2952 size_t len = 0;
2953 unsigned long flags = READ_ONCE(rdev->flags);
2954
2955 if (test_bit(Faulty, &flags) ||
2956 (!test_bit(ExternalBbl, &flags) &&
2957 rdev->badblocks.unacked_exist))
2958 len += sprintf(page+len, "faulty%s", sep);
2959 if (test_bit(In_sync, &flags))
2960 len += sprintf(page+len, "in_sync%s", sep);
2961 if (test_bit(Journal, &flags))
2962 len += sprintf(page+len, "journal%s", sep);
2963 if (test_bit(WriteMostly, &flags))
2964 len += sprintf(page+len, "write_mostly%s", sep);
2965 if (test_bit(Blocked, &flags) ||
2966 (rdev->badblocks.unacked_exist
2967 && !test_bit(Faulty, &flags)))
2968 len += sprintf(page+len, "blocked%s", sep);
2969 if (!test_bit(Faulty, &flags) &&
2970 !test_bit(Journal, &flags) &&
2971 !test_bit(In_sync, &flags))
2972 len += sprintf(page+len, "spare%s", sep);
2973 if (test_bit(WriteErrorSeen, &flags))
2974 len += sprintf(page+len, "write_error%s", sep);
2975 if (test_bit(WantReplacement, &flags))
2976 len += sprintf(page+len, "want_replacement%s", sep);
2977 if (test_bit(Replacement, &flags))
2978 len += sprintf(page+len, "replacement%s", sep);
2979 if (test_bit(ExternalBbl, &flags))
2980 len += sprintf(page+len, "external_bbl%s", sep);
2981 if (test_bit(FailFast, &flags))
2982 len += sprintf(page+len, "failfast%s", sep);
2983
2984 if (len)
2985 len -= strlen(sep);
2986
2987 return len+sprintf(page+len, "\n");
2988}
2989
2990static ssize_t
2991state_store(struct md_rdev *rdev, const char *buf, size_t len)
2992{
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007 int err = -EINVAL;
3008 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
3009 md_error(rdev->mddev, rdev);
3010 if (test_bit(Faulty, &rdev->flags))
3011 err = 0;
3012 else
3013 err = -EBUSY;
3014 } else if (cmd_match(buf, "remove")) {
3015 if (rdev->mddev->pers) {
3016 clear_bit(Blocked, &rdev->flags);
3017 remove_and_add_spares(rdev->mddev, rdev);
3018 }
3019 if (rdev->raid_disk >= 0)
3020 err = -EBUSY;
3021 else {
3022 struct mddev *mddev = rdev->mddev;
3023 err = 0;
3024 if (mddev_is_clustered(mddev))
3025 err = md_cluster_ops->remove_disk(mddev, rdev);
3026
3027 if (err == 0) {
3028 md_kick_rdev_from_array(rdev);
3029 if (mddev->pers) {
3030 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3031 md_wakeup_thread(mddev->thread);
3032 }
3033 md_new_event(mddev);
3034 }
3035 }
3036 } else if (cmd_match(buf, "writemostly")) {
3037 set_bit(WriteMostly, &rdev->flags);
3038 mddev_create_serial_pool(rdev->mddev, rdev, false);
3039 err = 0;
3040 } else if (cmd_match(buf, "-writemostly")) {
3041 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3042 clear_bit(WriteMostly, &rdev->flags);
3043 err = 0;
3044 } else if (cmd_match(buf, "blocked")) {
3045 set_bit(Blocked, &rdev->flags);
3046 err = 0;
3047 } else if (cmd_match(buf, "-blocked")) {
3048 if (!test_bit(Faulty, &rdev->flags) &&
3049 !test_bit(ExternalBbl, &rdev->flags) &&
3050 rdev->badblocks.unacked_exist) {
3051
3052
3053
3054 md_error(rdev->mddev, rdev);
3055 }
3056 clear_bit(Blocked, &rdev->flags);
3057 clear_bit(BlockedBadBlocks, &rdev->flags);
3058 wake_up(&rdev->blocked_wait);
3059 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3060 md_wakeup_thread(rdev->mddev->thread);
3061
3062 err = 0;
3063 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3064 set_bit(In_sync, &rdev->flags);
3065 err = 0;
3066 } else if (cmd_match(buf, "failfast")) {
3067 set_bit(FailFast, &rdev->flags);
3068 err = 0;
3069 } else if (cmd_match(buf, "-failfast")) {
3070 clear_bit(FailFast, &rdev->flags);
3071 err = 0;
3072 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3073 !test_bit(Journal, &rdev->flags)) {
3074 if (rdev->mddev->pers == NULL) {
3075 clear_bit(In_sync, &rdev->flags);
3076 rdev->saved_raid_disk = rdev->raid_disk;
3077 rdev->raid_disk = -1;
3078 err = 0;
3079 }
3080 } else if (cmd_match(buf, "write_error")) {
3081 set_bit(WriteErrorSeen, &rdev->flags);
3082 err = 0;
3083 } else if (cmd_match(buf, "-write_error")) {
3084 clear_bit(WriteErrorSeen, &rdev->flags);
3085 err = 0;
3086 } else if (cmd_match(buf, "want_replacement")) {
3087
3088
3089
3090
3091 if (rdev->raid_disk >= 0 &&
3092 !test_bit(Journal, &rdev->flags) &&
3093 !test_bit(Replacement, &rdev->flags))
3094 set_bit(WantReplacement, &rdev->flags);
3095 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3096 md_wakeup_thread(rdev->mddev->thread);
3097 err = 0;
3098 } else if (cmd_match(buf, "-want_replacement")) {
3099
3100
3101
3102 err = 0;
3103 clear_bit(WantReplacement, &rdev->flags);
3104 } else if (cmd_match(buf, "replacement")) {
3105
3106
3107
3108
3109 if (rdev->mddev->pers)
3110 err = -EBUSY;
3111 else {
3112 set_bit(Replacement, &rdev->flags);
3113 err = 0;
3114 }
3115 } else if (cmd_match(buf, "-replacement")) {
3116
3117 if (rdev->mddev->pers)
3118 err = -EBUSY;
3119 else {
3120 clear_bit(Replacement, &rdev->flags);
3121 err = 0;
3122 }
3123 } else if (cmd_match(buf, "re-add")) {
3124 if (!rdev->mddev->pers)
3125 err = -EINVAL;
3126 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3127 rdev->saved_raid_disk >= 0) {
3128
3129
3130
3131
3132
3133
3134 if (!mddev_is_clustered(rdev->mddev) ||
3135 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3136 clear_bit(Faulty, &rdev->flags);
3137 err = add_bound_rdev(rdev);
3138 }
3139 } else
3140 err = -EBUSY;
3141 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3142 set_bit(ExternalBbl, &rdev->flags);
3143 rdev->badblocks.shift = 0;
3144 err = 0;
3145 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3146 clear_bit(ExternalBbl, &rdev->flags);
3147 err = 0;
3148 }
3149 if (!err)
3150 sysfs_notify_dirent_safe(rdev->sysfs_state);
3151 return err ? err : len;
3152}
3153static struct rdev_sysfs_entry rdev_state =
3154__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3155
3156static ssize_t
3157errors_show(struct md_rdev *rdev, char *page)
3158{
3159 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3160}
3161
3162static ssize_t
3163errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3164{
3165 unsigned int n;
3166 int rv;
3167
3168 rv = kstrtouint(buf, 10, &n);
3169 if (rv < 0)
3170 return rv;
3171 atomic_set(&rdev->corrected_errors, n);
3172 return len;
3173}
3174static struct rdev_sysfs_entry rdev_errors =
3175__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3176
3177static ssize_t
3178slot_show(struct md_rdev *rdev, char *page)
3179{
3180 if (test_bit(Journal, &rdev->flags))
3181 return sprintf(page, "journal\n");
3182 else if (rdev->raid_disk < 0)
3183 return sprintf(page, "none\n");
3184 else
3185 return sprintf(page, "%d\n", rdev->raid_disk);
3186}
3187
3188static ssize_t
3189slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3190{
3191 int slot;
3192 int err;
3193
3194 if (test_bit(Journal, &rdev->flags))
3195 return -EBUSY;
3196 if (strncmp(buf, "none", 4)==0)
3197 slot = -1;
3198 else {
3199 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3200 if (err < 0)
3201 return err;
3202 }
3203 if (rdev->mddev->pers && slot == -1) {
3204
3205
3206
3207
3208
3209
3210
3211 if (rdev->raid_disk == -1)
3212 return -EEXIST;
3213
3214 if (rdev->mddev->pers->hot_remove_disk == NULL)
3215 return -EINVAL;
3216 clear_bit(Blocked, &rdev->flags);
3217 remove_and_add_spares(rdev->mddev, rdev);
3218 if (rdev->raid_disk >= 0)
3219 return -EBUSY;
3220 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3221 md_wakeup_thread(rdev->mddev->thread);
3222 } else if (rdev->mddev->pers) {
3223
3224
3225
3226 int err;
3227
3228 if (rdev->raid_disk != -1)
3229 return -EBUSY;
3230
3231 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3232 return -EBUSY;
3233
3234 if (rdev->mddev->pers->hot_add_disk == NULL)
3235 return -EINVAL;
3236
3237 if (slot >= rdev->mddev->raid_disks &&
3238 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3239 return -ENOSPC;
3240
3241 rdev->raid_disk = slot;
3242 if (test_bit(In_sync, &rdev->flags))
3243 rdev->saved_raid_disk = slot;
3244 else
3245 rdev->saved_raid_disk = -1;
3246 clear_bit(In_sync, &rdev->flags);
3247 clear_bit(Bitmap_sync, &rdev->flags);
3248 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3249 if (err) {
3250 rdev->raid_disk = -1;
3251 return err;
3252 } else
3253 sysfs_notify_dirent_safe(rdev->sysfs_state);
3254 ;
3255 sysfs_link_rdev(rdev->mddev, rdev);
3256
3257 } else {
3258 if (slot >= rdev->mddev->raid_disks &&
3259 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3260 return -ENOSPC;
3261 rdev->raid_disk = slot;
3262
3263 clear_bit(Faulty, &rdev->flags);
3264 clear_bit(WriteMostly, &rdev->flags);
3265 set_bit(In_sync, &rdev->flags);
3266 sysfs_notify_dirent_safe(rdev->sysfs_state);
3267 }
3268 return len;
3269}
3270
3271static struct rdev_sysfs_entry rdev_slot =
3272__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3273
3274static ssize_t
3275offset_show(struct md_rdev *rdev, char *page)
3276{
3277 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3278}
3279
3280static ssize_t
3281offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3282{
3283 unsigned long long offset;
3284 if (kstrtoull(buf, 10, &offset) < 0)
3285 return -EINVAL;
3286 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3287 return -EBUSY;
3288 if (rdev->sectors && rdev->mddev->external)
3289
3290
3291 return -EBUSY;
3292 rdev->data_offset = offset;
3293 rdev->new_data_offset = offset;
3294 return len;
3295}
3296
3297static struct rdev_sysfs_entry rdev_offset =
3298__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3299
3300static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3301{
3302 return sprintf(page, "%llu\n",
3303 (unsigned long long)rdev->new_data_offset);
3304}
3305
3306static ssize_t new_offset_store(struct md_rdev *rdev,
3307 const char *buf, size_t len)
3308{
3309 unsigned long long new_offset;
3310 struct mddev *mddev = rdev->mddev;
3311
3312 if (kstrtoull(buf, 10, &new_offset) < 0)
3313 return -EINVAL;
3314
3315 if (mddev->sync_thread ||
3316 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3317 return -EBUSY;
3318 if (new_offset == rdev->data_offset)
3319
3320 ;
3321 else if (new_offset > rdev->data_offset) {
3322
3323 if (new_offset - rdev->data_offset
3324 + mddev->dev_sectors > rdev->sectors)
3325 return -E2BIG;
3326 }
3327
3328
3329
3330
3331
3332 if (new_offset < rdev->data_offset &&
3333 mddev->reshape_backwards)
3334 return -EINVAL;
3335
3336
3337
3338
3339 if (new_offset > rdev->data_offset &&
3340 !mddev->reshape_backwards)
3341 return -EINVAL;
3342
3343 if (mddev->pers && mddev->persistent &&
3344 !super_types[mddev->major_version]
3345 .allow_new_offset(rdev, new_offset))
3346 return -E2BIG;
3347 rdev->new_data_offset = new_offset;
3348 if (new_offset > rdev->data_offset)
3349 mddev->reshape_backwards = 1;
3350 else if (new_offset < rdev->data_offset)
3351 mddev->reshape_backwards = 0;
3352
3353 return len;
3354}
3355static struct rdev_sysfs_entry rdev_new_offset =
3356__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3357
3358static ssize_t
3359rdev_size_show(struct md_rdev *rdev, char *page)
3360{
3361 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3362}
3363
3364static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3365{
3366
3367 if (s1+l1 <= s2)
3368 return 0;
3369 if (s2+l2 <= s1)
3370 return 0;
3371 return 1;
3372}
3373
3374static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3375{
3376 unsigned long long blocks;
3377 sector_t new;
3378
3379 if (kstrtoull(buf, 10, &blocks) < 0)
3380 return -EINVAL;
3381
3382 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3383 return -EINVAL;
3384
3385 new = blocks * 2;
3386 if (new != blocks * 2)
3387 return -EINVAL;
3388
3389 *sectors = new;
3390 return 0;
3391}
3392
3393static ssize_t
3394rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3395{
3396 struct mddev *my_mddev = rdev->mddev;
3397 sector_t oldsectors = rdev->sectors;
3398 sector_t sectors;
3399
3400 if (test_bit(Journal, &rdev->flags))
3401 return -EBUSY;
3402 if (strict_blocks_to_sectors(buf, §ors) < 0)
3403 return -EINVAL;
3404 if (rdev->data_offset != rdev->new_data_offset)
3405 return -EINVAL;
3406 if (my_mddev->pers && rdev->raid_disk >= 0) {
3407 if (my_mddev->persistent) {
3408 sectors = super_types[my_mddev->major_version].
3409 rdev_size_change(rdev, sectors);
3410 if (!sectors)
3411 return -EBUSY;
3412 } else if (!sectors)
3413 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3414 rdev->data_offset;
3415 if (!my_mddev->pers->resize)
3416
3417 return -EINVAL;
3418 }
3419 if (sectors < my_mddev->dev_sectors)
3420 return -EINVAL;
3421
3422 rdev->sectors = sectors;
3423 if (sectors > oldsectors && my_mddev->external) {
3424
3425
3426
3427
3428
3429
3430 struct mddev *mddev;
3431 int overlap = 0;
3432 struct list_head *tmp;
3433
3434 rcu_read_lock();
3435 for_each_mddev(mddev, tmp) {
3436 struct md_rdev *rdev2;
3437
3438 rdev_for_each(rdev2, mddev)
3439 if (rdev->bdev == rdev2->bdev &&
3440 rdev != rdev2 &&
3441 overlaps(rdev->data_offset, rdev->sectors,
3442 rdev2->data_offset,
3443 rdev2->sectors)) {
3444 overlap = 1;
3445 break;
3446 }
3447 if (overlap) {
3448 mddev_put(mddev);
3449 break;
3450 }
3451 }
3452 rcu_read_unlock();
3453 if (overlap) {
3454
3455
3456
3457
3458
3459
3460 rdev->sectors = oldsectors;
3461 return -EBUSY;
3462 }
3463 }
3464 return len;
3465}
3466
3467static struct rdev_sysfs_entry rdev_size =
3468__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3469
3470static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3471{
3472 unsigned long long recovery_start = rdev->recovery_offset;
3473
3474 if (test_bit(In_sync, &rdev->flags) ||
3475 recovery_start == MaxSector)
3476 return sprintf(page, "none\n");
3477
3478 return sprintf(page, "%llu\n", recovery_start);
3479}
3480
3481static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3482{
3483 unsigned long long recovery_start;
3484
3485 if (cmd_match(buf, "none"))
3486 recovery_start = MaxSector;
3487 else if (kstrtoull(buf, 10, &recovery_start))
3488 return -EINVAL;
3489
3490 if (rdev->mddev->pers &&
3491 rdev->raid_disk >= 0)
3492 return -EBUSY;
3493
3494 rdev->recovery_offset = recovery_start;
3495 if (recovery_start == MaxSector)
3496 set_bit(In_sync, &rdev->flags);
3497 else
3498 clear_bit(In_sync, &rdev->flags);
3499 return len;
3500}
3501
3502static struct rdev_sysfs_entry rdev_recovery_start =
3503__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516static ssize_t bb_show(struct md_rdev *rdev, char *page)
3517{
3518 return badblocks_show(&rdev->badblocks, page, 0);
3519}
3520static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3521{
3522 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3523
3524 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3525 wake_up(&rdev->blocked_wait);
3526 return rv;
3527}
3528static struct rdev_sysfs_entry rdev_bad_blocks =
3529__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3530
3531static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3532{
3533 return badblocks_show(&rdev->badblocks, page, 1);
3534}
3535static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3536{
3537 return badblocks_store(&rdev->badblocks, page, len, 1);
3538}
3539static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3540__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3541
3542static ssize_t
3543ppl_sector_show(struct md_rdev *rdev, char *page)
3544{
3545 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3546}
3547
3548static ssize_t
3549ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3550{
3551 unsigned long long sector;
3552
3553 if (kstrtoull(buf, 10, §or) < 0)
3554 return -EINVAL;
3555 if (sector != (sector_t)sector)
3556 return -EINVAL;
3557
3558 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3559 rdev->raid_disk >= 0)
3560 return -EBUSY;
3561
3562 if (rdev->mddev->persistent) {
3563 if (rdev->mddev->major_version == 0)
3564 return -EINVAL;
3565 if ((sector > rdev->sb_start &&
3566 sector - rdev->sb_start > S16_MAX) ||
3567 (sector < rdev->sb_start &&
3568 rdev->sb_start - sector > -S16_MIN))
3569 return -EINVAL;
3570 rdev->ppl.offset = sector - rdev->sb_start;
3571 } else if (!rdev->mddev->external) {
3572 return -EBUSY;
3573 }
3574 rdev->ppl.sector = sector;
3575 return len;
3576}
3577
3578static struct rdev_sysfs_entry rdev_ppl_sector =
3579__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3580
3581static ssize_t
3582ppl_size_show(struct md_rdev *rdev, char *page)
3583{
3584 return sprintf(page, "%u\n", rdev->ppl.size);
3585}
3586
3587static ssize_t
3588ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3589{
3590 unsigned int size;
3591
3592 if (kstrtouint(buf, 10, &size) < 0)
3593 return -EINVAL;
3594
3595 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3596 rdev->raid_disk >= 0)
3597 return -EBUSY;
3598
3599 if (rdev->mddev->persistent) {
3600 if (rdev->mddev->major_version == 0)
3601 return -EINVAL;
3602 if (size > U16_MAX)
3603 return -EINVAL;
3604 } else if (!rdev->mddev->external) {
3605 return -EBUSY;
3606 }
3607 rdev->ppl.size = size;
3608 return len;
3609}
3610
3611static struct rdev_sysfs_entry rdev_ppl_size =
3612__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3613
3614static struct attribute *rdev_default_attrs[] = {
3615 &rdev_state.attr,
3616 &rdev_errors.attr,
3617 &rdev_slot.attr,
3618 &rdev_offset.attr,
3619 &rdev_new_offset.attr,
3620 &rdev_size.attr,
3621 &rdev_recovery_start.attr,
3622 &rdev_bad_blocks.attr,
3623 &rdev_unack_bad_blocks.attr,
3624 &rdev_ppl_sector.attr,
3625 &rdev_ppl_size.attr,
3626 NULL,
3627};
3628static ssize_t
3629rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3630{
3631 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3632 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3633
3634 if (!entry->show)
3635 return -EIO;
3636 if (!rdev->mddev)
3637 return -ENODEV;
3638 return entry->show(rdev, page);
3639}
3640
3641static ssize_t
3642rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3643 const char *page, size_t length)
3644{
3645 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3646 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3647 ssize_t rv;
3648 struct mddev *mddev = rdev->mddev;
3649
3650 if (!entry->store)
3651 return -EIO;
3652 if (!capable(CAP_SYS_ADMIN))
3653 return -EACCES;
3654 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3655 if (!rv) {
3656 if (rdev->mddev == NULL)
3657 rv = -ENODEV;
3658 else
3659 rv = entry->store(rdev, page, length);
3660 mddev_unlock(mddev);
3661 }
3662 return rv;
3663}
3664
3665static void rdev_free(struct kobject *ko)
3666{
3667 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3668 kfree(rdev);
3669}
3670static const struct sysfs_ops rdev_sysfs_ops = {
3671 .show = rdev_attr_show,
3672 .store = rdev_attr_store,
3673};
3674static struct kobj_type rdev_ktype = {
3675 .release = rdev_free,
3676 .sysfs_ops = &rdev_sysfs_ops,
3677 .default_attrs = rdev_default_attrs,
3678};
3679
3680int md_rdev_init(struct md_rdev *rdev)
3681{
3682 rdev->desc_nr = -1;
3683 rdev->saved_raid_disk = -1;
3684 rdev->raid_disk = -1;
3685 rdev->flags = 0;
3686 rdev->data_offset = 0;
3687 rdev->new_data_offset = 0;
3688 rdev->sb_events = 0;
3689 rdev->last_read_error = 0;
3690 rdev->sb_loaded = 0;
3691 rdev->bb_page = NULL;
3692 atomic_set(&rdev->nr_pending, 0);
3693 atomic_set(&rdev->read_errors, 0);
3694 atomic_set(&rdev->corrected_errors, 0);
3695
3696 INIT_LIST_HEAD(&rdev->same_set);
3697 init_waitqueue_head(&rdev->blocked_wait);
3698
3699
3700
3701
3702
3703 return badblocks_init(&rdev->badblocks, 0);
3704}
3705EXPORT_SYMBOL_GPL(md_rdev_init);
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3717{
3718 char b[BDEVNAME_SIZE];
3719 int err;
3720 struct md_rdev *rdev;
3721 sector_t size;
3722
3723 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3724 if (!rdev)
3725 return ERR_PTR(-ENOMEM);
3726
3727 err = md_rdev_init(rdev);
3728 if (err)
3729 goto abort_free;
3730 err = alloc_disk_sb(rdev);
3731 if (err)
3732 goto abort_free;
3733
3734 err = lock_rdev(rdev, newdev, super_format == -2);
3735 if (err)
3736 goto abort_free;
3737
3738 kobject_init(&rdev->kobj, &rdev_ktype);
3739
3740 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3741 if (!size) {
3742 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3743 bdevname(rdev->bdev,b));
3744 err = -EINVAL;
3745 goto abort_free;
3746 }
3747
3748 if (super_format >= 0) {
3749 err = super_types[super_format].
3750 load_super(rdev, NULL, super_minor);
3751 if (err == -EINVAL) {
3752 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3753 bdevname(rdev->bdev,b),
3754 super_format, super_minor);
3755 goto abort_free;
3756 }
3757 if (err < 0) {
3758 pr_warn("md: could not read %s's sb, not importing!\n",
3759 bdevname(rdev->bdev,b));
3760 goto abort_free;
3761 }
3762 }
3763
3764 return rdev;
3765
3766abort_free:
3767 if (rdev->bdev)
3768 unlock_rdev(rdev);
3769 md_rdev_clear(rdev);
3770 kfree(rdev);
3771 return ERR_PTR(err);
3772}
3773
3774
3775
3776
3777
3778static int analyze_sbs(struct mddev *mddev)
3779{
3780 int i;
3781 struct md_rdev *rdev, *freshest, *tmp;
3782 char b[BDEVNAME_SIZE];
3783
3784 freshest = NULL;
3785 rdev_for_each_safe(rdev, tmp, mddev)
3786 switch (super_types[mddev->major_version].
3787 load_super(rdev, freshest, mddev->minor_version)) {
3788 case 1:
3789 freshest = rdev;
3790 break;
3791 case 0:
3792 break;
3793 default:
3794 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3795 bdevname(rdev->bdev,b));
3796 md_kick_rdev_from_array(rdev);
3797 }
3798
3799
3800 if (!freshest) {
3801 pr_warn("md: cannot find a valid disk\n");
3802 return -EINVAL;
3803 }
3804
3805 super_types[mddev->major_version].
3806 validate_super(mddev, freshest);
3807
3808 i = 0;
3809 rdev_for_each_safe(rdev, tmp, mddev) {
3810 if (mddev->max_disks &&
3811 (rdev->desc_nr >= mddev->max_disks ||
3812 i > mddev->max_disks)) {
3813 pr_warn("md: %s: %s: only %d devices permitted\n",
3814 mdname(mddev), bdevname(rdev->bdev, b),
3815 mddev->max_disks);
3816 md_kick_rdev_from_array(rdev);
3817 continue;
3818 }
3819 if (rdev != freshest) {
3820 if (super_types[mddev->major_version].
3821 validate_super(mddev, rdev)) {
3822 pr_warn("md: kicking non-fresh %s from array!\n",
3823 bdevname(rdev->bdev,b));
3824 md_kick_rdev_from_array(rdev);
3825 continue;
3826 }
3827 }
3828 if (mddev->level == LEVEL_MULTIPATH) {
3829 rdev->desc_nr = i++;
3830 rdev->raid_disk = rdev->desc_nr;
3831 set_bit(In_sync, &rdev->flags);
3832 } else if (rdev->raid_disk >=
3833 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3834 !test_bit(Journal, &rdev->flags)) {
3835 rdev->raid_disk = -1;
3836 clear_bit(In_sync, &rdev->flags);
3837 }
3838 }
3839
3840 return 0;
3841}
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3854{
3855 unsigned long result = 0;
3856 long decimals = -1;
3857 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3858 if (*cp == '.')
3859 decimals = 0;
3860 else if (decimals < scale) {
3861 unsigned int value;
3862 value = *cp - '0';
3863 result = result * 10 + value;
3864 if (decimals >= 0)
3865 decimals++;
3866 }
3867 cp++;
3868 }
3869 if (*cp == '\n')
3870 cp++;
3871 if (*cp)
3872 return -EINVAL;
3873 if (decimals < 0)
3874 decimals = 0;
3875 *res = result * int_pow(10, scale - decimals);
3876 return 0;
3877}
3878
3879static ssize_t
3880safe_delay_show(struct mddev *mddev, char *page)
3881{
3882 int msec = (mddev->safemode_delay*1000)/HZ;
3883 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3884}
3885static ssize_t
3886safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3887{
3888 unsigned long msec;
3889
3890 if (mddev_is_clustered(mddev)) {
3891 pr_warn("md: Safemode is disabled for clustered mode\n");
3892 return -EINVAL;
3893 }
3894
3895 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3896 return -EINVAL;
3897 if (msec == 0)
3898 mddev->safemode_delay = 0;
3899 else {
3900 unsigned long old_delay = mddev->safemode_delay;
3901 unsigned long new_delay = (msec*HZ)/1000;
3902
3903 if (new_delay == 0)
3904 new_delay = 1;
3905 mddev->safemode_delay = new_delay;
3906 if (new_delay < old_delay || old_delay == 0)
3907 mod_timer(&mddev->safemode_timer, jiffies+1);
3908 }
3909 return len;
3910}
3911static struct md_sysfs_entry md_safe_delay =
3912__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3913
3914static ssize_t
3915level_show(struct mddev *mddev, char *page)
3916{
3917 struct md_personality *p;
3918 int ret;
3919 spin_lock(&mddev->lock);
3920 p = mddev->pers;
3921 if (p)
3922 ret = sprintf(page, "%s\n", p->name);
3923 else if (mddev->clevel[0])
3924 ret = sprintf(page, "%s\n", mddev->clevel);
3925 else if (mddev->level != LEVEL_NONE)
3926 ret = sprintf(page, "%d\n", mddev->level);
3927 else
3928 ret = 0;
3929 spin_unlock(&mddev->lock);
3930 return ret;
3931}
3932
3933static ssize_t
3934level_store(struct mddev *mddev, const char *buf, size_t len)
3935{
3936 char clevel[16];
3937 ssize_t rv;
3938 size_t slen = len;
3939 struct md_personality *pers, *oldpers;
3940 long level;
3941 void *priv, *oldpriv;
3942 struct md_rdev *rdev;
3943
3944 if (slen == 0 || slen >= sizeof(clevel))
3945 return -EINVAL;
3946
3947 rv = mddev_lock(mddev);
3948 if (rv)
3949 return rv;
3950
3951 if (mddev->pers == NULL) {
3952 strncpy(mddev->clevel, buf, slen);
3953 if (mddev->clevel[slen-1] == '\n')
3954 slen--;
3955 mddev->clevel[slen] = 0;
3956 mddev->level = LEVEL_NONE;
3957 rv = len;
3958 goto out_unlock;
3959 }
3960 rv = -EROFS;
3961 if (mddev->ro)
3962 goto out_unlock;
3963
3964
3965
3966
3967
3968
3969
3970 rv = -EBUSY;
3971 if (mddev->sync_thread ||
3972 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3973 mddev->reshape_position != MaxSector ||
3974 mddev->sysfs_active)
3975 goto out_unlock;
3976
3977 rv = -EINVAL;
3978 if (!mddev->pers->quiesce) {
3979 pr_warn("md: %s: %s does not support online personality change\n",
3980 mdname(mddev), mddev->pers->name);
3981 goto out_unlock;
3982 }
3983
3984
3985 strncpy(clevel, buf, slen);
3986 if (clevel[slen-1] == '\n')
3987 slen--;
3988 clevel[slen] = 0;
3989 if (kstrtol(clevel, 10, &level))
3990 level = LEVEL_NONE;
3991
3992 if (request_module("md-%s", clevel) != 0)
3993 request_module("md-level-%s", clevel);
3994 spin_lock(&pers_lock);
3995 pers = find_pers(level, clevel);
3996 if (!pers || !try_module_get(pers->owner)) {
3997 spin_unlock(&pers_lock);
3998 pr_warn("md: personality %s not loaded\n", clevel);
3999 rv = -EINVAL;
4000 goto out_unlock;
4001 }
4002 spin_unlock(&pers_lock);
4003
4004 if (pers == mddev->pers) {
4005
4006 module_put(pers->owner);
4007 rv = len;
4008 goto out_unlock;
4009 }
4010 if (!pers->takeover) {
4011 module_put(pers->owner);
4012 pr_warn("md: %s: %s does not support personality takeover\n",
4013 mdname(mddev), clevel);
4014 rv = -EINVAL;
4015 goto out_unlock;
4016 }
4017
4018 rdev_for_each(rdev, mddev)
4019 rdev->new_raid_disk = rdev->raid_disk;
4020
4021
4022
4023
4024 priv = pers->takeover(mddev);
4025 if (IS_ERR(priv)) {
4026 mddev->new_level = mddev->level;
4027 mddev->new_layout = mddev->layout;
4028 mddev->new_chunk_sectors = mddev->chunk_sectors;
4029 mddev->raid_disks -= mddev->delta_disks;
4030 mddev->delta_disks = 0;
4031 mddev->reshape_backwards = 0;
4032 module_put(pers->owner);
4033 pr_warn("md: %s: %s would not accept array\n",
4034 mdname(mddev), clevel);
4035 rv = PTR_ERR(priv);
4036 goto out_unlock;
4037 }
4038
4039
4040 mddev_suspend(mddev);
4041 mddev_detach(mddev);
4042
4043 spin_lock(&mddev->lock);
4044 oldpers = mddev->pers;
4045 oldpriv = mddev->private;
4046 mddev->pers = pers;
4047 mddev->private = priv;
4048 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4049 mddev->level = mddev->new_level;
4050 mddev->layout = mddev->new_layout;
4051 mddev->chunk_sectors = mddev->new_chunk_sectors;
4052 mddev->delta_disks = 0;
4053 mddev->reshape_backwards = 0;
4054 mddev->degraded = 0;
4055 spin_unlock(&mddev->lock);
4056
4057 if (oldpers->sync_request == NULL &&
4058 mddev->external) {
4059
4060
4061
4062
4063
4064
4065
4066 mddev->in_sync = 0;
4067 mddev->safemode_delay = 0;
4068 mddev->safemode = 0;
4069 }
4070
4071 oldpers->free(mddev, oldpriv);
4072
4073 if (oldpers->sync_request == NULL &&
4074 pers->sync_request != NULL) {
4075
4076 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4077 pr_warn("md: cannot register extra attributes for %s\n",
4078 mdname(mddev));
4079 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4080 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4081 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4082 }
4083 if (oldpers->sync_request != NULL &&
4084 pers->sync_request == NULL) {
4085
4086 if (mddev->to_remove == NULL)
4087 mddev->to_remove = &md_redundancy_group;
4088 }
4089
4090 module_put(oldpers->owner);
4091
4092 rdev_for_each(rdev, mddev) {
4093 if (rdev->raid_disk < 0)
4094 continue;
4095 if (rdev->new_raid_disk >= mddev->raid_disks)
4096 rdev->new_raid_disk = -1;
4097 if (rdev->new_raid_disk == rdev->raid_disk)
4098 continue;
4099 sysfs_unlink_rdev(mddev, rdev);
4100 }
4101 rdev_for_each(rdev, mddev) {
4102 if (rdev->raid_disk < 0)
4103 continue;
4104 if (rdev->new_raid_disk == rdev->raid_disk)
4105 continue;
4106 rdev->raid_disk = rdev->new_raid_disk;
4107 if (rdev->raid_disk < 0)
4108 clear_bit(In_sync, &rdev->flags);
4109 else {
4110 if (sysfs_link_rdev(mddev, rdev))
4111 pr_warn("md: cannot register rd%d for %s after level change\n",
4112 rdev->raid_disk, mdname(mddev));
4113 }
4114 }
4115
4116 if (pers->sync_request == NULL) {
4117
4118
4119
4120 mddev->in_sync = 1;
4121 del_timer_sync(&mddev->safemode_timer);
4122 }
4123 blk_set_stacking_limits(&mddev->queue->limits);
4124 pers->run(mddev);
4125 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4126 mddev_resume(mddev);
4127 if (!mddev->thread)
4128 md_update_sb(mddev, 1);
4129 sysfs_notify_dirent_safe(mddev->sysfs_level);
4130 md_new_event(mddev);
4131 rv = len;
4132out_unlock:
4133 mddev_unlock(mddev);
4134 return rv;
4135}
4136
4137static struct md_sysfs_entry md_level =
4138__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4139
4140static ssize_t
4141layout_show(struct mddev *mddev, char *page)
4142{
4143
4144 if (mddev->reshape_position != MaxSector &&
4145 mddev->layout != mddev->new_layout)
4146 return sprintf(page, "%d (%d)\n",
4147 mddev->new_layout, mddev->layout);
4148 return sprintf(page, "%d\n", mddev->layout);
4149}
4150
4151static ssize_t
4152layout_store(struct mddev *mddev, const char *buf, size_t len)
4153{
4154 unsigned int n;
4155 int err;
4156
4157 err = kstrtouint(buf, 10, &n);
4158 if (err < 0)
4159 return err;
4160 err = mddev_lock(mddev);
4161 if (err)
4162 return err;
4163
4164 if (mddev->pers) {
4165 if (mddev->pers->check_reshape == NULL)
4166 err = -EBUSY;
4167 else if (mddev->ro)
4168 err = -EROFS;
4169 else {
4170 mddev->new_layout = n;
4171 err = mddev->pers->check_reshape(mddev);
4172 if (err)
4173 mddev->new_layout = mddev->layout;
4174 }
4175 } else {
4176 mddev->new_layout = n;
4177 if (mddev->reshape_position == MaxSector)
4178 mddev->layout = n;
4179 }
4180 mddev_unlock(mddev);
4181 return err ?: len;
4182}
4183static struct md_sysfs_entry md_layout =
4184__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4185
4186static ssize_t
4187raid_disks_show(struct mddev *mddev, char *page)
4188{
4189 if (mddev->raid_disks == 0)
4190 return 0;
4191 if (mddev->reshape_position != MaxSector &&
4192 mddev->delta_disks != 0)
4193 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4194 mddev->raid_disks - mddev->delta_disks);
4195 return sprintf(page, "%d\n", mddev->raid_disks);
4196}
4197
4198static int update_raid_disks(struct mddev *mddev, int raid_disks);
4199
4200static ssize_t
4201raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4202{
4203 unsigned int n;
4204 int err;
4205
4206 err = kstrtouint(buf, 10, &n);
4207 if (err < 0)
4208 return err;
4209
4210 err = mddev_lock(mddev);
4211 if (err)
4212 return err;
4213 if (mddev->pers)
4214 err = update_raid_disks(mddev, n);
4215 else if (mddev->reshape_position != MaxSector) {
4216 struct md_rdev *rdev;
4217 int olddisks = mddev->raid_disks - mddev->delta_disks;
4218
4219 err = -EINVAL;
4220 rdev_for_each(rdev, mddev) {
4221 if (olddisks < n &&
4222 rdev->data_offset < rdev->new_data_offset)
4223 goto out_unlock;
4224 if (olddisks > n &&
4225 rdev->data_offset > rdev->new_data_offset)
4226 goto out_unlock;
4227 }
4228 err = 0;
4229 mddev->delta_disks = n - olddisks;
4230 mddev->raid_disks = n;
4231 mddev->reshape_backwards = (mddev->delta_disks < 0);
4232 } else
4233 mddev->raid_disks = n;
4234out_unlock:
4235 mddev_unlock(mddev);
4236 return err ? err : len;
4237}
4238static struct md_sysfs_entry md_raid_disks =
4239__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4240
4241static ssize_t
4242uuid_show(struct mddev *mddev, char *page)
4243{
4244 return sprintf(page, "%pU\n", mddev->uuid);
4245}
4246static struct md_sysfs_entry md_uuid =
4247__ATTR(uuid, S_IRUGO, uuid_show, NULL);
4248
4249static ssize_t
4250chunk_size_show(struct mddev *mddev, char *page)
4251{
4252 if (mddev->reshape_position != MaxSector &&
4253 mddev->chunk_sectors != mddev->new_chunk_sectors)
4254 return sprintf(page, "%d (%d)\n",
4255 mddev->new_chunk_sectors << 9,
4256 mddev->chunk_sectors << 9);
4257 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4258}
4259
4260static ssize_t
4261chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4262{
4263 unsigned long n;
4264 int err;
4265
4266 err = kstrtoul(buf, 10, &n);
4267 if (err < 0)
4268 return err;
4269
4270 err = mddev_lock(mddev);
4271 if (err)
4272 return err;
4273 if (mddev->pers) {
4274 if (mddev->pers->check_reshape == NULL)
4275 err = -EBUSY;
4276 else if (mddev->ro)
4277 err = -EROFS;
4278 else {
4279 mddev->new_chunk_sectors = n >> 9;
4280 err = mddev->pers->check_reshape(mddev);
4281 if (err)
4282 mddev->new_chunk_sectors = mddev->chunk_sectors;
4283 }
4284 } else {
4285 mddev->new_chunk_sectors = n >> 9;
4286 if (mddev->reshape_position == MaxSector)
4287 mddev->chunk_sectors = n >> 9;
4288 }
4289 mddev_unlock(mddev);
4290 return err ?: len;
4291}
4292static struct md_sysfs_entry md_chunk_size =
4293__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4294
4295static ssize_t
4296resync_start_show(struct mddev *mddev, char *page)
4297{
4298 if (mddev->recovery_cp == MaxSector)
4299 return sprintf(page, "none\n");
4300 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4301}
4302
4303static ssize_t
4304resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4305{
4306 unsigned long long n;
4307 int err;
4308
4309 if (cmd_match(buf, "none"))
4310 n = MaxSector;
4311 else {
4312 err = kstrtoull(buf, 10, &n);
4313 if (err < 0)
4314 return err;
4315 if (n != (sector_t)n)
4316 return -EINVAL;
4317 }
4318
4319 err = mddev_lock(mddev);
4320 if (err)
4321 return err;
4322 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4323 err = -EBUSY;
4324
4325 if (!err) {
4326 mddev->recovery_cp = n;
4327 if (mddev->pers)
4328 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4329 }
4330 mddev_unlock(mddev);
4331 return err ?: len;
4332}
4333static struct md_sysfs_entry md_resync_start =
4334__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4335 resync_start_show, resync_start_store);
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4379 write_pending, active_idle, broken, bad_word};
4380static char *array_states[] = {
4381 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4382 "write-pending", "active-idle", "broken", NULL };
4383
4384static int match_word(const char *word, char **list)
4385{
4386 int n;
4387 for (n=0; list[n]; n++)
4388 if (cmd_match(word, list[n]))
4389 break;
4390 return n;
4391}
4392
4393static ssize_t
4394array_state_show(struct mddev *mddev, char *page)
4395{
4396 enum array_state st = inactive;
4397
4398 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4399 switch(mddev->ro) {
4400 case 1:
4401 st = readonly;
4402 break;
4403 case 2:
4404 st = read_auto;
4405 break;
4406 case 0:
4407 spin_lock(&mddev->lock);
4408 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4409 st = write_pending;
4410 else if (mddev->in_sync)
4411 st = clean;
4412 else if (mddev->safemode)
4413 st = active_idle;
4414 else
4415 st = active;
4416 spin_unlock(&mddev->lock);
4417 }
4418
4419 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4420 st = broken;
4421 } else {
4422 if (list_empty(&mddev->disks) &&
4423 mddev->raid_disks == 0 &&
4424 mddev->dev_sectors == 0)
4425 st = clear;
4426 else
4427 st = inactive;
4428 }
4429 return sprintf(page, "%s\n", array_states[st]);
4430}
4431
4432static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4433static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4434static int restart_array(struct mddev *mddev);
4435
4436static ssize_t
4437array_state_store(struct mddev *mddev, const char *buf, size_t len)
4438{
4439 int err = 0;
4440 enum array_state st = match_word(buf, array_states);
4441
4442 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4443
4444
4445
4446 spin_lock(&mddev->lock);
4447 if (st == active) {
4448 restart_array(mddev);
4449 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4450 md_wakeup_thread(mddev->thread);
4451 wake_up(&mddev->sb_wait);
4452 } else {
4453 restart_array(mddev);
4454 if (!set_in_sync(mddev))
4455 err = -EBUSY;
4456 }
4457 if (!err)
4458 sysfs_notify_dirent_safe(mddev->sysfs_state);
4459 spin_unlock(&mddev->lock);
4460 return err ?: len;
4461 }
4462 err = mddev_lock(mddev);
4463 if (err)
4464 return err;
4465 err = -EINVAL;
4466 switch(st) {
4467 case bad_word:
4468 break;
4469 case clear:
4470
4471 err = do_md_stop(mddev, 0, NULL);
4472 break;
4473 case inactive:
4474
4475 if (mddev->pers)
4476 err = do_md_stop(mddev, 2, NULL);
4477 else
4478 err = 0;
4479 break;
4480 case suspended:
4481 break;
4482 case readonly:
4483 if (mddev->pers)
4484 err = md_set_readonly(mddev, NULL);
4485 else {
4486 mddev->ro = 1;
4487 set_disk_ro(mddev->gendisk, 1);
4488 err = do_md_run(mddev);
4489 }
4490 break;
4491 case read_auto:
4492 if (mddev->pers) {
4493 if (mddev->ro == 0)
4494 err = md_set_readonly(mddev, NULL);
4495 else if (mddev->ro == 1)
4496 err = restart_array(mddev);
4497 if (err == 0) {
4498 mddev->ro = 2;
4499 set_disk_ro(mddev->gendisk, 0);
4500 }
4501 } else {
4502 mddev->ro = 2;
4503 err = do_md_run(mddev);
4504 }
4505 break;
4506 case clean:
4507 if (mddev->pers) {
4508 err = restart_array(mddev);
4509 if (err)
4510 break;
4511 spin_lock(&mddev->lock);
4512 if (!set_in_sync(mddev))
4513 err = -EBUSY;
4514 spin_unlock(&mddev->lock);
4515 } else
4516 err = -EINVAL;
4517 break;
4518 case active:
4519 if (mddev->pers) {
4520 err = restart_array(mddev);
4521 if (err)
4522 break;
4523 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4524 wake_up(&mddev->sb_wait);
4525 err = 0;
4526 } else {
4527 mddev->ro = 0;
4528 set_disk_ro(mddev->gendisk, 0);
4529 err = do_md_run(mddev);
4530 }
4531 break;
4532 case write_pending:
4533 case active_idle:
4534 case broken:
4535
4536 break;
4537 }
4538
4539 if (!err) {
4540 if (mddev->hold_active == UNTIL_IOCTL)
4541 mddev->hold_active = 0;
4542 sysfs_notify_dirent_safe(mddev->sysfs_state);
4543 }
4544 mddev_unlock(mddev);
4545 return err ?: len;
4546}
4547static struct md_sysfs_entry md_array_state =
4548__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4549
4550static ssize_t
4551max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4552 return sprintf(page, "%d\n",
4553 atomic_read(&mddev->max_corr_read_errors));
4554}
4555
4556static ssize_t
4557max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4558{
4559 unsigned int n;
4560 int rv;
4561
4562 rv = kstrtouint(buf, 10, &n);
4563 if (rv < 0)
4564 return rv;
4565 atomic_set(&mddev->max_corr_read_errors, n);
4566 return len;
4567}
4568
4569static struct md_sysfs_entry max_corr_read_errors =
4570__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4571 max_corrected_read_errors_store);
4572
4573static ssize_t
4574null_show(struct mddev *mddev, char *page)
4575{
4576 return -EINVAL;
4577}
4578
4579
4580static void flush_rdev_wq(struct mddev *mddev)
4581{
4582 struct md_rdev *rdev;
4583
4584 rcu_read_lock();
4585 rdev_for_each_rcu(rdev, mddev)
4586 if (work_pending(&rdev->del_work)) {
4587 flush_workqueue(md_rdev_misc_wq);
4588 break;
4589 }
4590 rcu_read_unlock();
4591}
4592
4593static ssize_t
4594new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4595{
4596
4597
4598
4599
4600
4601
4602
4603 char *e;
4604 int major = simple_strtoul(buf, &e, 10);
4605 int minor;
4606 dev_t dev;
4607 struct md_rdev *rdev;
4608 int err;
4609
4610 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4611 return -EINVAL;
4612 minor = simple_strtoul(e+1, &e, 10);
4613 if (*e && *e != '\n')
4614 return -EINVAL;
4615 dev = MKDEV(major, minor);
4616 if (major != MAJOR(dev) ||
4617 minor != MINOR(dev))
4618 return -EOVERFLOW;
4619
4620 flush_rdev_wq(mddev);
4621 err = mddev_lock(mddev);
4622 if (err)
4623 return err;
4624 if (mddev->persistent) {
4625 rdev = md_import_device(dev, mddev->major_version,
4626 mddev->minor_version);
4627 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4628 struct md_rdev *rdev0
4629 = list_entry(mddev->disks.next,
4630 struct md_rdev, same_set);
4631 err = super_types[mddev->major_version]
4632 .load_super(rdev, rdev0, mddev->minor_version);
4633 if (err < 0)
4634 goto out;
4635 }
4636 } else if (mddev->external)
4637 rdev = md_import_device(dev, -2, -1);
4638 else
4639 rdev = md_import_device(dev, -1, -1);
4640
4641 if (IS_ERR(rdev)) {
4642 mddev_unlock(mddev);
4643 return PTR_ERR(rdev);
4644 }
4645 err = bind_rdev_to_array(rdev, mddev);
4646 out:
4647 if (err)
4648 export_rdev(rdev);
4649 mddev_unlock(mddev);
4650 if (!err)
4651 md_new_event(mddev);
4652 return err ? err : len;
4653}
4654
4655static struct md_sysfs_entry md_new_device =
4656__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4657
4658static ssize_t
4659bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4660{
4661 char *end;
4662 unsigned long chunk, end_chunk;
4663 int err;
4664
4665 err = mddev_lock(mddev);
4666 if (err)
4667 return err;
4668 if (!mddev->bitmap)
4669 goto out;
4670
4671 while (*buf) {
4672 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4673 if (buf == end) break;
4674 if (*end == '-') {
4675 buf = end + 1;
4676 end_chunk = simple_strtoul(buf, &end, 0);
4677 if (buf == end) break;
4678 }
4679 if (*end && !isspace(*end)) break;
4680 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4681 buf = skip_spaces(end);
4682 }
4683 md_bitmap_unplug(mddev->bitmap);
4684out:
4685 mddev_unlock(mddev);
4686 return len;
4687}
4688
4689static struct md_sysfs_entry md_bitmap =
4690__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4691
4692static ssize_t
4693size_show(struct mddev *mddev, char *page)
4694{
4695 return sprintf(page, "%llu\n",
4696 (unsigned long long)mddev->dev_sectors / 2);
4697}
4698
4699static int update_size(struct mddev *mddev, sector_t num_sectors);
4700
4701static ssize_t
4702size_store(struct mddev *mddev, const char *buf, size_t len)
4703{
4704
4705
4706
4707
4708 sector_t sectors;
4709 int err = strict_blocks_to_sectors(buf, §ors);
4710
4711 if (err < 0)
4712 return err;
4713 err = mddev_lock(mddev);
4714 if (err)
4715 return err;
4716 if (mddev->pers) {
4717 err = update_size(mddev, sectors);
4718 if (err == 0)
4719 md_update_sb(mddev, 1);
4720 } else {
4721 if (mddev->dev_sectors == 0 ||
4722 mddev->dev_sectors > sectors)
4723 mddev->dev_sectors = sectors;
4724 else
4725 err = -ENOSPC;
4726 }
4727 mddev_unlock(mddev);
4728 return err ? err : len;
4729}
4730
4731static struct md_sysfs_entry md_size =
4732__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4733
4734
4735
4736
4737
4738
4739
4740static ssize_t
4741metadata_show(struct mddev *mddev, char *page)
4742{
4743 if (mddev->persistent)
4744 return sprintf(page, "%d.%d\n",
4745 mddev->major_version, mddev->minor_version);
4746 else if (mddev->external)
4747 return sprintf(page, "external:%s\n", mddev->metadata_type);
4748 else
4749 return sprintf(page, "none\n");
4750}
4751
4752static ssize_t
4753metadata_store(struct mddev *mddev, const char *buf, size_t len)
4754{
4755 int major, minor;
4756 char *e;
4757 int err;
4758
4759
4760
4761
4762
4763 err = mddev_lock(mddev);
4764 if (err)
4765 return err;
4766 err = -EBUSY;
4767 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4768 ;
4769 else if (!list_empty(&mddev->disks))
4770 goto out_unlock;
4771
4772 err = 0;
4773 if (cmd_match(buf, "none")) {
4774 mddev->persistent = 0;
4775 mddev->external = 0;
4776 mddev->major_version = 0;
4777 mddev->minor_version = 90;
4778 goto out_unlock;
4779 }
4780 if (strncmp(buf, "external:", 9) == 0) {
4781 size_t namelen = len-9;
4782 if (namelen >= sizeof(mddev->metadata_type))
4783 namelen = sizeof(mddev->metadata_type)-1;
4784 strncpy(mddev->metadata_type, buf+9, namelen);
4785 mddev->metadata_type[namelen] = 0;
4786 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4787 mddev->metadata_type[--namelen] = 0;
4788 mddev->persistent = 0;
4789 mddev->external = 1;
4790 mddev->major_version = 0;
4791 mddev->minor_version = 90;
4792 goto out_unlock;
4793 }
4794 major = simple_strtoul(buf, &e, 10);
4795 err = -EINVAL;
4796 if (e==buf || *e != '.')
4797 goto out_unlock;
4798 buf = e+1;
4799 minor = simple_strtoul(buf, &e, 10);
4800 if (e==buf || (*e && *e != '\n') )
4801 goto out_unlock;
4802 err = -ENOENT;
4803 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4804 goto out_unlock;
4805 mddev->major_version = major;
4806 mddev->minor_version = minor;
4807 mddev->persistent = 1;
4808 mddev->external = 0;
4809 err = 0;
4810out_unlock:
4811 mddev_unlock(mddev);
4812 return err ?: len;
4813}
4814
4815static struct md_sysfs_entry md_metadata =
4816__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4817
4818static ssize_t
4819action_show(struct mddev *mddev, char *page)
4820{
4821 char *type = "idle";
4822 unsigned long recovery = mddev->recovery;
4823 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4824 type = "frozen";
4825 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4826 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4827 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4828 type = "reshape";
4829 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4830 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4831 type = "resync";
4832 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4833 type = "check";
4834 else
4835 type = "repair";
4836 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4837 type = "recover";
4838 else if (mddev->reshape_position != MaxSector)
4839 type = "reshape";
4840 }
4841 return sprintf(page, "%s\n", type);
4842}
4843
4844static ssize_t
4845action_store(struct mddev *mddev, const char *page, size_t len)
4846{
4847 if (!mddev->pers || !mddev->pers->sync_request)
4848 return -EINVAL;
4849
4850
4851 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4852 if (cmd_match(page, "frozen"))
4853 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4854 else
4855 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4856 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4857 mddev_lock(mddev) == 0) {
4858 if (work_pending(&mddev->del_work))
4859 flush_workqueue(md_misc_wq);
4860 if (mddev->sync_thread) {
4861 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4862 md_reap_sync_thread(mddev);
4863 }
4864 mddev_unlock(mddev);
4865 }
4866 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4867 return -EBUSY;
4868 else if (cmd_match(page, "resync"))
4869 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4870 else if (cmd_match(page, "recover")) {
4871 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4872 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4873 } else if (cmd_match(page, "reshape")) {
4874 int err;
4875 if (mddev->pers->start_reshape == NULL)
4876 return -EINVAL;
4877 err = mddev_lock(mddev);
4878 if (!err) {
4879 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4880 err = -EBUSY;
4881 else {
4882 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4883 err = mddev->pers->start_reshape(mddev);
4884 }
4885 mddev_unlock(mddev);
4886 }
4887 if (err)
4888 return err;
4889 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4890 } else {
4891 if (cmd_match(page, "check"))
4892 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4893 else if (!cmd_match(page, "repair"))
4894 return -EINVAL;
4895 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4896 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4897 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4898 }
4899 if (mddev->ro == 2) {
4900
4901
4902
4903 mddev->ro = 0;
4904 md_wakeup_thread(mddev->sync_thread);
4905 }
4906 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4907 md_wakeup_thread(mddev->thread);
4908 sysfs_notify_dirent_safe(mddev->sysfs_action);
4909 return len;
4910}
4911
4912static struct md_sysfs_entry md_scan_mode =
4913__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4914
4915static ssize_t
4916last_sync_action_show(struct mddev *mddev, char *page)
4917{
4918 return sprintf(page, "%s\n", mddev->last_sync_action);
4919}
4920
4921static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4922
4923static ssize_t
4924mismatch_cnt_show(struct mddev *mddev, char *page)
4925{
4926 return sprintf(page, "%llu\n",
4927 (unsigned long long)
4928 atomic64_read(&mddev->resync_mismatches));
4929}
4930
4931static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4932
4933static ssize_t
4934sync_min_show(struct mddev *mddev, char *page)
4935{
4936 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4937 mddev->sync_speed_min ? "local": "system");
4938}
4939
4940static ssize_t
4941sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4942{
4943 unsigned int min;
4944 int rv;
4945
4946 if (strncmp(buf, "system", 6)==0) {
4947 min = 0;
4948 } else {
4949 rv = kstrtouint(buf, 10, &min);
4950 if (rv < 0)
4951 return rv;
4952 if (min == 0)
4953 return -EINVAL;
4954 }
4955 mddev->sync_speed_min = min;
4956 return len;
4957}
4958
4959static struct md_sysfs_entry md_sync_min =
4960__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4961
4962static ssize_t
4963sync_max_show(struct mddev *mddev, char *page)
4964{
4965 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4966 mddev->sync_speed_max ? "local": "system");
4967}
4968
4969static ssize_t
4970sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4971{
4972 unsigned int max;
4973 int rv;
4974
4975 if (strncmp(buf, "system", 6)==0) {
4976 max = 0;
4977 } else {
4978 rv = kstrtouint(buf, 10, &max);
4979 if (rv < 0)
4980 return rv;
4981 if (max == 0)
4982 return -EINVAL;
4983 }
4984 mddev->sync_speed_max = max;
4985 return len;
4986}
4987
4988static struct md_sysfs_entry md_sync_max =
4989__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4990
4991static ssize_t
4992degraded_show(struct mddev *mddev, char *page)
4993{
4994 return sprintf(page, "%d\n", mddev->degraded);
4995}
4996static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4997
4998static ssize_t
4999sync_force_parallel_show(struct mddev *mddev, char *page)
5000{
5001 return sprintf(page, "%d\n", mddev->parallel_resync);
5002}
5003
5004static ssize_t
5005sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
5006{
5007 long n;
5008
5009 if (kstrtol(buf, 10, &n))
5010 return -EINVAL;
5011
5012 if (n != 0 && n != 1)
5013 return -EINVAL;
5014
5015 mddev->parallel_resync = n;
5016
5017 if (mddev->sync_thread)
5018 wake_up(&resync_wait);
5019
5020 return len;
5021}
5022
5023
5024static struct md_sysfs_entry md_sync_force_parallel =
5025__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
5026 sync_force_parallel_show, sync_force_parallel_store);
5027
5028static ssize_t
5029sync_speed_show(struct mddev *mddev, char *page)
5030{
5031 unsigned long resync, dt, db;
5032 if (mddev->curr_resync == 0)
5033 return sprintf(page, "none\n");
5034 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5035 dt = (jiffies - mddev->resync_mark) / HZ;
5036 if (!dt) dt++;
5037 db = resync - mddev->resync_mark_cnt;
5038 return sprintf(page, "%lu\n", db/dt/2);
5039}
5040
5041static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
5042
5043static ssize_t
5044sync_completed_show(struct mddev *mddev, char *page)
5045{
5046 unsigned long long max_sectors, resync;
5047
5048 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5049 return sprintf(page, "none\n");
5050
5051 if (mddev->curr_resync == 1 ||
5052 mddev->curr_resync == 2)
5053 return sprintf(page, "delayed\n");
5054
5055 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5056 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5057 max_sectors = mddev->resync_max_sectors;
5058 else
5059 max_sectors = mddev->dev_sectors;
5060
5061 resync = mddev->curr_resync_completed;
5062 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
5063}
5064
5065static struct md_sysfs_entry md_sync_completed =
5066 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
5067
5068static ssize_t
5069min_sync_show(struct mddev *mddev, char *page)
5070{
5071 return sprintf(page, "%llu\n",
5072 (unsigned long long)mddev->resync_min);
5073}
5074static ssize_t
5075min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5076{
5077 unsigned long long min;
5078 int err;
5079
5080 if (kstrtoull(buf, 10, &min))
5081 return -EINVAL;
5082
5083 spin_lock(&mddev->lock);
5084 err = -EINVAL;
5085 if (min > mddev->resync_max)
5086 goto out_unlock;
5087
5088 err = -EBUSY;
5089 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5090 goto out_unlock;
5091
5092
5093 mddev->resync_min = round_down(min, 8);
5094 err = 0;
5095
5096out_unlock:
5097 spin_unlock(&mddev->lock);
5098 return err ?: len;
5099}
5100
5101static struct md_sysfs_entry md_min_sync =
5102__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5103
5104static ssize_t
5105max_sync_show(struct mddev *mddev, char *page)
5106{
5107 if (mddev->resync_max == MaxSector)
5108 return sprintf(page, "max\n");
5109 else
5110 return sprintf(page, "%llu\n",
5111 (unsigned long long)mddev->resync_max);
5112}
5113static ssize_t
5114max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5115{
5116 int err;
5117 spin_lock(&mddev->lock);
5118 if (strncmp(buf, "max", 3) == 0)
5119 mddev->resync_max = MaxSector;
5120 else {
5121 unsigned long long max;
5122 int chunk;
5123
5124 err = -EINVAL;
5125 if (kstrtoull(buf, 10, &max))
5126 goto out_unlock;
5127 if (max < mddev->resync_min)
5128 goto out_unlock;
5129
5130 err = -EBUSY;
5131 if (max < mddev->resync_max &&
5132 mddev->ro == 0 &&
5133 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5134 goto out_unlock;
5135
5136
5137 chunk = mddev->chunk_sectors;
5138 if (chunk) {
5139 sector_t temp = max;
5140
5141 err = -EINVAL;
5142 if (sector_div(temp, chunk))
5143 goto out_unlock;
5144 }
5145 mddev->resync_max = max;
5146 }
5147 wake_up(&mddev->recovery_wait);
5148 err = 0;
5149out_unlock:
5150 spin_unlock(&mddev->lock);
5151 return err ?: len;
5152}
5153
5154static struct md_sysfs_entry md_max_sync =
5155__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5156
5157static ssize_t
5158suspend_lo_show(struct mddev *mddev, char *page)
5159{
5160 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5161}
5162
5163static ssize_t
5164suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5165{
5166 unsigned long long new;
5167 int err;
5168
5169 err = kstrtoull(buf, 10, &new);
5170 if (err < 0)
5171 return err;
5172 if (new != (sector_t)new)
5173 return -EINVAL;
5174
5175 err = mddev_lock(mddev);
5176 if (err)
5177 return err;
5178 err = -EINVAL;
5179 if (mddev->pers == NULL ||
5180 mddev->pers->quiesce == NULL)
5181 goto unlock;
5182 mddev_suspend(mddev);
5183 mddev->suspend_lo = new;
5184 mddev_resume(mddev);
5185
5186 err = 0;
5187unlock:
5188 mddev_unlock(mddev);
5189 return err ?: len;
5190}
5191static struct md_sysfs_entry md_suspend_lo =
5192__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5193
5194static ssize_t
5195suspend_hi_show(struct mddev *mddev, char *page)
5196{
5197 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5198}
5199
5200static ssize_t
5201suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5202{
5203 unsigned long long new;
5204 int err;
5205
5206 err = kstrtoull(buf, 10, &new);
5207 if (err < 0)
5208 return err;
5209 if (new != (sector_t)new)
5210 return -EINVAL;
5211
5212 err = mddev_lock(mddev);
5213 if (err)
5214 return err;
5215 err = -EINVAL;
5216 if (mddev->pers == NULL)
5217 goto unlock;
5218
5219 mddev_suspend(mddev);
5220 mddev->suspend_hi = new;
5221 mddev_resume(mddev);
5222
5223 err = 0;
5224unlock:
5225 mddev_unlock(mddev);
5226 return err ?: len;
5227}
5228static struct md_sysfs_entry md_suspend_hi =
5229__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5230
5231static ssize_t
5232reshape_position_show(struct mddev *mddev, char *page)
5233{
5234 if (mddev->reshape_position != MaxSector)
5235 return sprintf(page, "%llu\n",
5236 (unsigned long long)mddev->reshape_position);
5237 strcpy(page, "none\n");
5238 return 5;
5239}
5240
5241static ssize_t
5242reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5243{
5244 struct md_rdev *rdev;
5245 unsigned long long new;
5246 int err;
5247
5248 err = kstrtoull(buf, 10, &new);
5249 if (err < 0)
5250 return err;
5251 if (new != (sector_t)new)
5252 return -EINVAL;
5253 err = mddev_lock(mddev);
5254 if (err)
5255 return err;
5256 err = -EBUSY;
5257 if (mddev->pers)
5258 goto unlock;
5259 mddev->reshape_position = new;
5260 mddev->delta_disks = 0;
5261 mddev->reshape_backwards = 0;
5262 mddev->new_level = mddev->level;
5263 mddev->new_layout = mddev->layout;
5264 mddev->new_chunk_sectors = mddev->chunk_sectors;
5265 rdev_for_each(rdev, mddev)
5266 rdev->new_data_offset = rdev->data_offset;
5267 err = 0;
5268unlock:
5269 mddev_unlock(mddev);
5270 return err ?: len;
5271}
5272
5273static struct md_sysfs_entry md_reshape_position =
5274__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5275 reshape_position_store);
5276
5277static ssize_t
5278reshape_direction_show(struct mddev *mddev, char *page)
5279{
5280 return sprintf(page, "%s\n",
5281 mddev->reshape_backwards ? "backwards" : "forwards");
5282}
5283
5284static ssize_t
5285reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5286{
5287 int backwards = 0;
5288 int err;
5289
5290 if (cmd_match(buf, "forwards"))
5291 backwards = 0;
5292 else if (cmd_match(buf, "backwards"))
5293 backwards = 1;
5294 else
5295 return -EINVAL;
5296 if (mddev->reshape_backwards == backwards)
5297 return len;
5298
5299 err = mddev_lock(mddev);
5300 if (err)
5301 return err;
5302
5303 if (mddev->delta_disks)
5304 err = -EBUSY;
5305 else if (mddev->persistent &&
5306 mddev->major_version == 0)
5307 err = -EINVAL;
5308 else
5309 mddev->reshape_backwards = backwards;
5310 mddev_unlock(mddev);
5311 return err ?: len;
5312}
5313
5314static struct md_sysfs_entry md_reshape_direction =
5315__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5316 reshape_direction_store);
5317
5318static ssize_t
5319array_size_show(struct mddev *mddev, char *page)
5320{
5321 if (mddev->external_size)
5322 return sprintf(page, "%llu\n",
5323 (unsigned long long)mddev->array_sectors/2);
5324 else
5325 return sprintf(page, "default\n");
5326}
5327
5328static ssize_t
5329array_size_store(struct mddev *mddev, const char *buf, size_t len)
5330{
5331 sector_t sectors;
5332 int err;
5333
5334 err = mddev_lock(mddev);
5335 if (err)
5336 return err;
5337
5338
5339 if (mddev_is_clustered(mddev)) {
5340 mddev_unlock(mddev);
5341 return -EINVAL;
5342 }
5343
5344 if (strncmp(buf, "default", 7) == 0) {
5345 if (mddev->pers)
5346 sectors = mddev->pers->size(mddev, 0, 0);
5347 else
5348 sectors = mddev->array_sectors;
5349
5350 mddev->external_size = 0;
5351 } else {
5352 if (strict_blocks_to_sectors(buf, §ors) < 0)
5353 err = -EINVAL;
5354 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5355 err = -E2BIG;
5356 else
5357 mddev->external_size = 1;
5358 }
5359
5360 if (!err) {
5361 mddev->array_sectors = sectors;
5362 if (mddev->pers)
5363 set_capacity_and_notify(mddev->gendisk,
5364 mddev->array_sectors);
5365 }
5366 mddev_unlock(mddev);
5367 return err ?: len;
5368}
5369
5370static struct md_sysfs_entry md_array_size =
5371__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5372 array_size_store);
5373
5374static ssize_t
5375consistency_policy_show(struct mddev *mddev, char *page)
5376{
5377 int ret;
5378
5379 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5380 ret = sprintf(page, "journal\n");
5381 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5382 ret = sprintf(page, "ppl\n");
5383 } else if (mddev->bitmap) {
5384 ret = sprintf(page, "bitmap\n");
5385 } else if (mddev->pers) {
5386 if (mddev->pers->sync_request)
5387 ret = sprintf(page, "resync\n");
5388 else
5389 ret = sprintf(page, "none\n");
5390 } else {
5391 ret = sprintf(page, "unknown\n");
5392 }
5393
5394 return ret;
5395}
5396
5397static ssize_t
5398consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5399{
5400 int err = 0;
5401
5402 if (mddev->pers) {
5403 if (mddev->pers->change_consistency_policy)
5404 err = mddev->pers->change_consistency_policy(mddev, buf);
5405 else
5406 err = -EBUSY;
5407 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5408 set_bit(MD_HAS_PPL, &mddev->flags);
5409 } else {
5410 err = -EINVAL;
5411 }
5412
5413 return err ? err : len;
5414}
5415
5416static struct md_sysfs_entry md_consistency_policy =
5417__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5418 consistency_policy_store);
5419
5420static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5421{
5422 return sprintf(page, "%d\n", mddev->fail_last_dev);
5423}
5424
5425
5426
5427
5428
5429static ssize_t
5430fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5431{
5432 int ret;
5433 bool value;
5434
5435 ret = kstrtobool(buf, &value);
5436 if (ret)
5437 return ret;
5438
5439 if (value != mddev->fail_last_dev)
5440 mddev->fail_last_dev = value;
5441
5442 return len;
5443}
5444static struct md_sysfs_entry md_fail_last_dev =
5445__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5446 fail_last_dev_store);
5447
5448static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5449{
5450 if (mddev->pers == NULL || (mddev->pers->level != 1))
5451 return sprintf(page, "n/a\n");
5452 else
5453 return sprintf(page, "%d\n", mddev->serialize_policy);
5454}
5455
5456
5457
5458
5459
5460static ssize_t
5461serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5462{
5463 int err;
5464 bool value;
5465
5466 err = kstrtobool(buf, &value);
5467 if (err)
5468 return err;
5469
5470 if (value == mddev->serialize_policy)
5471 return len;
5472
5473 err = mddev_lock(mddev);
5474 if (err)
5475 return err;
5476 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5477 pr_err("md: serialize_policy is only effective for raid1\n");
5478 err = -EINVAL;
5479 goto unlock;
5480 }
5481
5482 mddev_suspend(mddev);
5483 if (value)
5484 mddev_create_serial_pool(mddev, NULL, true);
5485 else
5486 mddev_destroy_serial_pool(mddev, NULL, true);
5487 mddev->serialize_policy = value;
5488 mddev_resume(mddev);
5489unlock:
5490 mddev_unlock(mddev);
5491 return err ?: len;
5492}
5493
5494static struct md_sysfs_entry md_serialize_policy =
5495__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5496 serialize_policy_store);
5497
5498
5499static struct attribute *md_default_attrs[] = {
5500 &md_level.attr,
5501 &md_layout.attr,
5502 &md_raid_disks.attr,
5503 &md_uuid.attr,
5504 &md_chunk_size.attr,
5505 &md_size.attr,
5506 &md_resync_start.attr,
5507 &md_metadata.attr,
5508 &md_new_device.attr,
5509 &md_safe_delay.attr,
5510 &md_array_state.attr,
5511 &md_reshape_position.attr,
5512 &md_reshape_direction.attr,
5513 &md_array_size.attr,
5514 &max_corr_read_errors.attr,
5515 &md_consistency_policy.attr,
5516 &md_fail_last_dev.attr,
5517 &md_serialize_policy.attr,
5518 NULL,
5519};
5520
5521static struct attribute *md_redundancy_attrs[] = {
5522 &md_scan_mode.attr,
5523 &md_last_scan_mode.attr,
5524 &md_mismatches.attr,
5525 &md_sync_min.attr,
5526 &md_sync_max.attr,
5527 &md_sync_speed.attr,
5528 &md_sync_force_parallel.attr,
5529 &md_sync_completed.attr,
5530 &md_min_sync.attr,
5531 &md_max_sync.attr,
5532 &md_suspend_lo.attr,
5533 &md_suspend_hi.attr,
5534 &md_bitmap.attr,
5535 &md_degraded.attr,
5536 NULL,
5537};
5538static struct attribute_group md_redundancy_group = {
5539 .name = NULL,
5540 .attrs = md_redundancy_attrs,
5541};
5542
5543static ssize_t
5544md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5545{
5546 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5547 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5548 ssize_t rv;
5549
5550 if (!entry->show)
5551 return -EIO;
5552 spin_lock(&all_mddevs_lock);
5553 if (list_empty(&mddev->all_mddevs)) {
5554 spin_unlock(&all_mddevs_lock);
5555 return -EBUSY;
5556 }
5557 mddev_get(mddev);
5558 spin_unlock(&all_mddevs_lock);
5559
5560 rv = entry->show(mddev, page);
5561 mddev_put(mddev);
5562 return rv;
5563}
5564
5565static ssize_t
5566md_attr_store(struct kobject *kobj, struct attribute *attr,
5567 const char *page, size_t length)
5568{
5569 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5570 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5571 ssize_t rv;
5572
5573 if (!entry->store)
5574 return -EIO;
5575 if (!capable(CAP_SYS_ADMIN))
5576 return -EACCES;
5577 spin_lock(&all_mddevs_lock);
5578 if (list_empty(&mddev->all_mddevs)) {
5579 spin_unlock(&all_mddevs_lock);
5580 return -EBUSY;
5581 }
5582 mddev_get(mddev);
5583 spin_unlock(&all_mddevs_lock);
5584 rv = entry->store(mddev, page, length);
5585 mddev_put(mddev);
5586 return rv;
5587}
5588
5589static void md_free(struct kobject *ko)
5590{
5591 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5592
5593 if (mddev->sysfs_state)
5594 sysfs_put(mddev->sysfs_state);
5595 if (mddev->sysfs_level)
5596 sysfs_put(mddev->sysfs_level);
5597
5598 if (mddev->gendisk)
5599 del_gendisk(mddev->gendisk);
5600 if (mddev->queue)
5601 blk_cleanup_queue(mddev->queue);
5602 if (mddev->gendisk)
5603 put_disk(mddev->gendisk);
5604 percpu_ref_exit(&mddev->writes_pending);
5605
5606 bioset_exit(&mddev->bio_set);
5607 bioset_exit(&mddev->sync_set);
5608 mempool_exit(&mddev->md_io_pool);
5609 kfree(mddev);
5610}
5611
5612static const struct sysfs_ops md_sysfs_ops = {
5613 .show = md_attr_show,
5614 .store = md_attr_store,
5615};
5616static struct kobj_type md_ktype = {
5617 .release = md_free,
5618 .sysfs_ops = &md_sysfs_ops,
5619 .default_attrs = md_default_attrs,
5620};
5621
5622int mdp_major = 0;
5623
5624static void mddev_delayed_delete(struct work_struct *ws)
5625{
5626 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5627
5628 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5629 kobject_del(&mddev->kobj);
5630 kobject_put(&mddev->kobj);
5631}
5632
5633static void no_op(struct percpu_ref *r) {}
5634
5635int mddev_init_writes_pending(struct mddev *mddev)
5636{
5637 if (mddev->writes_pending.percpu_count_ptr)
5638 return 0;
5639 if (percpu_ref_init(&mddev->writes_pending, no_op,
5640 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5641 return -ENOMEM;
5642
5643 percpu_ref_put(&mddev->writes_pending);
5644 return 0;
5645}
5646EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5647
5648static int md_alloc(dev_t dev, char *name)
5649{
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659 static DEFINE_MUTEX(disks_mutex);
5660 struct mddev *mddev = mddev_find(dev);
5661 struct gendisk *disk;
5662 int partitioned;
5663 int shift;
5664 int unit;
5665 int error;
5666
5667 if (!mddev)
5668 return -ENODEV;
5669
5670 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5671 shift = partitioned ? MdpMinorShift : 0;
5672 unit = MINOR(mddev->unit) >> shift;
5673
5674
5675
5676
5677 flush_workqueue(md_misc_wq);
5678
5679 mutex_lock(&disks_mutex);
5680 error = -EEXIST;
5681 if (mddev->gendisk)
5682 goto abort;
5683
5684 if (name && !dev) {
5685
5686
5687 struct mddev *mddev2;
5688 spin_lock(&all_mddevs_lock);
5689
5690 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5691 if (mddev2->gendisk &&
5692 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5693 spin_unlock(&all_mddevs_lock);
5694 goto abort;
5695 }
5696 spin_unlock(&all_mddevs_lock);
5697 }
5698 if (name && dev)
5699
5700
5701
5702 mddev->hold_active = UNTIL_STOP;
5703
5704 error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
5705 sizeof(struct md_io));
5706 if (error)
5707 goto abort;
5708
5709 error = -ENOMEM;
5710 mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
5711 if (!mddev->queue)
5712 goto abort;
5713
5714 blk_set_stacking_limits(&mddev->queue->limits);
5715
5716 disk = alloc_disk(1 << shift);
5717 if (!disk) {
5718 blk_cleanup_queue(mddev->queue);
5719 mddev->queue = NULL;
5720 goto abort;
5721 }
5722 disk->major = MAJOR(mddev->unit);
5723 disk->first_minor = unit << shift;
5724 if (name)
5725 strcpy(disk->disk_name, name);
5726 else if (partitioned)
5727 sprintf(disk->disk_name, "md_d%d", unit);
5728 else
5729 sprintf(disk->disk_name, "md%d", unit);
5730 disk->fops = &md_fops;
5731 disk->private_data = mddev;
5732 disk->queue = mddev->queue;
5733 blk_queue_write_cache(mddev->queue, true, true);
5734
5735
5736
5737
5738 disk->flags |= GENHD_FL_EXT_DEVT;
5739 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5740 mddev->gendisk = disk;
5741
5742
5743
5744 mutex_lock(&mddev->open_mutex);
5745 add_disk(disk);
5746
5747 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5748 if (error) {
5749
5750
5751
5752 pr_debug("md: cannot register %s/md - name in use\n",
5753 disk->disk_name);
5754 error = 0;
5755 }
5756 if (mddev->kobj.sd &&
5757 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5758 pr_debug("pointless warning\n");
5759 mutex_unlock(&mddev->open_mutex);
5760 abort:
5761 mutex_unlock(&disks_mutex);
5762 if (!error && mddev->kobj.sd) {
5763 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5764 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5765 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5766 }
5767 mddev_put(mddev);
5768 return error;
5769}
5770
5771static void md_probe(dev_t dev)
5772{
5773 if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
5774 return;
5775 if (create_on_open)
5776 md_alloc(dev, NULL);
5777}
5778
5779static int add_named_array(const char *val, const struct kernel_param *kp)
5780{
5781
5782
5783
5784
5785
5786
5787
5788 int len = strlen(val);
5789 char buf[DISK_NAME_LEN];
5790 unsigned long devnum;
5791
5792 while (len && val[len-1] == '\n')
5793 len--;
5794 if (len >= DISK_NAME_LEN)
5795 return -E2BIG;
5796 strlcpy(buf, val, len+1);
5797 if (strncmp(buf, "md_", 3) == 0)
5798 return md_alloc(0, buf);
5799 if (strncmp(buf, "md", 2) == 0 &&
5800 isdigit(buf[2]) &&
5801 kstrtoul(buf+2, 10, &devnum) == 0 &&
5802 devnum <= MINORMASK)
5803 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5804
5805 return -EINVAL;
5806}
5807
5808static void md_safemode_timeout(struct timer_list *t)
5809{
5810 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5811
5812 mddev->safemode = 1;
5813 if (mddev->external)
5814 sysfs_notify_dirent_safe(mddev->sysfs_state);
5815
5816 md_wakeup_thread(mddev->thread);
5817}
5818
5819static int start_dirty_degraded;
5820
5821int md_run(struct mddev *mddev)
5822{
5823 int err;
5824 struct md_rdev *rdev;
5825 struct md_personality *pers;
5826
5827 if (list_empty(&mddev->disks))
5828
5829 return -EINVAL;
5830
5831 if (mddev->pers)
5832 return -EBUSY;
5833
5834 if (mddev->sysfs_active)
5835 return -EBUSY;
5836
5837
5838
5839
5840 if (!mddev->raid_disks) {
5841 if (!mddev->persistent)
5842 return -EINVAL;
5843 err = analyze_sbs(mddev);
5844 if (err)
5845 return -EINVAL;
5846 }
5847
5848 if (mddev->level != LEVEL_NONE)
5849 request_module("md-level-%d", mddev->level);
5850 else if (mddev->clevel[0])
5851 request_module("md-%s", mddev->clevel);
5852
5853
5854
5855
5856
5857
5858 mddev->has_superblocks = false;
5859 rdev_for_each(rdev, mddev) {
5860 if (test_bit(Faulty, &rdev->flags))
5861 continue;
5862 sync_blockdev(rdev->bdev);
5863 invalidate_bdev(rdev->bdev);
5864 if (mddev->ro != 1 &&
5865 (bdev_read_only(rdev->bdev) ||
5866 bdev_read_only(rdev->meta_bdev))) {
5867 mddev->ro = 1;
5868 if (mddev->gendisk)
5869 set_disk_ro(mddev->gendisk, 1);
5870 }
5871
5872 if (rdev->sb_page)
5873 mddev->has_superblocks = true;
5874
5875
5876
5877
5878
5879 if (rdev->meta_bdev) {
5880 ;
5881 } else if (rdev->data_offset < rdev->sb_start) {
5882 if (mddev->dev_sectors &&
5883 rdev->data_offset + mddev->dev_sectors
5884 > rdev->sb_start) {
5885 pr_warn("md: %s: data overlaps metadata\n",
5886 mdname(mddev));
5887 return -EINVAL;
5888 }
5889 } else {
5890 if (rdev->sb_start + rdev->sb_size/512
5891 > rdev->data_offset) {
5892 pr_warn("md: %s: metadata overlaps data\n",
5893 mdname(mddev));
5894 return -EINVAL;
5895 }
5896 }
5897 sysfs_notify_dirent_safe(rdev->sysfs_state);
5898 }
5899
5900 if (!bioset_initialized(&mddev->bio_set)) {
5901 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5902 if (err)
5903 return err;
5904 }
5905 if (!bioset_initialized(&mddev->sync_set)) {
5906 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5907 if (err)
5908 return err;
5909 }
5910
5911 spin_lock(&pers_lock);
5912 pers = find_pers(mddev->level, mddev->clevel);
5913 if (!pers || !try_module_get(pers->owner)) {
5914 spin_unlock(&pers_lock);
5915 if (mddev->level != LEVEL_NONE)
5916 pr_warn("md: personality for level %d is not loaded!\n",
5917 mddev->level);
5918 else
5919 pr_warn("md: personality for level %s is not loaded!\n",
5920 mddev->clevel);
5921 err = -EINVAL;
5922 goto abort;
5923 }
5924 spin_unlock(&pers_lock);
5925 if (mddev->level != pers->level) {
5926 mddev->level = pers->level;
5927 mddev->new_level = pers->level;
5928 }
5929 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5930
5931 if (mddev->reshape_position != MaxSector &&
5932 pers->start_reshape == NULL) {
5933
5934 module_put(pers->owner);
5935 err = -EINVAL;
5936 goto abort;
5937 }
5938
5939 if (pers->sync_request) {
5940
5941
5942
5943 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5944 struct md_rdev *rdev2;
5945 int warned = 0;
5946
5947 rdev_for_each(rdev, mddev)
5948 rdev_for_each(rdev2, mddev) {
5949 if (rdev < rdev2 &&
5950 rdev->bdev->bd_disk ==
5951 rdev2->bdev->bd_disk) {
5952 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5953 mdname(mddev),
5954 bdevname(rdev->bdev,b),
5955 bdevname(rdev2->bdev,b2));
5956 warned = 1;
5957 }
5958 }
5959
5960 if (warned)
5961 pr_warn("True protection against single-disk failure might be compromised.\n");
5962 }
5963
5964 mddev->recovery = 0;
5965
5966 mddev->resync_max_sectors = mddev->dev_sectors;
5967
5968 mddev->ok_start_degraded = start_dirty_degraded;
5969
5970 if (start_readonly && mddev->ro == 0)
5971 mddev->ro = 2;
5972
5973 err = pers->run(mddev);
5974 if (err)
5975 pr_warn("md: pers->run() failed ...\n");
5976 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5977 WARN_ONCE(!mddev->external_size,
5978 "%s: default size too small, but 'external_size' not in effect?\n",
5979 __func__);
5980 pr_warn("md: invalid array_size %llu > default size %llu\n",
5981 (unsigned long long)mddev->array_sectors / 2,
5982 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5983 err = -EINVAL;
5984 }
5985 if (err == 0 && pers->sync_request &&
5986 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5987 struct bitmap *bitmap;
5988
5989 bitmap = md_bitmap_create(mddev, -1);
5990 if (IS_ERR(bitmap)) {
5991 err = PTR_ERR(bitmap);
5992 pr_warn("%s: failed to create bitmap (%d)\n",
5993 mdname(mddev), err);
5994 } else
5995 mddev->bitmap = bitmap;
5996
5997 }
5998 if (err)
5999 goto bitmap_abort;
6000
6001 if (mddev->bitmap_info.max_write_behind > 0) {
6002 bool create_pool = false;
6003
6004 rdev_for_each(rdev, mddev) {
6005 if (test_bit(WriteMostly, &rdev->flags) &&
6006 rdev_init_serial(rdev))
6007 create_pool = true;
6008 }
6009 if (create_pool && mddev->serial_info_pool == NULL) {
6010 mddev->serial_info_pool =
6011 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
6012 sizeof(struct serial_info));
6013 if (!mddev->serial_info_pool) {
6014 err = -ENOMEM;
6015 goto bitmap_abort;
6016 }
6017 }
6018 }
6019
6020 if (mddev->queue) {
6021 bool nonrot = true;
6022
6023 rdev_for_each(rdev, mddev) {
6024 if (rdev->raid_disk >= 0 &&
6025 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
6026 nonrot = false;
6027 break;
6028 }
6029 }
6030 if (mddev->degraded)
6031 nonrot = false;
6032 if (nonrot)
6033 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6034 else
6035 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6036 }
6037 if (pers->sync_request) {
6038 if (mddev->kobj.sd &&
6039 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6040 pr_warn("md: cannot register extra attributes for %s\n",
6041 mdname(mddev));
6042 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6043 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6044 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6045 } else if (mddev->ro == 2)
6046 mddev->ro = 0;
6047
6048 atomic_set(&mddev->max_corr_read_errors,
6049 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
6050 mddev->safemode = 0;
6051 if (mddev_is_clustered(mddev))
6052 mddev->safemode_delay = 0;
6053 else
6054 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6055 mddev->in_sync = 1;
6056 smp_wmb();
6057 spin_lock(&mddev->lock);
6058 mddev->pers = pers;
6059 spin_unlock(&mddev->lock);
6060 rdev_for_each(rdev, mddev)
6061 if (rdev->raid_disk >= 0)
6062 sysfs_link_rdev(mddev, rdev);
6063
6064 if (mddev->degraded && !mddev->ro)
6065
6066
6067
6068 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6069 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6070
6071 if (mddev->sb_flags)
6072 md_update_sb(mddev, 0);
6073
6074 md_new_event(mddev);
6075 return 0;
6076
6077bitmap_abort:
6078 mddev_detach(mddev);
6079 if (mddev->private)
6080 pers->free(mddev, mddev->private);
6081 mddev->private = NULL;
6082 module_put(pers->owner);
6083 md_bitmap_destroy(mddev);
6084abort:
6085 bioset_exit(&mddev->bio_set);
6086 bioset_exit(&mddev->sync_set);
6087 return err;
6088}
6089EXPORT_SYMBOL_GPL(md_run);
6090
6091int do_md_run(struct mddev *mddev)
6092{
6093 int err;
6094
6095 set_bit(MD_NOT_READY, &mddev->flags);
6096 err = md_run(mddev);
6097 if (err)
6098 goto out;
6099 err = md_bitmap_load(mddev);
6100 if (err) {
6101 md_bitmap_destroy(mddev);
6102 goto out;
6103 }
6104
6105 if (mddev_is_clustered(mddev))
6106 md_allow_write(mddev);
6107
6108
6109 md_start(mddev);
6110
6111 md_wakeup_thread(mddev->thread);
6112 md_wakeup_thread(mddev->sync_thread);
6113
6114 set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
6115 clear_bit(MD_NOT_READY, &mddev->flags);
6116 mddev->changed = 1;
6117 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6118 sysfs_notify_dirent_safe(mddev->sysfs_state);
6119 sysfs_notify_dirent_safe(mddev->sysfs_action);
6120 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6121out:
6122 clear_bit(MD_NOT_READY, &mddev->flags);
6123 return err;
6124}
6125
6126int md_start(struct mddev *mddev)
6127{
6128 int ret = 0;
6129
6130 if (mddev->pers->start) {
6131 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6132 md_wakeup_thread(mddev->thread);
6133 ret = mddev->pers->start(mddev);
6134 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6135 md_wakeup_thread(mddev->sync_thread);
6136 }
6137 return ret;
6138}
6139EXPORT_SYMBOL_GPL(md_start);
6140
6141static int restart_array(struct mddev *mddev)
6142{
6143 struct gendisk *disk = mddev->gendisk;
6144 struct md_rdev *rdev;
6145 bool has_journal = false;
6146 bool has_readonly = false;
6147
6148
6149 if (list_empty(&mddev->disks))
6150 return -ENXIO;
6151 if (!mddev->pers)
6152 return -EINVAL;
6153 if (!mddev->ro)
6154 return -EBUSY;
6155
6156 rcu_read_lock();
6157 rdev_for_each_rcu(rdev, mddev) {
6158 if (test_bit(Journal, &rdev->flags) &&
6159 !test_bit(Faulty, &rdev->flags))
6160 has_journal = true;
6161 if (bdev_read_only(rdev->bdev))
6162 has_readonly = true;
6163 }
6164 rcu_read_unlock();
6165 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6166
6167 return -EINVAL;
6168 if (has_readonly)
6169 return -EROFS;
6170
6171 mddev->safemode = 0;
6172 mddev->ro = 0;
6173 set_disk_ro(disk, 0);
6174 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6175
6176 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6177 md_wakeup_thread(mddev->thread);
6178 md_wakeup_thread(mddev->sync_thread);
6179 sysfs_notify_dirent_safe(mddev->sysfs_state);
6180 return 0;
6181}
6182
6183static void md_clean(struct mddev *mddev)
6184{
6185 mddev->array_sectors = 0;
6186 mddev->external_size = 0;
6187 mddev->dev_sectors = 0;
6188 mddev->raid_disks = 0;
6189 mddev->recovery_cp = 0;
6190 mddev->resync_min = 0;
6191 mddev->resync_max = MaxSector;
6192 mddev->reshape_position = MaxSector;
6193 mddev->external = 0;
6194 mddev->persistent = 0;
6195 mddev->level = LEVEL_NONE;
6196 mddev->clevel[0] = 0;
6197 mddev->flags = 0;
6198 mddev->sb_flags = 0;
6199 mddev->ro = 0;
6200 mddev->metadata_type[0] = 0;
6201 mddev->chunk_sectors = 0;
6202 mddev->ctime = mddev->utime = 0;
6203 mddev->layout = 0;
6204 mddev->max_disks = 0;
6205 mddev->events = 0;
6206 mddev->can_decrease_events = 0;
6207 mddev->delta_disks = 0;
6208 mddev->reshape_backwards = 0;
6209 mddev->new_level = LEVEL_NONE;
6210 mddev->new_layout = 0;
6211 mddev->new_chunk_sectors = 0;
6212 mddev->curr_resync = 0;
6213 atomic64_set(&mddev->resync_mismatches, 0);
6214 mddev->suspend_lo = mddev->suspend_hi = 0;
6215 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6216 mddev->recovery = 0;
6217 mddev->in_sync = 0;
6218 mddev->changed = 0;
6219 mddev->degraded = 0;
6220 mddev->safemode = 0;
6221 mddev->private = NULL;
6222 mddev->cluster_info = NULL;
6223 mddev->bitmap_info.offset = 0;
6224 mddev->bitmap_info.default_offset = 0;
6225 mddev->bitmap_info.default_space = 0;
6226 mddev->bitmap_info.chunksize = 0;
6227 mddev->bitmap_info.daemon_sleep = 0;
6228 mddev->bitmap_info.max_write_behind = 0;
6229 mddev->bitmap_info.nodes = 0;
6230}
6231
6232static void __md_stop_writes(struct mddev *mddev)
6233{
6234 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6235 if (work_pending(&mddev->del_work))
6236 flush_workqueue(md_misc_wq);
6237 if (mddev->sync_thread) {
6238 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6239 md_reap_sync_thread(mddev);
6240 }
6241
6242 del_timer_sync(&mddev->safemode_timer);
6243
6244 if (mddev->pers && mddev->pers->quiesce) {
6245 mddev->pers->quiesce(mddev, 1);
6246 mddev->pers->quiesce(mddev, 0);
6247 }
6248 md_bitmap_flush(mddev);
6249
6250 if (mddev->ro == 0 &&
6251 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6252 mddev->sb_flags)) {
6253
6254 if (!mddev_is_clustered(mddev))
6255 mddev->in_sync = 1;
6256 md_update_sb(mddev, 1);
6257 }
6258
6259 mddev->serialize_policy = 0;
6260 mddev_destroy_serial_pool(mddev, NULL, true);
6261}
6262
6263void md_stop_writes(struct mddev *mddev)
6264{
6265 mddev_lock_nointr(mddev);
6266 __md_stop_writes(mddev);
6267 mddev_unlock(mddev);
6268}
6269EXPORT_SYMBOL_GPL(md_stop_writes);
6270
6271static void mddev_detach(struct mddev *mddev)
6272{
6273 md_bitmap_wait_behind_writes(mddev);
6274 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6275 mddev->pers->quiesce(mddev, 1);
6276 mddev->pers->quiesce(mddev, 0);
6277 }
6278 md_unregister_thread(&mddev->thread);
6279 if (mddev->queue)
6280 blk_sync_queue(mddev->queue);
6281}
6282
6283static void __md_stop(struct mddev *mddev)
6284{
6285 struct md_personality *pers = mddev->pers;
6286 md_bitmap_destroy(mddev);
6287 mddev_detach(mddev);
6288
6289 if (mddev->event_work.func)
6290 flush_workqueue(md_misc_wq);
6291 spin_lock(&mddev->lock);
6292 mddev->pers = NULL;
6293 spin_unlock(&mddev->lock);
6294 pers->free(mddev, mddev->private);
6295 mddev->private = NULL;
6296 if (pers->sync_request && mddev->to_remove == NULL)
6297 mddev->to_remove = &md_redundancy_group;
6298 module_put(pers->owner);
6299 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6300}
6301
6302void md_stop(struct mddev *mddev)
6303{
6304
6305
6306
6307 __md_stop(mddev);
6308 bioset_exit(&mddev->bio_set);
6309 bioset_exit(&mddev->sync_set);
6310}
6311
6312EXPORT_SYMBOL_GPL(md_stop);
6313
6314static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6315{
6316 int err = 0;
6317 int did_freeze = 0;
6318
6319 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6320 did_freeze = 1;
6321 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6322 md_wakeup_thread(mddev->thread);
6323 }
6324 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6325 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6326 if (mddev->sync_thread)
6327
6328
6329 wake_up_process(mddev->sync_thread->tsk);
6330
6331 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6332 return -EBUSY;
6333 mddev_unlock(mddev);
6334 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6335 &mddev->recovery));
6336 wait_event(mddev->sb_wait,
6337 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6338 mddev_lock_nointr(mddev);
6339
6340 mutex_lock(&mddev->open_mutex);
6341 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6342 mddev->sync_thread ||
6343 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6344 pr_warn("md: %s still in use.\n",mdname(mddev));
6345 if (did_freeze) {
6346 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6347 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6348 md_wakeup_thread(mddev->thread);
6349 }
6350 err = -EBUSY;
6351 goto out;
6352 }
6353 if (mddev->pers) {
6354 __md_stop_writes(mddev);
6355
6356 err = -ENXIO;
6357 if (mddev->ro==1)
6358 goto out;
6359 mddev->ro = 1;
6360 set_disk_ro(mddev->gendisk, 1);
6361 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6362 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6363 md_wakeup_thread(mddev->thread);
6364 sysfs_notify_dirent_safe(mddev->sysfs_state);
6365 err = 0;
6366 }
6367out:
6368 mutex_unlock(&mddev->open_mutex);
6369 return err;
6370}
6371
6372
6373
6374
6375
6376static int do_md_stop(struct mddev *mddev, int mode,
6377 struct block_device *bdev)
6378{
6379 struct gendisk *disk = mddev->gendisk;
6380 struct md_rdev *rdev;
6381 int did_freeze = 0;
6382
6383 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6384 did_freeze = 1;
6385 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6386 md_wakeup_thread(mddev->thread);
6387 }
6388 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6389 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6390 if (mddev->sync_thread)
6391
6392
6393 wake_up_process(mddev->sync_thread->tsk);
6394
6395 mddev_unlock(mddev);
6396 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6397 !test_bit(MD_RECOVERY_RUNNING,
6398 &mddev->recovery)));
6399 mddev_lock_nointr(mddev);
6400
6401 mutex_lock(&mddev->open_mutex);
6402 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6403 mddev->sysfs_active ||
6404 mddev->sync_thread ||
6405 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6406 pr_warn("md: %s still in use.\n",mdname(mddev));
6407 mutex_unlock(&mddev->open_mutex);
6408 if (did_freeze) {
6409 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6410 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6411 md_wakeup_thread(mddev->thread);
6412 }
6413 return -EBUSY;
6414 }
6415 if (mddev->pers) {
6416 if (mddev->ro)
6417 set_disk_ro(disk, 0);
6418
6419 __md_stop_writes(mddev);
6420 __md_stop(mddev);
6421
6422
6423 sysfs_notify_dirent_safe(mddev->sysfs_state);
6424
6425 rdev_for_each(rdev, mddev)
6426 if (rdev->raid_disk >= 0)
6427 sysfs_unlink_rdev(mddev, rdev);
6428
6429 set_capacity_and_notify(disk, 0);
6430 mutex_unlock(&mddev->open_mutex);
6431 mddev->changed = 1;
6432
6433 if (mddev->ro)
6434 mddev->ro = 0;
6435 } else
6436 mutex_unlock(&mddev->open_mutex);
6437
6438
6439
6440 if (mode == 0) {
6441 pr_info("md: %s stopped.\n", mdname(mddev));
6442
6443 if (mddev->bitmap_info.file) {
6444 struct file *f = mddev->bitmap_info.file;
6445 spin_lock(&mddev->lock);
6446 mddev->bitmap_info.file = NULL;
6447 spin_unlock(&mddev->lock);
6448 fput(f);
6449 }
6450 mddev->bitmap_info.offset = 0;
6451
6452 export_array(mddev);
6453
6454 md_clean(mddev);
6455 if (mddev->hold_active == UNTIL_STOP)
6456 mddev->hold_active = 0;
6457 }
6458 md_new_event(mddev);
6459 sysfs_notify_dirent_safe(mddev->sysfs_state);
6460 return 0;
6461}
6462
6463#ifndef MODULE
6464static void autorun_array(struct mddev *mddev)
6465{
6466 struct md_rdev *rdev;
6467 int err;
6468
6469 if (list_empty(&mddev->disks))
6470 return;
6471
6472 pr_info("md: running: ");
6473
6474 rdev_for_each(rdev, mddev) {
6475 char b[BDEVNAME_SIZE];
6476 pr_cont("<%s>", bdevname(rdev->bdev,b));
6477 }
6478 pr_cont("\n");
6479
6480 err = do_md_run(mddev);
6481 if (err) {
6482 pr_warn("md: do_md_run() returned %d\n", err);
6483 do_md_stop(mddev, 0, NULL);
6484 }
6485}
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499static void autorun_devices(int part)
6500{
6501 struct md_rdev *rdev0, *rdev, *tmp;
6502 struct mddev *mddev;
6503 char b[BDEVNAME_SIZE];
6504
6505 pr_info("md: autorun ...\n");
6506 while (!list_empty(&pending_raid_disks)) {
6507 int unit;
6508 dev_t dev;
6509 LIST_HEAD(candidates);
6510 rdev0 = list_entry(pending_raid_disks.next,
6511 struct md_rdev, same_set);
6512
6513 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6514 INIT_LIST_HEAD(&candidates);
6515 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6516 if (super_90_load(rdev, rdev0, 0) >= 0) {
6517 pr_debug("md: adding %s ...\n",
6518 bdevname(rdev->bdev,b));
6519 list_move(&rdev->same_set, &candidates);
6520 }
6521
6522
6523
6524
6525
6526 if (part) {
6527 dev = MKDEV(mdp_major,
6528 rdev0->preferred_minor << MdpMinorShift);
6529 unit = MINOR(dev) >> MdpMinorShift;
6530 } else {
6531 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6532 unit = MINOR(dev);
6533 }
6534 if (rdev0->preferred_minor != unit) {
6535 pr_warn("md: unit number in %s is bad: %d\n",
6536 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6537 break;
6538 }
6539
6540 md_probe(dev);
6541 mddev = mddev_find(dev);
6542 if (!mddev || !mddev->gendisk) {
6543 if (mddev)
6544 mddev_put(mddev);
6545 break;
6546 }
6547 if (mddev_lock(mddev))
6548 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6549 else if (mddev->raid_disks || mddev->major_version
6550 || !list_empty(&mddev->disks)) {
6551 pr_warn("md: %s already running, cannot run %s\n",
6552 mdname(mddev), bdevname(rdev0->bdev,b));
6553 mddev_unlock(mddev);
6554 } else {
6555 pr_debug("md: created %s\n", mdname(mddev));
6556 mddev->persistent = 1;
6557 rdev_for_each_list(rdev, tmp, &candidates) {
6558 list_del_init(&rdev->same_set);
6559 if (bind_rdev_to_array(rdev, mddev))
6560 export_rdev(rdev);
6561 }
6562 autorun_array(mddev);
6563 mddev_unlock(mddev);
6564 }
6565
6566
6567
6568 rdev_for_each_list(rdev, tmp, &candidates) {
6569 list_del_init(&rdev->same_set);
6570 export_rdev(rdev);
6571 }
6572 mddev_put(mddev);
6573 }
6574 pr_info("md: ... autorun DONE.\n");
6575}
6576#endif
6577
6578static int get_version(void __user *arg)
6579{
6580 mdu_version_t ver;
6581
6582 ver.major = MD_MAJOR_VERSION;
6583 ver.minor = MD_MINOR_VERSION;
6584 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6585
6586 if (copy_to_user(arg, &ver, sizeof(ver)))
6587 return -EFAULT;
6588
6589 return 0;
6590}
6591
6592static int get_array_info(struct mddev *mddev, void __user *arg)
6593{
6594 mdu_array_info_t info;
6595 int nr,working,insync,failed,spare;
6596 struct md_rdev *rdev;
6597
6598 nr = working = insync = failed = spare = 0;
6599 rcu_read_lock();
6600 rdev_for_each_rcu(rdev, mddev) {
6601 nr++;
6602 if (test_bit(Faulty, &rdev->flags))
6603 failed++;
6604 else {
6605 working++;
6606 if (test_bit(In_sync, &rdev->flags))
6607 insync++;
6608 else if (test_bit(Journal, &rdev->flags))
6609
6610 ;
6611 else
6612 spare++;
6613 }
6614 }
6615 rcu_read_unlock();
6616
6617 info.major_version = mddev->major_version;
6618 info.minor_version = mddev->minor_version;
6619 info.patch_version = MD_PATCHLEVEL_VERSION;
6620 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6621 info.level = mddev->level;
6622 info.size = mddev->dev_sectors / 2;
6623 if (info.size != mddev->dev_sectors / 2)
6624 info.size = -1;
6625 info.nr_disks = nr;
6626 info.raid_disks = mddev->raid_disks;
6627 info.md_minor = mddev->md_minor;
6628 info.not_persistent= !mddev->persistent;
6629
6630 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6631 info.state = 0;
6632 if (mddev->in_sync)
6633 info.state = (1<<MD_SB_CLEAN);
6634 if (mddev->bitmap && mddev->bitmap_info.offset)
6635 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6636 if (mddev_is_clustered(mddev))
6637 info.state |= (1<<MD_SB_CLUSTERED);
6638 info.active_disks = insync;
6639 info.working_disks = working;
6640 info.failed_disks = failed;
6641 info.spare_disks = spare;
6642
6643 info.layout = mddev->layout;
6644 info.chunk_size = mddev->chunk_sectors << 9;
6645
6646 if (copy_to_user(arg, &info, sizeof(info)))
6647 return -EFAULT;
6648
6649 return 0;
6650}
6651
6652static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6653{
6654 mdu_bitmap_file_t *file = NULL;
6655 char *ptr;
6656 int err;
6657
6658 file = kzalloc(sizeof(*file), GFP_NOIO);
6659 if (!file)
6660 return -ENOMEM;
6661
6662 err = 0;
6663 spin_lock(&mddev->lock);
6664
6665 if (mddev->bitmap_info.file) {
6666 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6667 sizeof(file->pathname));
6668 if (IS_ERR(ptr))
6669 err = PTR_ERR(ptr);
6670 else
6671 memmove(file->pathname, ptr,
6672 sizeof(file->pathname)-(ptr-file->pathname));
6673 }
6674 spin_unlock(&mddev->lock);
6675
6676 if (err == 0 &&
6677 copy_to_user(arg, file, sizeof(*file)))
6678 err = -EFAULT;
6679
6680 kfree(file);
6681 return err;
6682}
6683
6684static int get_disk_info(struct mddev *mddev, void __user * arg)
6685{
6686 mdu_disk_info_t info;
6687 struct md_rdev *rdev;
6688
6689 if (copy_from_user(&info, arg, sizeof(info)))
6690 return -EFAULT;
6691
6692 rcu_read_lock();
6693 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6694 if (rdev) {
6695 info.major = MAJOR(rdev->bdev->bd_dev);
6696 info.minor = MINOR(rdev->bdev->bd_dev);
6697 info.raid_disk = rdev->raid_disk;
6698 info.state = 0;
6699 if (test_bit(Faulty, &rdev->flags))
6700 info.state |= (1<<MD_DISK_FAULTY);
6701 else if (test_bit(In_sync, &rdev->flags)) {
6702 info.state |= (1<<MD_DISK_ACTIVE);
6703 info.state |= (1<<MD_DISK_SYNC);
6704 }
6705 if (test_bit(Journal, &rdev->flags))
6706 info.state |= (1<<MD_DISK_JOURNAL);
6707 if (test_bit(WriteMostly, &rdev->flags))
6708 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6709 if (test_bit(FailFast, &rdev->flags))
6710 info.state |= (1<<MD_DISK_FAILFAST);
6711 } else {
6712 info.major = info.minor = 0;
6713 info.raid_disk = -1;
6714 info.state = (1<<MD_DISK_REMOVED);
6715 }
6716 rcu_read_unlock();
6717
6718 if (copy_to_user(arg, &info, sizeof(info)))
6719 return -EFAULT;
6720
6721 return 0;
6722}
6723
6724int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
6725{
6726 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6727 struct md_rdev *rdev;
6728 dev_t dev = MKDEV(info->major,info->minor);
6729
6730 if (mddev_is_clustered(mddev) &&
6731 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6732 pr_warn("%s: Cannot add to clustered mddev.\n",
6733 mdname(mddev));
6734 return -EINVAL;
6735 }
6736
6737 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6738 return -EOVERFLOW;
6739
6740 if (!mddev->raid_disks) {
6741 int err;
6742
6743 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6744 if (IS_ERR(rdev)) {
6745 pr_warn("md: md_import_device returned %ld\n",
6746 PTR_ERR(rdev));
6747 return PTR_ERR(rdev);
6748 }
6749 if (!list_empty(&mddev->disks)) {
6750 struct md_rdev *rdev0
6751 = list_entry(mddev->disks.next,
6752 struct md_rdev, same_set);
6753 err = super_types[mddev->major_version]
6754 .load_super(rdev, rdev0, mddev->minor_version);
6755 if (err < 0) {
6756 pr_warn("md: %s has different UUID to %s\n",
6757 bdevname(rdev->bdev,b),
6758 bdevname(rdev0->bdev,b2));
6759 export_rdev(rdev);
6760 return -EINVAL;
6761 }
6762 }
6763 err = bind_rdev_to_array(rdev, mddev);
6764 if (err)
6765 export_rdev(rdev);
6766 return err;
6767 }
6768
6769
6770
6771
6772
6773
6774 if (mddev->pers) {
6775 int err;
6776 if (!mddev->pers->hot_add_disk) {
6777 pr_warn("%s: personality does not support diskops!\n",
6778 mdname(mddev));
6779 return -EINVAL;
6780 }
6781 if (mddev->persistent)
6782 rdev = md_import_device(dev, mddev->major_version,
6783 mddev->minor_version);
6784 else
6785 rdev = md_import_device(dev, -1, -1);
6786 if (IS_ERR(rdev)) {
6787 pr_warn("md: md_import_device returned %ld\n",
6788 PTR_ERR(rdev));
6789 return PTR_ERR(rdev);
6790 }
6791
6792 if (!mddev->persistent) {
6793 if (info->state & (1<<MD_DISK_SYNC) &&
6794 info->raid_disk < mddev->raid_disks) {
6795 rdev->raid_disk = info->raid_disk;
6796 set_bit(In_sync, &rdev->flags);
6797 clear_bit(Bitmap_sync, &rdev->flags);
6798 } else
6799 rdev->raid_disk = -1;
6800 rdev->saved_raid_disk = rdev->raid_disk;
6801 } else
6802 super_types[mddev->major_version].
6803 validate_super(mddev, rdev);
6804 if ((info->state & (1<<MD_DISK_SYNC)) &&
6805 rdev->raid_disk != info->raid_disk) {
6806
6807
6808
6809 export_rdev(rdev);
6810 return -EINVAL;
6811 }
6812
6813 clear_bit(In_sync, &rdev->flags);
6814 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6815 set_bit(WriteMostly, &rdev->flags);
6816 else
6817 clear_bit(WriteMostly, &rdev->flags);
6818 if (info->state & (1<<MD_DISK_FAILFAST))
6819 set_bit(FailFast, &rdev->flags);
6820 else
6821 clear_bit(FailFast, &rdev->flags);
6822
6823 if (info->state & (1<<MD_DISK_JOURNAL)) {
6824 struct md_rdev *rdev2;
6825 bool has_journal = false;
6826
6827
6828 rdev_for_each(rdev2, mddev) {
6829 if (test_bit(Journal, &rdev2->flags)) {
6830 has_journal = true;
6831 break;
6832 }
6833 }
6834 if (has_journal || mddev->bitmap) {
6835 export_rdev(rdev);
6836 return -EBUSY;
6837 }
6838 set_bit(Journal, &rdev->flags);
6839 }
6840
6841
6842
6843 if (mddev_is_clustered(mddev)) {
6844 if (info->state & (1 << MD_DISK_CANDIDATE))
6845 set_bit(Candidate, &rdev->flags);
6846 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6847
6848 err = md_cluster_ops->add_new_disk(mddev, rdev);
6849 if (err) {
6850 export_rdev(rdev);
6851 return err;
6852 }
6853 }
6854 }
6855
6856 rdev->raid_disk = -1;
6857 err = bind_rdev_to_array(rdev, mddev);
6858
6859 if (err)
6860 export_rdev(rdev);
6861
6862 if (mddev_is_clustered(mddev)) {
6863 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6864 if (!err) {
6865 err = md_cluster_ops->new_disk_ack(mddev,
6866 err == 0);
6867 if (err)
6868 md_kick_rdev_from_array(rdev);
6869 }
6870 } else {
6871 if (err)
6872 md_cluster_ops->add_new_disk_cancel(mddev);
6873 else
6874 err = add_bound_rdev(rdev);
6875 }
6876
6877 } else if (!err)
6878 err = add_bound_rdev(rdev);
6879
6880 return err;
6881 }
6882
6883
6884
6885
6886 if (mddev->major_version != 0) {
6887 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6888 return -EINVAL;
6889 }
6890
6891 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6892 int err;
6893 rdev = md_import_device(dev, -1, 0);
6894 if (IS_ERR(rdev)) {
6895 pr_warn("md: error, md_import_device() returned %ld\n",
6896 PTR_ERR(rdev));
6897 return PTR_ERR(rdev);
6898 }
6899 rdev->desc_nr = info->number;
6900 if (info->raid_disk < mddev->raid_disks)
6901 rdev->raid_disk = info->raid_disk;
6902 else
6903 rdev->raid_disk = -1;
6904
6905 if (rdev->raid_disk < mddev->raid_disks)
6906 if (info->state & (1<<MD_DISK_SYNC))
6907 set_bit(In_sync, &rdev->flags);
6908
6909 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6910 set_bit(WriteMostly, &rdev->flags);
6911 if (info->state & (1<<MD_DISK_FAILFAST))
6912 set_bit(FailFast, &rdev->flags);
6913
6914 if (!mddev->persistent) {
6915 pr_debug("md: nonpersistent superblock ...\n");
6916 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6917 } else
6918 rdev->sb_start = calc_dev_sboffset(rdev);
6919 rdev->sectors = rdev->sb_start;
6920
6921 err = bind_rdev_to_array(rdev, mddev);
6922 if (err) {
6923 export_rdev(rdev);
6924 return err;
6925 }
6926 }
6927
6928 return 0;
6929}
6930
6931static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6932{
6933 char b[BDEVNAME_SIZE];
6934 struct md_rdev *rdev;
6935
6936 if (!mddev->pers)
6937 return -ENODEV;
6938
6939 rdev = find_rdev(mddev, dev);
6940 if (!rdev)
6941 return -ENXIO;
6942
6943 if (rdev->raid_disk < 0)
6944 goto kick_rdev;
6945
6946 clear_bit(Blocked, &rdev->flags);
6947 remove_and_add_spares(mddev, rdev);
6948
6949 if (rdev->raid_disk >= 0)
6950 goto busy;
6951
6952kick_rdev:
6953 if (mddev_is_clustered(mddev)) {
6954 if (md_cluster_ops->remove_disk(mddev, rdev))
6955 goto busy;
6956 }
6957
6958 md_kick_rdev_from_array(rdev);
6959 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6960 if (mddev->thread)
6961 md_wakeup_thread(mddev->thread);
6962 else
6963 md_update_sb(mddev, 1);
6964 md_new_event(mddev);
6965
6966 return 0;
6967busy:
6968 pr_debug("md: cannot remove active disk %s from %s ...\n",
6969 bdevname(rdev->bdev,b), mdname(mddev));
6970 return -EBUSY;
6971}
6972
6973static int hot_add_disk(struct mddev *mddev, dev_t dev)
6974{
6975 char b[BDEVNAME_SIZE];
6976 int err;
6977 struct md_rdev *rdev;
6978
6979 if (!mddev->pers)
6980 return -ENODEV;
6981
6982 if (mddev->major_version != 0) {
6983 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6984 mdname(mddev));
6985 return -EINVAL;
6986 }
6987 if (!mddev->pers->hot_add_disk) {
6988 pr_warn("%s: personality does not support diskops!\n",
6989 mdname(mddev));
6990 return -EINVAL;
6991 }
6992
6993 rdev = md_import_device(dev, -1, 0);
6994 if (IS_ERR(rdev)) {
6995 pr_warn("md: error, md_import_device() returned %ld\n",
6996 PTR_ERR(rdev));
6997 return -EINVAL;
6998 }
6999
7000 if (mddev->persistent)
7001 rdev->sb_start = calc_dev_sboffset(rdev);
7002 else
7003 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
7004
7005 rdev->sectors = rdev->sb_start;
7006
7007 if (test_bit(Faulty, &rdev->flags)) {
7008 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
7009 bdevname(rdev->bdev,b), mdname(mddev));
7010 err = -EINVAL;
7011 goto abort_export;
7012 }
7013
7014 clear_bit(In_sync, &rdev->flags);
7015 rdev->desc_nr = -1;
7016 rdev->saved_raid_disk = -1;
7017 err = bind_rdev_to_array(rdev, mddev);
7018 if (err)
7019 goto abort_export;
7020
7021
7022
7023
7024
7025
7026 rdev->raid_disk = -1;
7027
7028 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7029 if (!mddev->thread)
7030 md_update_sb(mddev, 1);
7031
7032
7033
7034
7035 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7036 md_wakeup_thread(mddev->thread);
7037 md_new_event(mddev);
7038 return 0;
7039
7040abort_export:
7041 export_rdev(rdev);
7042 return err;
7043}
7044
7045static int set_bitmap_file(struct mddev *mddev, int fd)
7046{
7047 int err = 0;
7048
7049 if (mddev->pers) {
7050 if (!mddev->pers->quiesce || !mddev->thread)
7051 return -EBUSY;
7052 if (mddev->recovery || mddev->sync_thread)
7053 return -EBUSY;
7054
7055 }
7056
7057 if (fd >= 0) {
7058 struct inode *inode;
7059 struct file *f;
7060
7061 if (mddev->bitmap || mddev->bitmap_info.file)
7062 return -EEXIST;
7063 f = fget(fd);
7064
7065 if (f == NULL) {
7066 pr_warn("%s: error: failed to get bitmap file\n",
7067 mdname(mddev));
7068 return -EBADF;
7069 }
7070
7071 inode = f->f_mapping->host;
7072 if (!S_ISREG(inode->i_mode)) {
7073 pr_warn("%s: error: bitmap file must be a regular file\n",
7074 mdname(mddev));
7075 err = -EBADF;
7076 } else if (!(f->f_mode & FMODE_WRITE)) {
7077 pr_warn("%s: error: bitmap file must open for write\n",
7078 mdname(mddev));
7079 err = -EBADF;
7080 } else if (atomic_read(&inode->i_writecount) != 1) {
7081 pr_warn("%s: error: bitmap file is already in use\n",
7082 mdname(mddev));
7083 err = -EBUSY;
7084 }
7085 if (err) {
7086 fput(f);
7087 return err;
7088 }
7089 mddev->bitmap_info.file = f;
7090 mddev->bitmap_info.offset = 0;
7091 } else if (mddev->bitmap == NULL)
7092 return -ENOENT;
7093 err = 0;
7094 if (mddev->pers) {
7095 if (fd >= 0) {
7096 struct bitmap *bitmap;
7097
7098 bitmap = md_bitmap_create(mddev, -1);
7099 mddev_suspend(mddev);
7100 if (!IS_ERR(bitmap)) {
7101 mddev->bitmap = bitmap;
7102 err = md_bitmap_load(mddev);
7103 } else
7104 err = PTR_ERR(bitmap);
7105 if (err) {
7106 md_bitmap_destroy(mddev);
7107 fd = -1;
7108 }
7109 mddev_resume(mddev);
7110 } else if (fd < 0) {
7111 mddev_suspend(mddev);
7112 md_bitmap_destroy(mddev);
7113 mddev_resume(mddev);
7114 }
7115 }
7116 if (fd < 0) {
7117 struct file *f = mddev->bitmap_info.file;
7118 if (f) {
7119 spin_lock(&mddev->lock);
7120 mddev->bitmap_info.file = NULL;
7121 spin_unlock(&mddev->lock);
7122 fput(f);
7123 }
7124 }
7125
7126 return err;
7127}
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
7143{
7144 if (info->raid_disks == 0) {
7145
7146 if (info->major_version < 0 ||
7147 info->major_version >= ARRAY_SIZE(super_types) ||
7148 super_types[info->major_version].name == NULL) {
7149
7150 pr_warn("md: superblock version %d not known\n",
7151 info->major_version);
7152 return -EINVAL;
7153 }
7154 mddev->major_version = info->major_version;
7155 mddev->minor_version = info->minor_version;
7156 mddev->patch_version = info->patch_version;
7157 mddev->persistent = !info->not_persistent;
7158
7159
7160
7161 mddev->ctime = ktime_get_real_seconds();
7162 return 0;
7163 }
7164 mddev->major_version = MD_MAJOR_VERSION;
7165 mddev->minor_version = MD_MINOR_VERSION;
7166 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7167 mddev->ctime = ktime_get_real_seconds();
7168
7169 mddev->level = info->level;
7170 mddev->clevel[0] = 0;
7171 mddev->dev_sectors = 2 * (sector_t)info->size;
7172 mddev->raid_disks = info->raid_disks;
7173
7174
7175
7176 if (info->state & (1<<MD_SB_CLEAN))
7177 mddev->recovery_cp = MaxSector;
7178 else
7179 mddev->recovery_cp = 0;
7180 mddev->persistent = ! info->not_persistent;
7181 mddev->external = 0;
7182
7183 mddev->layout = info->layout;
7184 if (mddev->level == 0)
7185
7186 mddev->layout = -1;
7187 mddev->chunk_sectors = info->chunk_size >> 9;
7188
7189 if (mddev->persistent) {
7190 mddev->max_disks = MD_SB_DISKS;
7191 mddev->flags = 0;
7192 mddev->sb_flags = 0;
7193 }
7194 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7195
7196 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7197 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7198 mddev->bitmap_info.offset = 0;
7199
7200 mddev->reshape_position = MaxSector;
7201
7202
7203
7204
7205 get_random_bytes(mddev->uuid, 16);
7206
7207 mddev->new_level = mddev->level;
7208 mddev->new_chunk_sectors = mddev->chunk_sectors;
7209 mddev->new_layout = mddev->layout;
7210 mddev->delta_disks = 0;
7211 mddev->reshape_backwards = 0;
7212
7213 return 0;
7214}
7215
7216void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7217{
7218 lockdep_assert_held(&mddev->reconfig_mutex);
7219
7220 if (mddev->external_size)
7221 return;
7222
7223 mddev->array_sectors = array_sectors;
7224}
7225EXPORT_SYMBOL(md_set_array_sectors);
7226
7227static int update_size(struct mddev *mddev, sector_t num_sectors)
7228{
7229 struct md_rdev *rdev;
7230 int rv;
7231 int fit = (num_sectors == 0);
7232 sector_t old_dev_sectors = mddev->dev_sectors;
7233
7234 if (mddev->pers->resize == NULL)
7235 return -EINVAL;
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7246 mddev->sync_thread)
7247 return -EBUSY;
7248 if (mddev->ro)
7249 return -EROFS;
7250
7251 rdev_for_each(rdev, mddev) {
7252 sector_t avail = rdev->sectors;
7253
7254 if (fit && (num_sectors == 0 || num_sectors > avail))
7255 num_sectors = avail;
7256 if (avail < num_sectors)
7257 return -ENOSPC;
7258 }
7259 rv = mddev->pers->resize(mddev, num_sectors);
7260 if (!rv) {
7261 if (mddev_is_clustered(mddev))
7262 md_cluster_ops->update_size(mddev, old_dev_sectors);
7263 else if (mddev->queue) {
7264 set_capacity_and_notify(mddev->gendisk,
7265 mddev->array_sectors);
7266 }
7267 }
7268 return rv;
7269}
7270
7271static int update_raid_disks(struct mddev *mddev, int raid_disks)
7272{
7273 int rv;
7274 struct md_rdev *rdev;
7275
7276 if (mddev->pers->check_reshape == NULL)
7277 return -EINVAL;
7278 if (mddev->ro)
7279 return -EROFS;
7280 if (raid_disks <= 0 ||
7281 (mddev->max_disks && raid_disks >= mddev->max_disks))
7282 return -EINVAL;
7283 if (mddev->sync_thread ||
7284 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7285 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7286 mddev->reshape_position != MaxSector)
7287 return -EBUSY;
7288
7289 rdev_for_each(rdev, mddev) {
7290 if (mddev->raid_disks < raid_disks &&
7291 rdev->data_offset < rdev->new_data_offset)
7292 return -EINVAL;
7293 if (mddev->raid_disks > raid_disks &&
7294 rdev->data_offset > rdev->new_data_offset)
7295 return -EINVAL;
7296 }
7297
7298 mddev->delta_disks = raid_disks - mddev->raid_disks;
7299 if (mddev->delta_disks < 0)
7300 mddev->reshape_backwards = 1;
7301 else if (mddev->delta_disks > 0)
7302 mddev->reshape_backwards = 0;
7303
7304 rv = mddev->pers->check_reshape(mddev);
7305 if (rv < 0) {
7306 mddev->delta_disks = 0;
7307 mddev->reshape_backwards = 0;
7308 }
7309 return rv;
7310}
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7321{
7322 int rv = 0;
7323 int cnt = 0;
7324 int state = 0;
7325
7326
7327 if (mddev->bitmap && mddev->bitmap_info.offset)
7328 state |= (1 << MD_SB_BITMAP_PRESENT);
7329
7330 if (mddev->major_version != info->major_version ||
7331 mddev->minor_version != info->minor_version ||
7332
7333 mddev->ctime != info->ctime ||
7334 mddev->level != info->level ||
7335
7336 mddev->persistent != !info->not_persistent ||
7337 mddev->chunk_sectors != info->chunk_size >> 9 ||
7338
7339 ((state^info->state) & 0xfffffe00)
7340 )
7341 return -EINVAL;
7342
7343 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7344 cnt++;
7345 if (mddev->raid_disks != info->raid_disks)
7346 cnt++;
7347 if (mddev->layout != info->layout)
7348 cnt++;
7349 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7350 cnt++;
7351 if (cnt == 0)
7352 return 0;
7353 if (cnt > 1)
7354 return -EINVAL;
7355
7356 if (mddev->layout != info->layout) {
7357
7358
7359
7360
7361 if (mddev->pers->check_reshape == NULL)
7362 return -EINVAL;
7363 else {
7364 mddev->new_layout = info->layout;
7365 rv = mddev->pers->check_reshape(mddev);
7366 if (rv)
7367 mddev->new_layout = mddev->layout;
7368 return rv;
7369 }
7370 }
7371 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7372 rv = update_size(mddev, (sector_t)info->size * 2);
7373
7374 if (mddev->raid_disks != info->raid_disks)
7375 rv = update_raid_disks(mddev, info->raid_disks);
7376
7377 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7378 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7379 rv = -EINVAL;
7380 goto err;
7381 }
7382 if (mddev->recovery || mddev->sync_thread) {
7383 rv = -EBUSY;
7384 goto err;
7385 }
7386 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7387 struct bitmap *bitmap;
7388
7389 if (mddev->bitmap) {
7390 rv = -EEXIST;
7391 goto err;
7392 }
7393 if (mddev->bitmap_info.default_offset == 0) {
7394 rv = -EINVAL;
7395 goto err;
7396 }
7397 mddev->bitmap_info.offset =
7398 mddev->bitmap_info.default_offset;
7399 mddev->bitmap_info.space =
7400 mddev->bitmap_info.default_space;
7401 bitmap = md_bitmap_create(mddev, -1);
7402 mddev_suspend(mddev);
7403 if (!IS_ERR(bitmap)) {
7404 mddev->bitmap = bitmap;
7405 rv = md_bitmap_load(mddev);
7406 } else
7407 rv = PTR_ERR(bitmap);
7408 if (rv)
7409 md_bitmap_destroy(mddev);
7410 mddev_resume(mddev);
7411 } else {
7412
7413 if (!mddev->bitmap) {
7414 rv = -ENOENT;
7415 goto err;
7416 }
7417 if (mddev->bitmap->storage.file) {
7418 rv = -EINVAL;
7419 goto err;
7420 }
7421 if (mddev->bitmap_info.nodes) {
7422
7423 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7424 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7425 rv = -EPERM;
7426 md_cluster_ops->unlock_all_bitmaps(mddev);
7427 goto err;
7428 }
7429
7430 mddev->bitmap_info.nodes = 0;
7431 md_cluster_ops->leave(mddev);
7432 module_put(md_cluster_mod);
7433 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7434 }
7435 mddev_suspend(mddev);
7436 md_bitmap_destroy(mddev);
7437 mddev_resume(mddev);
7438 mddev->bitmap_info.offset = 0;
7439 }
7440 }
7441 md_update_sb(mddev, 1);
7442 return rv;
7443err:
7444 return rv;
7445}
7446
7447static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7448{
7449 struct md_rdev *rdev;
7450 int err = 0;
7451
7452 if (mddev->pers == NULL)
7453 return -ENODEV;
7454
7455 rcu_read_lock();
7456 rdev = md_find_rdev_rcu(mddev, dev);
7457 if (!rdev)
7458 err = -ENODEV;
7459 else {
7460 md_error(mddev, rdev);
7461 if (!test_bit(Faulty, &rdev->flags))
7462 err = -EBUSY;
7463 }
7464 rcu_read_unlock();
7465 return err;
7466}
7467
7468
7469
7470
7471
7472
7473
7474static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7475{
7476 struct mddev *mddev = bdev->bd_disk->private_data;
7477
7478 geo->heads = 2;
7479 geo->sectors = 4;
7480 geo->cylinders = mddev->array_sectors / 8;
7481 return 0;
7482}
7483
7484static inline bool md_ioctl_valid(unsigned int cmd)
7485{
7486 switch (cmd) {
7487 case ADD_NEW_DISK:
7488 case GET_ARRAY_INFO:
7489 case GET_BITMAP_FILE:
7490 case GET_DISK_INFO:
7491 case HOT_ADD_DISK:
7492 case HOT_REMOVE_DISK:
7493 case RAID_VERSION:
7494 case RESTART_ARRAY_RW:
7495 case RUN_ARRAY:
7496 case SET_ARRAY_INFO:
7497 case SET_BITMAP_FILE:
7498 case SET_DISK_FAULTY:
7499 case STOP_ARRAY:
7500 case STOP_ARRAY_RO:
7501 case CLUSTERED_DISK_NACK:
7502 return true;
7503 default:
7504 return false;
7505 }
7506}
7507
7508static int md_ioctl(struct block_device *bdev, fmode_t mode,
7509 unsigned int cmd, unsigned long arg)
7510{
7511 int err = 0;
7512 void __user *argp = (void __user *)arg;
7513 struct mddev *mddev = NULL;
7514 bool did_set_md_closing = false;
7515
7516 if (!md_ioctl_valid(cmd))
7517 return -ENOTTY;
7518
7519 switch (cmd) {
7520 case RAID_VERSION:
7521 case GET_ARRAY_INFO:
7522 case GET_DISK_INFO:
7523 break;
7524 default:
7525 if (!capable(CAP_SYS_ADMIN))
7526 return -EACCES;
7527 }
7528
7529
7530
7531
7532
7533 switch (cmd) {
7534 case RAID_VERSION:
7535 err = get_version(argp);
7536 goto out;
7537 default:;
7538 }
7539
7540
7541
7542
7543
7544 mddev = bdev->bd_disk->private_data;
7545
7546 if (!mddev) {
7547 BUG();
7548 goto out;
7549 }
7550
7551
7552 switch (cmd) {
7553 case GET_ARRAY_INFO:
7554 if (!mddev->raid_disks && !mddev->external)
7555 err = -ENODEV;
7556 else
7557 err = get_array_info(mddev, argp);
7558 goto out;
7559
7560 case GET_DISK_INFO:
7561 if (!mddev->raid_disks && !mddev->external)
7562 err = -ENODEV;
7563 else
7564 err = get_disk_info(mddev, argp);
7565 goto out;
7566
7567 case SET_DISK_FAULTY:
7568 err = set_disk_faulty(mddev, new_decode_dev(arg));
7569 goto out;
7570
7571 case GET_BITMAP_FILE:
7572 err = get_bitmap_file(mddev, argp);
7573 goto out;
7574
7575 }
7576
7577 if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7578 flush_rdev_wq(mddev);
7579
7580 if (cmd == HOT_REMOVE_DISK)
7581
7582 wait_event_interruptible_timeout(mddev->sb_wait,
7583 !test_bit(MD_RECOVERY_NEEDED,
7584 &mddev->recovery),
7585 msecs_to_jiffies(5000));
7586 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7587
7588
7589
7590 mutex_lock(&mddev->open_mutex);
7591 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7592 mutex_unlock(&mddev->open_mutex);
7593 err = -EBUSY;
7594 goto out;
7595 }
7596 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7597 mutex_unlock(&mddev->open_mutex);
7598 err = -EBUSY;
7599 goto out;
7600 }
7601 did_set_md_closing = true;
7602 mutex_unlock(&mddev->open_mutex);
7603 sync_blockdev(bdev);
7604 }
7605 err = mddev_lock(mddev);
7606 if (err) {
7607 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7608 err, cmd);
7609 goto out;
7610 }
7611
7612 if (cmd == SET_ARRAY_INFO) {
7613 mdu_array_info_t info;
7614 if (!arg)
7615 memset(&info, 0, sizeof(info));
7616 else if (copy_from_user(&info, argp, sizeof(info))) {
7617 err = -EFAULT;
7618 goto unlock;
7619 }
7620 if (mddev->pers) {
7621 err = update_array_info(mddev, &info);
7622 if (err) {
7623 pr_warn("md: couldn't update array info. %d\n", err);
7624 goto unlock;
7625 }
7626 goto unlock;
7627 }
7628 if (!list_empty(&mddev->disks)) {
7629 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7630 err = -EBUSY;
7631 goto unlock;
7632 }
7633 if (mddev->raid_disks) {
7634 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7635 err = -EBUSY;
7636 goto unlock;
7637 }
7638 err = md_set_array_info(mddev, &info);
7639 if (err) {
7640 pr_warn("md: couldn't set array info. %d\n", err);
7641 goto unlock;
7642 }
7643 goto unlock;
7644 }
7645
7646
7647
7648
7649
7650
7651 if ((!mddev->raid_disks && !mddev->external)
7652 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7653 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7654 && cmd != GET_BITMAP_FILE) {
7655 err = -ENODEV;
7656 goto unlock;
7657 }
7658
7659
7660
7661
7662 switch (cmd) {
7663 case RESTART_ARRAY_RW:
7664 err = restart_array(mddev);
7665 goto unlock;
7666
7667 case STOP_ARRAY:
7668 err = do_md_stop(mddev, 0, bdev);
7669 goto unlock;
7670
7671 case STOP_ARRAY_RO:
7672 err = md_set_readonly(mddev, bdev);
7673 goto unlock;
7674
7675 case HOT_REMOVE_DISK:
7676 err = hot_remove_disk(mddev, new_decode_dev(arg));
7677 goto unlock;
7678
7679 case ADD_NEW_DISK:
7680
7681
7682
7683
7684 if (mddev->pers) {
7685 mdu_disk_info_t info;
7686 if (copy_from_user(&info, argp, sizeof(info)))
7687 err = -EFAULT;
7688 else if (!(info.state & (1<<MD_DISK_SYNC)))
7689
7690 break;
7691 else
7692 err = md_add_new_disk(mddev, &info);
7693 goto unlock;
7694 }
7695 break;
7696 }
7697
7698
7699
7700
7701
7702 if (mddev->ro && mddev->pers) {
7703 if (mddev->ro == 2) {
7704 mddev->ro = 0;
7705 sysfs_notify_dirent_safe(mddev->sysfs_state);
7706 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7707
7708
7709
7710
7711 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7712 mddev_unlock(mddev);
7713 wait_event(mddev->sb_wait,
7714 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7715 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7716 mddev_lock_nointr(mddev);
7717 }
7718 } else {
7719 err = -EROFS;
7720 goto unlock;
7721 }
7722 }
7723
7724 switch (cmd) {
7725 case ADD_NEW_DISK:
7726 {
7727 mdu_disk_info_t info;
7728 if (copy_from_user(&info, argp, sizeof(info)))
7729 err = -EFAULT;
7730 else
7731 err = md_add_new_disk(mddev, &info);
7732 goto unlock;
7733 }
7734
7735 case CLUSTERED_DISK_NACK:
7736 if (mddev_is_clustered(mddev))
7737 md_cluster_ops->new_disk_ack(mddev, false);
7738 else
7739 err = -EINVAL;
7740 goto unlock;
7741
7742 case HOT_ADD_DISK:
7743 err = hot_add_disk(mddev, new_decode_dev(arg));
7744 goto unlock;
7745
7746 case RUN_ARRAY:
7747 err = do_md_run(mddev);
7748 goto unlock;
7749
7750 case SET_BITMAP_FILE:
7751 err = set_bitmap_file(mddev, (int)arg);
7752 goto unlock;
7753
7754 default:
7755 err = -EINVAL;
7756 goto unlock;
7757 }
7758
7759unlock:
7760 if (mddev->hold_active == UNTIL_IOCTL &&
7761 err != -EINVAL)
7762 mddev->hold_active = 0;
7763 mddev_unlock(mddev);
7764out:
7765 if(did_set_md_closing)
7766 clear_bit(MD_CLOSING, &mddev->flags);
7767 return err;
7768}
7769#ifdef CONFIG_COMPAT
7770static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7771 unsigned int cmd, unsigned long arg)
7772{
7773 switch (cmd) {
7774 case HOT_REMOVE_DISK:
7775 case HOT_ADD_DISK:
7776 case SET_DISK_FAULTY:
7777 case SET_BITMAP_FILE:
7778
7779 break;
7780 default:
7781 arg = (unsigned long)compat_ptr(arg);
7782 break;
7783 }
7784
7785 return md_ioctl(bdev, mode, cmd, arg);
7786}
7787#endif
7788
7789static int md_set_read_only(struct block_device *bdev, bool ro)
7790{
7791 struct mddev *mddev = bdev->bd_disk->private_data;
7792 int err;
7793
7794 err = mddev_lock(mddev);
7795 if (err)
7796 return err;
7797
7798 if (!mddev->raid_disks && !mddev->external) {
7799 err = -ENODEV;
7800 goto out_unlock;
7801 }
7802
7803
7804
7805
7806
7807 if (!ro && mddev->ro == 1 && mddev->pers) {
7808 err = restart_array(mddev);
7809 if (err)
7810 goto out_unlock;
7811 mddev->ro = 2;
7812 }
7813
7814out_unlock:
7815 mddev_unlock(mddev);
7816 return err;
7817}
7818
7819static int md_open(struct block_device *bdev, fmode_t mode)
7820{
7821
7822
7823
7824
7825 struct mddev *mddev = mddev_find(bdev->bd_dev);
7826 int err;
7827
7828 if (!mddev)
7829 return -ENODEV;
7830
7831 if (mddev->gendisk != bdev->bd_disk) {
7832
7833
7834
7835 mddev_put(mddev);
7836
7837 if (work_pending(&mddev->del_work))
7838 flush_workqueue(md_misc_wq);
7839
7840 return -ERESTARTSYS;
7841 }
7842 BUG_ON(mddev != bdev->bd_disk->private_data);
7843
7844 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7845 goto out;
7846
7847 if (test_bit(MD_CLOSING, &mddev->flags)) {
7848 mutex_unlock(&mddev->open_mutex);
7849 err = -ENODEV;
7850 goto out;
7851 }
7852
7853 err = 0;
7854 atomic_inc(&mddev->openers);
7855 mutex_unlock(&mddev->open_mutex);
7856
7857 bdev_check_media_change(bdev);
7858 out:
7859 if (err)
7860 mddev_put(mddev);
7861 return err;
7862}
7863
7864static void md_release(struct gendisk *disk, fmode_t mode)
7865{
7866 struct mddev *mddev = disk->private_data;
7867
7868 BUG_ON(!mddev);
7869 atomic_dec(&mddev->openers);
7870 mddev_put(mddev);
7871}
7872
7873static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
7874{
7875 struct mddev *mddev = disk->private_data;
7876 unsigned int ret = 0;
7877
7878 if (mddev->changed)
7879 ret = DISK_EVENT_MEDIA_CHANGE;
7880 mddev->changed = 0;
7881 return ret;
7882}
7883
7884const struct block_device_operations md_fops =
7885{
7886 .owner = THIS_MODULE,
7887 .submit_bio = md_submit_bio,
7888 .open = md_open,
7889 .release = md_release,
7890 .ioctl = md_ioctl,
7891#ifdef CONFIG_COMPAT
7892 .compat_ioctl = md_compat_ioctl,
7893#endif
7894 .getgeo = md_getgeo,
7895 .check_events = md_check_events,
7896 .set_read_only = md_set_read_only,
7897};
7898
7899static int md_thread(void *arg)
7900{
7901 struct md_thread *thread = arg;
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915 allow_signal(SIGKILL);
7916 while (!kthread_should_stop()) {
7917
7918
7919
7920
7921
7922
7923 if (signal_pending(current))
7924 flush_signals(current);
7925
7926 wait_event_interruptible_timeout
7927 (thread->wqueue,
7928 test_bit(THREAD_WAKEUP, &thread->flags)
7929 || kthread_should_stop() || kthread_should_park(),
7930 thread->timeout);
7931
7932 clear_bit(THREAD_WAKEUP, &thread->flags);
7933 if (kthread_should_park())
7934 kthread_parkme();
7935 if (!kthread_should_stop())
7936 thread->run(thread);
7937 }
7938
7939 return 0;
7940}
7941
7942void md_wakeup_thread(struct md_thread *thread)
7943{
7944 if (thread) {
7945 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7946 set_bit(THREAD_WAKEUP, &thread->flags);
7947 wake_up(&thread->wqueue);
7948 }
7949}
7950EXPORT_SYMBOL(md_wakeup_thread);
7951
7952struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7953 struct mddev *mddev, const char *name)
7954{
7955 struct md_thread *thread;
7956
7957 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7958 if (!thread)
7959 return NULL;
7960
7961 init_waitqueue_head(&thread->wqueue);
7962
7963 thread->run = run;
7964 thread->mddev = mddev;
7965 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7966 thread->tsk = kthread_run(md_thread, thread,
7967 "%s_%s",
7968 mdname(thread->mddev),
7969 name);
7970 if (IS_ERR(thread->tsk)) {
7971 kfree(thread);
7972 return NULL;
7973 }
7974 return thread;
7975}
7976EXPORT_SYMBOL(md_register_thread);
7977
7978void md_unregister_thread(struct md_thread **threadp)
7979{
7980 struct md_thread *thread = *threadp;
7981 if (!thread)
7982 return;
7983 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7984
7985
7986
7987 spin_lock(&pers_lock);
7988 *threadp = NULL;
7989 spin_unlock(&pers_lock);
7990
7991 kthread_stop(thread->tsk);
7992 kfree(thread);
7993}
7994EXPORT_SYMBOL(md_unregister_thread);
7995
7996void md_error(struct mddev *mddev, struct md_rdev *rdev)
7997{
7998 if (!rdev || test_bit(Faulty, &rdev->flags))
7999 return;
8000
8001 if (!mddev->pers || !mddev->pers->error_handler)
8002 return;
8003 mddev->pers->error_handler(mddev,rdev);
8004 if (mddev->degraded)
8005 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8006 sysfs_notify_dirent_safe(rdev->sysfs_state);
8007 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8008 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8009 md_wakeup_thread(mddev->thread);
8010 if (mddev->event_work.func)
8011 queue_work(md_misc_wq, &mddev->event_work);
8012 md_new_event(mddev);
8013}
8014EXPORT_SYMBOL(md_error);
8015
8016
8017
8018static void status_unused(struct seq_file *seq)
8019{
8020 int i = 0;
8021 struct md_rdev *rdev;
8022
8023 seq_printf(seq, "unused devices: ");
8024
8025 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
8026 char b[BDEVNAME_SIZE];
8027 i++;
8028 seq_printf(seq, "%s ",
8029 bdevname(rdev->bdev,b));
8030 }
8031 if (!i)
8032 seq_printf(seq, "<none>");
8033
8034 seq_printf(seq, "\n");
8035}
8036
8037static int status_resync(struct seq_file *seq, struct mddev *mddev)
8038{
8039 sector_t max_sectors, resync, res;
8040 unsigned long dt, db = 0;
8041 sector_t rt, curr_mark_cnt, resync_mark_cnt;
8042 int scale, recovery_active;
8043 unsigned int per_milli;
8044
8045 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8046 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8047 max_sectors = mddev->resync_max_sectors;
8048 else
8049 max_sectors = mddev->dev_sectors;
8050
8051 resync = mddev->curr_resync;
8052 if (resync <= 3) {
8053 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8054
8055 resync = max_sectors;
8056 } else if (resync > max_sectors)
8057 resync = max_sectors;
8058 else
8059 resync -= atomic_read(&mddev->recovery_active);
8060
8061 if (resync == 0) {
8062 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8063 struct md_rdev *rdev;
8064
8065 rdev_for_each(rdev, mddev)
8066 if (rdev->raid_disk >= 0 &&
8067 !test_bit(Faulty, &rdev->flags) &&
8068 rdev->recovery_offset != MaxSector &&
8069 rdev->recovery_offset) {
8070 seq_printf(seq, "\trecover=REMOTE");
8071 return 1;
8072 }
8073 if (mddev->reshape_position != MaxSector)
8074 seq_printf(seq, "\treshape=REMOTE");
8075 else
8076 seq_printf(seq, "\tresync=REMOTE");
8077 return 1;
8078 }
8079 if (mddev->recovery_cp < MaxSector) {
8080 seq_printf(seq, "\tresync=PENDING");
8081 return 1;
8082 }
8083 return 0;
8084 }
8085 if (resync < 3) {
8086 seq_printf(seq, "\tresync=DELAYED");
8087 return 1;
8088 }
8089
8090 WARN_ON(max_sectors == 0);
8091
8092
8093
8094
8095
8096 scale = 10;
8097 if (sizeof(sector_t) > sizeof(unsigned long)) {
8098 while ( max_sectors/2 > (1ULL<<(scale+32)))
8099 scale++;
8100 }
8101 res = (resync>>scale)*1000;
8102 sector_div(res, (u32)((max_sectors>>scale)+1));
8103
8104 per_milli = res;
8105 {
8106 int i, x = per_milli/50, y = 20-x;
8107 seq_printf(seq, "[");
8108 for (i = 0; i < x; i++)
8109 seq_printf(seq, "=");
8110 seq_printf(seq, ">");
8111 for (i = 0; i < y; i++)
8112 seq_printf(seq, ".");
8113 seq_printf(seq, "] ");
8114 }
8115 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8116 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8117 "reshape" :
8118 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8119 "check" :
8120 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8121 "resync" : "recovery"))),
8122 per_milli/10, per_milli % 10,
8123 (unsigned long long) resync/2,
8124 (unsigned long long) max_sectors/2);
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143 dt = ((jiffies - mddev->resync_mark) / HZ);
8144 if (!dt) dt++;
8145
8146 curr_mark_cnt = mddev->curr_mark_cnt;
8147 recovery_active = atomic_read(&mddev->recovery_active);
8148 resync_mark_cnt = mddev->resync_mark_cnt;
8149
8150 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8151 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8152
8153 rt = max_sectors - resync;
8154 rt = div64_u64(rt, db/32+1);
8155 rt *= dt;
8156 rt >>= 5;
8157
8158 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8159 ((unsigned long)rt % 60)/6);
8160
8161 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8162 return 1;
8163}
8164
8165static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8166{
8167 struct list_head *tmp;
8168 loff_t l = *pos;
8169 struct mddev *mddev;
8170
8171 if (l >= 0x10000)
8172 return NULL;
8173 if (!l--)
8174
8175 return (void*)1;
8176
8177 spin_lock(&all_mddevs_lock);
8178 list_for_each(tmp,&all_mddevs)
8179 if (!l--) {
8180 mddev = list_entry(tmp, struct mddev, all_mddevs);
8181 mddev_get(mddev);
8182 spin_unlock(&all_mddevs_lock);
8183 return mddev;
8184 }
8185 spin_unlock(&all_mddevs_lock);
8186 if (!l--)
8187 return (void*)2;
8188 return NULL;
8189}
8190
8191static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8192{
8193 struct list_head *tmp;
8194 struct mddev *next_mddev, *mddev = v;
8195
8196 ++*pos;
8197 if (v == (void*)2)
8198 return NULL;
8199
8200 spin_lock(&all_mddevs_lock);
8201 if (v == (void*)1)
8202 tmp = all_mddevs.next;
8203 else
8204 tmp = mddev->all_mddevs.next;
8205 if (tmp != &all_mddevs)
8206 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8207 else {
8208 next_mddev = (void*)2;
8209 *pos = 0x10000;
8210 }
8211 spin_unlock(&all_mddevs_lock);
8212
8213 if (v != (void*)1)
8214 mddev_put(mddev);
8215 return next_mddev;
8216
8217}
8218
8219static void md_seq_stop(struct seq_file *seq, void *v)
8220{
8221 struct mddev *mddev = v;
8222
8223 if (mddev && v != (void*)1 && v != (void*)2)
8224 mddev_put(mddev);
8225}
8226
8227static int md_seq_show(struct seq_file *seq, void *v)
8228{
8229 struct mddev *mddev = v;
8230 sector_t sectors;
8231 struct md_rdev *rdev;
8232
8233 if (v == (void*)1) {
8234 struct md_personality *pers;
8235 seq_printf(seq, "Personalities : ");
8236 spin_lock(&pers_lock);
8237 list_for_each_entry(pers, &pers_list, list)
8238 seq_printf(seq, "[%s] ", pers->name);
8239
8240 spin_unlock(&pers_lock);
8241 seq_printf(seq, "\n");
8242 seq->poll_event = atomic_read(&md_event_count);
8243 return 0;
8244 }
8245 if (v == (void*)2) {
8246 status_unused(seq);
8247 return 0;
8248 }
8249
8250 spin_lock(&mddev->lock);
8251 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8252 seq_printf(seq, "%s : %sactive", mdname(mddev),
8253 mddev->pers ? "" : "in");
8254 if (mddev->pers) {
8255 if (mddev->ro==1)
8256 seq_printf(seq, " (read-only)");
8257 if (mddev->ro==2)
8258 seq_printf(seq, " (auto-read-only)");
8259 seq_printf(seq, " %s", mddev->pers->name);
8260 }
8261
8262 sectors = 0;
8263 rcu_read_lock();
8264 rdev_for_each_rcu(rdev, mddev) {
8265 char b[BDEVNAME_SIZE];
8266 seq_printf(seq, " %s[%d]",
8267 bdevname(rdev->bdev,b), rdev->desc_nr);
8268 if (test_bit(WriteMostly, &rdev->flags))
8269 seq_printf(seq, "(W)");
8270 if (test_bit(Journal, &rdev->flags))
8271 seq_printf(seq, "(J)");
8272 if (test_bit(Faulty, &rdev->flags)) {
8273 seq_printf(seq, "(F)");
8274 continue;
8275 }
8276 if (rdev->raid_disk < 0)
8277 seq_printf(seq, "(S)");
8278 if (test_bit(Replacement, &rdev->flags))
8279 seq_printf(seq, "(R)");
8280 sectors += rdev->sectors;
8281 }
8282 rcu_read_unlock();
8283
8284 if (!list_empty(&mddev->disks)) {
8285 if (mddev->pers)
8286 seq_printf(seq, "\n %llu blocks",
8287 (unsigned long long)
8288 mddev->array_sectors / 2);
8289 else
8290 seq_printf(seq, "\n %llu blocks",
8291 (unsigned long long)sectors / 2);
8292 }
8293 if (mddev->persistent) {
8294 if (mddev->major_version != 0 ||
8295 mddev->minor_version != 90) {
8296 seq_printf(seq," super %d.%d",
8297 mddev->major_version,
8298 mddev->minor_version);
8299 }
8300 } else if (mddev->external)
8301 seq_printf(seq, " super external:%s",
8302 mddev->metadata_type);
8303 else
8304 seq_printf(seq, " super non-persistent");
8305
8306 if (mddev->pers) {
8307 mddev->pers->status(seq, mddev);
8308 seq_printf(seq, "\n ");
8309 if (mddev->pers->sync_request) {
8310 if (status_resync(seq, mddev))
8311 seq_printf(seq, "\n ");
8312 }
8313 } else
8314 seq_printf(seq, "\n ");
8315
8316 md_bitmap_status(seq, mddev->bitmap);
8317
8318 seq_printf(seq, "\n");
8319 }
8320 spin_unlock(&mddev->lock);
8321
8322 return 0;
8323}
8324
8325static const struct seq_operations md_seq_ops = {
8326 .start = md_seq_start,
8327 .next = md_seq_next,
8328 .stop = md_seq_stop,
8329 .show = md_seq_show,
8330};
8331
8332static int md_seq_open(struct inode *inode, struct file *file)
8333{
8334 struct seq_file *seq;
8335 int error;
8336
8337 error = seq_open(file, &md_seq_ops);
8338 if (error)
8339 return error;
8340
8341 seq = file->private_data;
8342 seq->poll_event = atomic_read(&md_event_count);
8343 return error;
8344}
8345
8346static int md_unloading;
8347static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8348{
8349 struct seq_file *seq = filp->private_data;
8350 __poll_t mask;
8351
8352 if (md_unloading)
8353 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8354 poll_wait(filp, &md_event_waiters, wait);
8355
8356
8357 mask = EPOLLIN | EPOLLRDNORM;
8358
8359 if (seq->poll_event != atomic_read(&md_event_count))
8360 mask |= EPOLLERR | EPOLLPRI;
8361 return mask;
8362}
8363
8364static const struct proc_ops mdstat_proc_ops = {
8365 .proc_open = md_seq_open,
8366 .proc_read = seq_read,
8367 .proc_lseek = seq_lseek,
8368 .proc_release = seq_release,
8369 .proc_poll = mdstat_poll,
8370};
8371
8372int register_md_personality(struct md_personality *p)
8373{
8374 pr_debug("md: %s personality registered for level %d\n",
8375 p->name, p->level);
8376 spin_lock(&pers_lock);
8377 list_add_tail(&p->list, &pers_list);
8378 spin_unlock(&pers_lock);
8379 return 0;
8380}
8381EXPORT_SYMBOL(register_md_personality);
8382
8383int unregister_md_personality(struct md_personality *p)
8384{
8385 pr_debug("md: %s personality unregistered\n", p->name);
8386 spin_lock(&pers_lock);
8387 list_del_init(&p->list);
8388 spin_unlock(&pers_lock);
8389 return 0;
8390}
8391EXPORT_SYMBOL(unregister_md_personality);
8392
8393int register_md_cluster_operations(struct md_cluster_operations *ops,
8394 struct module *module)
8395{
8396 int ret = 0;
8397 spin_lock(&pers_lock);
8398 if (md_cluster_ops != NULL)
8399 ret = -EALREADY;
8400 else {
8401 md_cluster_ops = ops;
8402 md_cluster_mod = module;
8403 }
8404 spin_unlock(&pers_lock);
8405 return ret;
8406}
8407EXPORT_SYMBOL(register_md_cluster_operations);
8408
8409int unregister_md_cluster_operations(void)
8410{
8411 spin_lock(&pers_lock);
8412 md_cluster_ops = NULL;
8413 spin_unlock(&pers_lock);
8414 return 0;
8415}
8416EXPORT_SYMBOL(unregister_md_cluster_operations);
8417
8418int md_setup_cluster(struct mddev *mddev, int nodes)
8419{
8420 int ret;
8421 if (!md_cluster_ops)
8422 request_module("md-cluster");
8423 spin_lock(&pers_lock);
8424
8425 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8426 pr_warn("can't find md-cluster module or get it's reference.\n");
8427 spin_unlock(&pers_lock);
8428 return -ENOENT;
8429 }
8430 spin_unlock(&pers_lock);
8431
8432 ret = md_cluster_ops->join(mddev, nodes);
8433 if (!ret)
8434 mddev->safemode_delay = 0;
8435 return ret;
8436}
8437
8438void md_cluster_stop(struct mddev *mddev)
8439{
8440 if (!md_cluster_ops)
8441 return;
8442 md_cluster_ops->leave(mddev);
8443 module_put(md_cluster_mod);
8444}
8445
8446static int is_mddev_idle(struct mddev *mddev, int init)
8447{
8448 struct md_rdev *rdev;
8449 int idle;
8450 int curr_events;
8451
8452 idle = 1;
8453 rcu_read_lock();
8454 rdev_for_each_rcu(rdev, mddev) {
8455 struct gendisk *disk = rdev->bdev->bd_disk;
8456 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
8457 atomic_read(&disk->sync_io);
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480 if (init || curr_events - rdev->last_events > 64) {
8481 rdev->last_events = curr_events;
8482 idle = 0;
8483 }
8484 }
8485 rcu_read_unlock();
8486 return idle;
8487}
8488
8489void md_done_sync(struct mddev *mddev, int blocks, int ok)
8490{
8491
8492 atomic_sub(blocks, &mddev->recovery_active);
8493 wake_up(&mddev->recovery_wait);
8494 if (!ok) {
8495 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8496 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8497 md_wakeup_thread(mddev->thread);
8498
8499 }
8500}
8501EXPORT_SYMBOL(md_done_sync);
8502
8503
8504
8505
8506
8507
8508
8509
8510bool md_write_start(struct mddev *mddev, struct bio *bi)
8511{
8512 int did_change = 0;
8513
8514 if (bio_data_dir(bi) != WRITE)
8515 return true;
8516
8517 BUG_ON(mddev->ro == 1);
8518 if (mddev->ro == 2) {
8519
8520 mddev->ro = 0;
8521 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8522 md_wakeup_thread(mddev->thread);
8523 md_wakeup_thread(mddev->sync_thread);
8524 did_change = 1;
8525 }
8526 rcu_read_lock();
8527 percpu_ref_get(&mddev->writes_pending);
8528 smp_mb();
8529 if (mddev->safemode == 1)
8530 mddev->safemode = 0;
8531
8532 if (mddev->in_sync || mddev->sync_checkers) {
8533 spin_lock(&mddev->lock);
8534 if (mddev->in_sync) {
8535 mddev->in_sync = 0;
8536 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8537 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8538 md_wakeup_thread(mddev->thread);
8539 did_change = 1;
8540 }
8541 spin_unlock(&mddev->lock);
8542 }
8543 rcu_read_unlock();
8544 if (did_change)
8545 sysfs_notify_dirent_safe(mddev->sysfs_state);
8546 if (!mddev->has_superblocks)
8547 return true;
8548 wait_event(mddev->sb_wait,
8549 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8550 mddev->suspended);
8551 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8552 percpu_ref_put(&mddev->writes_pending);
8553 return false;
8554 }
8555 return true;
8556}
8557EXPORT_SYMBOL(md_write_start);
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567void md_write_inc(struct mddev *mddev, struct bio *bi)
8568{
8569 if (bio_data_dir(bi) != WRITE)
8570 return;
8571 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8572 percpu_ref_get(&mddev->writes_pending);
8573}
8574EXPORT_SYMBOL(md_write_inc);
8575
8576void md_write_end(struct mddev *mddev)
8577{
8578 percpu_ref_put(&mddev->writes_pending);
8579
8580 if (mddev->safemode == 2)
8581 md_wakeup_thread(mddev->thread);
8582 else if (mddev->safemode_delay)
8583
8584
8585
8586 mod_timer(&mddev->safemode_timer,
8587 roundup(jiffies, mddev->safemode_delay) +
8588 mddev->safemode_delay);
8589}
8590
8591EXPORT_SYMBOL(md_write_end);
8592
8593
8594
8595
8596
8597
8598
8599void md_allow_write(struct mddev *mddev)
8600{
8601 if (!mddev->pers)
8602 return;
8603 if (mddev->ro)
8604 return;
8605 if (!mddev->pers->sync_request)
8606 return;
8607
8608 spin_lock(&mddev->lock);
8609 if (mddev->in_sync) {
8610 mddev->in_sync = 0;
8611 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8612 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8613 if (mddev->safemode_delay &&
8614 mddev->safemode == 0)
8615 mddev->safemode = 1;
8616 spin_unlock(&mddev->lock);
8617 md_update_sb(mddev, 0);
8618 sysfs_notify_dirent_safe(mddev->sysfs_state);
8619
8620 wait_event(mddev->sb_wait,
8621 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8622 } else
8623 spin_unlock(&mddev->lock);
8624}
8625EXPORT_SYMBOL_GPL(md_allow_write);
8626
8627#define SYNC_MARKS 10
8628#define SYNC_MARK_STEP (3*HZ)
8629#define UPDATE_FREQUENCY (5*60*HZ)
8630void md_do_sync(struct md_thread *thread)
8631{
8632 struct mddev *mddev = thread->mddev;
8633 struct mddev *mddev2;
8634 unsigned int currspeed = 0, window;
8635 sector_t max_sectors,j, io_sectors, recovery_done;
8636 unsigned long mark[SYNC_MARKS];
8637 unsigned long update_time;
8638 sector_t mark_cnt[SYNC_MARKS];
8639 int last_mark,m;
8640 struct list_head *tmp;
8641 sector_t last_check;
8642 int skipped = 0;
8643 struct md_rdev *rdev;
8644 char *desc, *action = NULL;
8645 struct blk_plug plug;
8646 int ret;
8647
8648
8649 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8650 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8651 return;
8652 if (mddev->ro) {
8653 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8654 return;
8655 }
8656
8657 if (mddev_is_clustered(mddev)) {
8658 ret = md_cluster_ops->resync_start(mddev);
8659 if (ret)
8660 goto skip;
8661
8662 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8663 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8664 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8665 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8666 && ((unsigned long long)mddev->curr_resync_completed
8667 < (unsigned long long)mddev->resync_max_sectors))
8668 goto skip;
8669 }
8670
8671 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8672 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8673 desc = "data-check";
8674 action = "check";
8675 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8676 desc = "requested-resync";
8677 action = "repair";
8678 } else
8679 desc = "resync";
8680 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8681 desc = "reshape";
8682 else
8683 desc = "recovery";
8684
8685 mddev->last_sync_action = action ?: desc;
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703 do {
8704 int mddev2_minor = -1;
8705 mddev->curr_resync = 2;
8706
8707 try_again:
8708 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8709 goto skip;
8710 for_each_mddev(mddev2, tmp) {
8711 if (mddev2 == mddev)
8712 continue;
8713 if (!mddev->parallel_resync
8714 && mddev2->curr_resync
8715 && match_mddev_units(mddev, mddev2)) {
8716 DEFINE_WAIT(wq);
8717 if (mddev < mddev2 && mddev->curr_resync == 2) {
8718
8719 mddev->curr_resync = 1;
8720 wake_up(&resync_wait);
8721 }
8722 if (mddev > mddev2 && mddev->curr_resync == 1)
8723
8724
8725
8726 continue;
8727
8728
8729
8730
8731 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8732 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8733 mddev2->curr_resync >= mddev->curr_resync) {
8734 if (mddev2_minor != mddev2->md_minor) {
8735 mddev2_minor = mddev2->md_minor;
8736 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8737 desc, mdname(mddev),
8738 mdname(mddev2));
8739 }
8740 mddev_put(mddev2);
8741 if (signal_pending(current))
8742 flush_signals(current);
8743 schedule();
8744 finish_wait(&resync_wait, &wq);
8745 goto try_again;
8746 }
8747 finish_wait(&resync_wait, &wq);
8748 }
8749 }
8750 } while (mddev->curr_resync < 2);
8751
8752 j = 0;
8753 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8754
8755
8756
8757 max_sectors = mddev->resync_max_sectors;
8758 atomic64_set(&mddev->resync_mismatches, 0);
8759
8760 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8761 j = mddev->resync_min;
8762 else if (!mddev->bitmap)
8763 j = mddev->recovery_cp;
8764
8765 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8766 max_sectors = mddev->resync_max_sectors;
8767
8768
8769
8770
8771
8772 if (mddev_is_clustered(mddev) &&
8773 mddev->reshape_position != MaxSector)
8774 j = mddev->reshape_position;
8775 } else {
8776
8777 max_sectors = mddev->dev_sectors;
8778 j = MaxSector;
8779 rcu_read_lock();
8780 rdev_for_each_rcu(rdev, mddev)
8781 if (rdev->raid_disk >= 0 &&
8782 !test_bit(Journal, &rdev->flags) &&
8783 !test_bit(Faulty, &rdev->flags) &&
8784 !test_bit(In_sync, &rdev->flags) &&
8785 rdev->recovery_offset < j)
8786 j = rdev->recovery_offset;
8787 rcu_read_unlock();
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797 if (mddev->bitmap) {
8798 mddev->pers->quiesce(mddev, 1);
8799 mddev->pers->quiesce(mddev, 0);
8800 }
8801 }
8802
8803 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8804 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8805 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8806 speed_max(mddev), desc);
8807
8808 is_mddev_idle(mddev, 1);
8809
8810 io_sectors = 0;
8811 for (m = 0; m < SYNC_MARKS; m++) {
8812 mark[m] = jiffies;
8813 mark_cnt[m] = io_sectors;
8814 }
8815 last_mark = 0;
8816 mddev->resync_mark = mark[last_mark];
8817 mddev->resync_mark_cnt = mark_cnt[last_mark];
8818
8819
8820
8821
8822 window = 32 * (PAGE_SIZE / 512);
8823 pr_debug("md: using %dk window, over a total of %lluk.\n",
8824 window/2, (unsigned long long)max_sectors/2);
8825
8826 atomic_set(&mddev->recovery_active, 0);
8827 last_check = 0;
8828
8829 if (j>2) {
8830 pr_debug("md: resuming %s of %s from checkpoint.\n",
8831 desc, mdname(mddev));
8832 mddev->curr_resync = j;
8833 } else
8834 mddev->curr_resync = 3;
8835 mddev->curr_resync_completed = j;
8836 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8837 md_new_event(mddev);
8838 update_time = jiffies;
8839
8840 blk_start_plug(&plug);
8841 while (j < max_sectors) {
8842 sector_t sectors;
8843
8844 skipped = 0;
8845
8846 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8847 ((mddev->curr_resync > mddev->curr_resync_completed &&
8848 (mddev->curr_resync - mddev->curr_resync_completed)
8849 > (max_sectors >> 4)) ||
8850 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8851 (j - mddev->curr_resync_completed)*2
8852 >= mddev->resync_max - mddev->curr_resync_completed ||
8853 mddev->curr_resync_completed > mddev->resync_max
8854 )) {
8855
8856 wait_event(mddev->recovery_wait,
8857 atomic_read(&mddev->recovery_active) == 0);
8858 mddev->curr_resync_completed = j;
8859 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8860 j > mddev->recovery_cp)
8861 mddev->recovery_cp = j;
8862 update_time = jiffies;
8863 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8864 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8865 }
8866
8867 while (j >= mddev->resync_max &&
8868 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8869
8870
8871
8872
8873 flush_signals(current);
8874 wait_event_interruptible(mddev->recovery_wait,
8875 mddev->resync_max > j
8876 || test_bit(MD_RECOVERY_INTR,
8877 &mddev->recovery));
8878 }
8879
8880 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8881 break;
8882
8883 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8884 if (sectors == 0) {
8885 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8886 break;
8887 }
8888
8889 if (!skipped) {
8890 io_sectors += sectors;
8891 atomic_add(sectors, &mddev->recovery_active);
8892 }
8893
8894 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8895 break;
8896
8897 j += sectors;
8898 if (j > max_sectors)
8899
8900 j = max_sectors;
8901 if (j > 2)
8902 mddev->curr_resync = j;
8903 mddev->curr_mark_cnt = io_sectors;
8904 if (last_check == 0)
8905
8906
8907
8908 md_new_event(mddev);
8909
8910 if (last_check + window > io_sectors || j == max_sectors)
8911 continue;
8912
8913 last_check = io_sectors;
8914 repeat:
8915 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8916
8917 int next = (last_mark+1) % SYNC_MARKS;
8918
8919 mddev->resync_mark = mark[next];
8920 mddev->resync_mark_cnt = mark_cnt[next];
8921 mark[next] = jiffies;
8922 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8923 last_mark = next;
8924 }
8925
8926 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8927 break;
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937 cond_resched();
8938
8939 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8940 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8941 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8942
8943 if (currspeed > speed_min(mddev)) {
8944 if (currspeed > speed_max(mddev)) {
8945 msleep(500);
8946 goto repeat;
8947 }
8948 if (!is_mddev_idle(mddev, 0)) {
8949
8950
8951
8952
8953 wait_event(mddev->recovery_wait,
8954 !atomic_read(&mddev->recovery_active));
8955 }
8956 }
8957 }
8958 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8959 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8960 ? "interrupted" : "done");
8961
8962
8963
8964 blk_finish_plug(&plug);
8965 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8966
8967 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8968 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8969 mddev->curr_resync > 3) {
8970 mddev->curr_resync_completed = mddev->curr_resync;
8971 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8972 }
8973 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8974
8975 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8976 mddev->curr_resync > 3) {
8977 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8978 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8979 if (mddev->curr_resync >= mddev->recovery_cp) {
8980 pr_debug("md: checkpointing %s of %s.\n",
8981 desc, mdname(mddev));
8982 if (test_bit(MD_RECOVERY_ERROR,
8983 &mddev->recovery))
8984 mddev->recovery_cp =
8985 mddev->curr_resync_completed;
8986 else
8987 mddev->recovery_cp =
8988 mddev->curr_resync;
8989 }
8990 } else
8991 mddev->recovery_cp = MaxSector;
8992 } else {
8993 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8994 mddev->curr_resync = MaxSector;
8995 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8996 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8997 rcu_read_lock();
8998 rdev_for_each_rcu(rdev, mddev)
8999 if (rdev->raid_disk >= 0 &&
9000 mddev->delta_disks >= 0 &&
9001 !test_bit(Journal, &rdev->flags) &&
9002 !test_bit(Faulty, &rdev->flags) &&
9003 !test_bit(In_sync, &rdev->flags) &&
9004 rdev->recovery_offset < mddev->curr_resync)
9005 rdev->recovery_offset = mddev->curr_resync;
9006 rcu_read_unlock();
9007 }
9008 }
9009 }
9010 skip:
9011
9012
9013
9014 set_mask_bits(&mddev->sb_flags, 0,
9015 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
9016
9017 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9018 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9019 mddev->delta_disks > 0 &&
9020 mddev->pers->finish_reshape &&
9021 mddev->pers->size &&
9022 mddev->queue) {
9023 mddev_lock_nointr(mddev);
9024 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9025 mddev_unlock(mddev);
9026 if (!mddev_is_clustered(mddev))
9027 set_capacity_and_notify(mddev->gendisk,
9028 mddev->array_sectors);
9029 }
9030
9031 spin_lock(&mddev->lock);
9032 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9033
9034 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9035 mddev->resync_min = 0;
9036 mddev->resync_max = MaxSector;
9037 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9038 mddev->resync_min = mddev->curr_resync_completed;
9039 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9040 mddev->curr_resync = 0;
9041 spin_unlock(&mddev->lock);
9042
9043 wake_up(&resync_wait);
9044 md_wakeup_thread(mddev->thread);
9045 return;
9046}
9047EXPORT_SYMBOL_GPL(md_do_sync);
9048
9049static int remove_and_add_spares(struct mddev *mddev,
9050 struct md_rdev *this)
9051{
9052 struct md_rdev *rdev;
9053 int spares = 0;
9054 int removed = 0;
9055 bool remove_some = false;
9056
9057 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9058
9059 return 0;
9060
9061 rdev_for_each(rdev, mddev) {
9062 if ((this == NULL || rdev == this) &&
9063 rdev->raid_disk >= 0 &&
9064 !test_bit(Blocked, &rdev->flags) &&
9065 test_bit(Faulty, &rdev->flags) &&
9066 atomic_read(&rdev->nr_pending)==0) {
9067
9068
9069
9070
9071
9072 remove_some = true;
9073 set_bit(RemoveSynchronized, &rdev->flags);
9074 }
9075 }
9076
9077 if (remove_some)
9078 synchronize_rcu();
9079 rdev_for_each(rdev, mddev) {
9080 if ((this == NULL || rdev == this) &&
9081 rdev->raid_disk >= 0 &&
9082 !test_bit(Blocked, &rdev->flags) &&
9083 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9084 (!test_bit(In_sync, &rdev->flags) &&
9085 !test_bit(Journal, &rdev->flags))) &&
9086 atomic_read(&rdev->nr_pending)==0)) {
9087 if (mddev->pers->hot_remove_disk(
9088 mddev, rdev) == 0) {
9089 sysfs_unlink_rdev(mddev, rdev);
9090 rdev->saved_raid_disk = rdev->raid_disk;
9091 rdev->raid_disk = -1;
9092 removed++;
9093 }
9094 }
9095 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9096 clear_bit(RemoveSynchronized, &rdev->flags);
9097 }
9098
9099 if (removed && mddev->kobj.sd)
9100 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9101
9102 if (this && removed)
9103 goto no_add;
9104
9105 rdev_for_each(rdev, mddev) {
9106 if (this && this != rdev)
9107 continue;
9108 if (test_bit(Candidate, &rdev->flags))
9109 continue;
9110 if (rdev->raid_disk >= 0 &&
9111 !test_bit(In_sync, &rdev->flags) &&
9112 !test_bit(Journal, &rdev->flags) &&
9113 !test_bit(Faulty, &rdev->flags))
9114 spares++;
9115 if (rdev->raid_disk >= 0)
9116 continue;
9117 if (test_bit(Faulty, &rdev->flags))
9118 continue;
9119 if (!test_bit(Journal, &rdev->flags)) {
9120 if (mddev->ro &&
9121 ! (rdev->saved_raid_disk >= 0 &&
9122 !test_bit(Bitmap_sync, &rdev->flags)))
9123 continue;
9124
9125 rdev->recovery_offset = 0;
9126 }
9127 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9128
9129 sysfs_link_rdev(mddev, rdev);
9130 if (!test_bit(Journal, &rdev->flags))
9131 spares++;
9132 md_new_event(mddev);
9133 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9134 }
9135 }
9136no_add:
9137 if (removed)
9138 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9139 return spares;
9140}
9141
9142static void md_start_sync(struct work_struct *ws)
9143{
9144 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9145
9146 mddev->sync_thread = md_register_thread(md_do_sync,
9147 mddev,
9148 "resync");
9149 if (!mddev->sync_thread) {
9150 pr_warn("%s: could not start resync thread...\n",
9151 mdname(mddev));
9152
9153 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9154 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9155 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9156 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9157 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9158 wake_up(&resync_wait);
9159 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9160 &mddev->recovery))
9161 if (mddev->sysfs_action)
9162 sysfs_notify_dirent_safe(mddev->sysfs_action);
9163 } else
9164 md_wakeup_thread(mddev->sync_thread);
9165 sysfs_notify_dirent_safe(mddev->sysfs_action);
9166 md_new_event(mddev);
9167}
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191void md_check_recovery(struct mddev *mddev)
9192{
9193 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9194
9195
9196
9197 set_bit(MD_UPDATING_SB, &mddev->flags);
9198 smp_mb__after_atomic();
9199 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9200 md_update_sb(mddev, 0);
9201 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9202 wake_up(&mddev->sb_wait);
9203 }
9204
9205 if (mddev->suspended)
9206 return;
9207
9208 if (mddev->bitmap)
9209 md_bitmap_daemon_work(mddev);
9210
9211 if (signal_pending(current)) {
9212 if (mddev->pers->sync_request && !mddev->external) {
9213 pr_debug("md: %s in immediate safe mode\n",
9214 mdname(mddev));
9215 mddev->safemode = 2;
9216 }
9217 flush_signals(current);
9218 }
9219
9220 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9221 return;
9222 if ( ! (
9223 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9224 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9225 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9226 (mddev->external == 0 && mddev->safemode == 1) ||
9227 (mddev->safemode == 2
9228 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9229 ))
9230 return;
9231
9232 if (mddev_trylock(mddev)) {
9233 int spares = 0;
9234 bool try_set_sync = mddev->safemode != 0;
9235
9236 if (!mddev->external && mddev->safemode == 1)
9237 mddev->safemode = 0;
9238
9239 if (mddev->ro) {
9240 struct md_rdev *rdev;
9241 if (!mddev->external && mddev->in_sync)
9242
9243
9244
9245
9246
9247 rdev_for_each(rdev, mddev)
9248 clear_bit(Blocked, &rdev->flags);
9249
9250
9251
9252
9253
9254
9255
9256 remove_and_add_spares(mddev, NULL);
9257
9258
9259
9260 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9261 md_reap_sync_thread(mddev);
9262 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9263 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9264 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9265 goto unlock;
9266 }
9267
9268 if (mddev_is_clustered(mddev)) {
9269 struct md_rdev *rdev;
9270
9271
9272
9273 rdev_for_each(rdev, mddev) {
9274 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9275 rdev->raid_disk < 0)
9276 md_kick_rdev_from_array(rdev);
9277 }
9278 }
9279
9280 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9281 spin_lock(&mddev->lock);
9282 set_in_sync(mddev);
9283 spin_unlock(&mddev->lock);
9284 }
9285
9286 if (mddev->sb_flags)
9287 md_update_sb(mddev, 0);
9288
9289 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9290 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9291
9292 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9293 goto unlock;
9294 }
9295 if (mddev->sync_thread) {
9296 md_reap_sync_thread(mddev);
9297 goto unlock;
9298 }
9299
9300
9301
9302 mddev->curr_resync_completed = 0;
9303 spin_lock(&mddev->lock);
9304 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9305 spin_unlock(&mddev->lock);
9306
9307
9308
9309 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9310 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9311
9312 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9313 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9314 goto not_running;
9315
9316
9317
9318
9319
9320
9321
9322 if (mddev->reshape_position != MaxSector) {
9323 if (mddev->pers->check_reshape == NULL ||
9324 mddev->pers->check_reshape(mddev) != 0)
9325
9326 goto not_running;
9327 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9328 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9329 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9330 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9331 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9332 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9333 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9334 } else if (mddev->recovery_cp < MaxSector) {
9335 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9336 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9337 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9338
9339 goto not_running;
9340
9341 if (mddev->pers->sync_request) {
9342 if (spares) {
9343
9344
9345
9346
9347 md_bitmap_write_all(mddev->bitmap);
9348 }
9349 INIT_WORK(&mddev->del_work, md_start_sync);
9350 queue_work(md_misc_wq, &mddev->del_work);
9351 goto unlock;
9352 }
9353 not_running:
9354 if (!mddev->sync_thread) {
9355 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9356 wake_up(&resync_wait);
9357 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9358 &mddev->recovery))
9359 if (mddev->sysfs_action)
9360 sysfs_notify_dirent_safe(mddev->sysfs_action);
9361 }
9362 unlock:
9363 wake_up(&mddev->sb_wait);
9364 mddev_unlock(mddev);
9365 }
9366}
9367EXPORT_SYMBOL(md_check_recovery);
9368
9369void md_reap_sync_thread(struct mddev *mddev)
9370{
9371 struct md_rdev *rdev;
9372 sector_t old_dev_sectors = mddev->dev_sectors;
9373 bool is_reshaped = false;
9374
9375
9376 md_unregister_thread(&mddev->sync_thread);
9377 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9378 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9379 mddev->degraded != mddev->raid_disks) {
9380
9381
9382 if (mddev->pers->spare_active(mddev)) {
9383 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9384 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9385 }
9386 }
9387 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9388 mddev->pers->finish_reshape) {
9389 mddev->pers->finish_reshape(mddev);
9390 if (mddev_is_clustered(mddev))
9391 is_reshaped = true;
9392 }
9393
9394
9395
9396
9397 if (!mddev->degraded)
9398 rdev_for_each(rdev, mddev)
9399 rdev->saved_raid_disk = -1;
9400
9401 md_update_sb(mddev, 1);
9402
9403
9404
9405 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9406 md_cluster_ops->resync_finish(mddev);
9407 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9408 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9409 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9410 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9411 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9412 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9413
9414
9415
9416
9417
9418 if (mddev_is_clustered(mddev) && is_reshaped
9419 && !test_bit(MD_CLOSING, &mddev->flags))
9420 md_cluster_ops->update_size(mddev, old_dev_sectors);
9421 wake_up(&resync_wait);
9422
9423 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9424 sysfs_notify_dirent_safe(mddev->sysfs_action);
9425 md_new_event(mddev);
9426 if (mddev->event_work.func)
9427 queue_work(md_misc_wq, &mddev->event_work);
9428}
9429EXPORT_SYMBOL(md_reap_sync_thread);
9430
9431void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9432{
9433 sysfs_notify_dirent_safe(rdev->sysfs_state);
9434 wait_event_timeout(rdev->blocked_wait,
9435 !test_bit(Blocked, &rdev->flags) &&
9436 !test_bit(BlockedBadBlocks, &rdev->flags),
9437 msecs_to_jiffies(5000));
9438 rdev_dec_pending(rdev, mddev);
9439}
9440EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9441
9442void md_finish_reshape(struct mddev *mddev)
9443{
9444
9445 struct md_rdev *rdev;
9446
9447 rdev_for_each(rdev, mddev) {
9448 if (rdev->data_offset > rdev->new_data_offset)
9449 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9450 else
9451 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9452 rdev->data_offset = rdev->new_data_offset;
9453 }
9454}
9455EXPORT_SYMBOL(md_finish_reshape);
9456
9457
9458
9459
9460int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9461 int is_new)
9462{
9463 struct mddev *mddev = rdev->mddev;
9464 int rv;
9465 if (is_new)
9466 s += rdev->new_data_offset;
9467 else
9468 s += rdev->data_offset;
9469 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9470 if (rv == 0) {
9471
9472 if (test_bit(ExternalBbl, &rdev->flags))
9473 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9474 sysfs_notify_dirent_safe(rdev->sysfs_state);
9475 set_mask_bits(&mddev->sb_flags, 0,
9476 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9477 md_wakeup_thread(rdev->mddev->thread);
9478 return 1;
9479 } else
9480 return 0;
9481}
9482EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9483
9484int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9485 int is_new)
9486{
9487 int rv;
9488 if (is_new)
9489 s += rdev->new_data_offset;
9490 else
9491 s += rdev->data_offset;
9492 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9493 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9494 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9495 return rv;
9496}
9497EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9498
9499static int md_notify_reboot(struct notifier_block *this,
9500 unsigned long code, void *x)
9501{
9502 struct list_head *tmp;
9503 struct mddev *mddev;
9504 int need_delay = 0;
9505
9506 for_each_mddev(mddev, tmp) {
9507 if (mddev_trylock(mddev)) {
9508 if (mddev->pers)
9509 __md_stop_writes(mddev);
9510 if (mddev->persistent)
9511 mddev->safemode = 2;
9512 mddev_unlock(mddev);
9513 }
9514 need_delay = 1;
9515 }
9516
9517
9518
9519
9520
9521
9522 if (need_delay)
9523 mdelay(1000*1);
9524
9525 return NOTIFY_DONE;
9526}
9527
9528static struct notifier_block md_notifier = {
9529 .notifier_call = md_notify_reboot,
9530 .next = NULL,
9531 .priority = INT_MAX,
9532};
9533
9534static void md_geninit(void)
9535{
9536 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9537
9538 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9539}
9540
9541static int __init md_init(void)
9542{
9543 int ret = -ENOMEM;
9544
9545 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9546 if (!md_wq)
9547 goto err_wq;
9548
9549 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9550 if (!md_misc_wq)
9551 goto err_misc_wq;
9552
9553 md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9554 if (!md_rdev_misc_wq)
9555 goto err_rdev_misc_wq;
9556
9557 ret = __register_blkdev(MD_MAJOR, "md", md_probe);
9558 if (ret < 0)
9559 goto err_md;
9560
9561 ret = __register_blkdev(0, "mdp", md_probe);
9562 if (ret < 0)
9563 goto err_mdp;
9564 mdp_major = ret;
9565
9566 register_reboot_notifier(&md_notifier);
9567 raid_table_header = register_sysctl_table(raid_root_table);
9568
9569 md_geninit();
9570 return 0;
9571
9572err_mdp:
9573 unregister_blkdev(MD_MAJOR, "md");
9574err_md:
9575 destroy_workqueue(md_rdev_misc_wq);
9576err_rdev_misc_wq:
9577 destroy_workqueue(md_misc_wq);
9578err_misc_wq:
9579 destroy_workqueue(md_wq);
9580err_wq:
9581 return ret;
9582}
9583
9584static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9585{
9586 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9587 struct md_rdev *rdev2;
9588 int role, ret;
9589 char b[BDEVNAME_SIZE];
9590
9591
9592
9593
9594
9595 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9596 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9597 if (ret)
9598 pr_info("md-cluster: resize failed\n");
9599 else
9600 md_bitmap_update_sb(mddev->bitmap);
9601 }
9602
9603
9604 rdev_for_each(rdev2, mddev) {
9605 if (test_bit(Faulty, &rdev2->flags))
9606 continue;
9607
9608
9609 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9610
9611 if (test_bit(Candidate, &rdev2->flags)) {
9612 if (role == 0xfffe) {
9613 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9614 md_kick_rdev_from_array(rdev2);
9615 continue;
9616 }
9617 else
9618 clear_bit(Candidate, &rdev2->flags);
9619 }
9620
9621 if (role != rdev2->raid_disk) {
9622
9623
9624
9625 if (rdev2->raid_disk == -1 && role != 0xffff &&
9626 !(le32_to_cpu(sb->feature_map) &
9627 MD_FEATURE_RESHAPE_ACTIVE)) {
9628 rdev2->saved_raid_disk = role;
9629 ret = remove_and_add_spares(mddev, rdev2);
9630 pr_info("Activated spare: %s\n",
9631 bdevname(rdev2->bdev,b));
9632
9633
9634 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9635 md_wakeup_thread(mddev->thread);
9636 }
9637
9638
9639
9640
9641
9642 if ((role == 0xfffe) || (role == 0xfffd)) {
9643 md_error(mddev, rdev2);
9644 clear_bit(Blocked, &rdev2->flags);
9645 }
9646 }
9647 }
9648
9649 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9650 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9651 if (ret)
9652 pr_warn("md: updating array disks failed. %d\n", ret);
9653 }
9654
9655
9656
9657
9658
9659 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9660 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9661
9662
9663
9664
9665 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9666 if (mddev->pers->update_reshape_pos)
9667 mddev->pers->update_reshape_pos(mddev);
9668 if (mddev->pers->start_reshape)
9669 mddev->pers->start_reshape(mddev);
9670 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9671 mddev->reshape_position != MaxSector &&
9672 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9673
9674 mddev->reshape_position = MaxSector;
9675 if (mddev->pers->update_reshape_pos)
9676 mddev->pers->update_reshape_pos(mddev);
9677 }
9678
9679
9680 mddev->events = le64_to_cpu(sb->events);
9681}
9682
9683static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9684{
9685 int err;
9686 struct page *swapout = rdev->sb_page;
9687 struct mdp_superblock_1 *sb;
9688
9689
9690
9691
9692 rdev->sb_page = NULL;
9693 err = alloc_disk_sb(rdev);
9694 if (err == 0) {
9695 ClearPageUptodate(rdev->sb_page);
9696 rdev->sb_loaded = 0;
9697 err = super_types[mddev->major_version].
9698 load_super(rdev, NULL, mddev->minor_version);
9699 }
9700 if (err < 0) {
9701 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9702 __func__, __LINE__, rdev->desc_nr, err);
9703 if (rdev->sb_page)
9704 put_page(rdev->sb_page);
9705 rdev->sb_page = swapout;
9706 rdev->sb_loaded = 1;
9707 return err;
9708 }
9709
9710 sb = page_address(rdev->sb_page);
9711
9712
9713
9714
9715 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9716 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9717
9718
9719
9720
9721 if (rdev->recovery_offset == MaxSector &&
9722 !test_bit(In_sync, &rdev->flags) &&
9723 mddev->pers->spare_active(mddev))
9724 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9725
9726 put_page(swapout);
9727 return 0;
9728}
9729
9730void md_reload_sb(struct mddev *mddev, int nr)
9731{
9732 struct md_rdev *rdev;
9733 int err;
9734
9735
9736 rdev_for_each_rcu(rdev, mddev) {
9737 if (rdev->desc_nr == nr)
9738 break;
9739 }
9740
9741 if (!rdev || rdev->desc_nr != nr) {
9742 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9743 return;
9744 }
9745
9746 err = read_rdev(mddev, rdev);
9747 if (err < 0)
9748 return;
9749
9750 check_sb_changes(mddev, rdev);
9751
9752
9753 rdev_for_each_rcu(rdev, mddev) {
9754 if (!test_bit(Faulty, &rdev->flags))
9755 read_rdev(mddev, rdev);
9756 }
9757}
9758EXPORT_SYMBOL(md_reload_sb);
9759
9760#ifndef MODULE
9761
9762
9763
9764
9765
9766
9767static DEFINE_MUTEX(detected_devices_mutex);
9768static LIST_HEAD(all_detected_devices);
9769struct detected_devices_node {
9770 struct list_head list;
9771 dev_t dev;
9772};
9773
9774void md_autodetect_dev(dev_t dev)
9775{
9776 struct detected_devices_node *node_detected_dev;
9777
9778 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9779 if (node_detected_dev) {
9780 node_detected_dev->dev = dev;
9781 mutex_lock(&detected_devices_mutex);
9782 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9783 mutex_unlock(&detected_devices_mutex);
9784 }
9785}
9786
9787void md_autostart_arrays(int part)
9788{
9789 struct md_rdev *rdev;
9790 struct detected_devices_node *node_detected_dev;
9791 dev_t dev;
9792 int i_scanned, i_passed;
9793
9794 i_scanned = 0;
9795 i_passed = 0;
9796
9797 pr_info("md: Autodetecting RAID arrays.\n");
9798
9799 mutex_lock(&detected_devices_mutex);
9800 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9801 i_scanned++;
9802 node_detected_dev = list_entry(all_detected_devices.next,
9803 struct detected_devices_node, list);
9804 list_del(&node_detected_dev->list);
9805 dev = node_detected_dev->dev;
9806 kfree(node_detected_dev);
9807 mutex_unlock(&detected_devices_mutex);
9808 rdev = md_import_device(dev,0, 90);
9809 mutex_lock(&detected_devices_mutex);
9810 if (IS_ERR(rdev))
9811 continue;
9812
9813 if (test_bit(Faulty, &rdev->flags))
9814 continue;
9815
9816 set_bit(AutoDetected, &rdev->flags);
9817 list_add(&rdev->same_set, &pending_raid_disks);
9818 i_passed++;
9819 }
9820 mutex_unlock(&detected_devices_mutex);
9821
9822 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9823
9824 autorun_devices(part);
9825}
9826
9827#endif
9828
9829static __exit void md_exit(void)
9830{
9831 struct mddev *mddev;
9832 struct list_head *tmp;
9833 int delay = 1;
9834
9835 unregister_blkdev(MD_MAJOR,"md");
9836 unregister_blkdev(mdp_major, "mdp");
9837 unregister_reboot_notifier(&md_notifier);
9838 unregister_sysctl_table(raid_table_header);
9839
9840
9841
9842
9843 md_unloading = 1;
9844 while (waitqueue_active(&md_event_waiters)) {
9845
9846 wake_up(&md_event_waiters);
9847 msleep(delay);
9848 delay += delay;
9849 }
9850 remove_proc_entry("mdstat", NULL);
9851
9852 for_each_mddev(mddev, tmp) {
9853 export_array(mddev);
9854 mddev->ctime = 0;
9855 mddev->hold_active = 0;
9856
9857
9858
9859
9860
9861
9862 }
9863 destroy_workqueue(md_rdev_misc_wq);
9864 destroy_workqueue(md_misc_wq);
9865 destroy_workqueue(md_wq);
9866}
9867
9868subsys_initcall(md_init);
9869module_exit(md_exit)
9870
9871static int get_ro(char *buffer, const struct kernel_param *kp)
9872{
9873 return sprintf(buffer, "%d\n", start_readonly);
9874}
9875static int set_ro(const char *val, const struct kernel_param *kp)
9876{
9877 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9878}
9879
9880module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9881module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9882module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9883module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9884
9885MODULE_LICENSE("GPL");
9886MODULE_DESCRIPTION("MD RAID framework");
9887MODULE_ALIAS("md");
9888MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9889