1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/raid/detect.h>
62#include <linux/slab.h>
63#include <linux/percpu-refcount.h>
64#include <linux/part_stat.h>
65
66#include <trace/events/block.h>
67#include "md.h"
68#include "md-bitmap.h"
69#include "md-cluster.h"
70
71
72
73
74
75
76static LIST_HEAD(pers_list);
77static DEFINE_SPINLOCK(pers_lock);
78
79static struct kobj_type md_ktype;
80
81struct md_cluster_operations *md_cluster_ops;
82EXPORT_SYMBOL(md_cluster_ops);
83static struct module *md_cluster_mod;
84
85static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
86static struct workqueue_struct *md_wq;
87static struct workqueue_struct *md_misc_wq;
88static struct workqueue_struct *md_rdev_misc_wq;
89
90static int remove_and_add_spares(struct mddev *mddev,
91 struct md_rdev *this);
92static void mddev_detach(struct mddev *mddev);
93
94
95
96
97
98
99#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
100
101#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
102
103
104
105
106
107
108
109
110
111
112
113
114
115static int sysctl_speed_limit_min = 1000;
116static int sysctl_speed_limit_max = 200000;
117static inline int speed_min(struct mddev *mddev)
118{
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
121}
122
123static inline int speed_max(struct mddev *mddev)
124{
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
127}
128
129static void rdev_uninit_serial(struct md_rdev *rdev)
130{
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
132 return;
133
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
136}
137
138static void rdevs_uninit_serial(struct mddev *mddev)
139{
140 struct md_rdev *rdev;
141
142 rdev_for_each(rdev, mddev)
143 rdev_uninit_serial(rdev);
144}
145
146static int rdev_init_serial(struct md_rdev *rdev)
147{
148
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
150 struct serial_in_rdev *serial = NULL;
151
152 if (test_bit(CollisionCheck, &rdev->flags))
153 return 0;
154
155 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
156 GFP_KERNEL);
157 if (!serial)
158 return -ENOMEM;
159
160 for (i = 0; i < serial_nums; i++) {
161 struct serial_in_rdev *serial_tmp = &serial[i];
162
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
166 }
167
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
170
171 return 0;
172}
173
174static int rdevs_init_serial(struct mddev *mddev)
175{
176 struct md_rdev *rdev;
177 int ret = 0;
178
179 rdev_for_each(rdev, mddev) {
180 ret = rdev_init_serial(rdev);
181 if (ret)
182 break;
183 }
184
185
186 if (ret && !mddev->serial_info_pool)
187 rdevs_uninit_serial(mddev);
188
189 return ret;
190}
191
192
193
194
195
196
197static int rdev_need_serial(struct md_rdev *rdev)
198{
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
202}
203
204
205
206
207
208
209void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
210 bool is_suspend)
211{
212 int ret = 0;
213
214 if (rdev && !rdev_need_serial(rdev) &&
215 !test_bit(CollisionCheck, &rdev->flags))
216 return;
217
218 if (!is_suspend)
219 mddev_suspend(mddev);
220
221 if (!rdev)
222 ret = rdevs_init_serial(mddev);
223 else
224 ret = rdev_init_serial(rdev);
225 if (ret)
226 goto abort;
227
228 if (mddev->serial_info_pool == NULL) {
229
230
231
232
233 mddev->serial_info_pool =
234 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
235 sizeof(struct serial_info));
236 if (!mddev->serial_info_pool) {
237 rdevs_uninit_serial(mddev);
238 pr_err("can't alloc memory pool for serialization\n");
239 }
240 }
241
242abort:
243 if (!is_suspend)
244 mddev_resume(mddev);
245}
246
247
248
249
250
251
252
253void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
254 bool is_suspend)
255{
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
257 return;
258
259 if (mddev->serial_info_pool) {
260 struct md_rdev *temp;
261 int num = 0;
262
263 if (!is_suspend)
264 mddev_suspend(mddev);
265 rdev_for_each(temp, mddev) {
266 if (!rdev) {
267 if (!mddev->serialize_policy ||
268 !rdev_need_serial(temp))
269 rdev_uninit_serial(temp);
270 else
271 num++;
272 } else if (temp != rdev &&
273 test_bit(CollisionCheck, &temp->flags))
274 num++;
275 }
276
277 if (rdev)
278 rdev_uninit_serial(rdev);
279
280 if (num)
281 pr_info("The mempool could be used by other devices\n");
282 else {
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
285 }
286 if (!is_suspend)
287 mddev_resume(mddev);
288 }
289}
290
291static struct ctl_table_header *raid_table_header;
292
293static struct ctl_table raid_table[] = {
294 {
295 .procname = "speed_limit_min",
296 .data = &sysctl_speed_limit_min,
297 .maxlen = sizeof(int),
298 .mode = S_IRUGO|S_IWUSR,
299 .proc_handler = proc_dointvec,
300 },
301 {
302 .procname = "speed_limit_max",
303 .data = &sysctl_speed_limit_max,
304 .maxlen = sizeof(int),
305 .mode = S_IRUGO|S_IWUSR,
306 .proc_handler = proc_dointvec,
307 },
308 { }
309};
310
311static struct ctl_table raid_dir_table[] = {
312 {
313 .procname = "raid",
314 .maxlen = 0,
315 .mode = S_IRUGO|S_IXUGO,
316 .child = raid_table,
317 },
318 { }
319};
320
321static struct ctl_table raid_root_table[] = {
322 {
323 .procname = "dev",
324 .maxlen = 0,
325 .mode = 0555,
326 .child = raid_dir_table,
327 },
328 { }
329};
330
331static int start_readonly;
332
333
334
335
336
337
338
339
340
341static bool create_on_open = true;
342
343
344
345
346
347
348
349
350
351
352
353static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
354static atomic_t md_event_count;
355void md_new_event(struct mddev *mddev)
356{
357 atomic_inc(&md_event_count);
358 wake_up(&md_event_waiters);
359}
360EXPORT_SYMBOL_GPL(md_new_event);
361
362
363
364
365
366static LIST_HEAD(all_mddevs);
367static DEFINE_SPINLOCK(all_mddevs_lock);
368
369
370
371
372
373
374
375
376#define for_each_mddev(_mddev,_tmp) \
377 \
378 for (({ spin_lock(&all_mddevs_lock); \
379 _tmp = all_mddevs.next; \
380 _mddev = NULL;}); \
381 ({ if (_tmp != &all_mddevs) \
382 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
383 spin_unlock(&all_mddevs_lock); \
384 if (_mddev) mddev_put(_mddev); \
385 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
386 _tmp != &all_mddevs;}); \
387 ({ spin_lock(&all_mddevs_lock); \
388 _tmp = _tmp->next;}) \
389 )
390
391
392
393
394
395
396
397
398static bool is_suspended(struct mddev *mddev, struct bio *bio)
399{
400 if (mddev->suspended)
401 return true;
402 if (bio_data_dir(bio) != WRITE)
403 return false;
404 if (mddev->suspend_lo >= mddev->suspend_hi)
405 return false;
406 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
407 return false;
408 if (bio_end_sector(bio) < mddev->suspend_lo)
409 return false;
410 return true;
411}
412
413void md_handle_request(struct mddev *mddev, struct bio *bio)
414{
415check_suspended:
416 rcu_read_lock();
417 if (is_suspended(mddev, bio)) {
418 DEFINE_WAIT(__wait);
419 for (;;) {
420 prepare_to_wait(&mddev->sb_wait, &__wait,
421 TASK_UNINTERRUPTIBLE);
422 if (!is_suspended(mddev, bio))
423 break;
424 rcu_read_unlock();
425 schedule();
426 rcu_read_lock();
427 }
428 finish_wait(&mddev->sb_wait, &__wait);
429 }
430 atomic_inc(&mddev->active_io);
431 rcu_read_unlock();
432
433 if (!mddev->pers->make_request(mddev, bio)) {
434 atomic_dec(&mddev->active_io);
435 wake_up(&mddev->sb_wait);
436 goto check_suspended;
437 }
438
439 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
440 wake_up(&mddev->sb_wait);
441}
442EXPORT_SYMBOL(md_handle_request);
443
444struct md_io {
445 struct mddev *mddev;
446 bio_end_io_t *orig_bi_end_io;
447 void *orig_bi_private;
448 struct block_device *orig_bi_bdev;
449 unsigned long start_time;
450};
451
452static void md_end_io(struct bio *bio)
453{
454 struct md_io *md_io = bio->bi_private;
455 struct mddev *mddev = md_io->mddev;
456
457 bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev);
458
459 bio->bi_end_io = md_io->orig_bi_end_io;
460 bio->bi_private = md_io->orig_bi_private;
461
462 mempool_free(md_io, &mddev->md_io_pool);
463
464 if (bio->bi_end_io)
465 bio->bi_end_io(bio);
466}
467
468static blk_qc_t md_submit_bio(struct bio *bio)
469{
470 const int rw = bio_data_dir(bio);
471 struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
472
473 if (mddev == NULL || mddev->pers == NULL) {
474 bio_io_error(bio);
475 return BLK_QC_T_NONE;
476 }
477
478 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
479 bio_io_error(bio);
480 return BLK_QC_T_NONE;
481 }
482
483 blk_queue_split(&bio);
484
485 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
486 if (bio_sectors(bio) != 0)
487 bio->bi_status = BLK_STS_IOERR;
488 bio_endio(bio);
489 return BLK_QC_T_NONE;
490 }
491
492 if (bio->bi_end_io != md_end_io) {
493 struct md_io *md_io;
494
495 md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
496 md_io->mddev = mddev;
497 md_io->orig_bi_end_io = bio->bi_end_io;
498 md_io->orig_bi_private = bio->bi_private;
499 md_io->orig_bi_bdev = bio->bi_bdev;
500
501 bio->bi_end_io = md_end_io;
502 bio->bi_private = md_io;
503
504 md_io->start_time = bio_start_io_acct(bio);
505 }
506
507
508 bio->bi_opf &= ~REQ_NOMERGE;
509
510 md_handle_request(mddev, bio);
511
512 return BLK_QC_T_NONE;
513}
514
515
516
517
518
519
520
521void mddev_suspend(struct mddev *mddev)
522{
523 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
524 lockdep_assert_held(&mddev->reconfig_mutex);
525 if (mddev->suspended++)
526 return;
527 synchronize_rcu();
528 wake_up(&mddev->sb_wait);
529 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
530 smp_mb__after_atomic();
531 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
532 mddev->pers->quiesce(mddev, 1);
533 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
534 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
535
536 del_timer_sync(&mddev->safemode_timer);
537
538 mddev->noio_flag = memalloc_noio_save();
539}
540EXPORT_SYMBOL_GPL(mddev_suspend);
541
542void mddev_resume(struct mddev *mddev)
543{
544
545 memalloc_noio_restore(mddev->noio_flag);
546 lockdep_assert_held(&mddev->reconfig_mutex);
547 if (--mddev->suspended)
548 return;
549 wake_up(&mddev->sb_wait);
550 mddev->pers->quiesce(mddev, 0);
551
552 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
553 md_wakeup_thread(mddev->thread);
554 md_wakeup_thread(mddev->sync_thread);
555}
556EXPORT_SYMBOL_GPL(mddev_resume);
557
558
559
560
561
562static void md_end_flush(struct bio *bio)
563{
564 struct md_rdev *rdev = bio->bi_private;
565 struct mddev *mddev = rdev->mddev;
566
567 rdev_dec_pending(rdev, mddev);
568
569 if (atomic_dec_and_test(&mddev->flush_pending)) {
570
571 queue_work(md_wq, &mddev->flush_work);
572 }
573 bio_put(bio);
574}
575
576static void md_submit_flush_data(struct work_struct *ws);
577
578static void submit_flushes(struct work_struct *ws)
579{
580 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
581 struct md_rdev *rdev;
582
583 mddev->start_flush = ktime_get_boottime();
584 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
585 atomic_set(&mddev->flush_pending, 1);
586 rcu_read_lock();
587 rdev_for_each_rcu(rdev, mddev)
588 if (rdev->raid_disk >= 0 &&
589 !test_bit(Faulty, &rdev->flags)) {
590
591
592
593
594 struct bio *bi;
595 atomic_inc(&rdev->nr_pending);
596 atomic_inc(&rdev->nr_pending);
597 rcu_read_unlock();
598 bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set);
599 bi->bi_end_io = md_end_flush;
600 bi->bi_private = rdev;
601 bio_set_dev(bi, rdev->bdev);
602 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
603 atomic_inc(&mddev->flush_pending);
604 submit_bio(bi);
605 rcu_read_lock();
606 rdev_dec_pending(rdev, mddev);
607 }
608 rcu_read_unlock();
609 if (atomic_dec_and_test(&mddev->flush_pending))
610 queue_work(md_wq, &mddev->flush_work);
611}
612
613static void md_submit_flush_data(struct work_struct *ws)
614{
615 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
616 struct bio *bio = mddev->flush_bio;
617
618
619
620
621
622
623
624 spin_lock_irq(&mddev->lock);
625 mddev->prev_flush_start = mddev->start_flush;
626 mddev->flush_bio = NULL;
627 spin_unlock_irq(&mddev->lock);
628 wake_up(&mddev->sb_wait);
629
630 if (bio->bi_iter.bi_size == 0) {
631
632 bio_endio(bio);
633 } else {
634 bio->bi_opf &= ~REQ_PREFLUSH;
635 md_handle_request(mddev, bio);
636 }
637}
638
639
640
641
642
643
644
645bool md_flush_request(struct mddev *mddev, struct bio *bio)
646{
647 ktime_t req_start = ktime_get_boottime();
648 spin_lock_irq(&mddev->lock);
649
650
651
652 wait_event_lock_irq(mddev->sb_wait,
653 !mddev->flush_bio ||
654 ktime_before(req_start, mddev->prev_flush_start),
655 mddev->lock);
656
657 if (ktime_after(req_start, mddev->prev_flush_start)) {
658 WARN_ON(mddev->flush_bio);
659 mddev->flush_bio = bio;
660 bio = NULL;
661 }
662 spin_unlock_irq(&mddev->lock);
663
664 if (!bio) {
665 INIT_WORK(&mddev->flush_work, submit_flushes);
666 queue_work(md_wq, &mddev->flush_work);
667 } else {
668
669 if (bio->bi_iter.bi_size == 0)
670
671 bio_endio(bio);
672 else {
673 bio->bi_opf &= ~REQ_PREFLUSH;
674 return false;
675 }
676 }
677 return true;
678}
679EXPORT_SYMBOL(md_flush_request);
680
681static inline struct mddev *mddev_get(struct mddev *mddev)
682{
683 atomic_inc(&mddev->active);
684 return mddev;
685}
686
687static void mddev_delayed_delete(struct work_struct *ws);
688
689static void mddev_put(struct mddev *mddev)
690{
691 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
692 return;
693 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
694 mddev->ctime == 0 && !mddev->hold_active) {
695
696
697 list_del_init(&mddev->all_mddevs);
698
699
700
701
702
703
704 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
705 queue_work(md_misc_wq, &mddev->del_work);
706 }
707 spin_unlock(&all_mddevs_lock);
708}
709
710static void md_safemode_timeout(struct timer_list *t);
711
712void mddev_init(struct mddev *mddev)
713{
714 kobject_init(&mddev->kobj, &md_ktype);
715 mutex_init(&mddev->open_mutex);
716 mutex_init(&mddev->reconfig_mutex);
717 mutex_init(&mddev->bitmap_info.mutex);
718 INIT_LIST_HEAD(&mddev->disks);
719 INIT_LIST_HEAD(&mddev->all_mddevs);
720 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
721 atomic_set(&mddev->active, 1);
722 atomic_set(&mddev->openers, 0);
723 atomic_set(&mddev->active_io, 0);
724 spin_lock_init(&mddev->lock);
725 atomic_set(&mddev->flush_pending, 0);
726 init_waitqueue_head(&mddev->sb_wait);
727 init_waitqueue_head(&mddev->recovery_wait);
728 mddev->reshape_position = MaxSector;
729 mddev->reshape_backwards = 0;
730 mddev->last_sync_action = "none";
731 mddev->resync_min = 0;
732 mddev->resync_max = MaxSector;
733 mddev->level = LEVEL_NONE;
734}
735EXPORT_SYMBOL_GPL(mddev_init);
736
737static struct mddev *mddev_find(dev_t unit)
738{
739 struct mddev *mddev, *new = NULL;
740
741 if (unit && MAJOR(unit) != MD_MAJOR)
742 unit &= ~((1<<MdpMinorShift)-1);
743
744 retry:
745 spin_lock(&all_mddevs_lock);
746
747 if (unit) {
748 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
749 if (mddev->unit == unit) {
750 mddev_get(mddev);
751 spin_unlock(&all_mddevs_lock);
752 kfree(new);
753 return mddev;
754 }
755
756 if (new) {
757 list_add(&new->all_mddevs, &all_mddevs);
758 spin_unlock(&all_mddevs_lock);
759 new->hold_active = UNTIL_IOCTL;
760 return new;
761 }
762 } else if (new) {
763
764 static int next_minor = 512;
765 int start = next_minor;
766 int is_free = 0;
767 int dev = 0;
768 while (!is_free) {
769 dev = MKDEV(MD_MAJOR, next_minor);
770 next_minor++;
771 if (next_minor > MINORMASK)
772 next_minor = 0;
773 if (next_minor == start) {
774
775 spin_unlock(&all_mddevs_lock);
776 kfree(new);
777 return NULL;
778 }
779
780 is_free = 1;
781 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
782 if (mddev->unit == dev) {
783 is_free = 0;
784 break;
785 }
786 }
787 new->unit = dev;
788 new->md_minor = MINOR(dev);
789 new->hold_active = UNTIL_STOP;
790 list_add(&new->all_mddevs, &all_mddevs);
791 spin_unlock(&all_mddevs_lock);
792 return new;
793 }
794 spin_unlock(&all_mddevs_lock);
795
796 new = kzalloc(sizeof(*new), GFP_KERNEL);
797 if (!new)
798 return NULL;
799
800 new->unit = unit;
801 if (MAJOR(unit) == MD_MAJOR)
802 new->md_minor = MINOR(unit);
803 else
804 new->md_minor = MINOR(unit) >> MdpMinorShift;
805
806 mddev_init(new);
807
808 goto retry;
809}
810
811static struct attribute_group md_redundancy_group;
812
813void mddev_unlock(struct mddev *mddev)
814{
815 if (mddev->to_remove) {
816
817
818
819
820
821
822
823
824
825
826
827
828 struct attribute_group *to_remove = mddev->to_remove;
829 mddev->to_remove = NULL;
830 mddev->sysfs_active = 1;
831 mutex_unlock(&mddev->reconfig_mutex);
832
833 if (mddev->kobj.sd) {
834 if (to_remove != &md_redundancy_group)
835 sysfs_remove_group(&mddev->kobj, to_remove);
836 if (mddev->pers == NULL ||
837 mddev->pers->sync_request == NULL) {
838 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
839 if (mddev->sysfs_action)
840 sysfs_put(mddev->sysfs_action);
841 if (mddev->sysfs_completed)
842 sysfs_put(mddev->sysfs_completed);
843 if (mddev->sysfs_degraded)
844 sysfs_put(mddev->sysfs_degraded);
845 mddev->sysfs_action = NULL;
846 mddev->sysfs_completed = NULL;
847 mddev->sysfs_degraded = NULL;
848 }
849 }
850 mddev->sysfs_active = 0;
851 } else
852 mutex_unlock(&mddev->reconfig_mutex);
853
854
855
856
857 spin_lock(&pers_lock);
858 md_wakeup_thread(mddev->thread);
859 wake_up(&mddev->sb_wait);
860 spin_unlock(&pers_lock);
861}
862EXPORT_SYMBOL_GPL(mddev_unlock);
863
864struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
865{
866 struct md_rdev *rdev;
867
868 rdev_for_each_rcu(rdev, mddev)
869 if (rdev->desc_nr == nr)
870 return rdev;
871
872 return NULL;
873}
874EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
875
876static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
877{
878 struct md_rdev *rdev;
879
880 rdev_for_each(rdev, mddev)
881 if (rdev->bdev->bd_dev == dev)
882 return rdev;
883
884 return NULL;
885}
886
887struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
888{
889 struct md_rdev *rdev;
890
891 rdev_for_each_rcu(rdev, mddev)
892 if (rdev->bdev->bd_dev == dev)
893 return rdev;
894
895 return NULL;
896}
897EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
898
899static struct md_personality *find_pers(int level, char *clevel)
900{
901 struct md_personality *pers;
902 list_for_each_entry(pers, &pers_list, list) {
903 if (level != LEVEL_NONE && pers->level == level)
904 return pers;
905 if (strcmp(pers->name, clevel)==0)
906 return pers;
907 }
908 return NULL;
909}
910
911
912static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
913{
914 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
915 return MD_NEW_SIZE_SECTORS(num_sectors);
916}
917
918static int alloc_disk_sb(struct md_rdev *rdev)
919{
920 rdev->sb_page = alloc_page(GFP_KERNEL);
921 if (!rdev->sb_page)
922 return -ENOMEM;
923 return 0;
924}
925
926void md_rdev_clear(struct md_rdev *rdev)
927{
928 if (rdev->sb_page) {
929 put_page(rdev->sb_page);
930 rdev->sb_loaded = 0;
931 rdev->sb_page = NULL;
932 rdev->sb_start = 0;
933 rdev->sectors = 0;
934 }
935 if (rdev->bb_page) {
936 put_page(rdev->bb_page);
937 rdev->bb_page = NULL;
938 }
939 badblocks_exit(&rdev->badblocks);
940}
941EXPORT_SYMBOL_GPL(md_rdev_clear);
942
943static void super_written(struct bio *bio)
944{
945 struct md_rdev *rdev = bio->bi_private;
946 struct mddev *mddev = rdev->mddev;
947
948 if (bio->bi_status) {
949 pr_err("md: %s gets error=%d\n", __func__,
950 blk_status_to_errno(bio->bi_status));
951 md_error(mddev, rdev);
952 if (!test_bit(Faulty, &rdev->flags)
953 && (bio->bi_opf & MD_FAILFAST)) {
954 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
955 set_bit(LastDev, &rdev->flags);
956 }
957 } else
958 clear_bit(LastDev, &rdev->flags);
959
960 if (atomic_dec_and_test(&mddev->pending_writes))
961 wake_up(&mddev->sb_wait);
962 rdev_dec_pending(rdev, mddev);
963 bio_put(bio);
964}
965
966void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
967 sector_t sector, int size, struct page *page)
968{
969
970
971
972
973
974
975 struct bio *bio;
976 int ff = 0;
977
978 if (!page)
979 return;
980
981 if (test_bit(Faulty, &rdev->flags))
982 return;
983
984 bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
985
986 atomic_inc(&rdev->nr_pending);
987
988 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
989 bio->bi_iter.bi_sector = sector;
990 bio_add_page(bio, page, size, 0);
991 bio->bi_private = rdev;
992 bio->bi_end_io = super_written;
993
994 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
995 test_bit(FailFast, &rdev->flags) &&
996 !test_bit(LastDev, &rdev->flags))
997 ff = MD_FAILFAST;
998 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
999
1000 atomic_inc(&mddev->pending_writes);
1001 submit_bio(bio);
1002}
1003
1004int md_super_wait(struct mddev *mddev)
1005{
1006
1007 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1008 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1009 return -EAGAIN;
1010 return 0;
1011}
1012
1013int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
1014 struct page *page, int op, int op_flags, bool metadata_op)
1015{
1016 struct bio bio;
1017 struct bio_vec bvec;
1018
1019 bio_init(&bio, &bvec, 1);
1020
1021 if (metadata_op && rdev->meta_bdev)
1022 bio_set_dev(&bio, rdev->meta_bdev);
1023 else
1024 bio_set_dev(&bio, rdev->bdev);
1025 bio.bi_opf = op | op_flags;
1026 if (metadata_op)
1027 bio.bi_iter.bi_sector = sector + rdev->sb_start;
1028 else if (rdev->mddev->reshape_position != MaxSector &&
1029 (rdev->mddev->reshape_backwards ==
1030 (sector >= rdev->mddev->reshape_position)))
1031 bio.bi_iter.bi_sector = sector + rdev->new_data_offset;
1032 else
1033 bio.bi_iter.bi_sector = sector + rdev->data_offset;
1034 bio_add_page(&bio, page, size, 0);
1035
1036 submit_bio_wait(&bio);
1037
1038 return !bio.bi_status;
1039}
1040EXPORT_SYMBOL_GPL(sync_page_io);
1041
1042static int read_disk_sb(struct md_rdev *rdev, int size)
1043{
1044 char b[BDEVNAME_SIZE];
1045
1046 if (rdev->sb_loaded)
1047 return 0;
1048
1049 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1050 goto fail;
1051 rdev->sb_loaded = 1;
1052 return 0;
1053
1054fail:
1055 pr_err("md: disabled device %s, could not read superblock.\n",
1056 bdevname(rdev->bdev,b));
1057 return -EINVAL;
1058}
1059
1060static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1061{
1062 return sb1->set_uuid0 == sb2->set_uuid0 &&
1063 sb1->set_uuid1 == sb2->set_uuid1 &&
1064 sb1->set_uuid2 == sb2->set_uuid2 &&
1065 sb1->set_uuid3 == sb2->set_uuid3;
1066}
1067
1068static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1069{
1070 int ret;
1071 mdp_super_t *tmp1, *tmp2;
1072
1073 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1074 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1075
1076 if (!tmp1 || !tmp2) {
1077 ret = 0;
1078 goto abort;
1079 }
1080
1081 *tmp1 = *sb1;
1082 *tmp2 = *sb2;
1083
1084
1085
1086
1087 tmp1->nr_disks = 0;
1088 tmp2->nr_disks = 0;
1089
1090 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1091abort:
1092 kfree(tmp1);
1093 kfree(tmp2);
1094 return ret;
1095}
1096
1097static u32 md_csum_fold(u32 csum)
1098{
1099 csum = (csum & 0xffff) + (csum >> 16);
1100 return (csum & 0xffff) + (csum >> 16);
1101}
1102
1103static unsigned int calc_sb_csum(mdp_super_t *sb)
1104{
1105 u64 newcsum = 0;
1106 u32 *sb32 = (u32*)sb;
1107 int i;
1108 unsigned int disk_csum, csum;
1109
1110 disk_csum = sb->sb_csum;
1111 sb->sb_csum = 0;
1112
1113 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1114 newcsum += sb32[i];
1115 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1116
1117#ifdef CONFIG_ALPHA
1118
1119
1120
1121
1122
1123
1124
1125
1126 sb->sb_csum = md_csum_fold(disk_csum);
1127#else
1128 sb->sb_csum = disk_csum;
1129#endif
1130 return csum;
1131}
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163struct super_type {
1164 char *name;
1165 struct module *owner;
1166 int (*load_super)(struct md_rdev *rdev,
1167 struct md_rdev *refdev,
1168 int minor_version);
1169 int (*validate_super)(struct mddev *mddev,
1170 struct md_rdev *rdev);
1171 void (*sync_super)(struct mddev *mddev,
1172 struct md_rdev *rdev);
1173 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1174 sector_t num_sectors);
1175 int (*allow_new_offset)(struct md_rdev *rdev,
1176 unsigned long long new_offset);
1177};
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187int md_check_no_bitmap(struct mddev *mddev)
1188{
1189 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1190 return 0;
1191 pr_warn("%s: bitmaps are not supported for %s\n",
1192 mdname(mddev), mddev->pers->name);
1193 return 1;
1194}
1195EXPORT_SYMBOL(md_check_no_bitmap);
1196
1197
1198
1199
1200static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1201{
1202 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1203 mdp_super_t *sb;
1204 int ret;
1205 bool spare_disk = true;
1206
1207
1208
1209
1210
1211
1212
1213 rdev->sb_start = calc_dev_sboffset(rdev);
1214
1215 ret = read_disk_sb(rdev, MD_SB_BYTES);
1216 if (ret)
1217 return ret;
1218
1219 ret = -EINVAL;
1220
1221 bdevname(rdev->bdev, b);
1222 sb = page_address(rdev->sb_page);
1223
1224 if (sb->md_magic != MD_SB_MAGIC) {
1225 pr_warn("md: invalid raid superblock magic on %s\n", b);
1226 goto abort;
1227 }
1228
1229 if (sb->major_version != 0 ||
1230 sb->minor_version < 90 ||
1231 sb->minor_version > 91) {
1232 pr_warn("Bad version number %d.%d on %s\n",
1233 sb->major_version, sb->minor_version, b);
1234 goto abort;
1235 }
1236
1237 if (sb->raid_disks <= 0)
1238 goto abort;
1239
1240 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1241 pr_warn("md: invalid superblock checksum on %s\n", b);
1242 goto abort;
1243 }
1244
1245 rdev->preferred_minor = sb->md_minor;
1246 rdev->data_offset = 0;
1247 rdev->new_data_offset = 0;
1248 rdev->sb_size = MD_SB_BYTES;
1249 rdev->badblocks.shift = -1;
1250
1251 if (sb->level == LEVEL_MULTIPATH)
1252 rdev->desc_nr = -1;
1253 else
1254 rdev->desc_nr = sb->this_disk.number;
1255
1256
1257 if (sb->level == LEVEL_MULTIPATH ||
1258 (rdev->desc_nr >= 0 &&
1259 rdev->desc_nr < MD_SB_DISKS &&
1260 sb->disks[rdev->desc_nr].state &
1261 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1262 spare_disk = false;
1263
1264 if (!refdev) {
1265 if (!spare_disk)
1266 ret = 1;
1267 else
1268 ret = 0;
1269 } else {
1270 __u64 ev1, ev2;
1271 mdp_super_t *refsb = page_address(refdev->sb_page);
1272 if (!md_uuid_equal(refsb, sb)) {
1273 pr_warn("md: %s has different UUID to %s\n",
1274 b, bdevname(refdev->bdev,b2));
1275 goto abort;
1276 }
1277 if (!md_sb_equal(refsb, sb)) {
1278 pr_warn("md: %s has same UUID but different superblock to %s\n",
1279 b, bdevname(refdev->bdev, b2));
1280 goto abort;
1281 }
1282 ev1 = md_event(sb);
1283 ev2 = md_event(refsb);
1284
1285 if (!spare_disk && ev1 > ev2)
1286 ret = 1;
1287 else
1288 ret = 0;
1289 }
1290 rdev->sectors = rdev->sb_start;
1291
1292
1293
1294
1295 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1296 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1297
1298 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1299
1300 ret = -EINVAL;
1301
1302 abort:
1303 return ret;
1304}
1305
1306
1307
1308
1309static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1310{
1311 mdp_disk_t *desc;
1312 mdp_super_t *sb = page_address(rdev->sb_page);
1313 __u64 ev1 = md_event(sb);
1314
1315 rdev->raid_disk = -1;
1316 clear_bit(Faulty, &rdev->flags);
1317 clear_bit(In_sync, &rdev->flags);
1318 clear_bit(Bitmap_sync, &rdev->flags);
1319 clear_bit(WriteMostly, &rdev->flags);
1320
1321 if (mddev->raid_disks == 0) {
1322 mddev->major_version = 0;
1323 mddev->minor_version = sb->minor_version;
1324 mddev->patch_version = sb->patch_version;
1325 mddev->external = 0;
1326 mddev->chunk_sectors = sb->chunk_size >> 9;
1327 mddev->ctime = sb->ctime;
1328 mddev->utime = sb->utime;
1329 mddev->level = sb->level;
1330 mddev->clevel[0] = 0;
1331 mddev->layout = sb->layout;
1332 mddev->raid_disks = sb->raid_disks;
1333 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1334 mddev->events = ev1;
1335 mddev->bitmap_info.offset = 0;
1336 mddev->bitmap_info.space = 0;
1337
1338 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1339 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1340 mddev->reshape_backwards = 0;
1341
1342 if (mddev->minor_version >= 91) {
1343 mddev->reshape_position = sb->reshape_position;
1344 mddev->delta_disks = sb->delta_disks;
1345 mddev->new_level = sb->new_level;
1346 mddev->new_layout = sb->new_layout;
1347 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1348 if (mddev->delta_disks < 0)
1349 mddev->reshape_backwards = 1;
1350 } else {
1351 mddev->reshape_position = MaxSector;
1352 mddev->delta_disks = 0;
1353 mddev->new_level = mddev->level;
1354 mddev->new_layout = mddev->layout;
1355 mddev->new_chunk_sectors = mddev->chunk_sectors;
1356 }
1357 if (mddev->level == 0)
1358 mddev->layout = -1;
1359
1360 if (sb->state & (1<<MD_SB_CLEAN))
1361 mddev->recovery_cp = MaxSector;
1362 else {
1363 if (sb->events_hi == sb->cp_events_hi &&
1364 sb->events_lo == sb->cp_events_lo) {
1365 mddev->recovery_cp = sb->recovery_cp;
1366 } else
1367 mddev->recovery_cp = 0;
1368 }
1369
1370 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1371 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1372 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1373 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1374
1375 mddev->max_disks = MD_SB_DISKS;
1376
1377 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1378 mddev->bitmap_info.file == NULL) {
1379 mddev->bitmap_info.offset =
1380 mddev->bitmap_info.default_offset;
1381 mddev->bitmap_info.space =
1382 mddev->bitmap_info.default_space;
1383 }
1384
1385 } else if (mddev->pers == NULL) {
1386
1387
1388 ++ev1;
1389 if (sb->disks[rdev->desc_nr].state & (
1390 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1391 if (ev1 < mddev->events)
1392 return -EINVAL;
1393 } else if (mddev->bitmap) {
1394
1395
1396
1397 if (ev1 < mddev->bitmap->events_cleared)
1398 return 0;
1399 if (ev1 < mddev->events)
1400 set_bit(Bitmap_sync, &rdev->flags);
1401 } else {
1402 if (ev1 < mddev->events)
1403
1404 return 0;
1405 }
1406
1407 if (mddev->level != LEVEL_MULTIPATH) {
1408 desc = sb->disks + rdev->desc_nr;
1409
1410 if (desc->state & (1<<MD_DISK_FAULTY))
1411 set_bit(Faulty, &rdev->flags);
1412 else if (desc->state & (1<<MD_DISK_SYNC)
1413) {
1414 set_bit(In_sync, &rdev->flags);
1415 rdev->raid_disk = desc->raid_disk;
1416 rdev->saved_raid_disk = desc->raid_disk;
1417 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1418
1419
1420
1421 if (mddev->minor_version >= 91) {
1422 rdev->recovery_offset = 0;
1423 rdev->raid_disk = desc->raid_disk;
1424 }
1425 }
1426 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1427 set_bit(WriteMostly, &rdev->flags);
1428 if (desc->state & (1<<MD_DISK_FAILFAST))
1429 set_bit(FailFast, &rdev->flags);
1430 } else
1431 set_bit(In_sync, &rdev->flags);
1432 return 0;
1433}
1434
1435
1436
1437
1438static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1439{
1440 mdp_super_t *sb;
1441 struct md_rdev *rdev2;
1442 int next_spare = mddev->raid_disks;
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 int i;
1455 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1456
1457 rdev->sb_size = MD_SB_BYTES;
1458
1459 sb = page_address(rdev->sb_page);
1460
1461 memset(sb, 0, sizeof(*sb));
1462
1463 sb->md_magic = MD_SB_MAGIC;
1464 sb->major_version = mddev->major_version;
1465 sb->patch_version = mddev->patch_version;
1466 sb->gvalid_words = 0;
1467 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1468 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1469 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1470 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1471
1472 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1473 sb->level = mddev->level;
1474 sb->size = mddev->dev_sectors / 2;
1475 sb->raid_disks = mddev->raid_disks;
1476 sb->md_minor = mddev->md_minor;
1477 sb->not_persistent = 0;
1478 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1479 sb->state = 0;
1480 sb->events_hi = (mddev->events>>32);
1481 sb->events_lo = (u32)mddev->events;
1482
1483 if (mddev->reshape_position == MaxSector)
1484 sb->minor_version = 90;
1485 else {
1486 sb->minor_version = 91;
1487 sb->reshape_position = mddev->reshape_position;
1488 sb->new_level = mddev->new_level;
1489 sb->delta_disks = mddev->delta_disks;
1490 sb->new_layout = mddev->new_layout;
1491 sb->new_chunk = mddev->new_chunk_sectors << 9;
1492 }
1493 mddev->minor_version = sb->minor_version;
1494 if (mddev->in_sync)
1495 {
1496 sb->recovery_cp = mddev->recovery_cp;
1497 sb->cp_events_hi = (mddev->events>>32);
1498 sb->cp_events_lo = (u32)mddev->events;
1499 if (mddev->recovery_cp == MaxSector)
1500 sb->state = (1<< MD_SB_CLEAN);
1501 } else
1502 sb->recovery_cp = 0;
1503
1504 sb->layout = mddev->layout;
1505 sb->chunk_size = mddev->chunk_sectors << 9;
1506
1507 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1508 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1509
1510 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1511 rdev_for_each(rdev2, mddev) {
1512 mdp_disk_t *d;
1513 int desc_nr;
1514 int is_active = test_bit(In_sync, &rdev2->flags);
1515
1516 if (rdev2->raid_disk >= 0 &&
1517 sb->minor_version >= 91)
1518
1519
1520
1521
1522 is_active = 1;
1523 if (rdev2->raid_disk < 0 ||
1524 test_bit(Faulty, &rdev2->flags))
1525 is_active = 0;
1526 if (is_active)
1527 desc_nr = rdev2->raid_disk;
1528 else
1529 desc_nr = next_spare++;
1530 rdev2->desc_nr = desc_nr;
1531 d = &sb->disks[rdev2->desc_nr];
1532 nr_disks++;
1533 d->number = rdev2->desc_nr;
1534 d->major = MAJOR(rdev2->bdev->bd_dev);
1535 d->minor = MINOR(rdev2->bdev->bd_dev);
1536 if (is_active)
1537 d->raid_disk = rdev2->raid_disk;
1538 else
1539 d->raid_disk = rdev2->desc_nr;
1540 if (test_bit(Faulty, &rdev2->flags))
1541 d->state = (1<<MD_DISK_FAULTY);
1542 else if (is_active) {
1543 d->state = (1<<MD_DISK_ACTIVE);
1544 if (test_bit(In_sync, &rdev2->flags))
1545 d->state |= (1<<MD_DISK_SYNC);
1546 active++;
1547 working++;
1548 } else {
1549 d->state = 0;
1550 spare++;
1551 working++;
1552 }
1553 if (test_bit(WriteMostly, &rdev2->flags))
1554 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1555 if (test_bit(FailFast, &rdev2->flags))
1556 d->state |= (1<<MD_DISK_FAILFAST);
1557 }
1558
1559 for (i=0 ; i < mddev->raid_disks ; i++) {
1560 mdp_disk_t *d = &sb->disks[i];
1561 if (d->state == 0 && d->number == 0) {
1562 d->number = i;
1563 d->raid_disk = i;
1564 d->state = (1<<MD_DISK_REMOVED);
1565 d->state |= (1<<MD_DISK_FAULTY);
1566 failed++;
1567 }
1568 }
1569 sb->nr_disks = nr_disks;
1570 sb->active_disks = active;
1571 sb->working_disks = working;
1572 sb->failed_disks = failed;
1573 sb->spare_disks = spare;
1574
1575 sb->this_disk = sb->disks[rdev->desc_nr];
1576 sb->sb_csum = calc_sb_csum(sb);
1577}
1578
1579
1580
1581
1582static unsigned long long
1583super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1584{
1585 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1586 return 0;
1587 if (rdev->mddev->bitmap_info.offset)
1588 return 0;
1589 rdev->sb_start = calc_dev_sboffset(rdev);
1590 if (!num_sectors || num_sectors > rdev->sb_start)
1591 num_sectors = rdev->sb_start;
1592
1593
1594
1595 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1596 num_sectors = (sector_t)(2ULL << 32) - 2;
1597 do {
1598 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1599 rdev->sb_page);
1600 } while (md_super_wait(rdev->mddev) < 0);
1601 return num_sectors;
1602}
1603
1604static int
1605super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1606{
1607
1608 return new_offset == 0;
1609}
1610
1611
1612
1613
1614
1615static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1616{
1617 __le32 disk_csum;
1618 u32 csum;
1619 unsigned long long newcsum;
1620 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1621 __le32 *isuper = (__le32*)sb;
1622
1623 disk_csum = sb->sb_csum;
1624 sb->sb_csum = 0;
1625 newcsum = 0;
1626 for (; size >= 4; size -= 4)
1627 newcsum += le32_to_cpu(*isuper++);
1628
1629 if (size == 2)
1630 newcsum += le16_to_cpu(*(__le16*) isuper);
1631
1632 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1633 sb->sb_csum = disk_csum;
1634 return cpu_to_le32(csum);
1635}
1636
1637static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1638{
1639 struct mdp_superblock_1 *sb;
1640 int ret;
1641 sector_t sb_start;
1642 sector_t sectors;
1643 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1644 int bmask;
1645 bool spare_disk = true;
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655 switch(minor_version) {
1656 case 0:
1657 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1658 sb_start -= 8*2;
1659 sb_start &= ~(sector_t)(4*2-1);
1660 break;
1661 case 1:
1662 sb_start = 0;
1663 break;
1664 case 2:
1665 sb_start = 8;
1666 break;
1667 default:
1668 return -EINVAL;
1669 }
1670 rdev->sb_start = sb_start;
1671
1672
1673
1674
1675 ret = read_disk_sb(rdev, 4096);
1676 if (ret) return ret;
1677
1678 sb = page_address(rdev->sb_page);
1679
1680 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1681 sb->major_version != cpu_to_le32(1) ||
1682 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1683 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1684 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1685 return -EINVAL;
1686
1687 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1688 pr_warn("md: invalid superblock checksum on %s\n",
1689 bdevname(rdev->bdev,b));
1690 return -EINVAL;
1691 }
1692 if (le64_to_cpu(sb->data_size) < 10) {
1693 pr_warn("md: data_size too small on %s\n",
1694 bdevname(rdev->bdev,b));
1695 return -EINVAL;
1696 }
1697 if (sb->pad0 ||
1698 sb->pad3[0] ||
1699 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1700
1701 return -EINVAL;
1702
1703 rdev->preferred_minor = 0xffff;
1704 rdev->data_offset = le64_to_cpu(sb->data_offset);
1705 rdev->new_data_offset = rdev->data_offset;
1706 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1707 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1708 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1709 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1710
1711 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1712 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1713 if (rdev->sb_size & bmask)
1714 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1715
1716 if (minor_version
1717 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1718 return -EINVAL;
1719 if (minor_version
1720 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1721 return -EINVAL;
1722
1723 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1724 rdev->desc_nr = -1;
1725 else
1726 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1727
1728 if (!rdev->bb_page) {
1729 rdev->bb_page = alloc_page(GFP_KERNEL);
1730 if (!rdev->bb_page)
1731 return -ENOMEM;
1732 }
1733 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1734 rdev->badblocks.count == 0) {
1735
1736
1737
1738 s32 offset;
1739 sector_t bb_sector;
1740 __le64 *bbp;
1741 int i;
1742 int sectors = le16_to_cpu(sb->bblog_size);
1743 if (sectors > (PAGE_SIZE / 512))
1744 return -EINVAL;
1745 offset = le32_to_cpu(sb->bblog_offset);
1746 if (offset == 0)
1747 return -EINVAL;
1748 bb_sector = (long long)offset;
1749 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1750 rdev->bb_page, REQ_OP_READ, 0, true))
1751 return -EIO;
1752 bbp = (__le64 *)page_address(rdev->bb_page);
1753 rdev->badblocks.shift = sb->bblog_shift;
1754 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1755 u64 bb = le64_to_cpu(*bbp);
1756 int count = bb & (0x3ff);
1757 u64 sector = bb >> 10;
1758 sector <<= sb->bblog_shift;
1759 count <<= sb->bblog_shift;
1760 if (bb + 1 == 0)
1761 break;
1762 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1763 return -EINVAL;
1764 }
1765 } else if (sb->bblog_offset != 0)
1766 rdev->badblocks.shift = 0;
1767
1768 if ((le32_to_cpu(sb->feature_map) &
1769 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1770 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1771 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1772 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1773 }
1774
1775 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1776 sb->level != 0)
1777 return -EINVAL;
1778
1779
1780 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1781 (rdev->desc_nr >= 0 &&
1782 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1783 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1784 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1785 spare_disk = false;
1786
1787 if (!refdev) {
1788 if (!spare_disk)
1789 ret = 1;
1790 else
1791 ret = 0;
1792 } else {
1793 __u64 ev1, ev2;
1794 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1795
1796 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1797 sb->level != refsb->level ||
1798 sb->layout != refsb->layout ||
1799 sb->chunksize != refsb->chunksize) {
1800 pr_warn("md: %s has strangely different superblock to %s\n",
1801 bdevname(rdev->bdev,b),
1802 bdevname(refdev->bdev,b2));
1803 return -EINVAL;
1804 }
1805 ev1 = le64_to_cpu(sb->events);
1806 ev2 = le64_to_cpu(refsb->events);
1807
1808 if (!spare_disk && ev1 > ev2)
1809 ret = 1;
1810 else
1811 ret = 0;
1812 }
1813 if (minor_version) {
1814 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1815 sectors -= rdev->data_offset;
1816 } else
1817 sectors = rdev->sb_start;
1818 if (sectors < le64_to_cpu(sb->data_size))
1819 return -EINVAL;
1820 rdev->sectors = le64_to_cpu(sb->data_size);
1821 return ret;
1822}
1823
1824static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1825{
1826 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1827 __u64 ev1 = le64_to_cpu(sb->events);
1828
1829 rdev->raid_disk = -1;
1830 clear_bit(Faulty, &rdev->flags);
1831 clear_bit(In_sync, &rdev->flags);
1832 clear_bit(Bitmap_sync, &rdev->flags);
1833 clear_bit(WriteMostly, &rdev->flags);
1834
1835 if (mddev->raid_disks == 0) {
1836 mddev->major_version = 1;
1837 mddev->patch_version = 0;
1838 mddev->external = 0;
1839 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1840 mddev->ctime = le64_to_cpu(sb->ctime);
1841 mddev->utime = le64_to_cpu(sb->utime);
1842 mddev->level = le32_to_cpu(sb->level);
1843 mddev->clevel[0] = 0;
1844 mddev->layout = le32_to_cpu(sb->layout);
1845 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1846 mddev->dev_sectors = le64_to_cpu(sb->size);
1847 mddev->events = ev1;
1848 mddev->bitmap_info.offset = 0;
1849 mddev->bitmap_info.space = 0;
1850
1851
1852
1853 mddev->bitmap_info.default_offset = 1024 >> 9;
1854 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1855 mddev->reshape_backwards = 0;
1856
1857 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1858 memcpy(mddev->uuid, sb->set_uuid, 16);
1859
1860 mddev->max_disks = (4096-256)/2;
1861
1862 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1863 mddev->bitmap_info.file == NULL) {
1864 mddev->bitmap_info.offset =
1865 (__s32)le32_to_cpu(sb->bitmap_offset);
1866
1867
1868
1869
1870
1871 if (mddev->minor_version > 0)
1872 mddev->bitmap_info.space = 0;
1873 else if (mddev->bitmap_info.offset > 0)
1874 mddev->bitmap_info.space =
1875 8 - mddev->bitmap_info.offset;
1876 else
1877 mddev->bitmap_info.space =
1878 -mddev->bitmap_info.offset;
1879 }
1880
1881 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1882 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1883 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1884 mddev->new_level = le32_to_cpu(sb->new_level);
1885 mddev->new_layout = le32_to_cpu(sb->new_layout);
1886 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1887 if (mddev->delta_disks < 0 ||
1888 (mddev->delta_disks == 0 &&
1889 (le32_to_cpu(sb->feature_map)
1890 & MD_FEATURE_RESHAPE_BACKWARDS)))
1891 mddev->reshape_backwards = 1;
1892 } else {
1893 mddev->reshape_position = MaxSector;
1894 mddev->delta_disks = 0;
1895 mddev->new_level = mddev->level;
1896 mddev->new_layout = mddev->layout;
1897 mddev->new_chunk_sectors = mddev->chunk_sectors;
1898 }
1899
1900 if (mddev->level == 0 &&
1901 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1902 mddev->layout = -1;
1903
1904 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1905 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1906
1907 if (le32_to_cpu(sb->feature_map) &
1908 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1909 if (le32_to_cpu(sb->feature_map) &
1910 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1911 return -EINVAL;
1912 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1913 (le32_to_cpu(sb->feature_map) &
1914 MD_FEATURE_MULTIPLE_PPLS))
1915 return -EINVAL;
1916 set_bit(MD_HAS_PPL, &mddev->flags);
1917 }
1918 } else if (mddev->pers == NULL) {
1919
1920
1921 ++ev1;
1922 if (rdev->desc_nr >= 0 &&
1923 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1924 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1925 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1926 if (ev1 < mddev->events)
1927 return -EINVAL;
1928 } else if (mddev->bitmap) {
1929
1930
1931
1932 if (ev1 < mddev->bitmap->events_cleared)
1933 return 0;
1934 if (ev1 < mddev->events)
1935 set_bit(Bitmap_sync, &rdev->flags);
1936 } else {
1937 if (ev1 < mddev->events)
1938
1939 return 0;
1940 }
1941 if (mddev->level != LEVEL_MULTIPATH) {
1942 int role;
1943 if (rdev->desc_nr < 0 ||
1944 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1945 role = MD_DISK_ROLE_SPARE;
1946 rdev->desc_nr = -1;
1947 } else
1948 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1949 switch(role) {
1950 case MD_DISK_ROLE_SPARE:
1951 break;
1952 case MD_DISK_ROLE_FAULTY:
1953 set_bit(Faulty, &rdev->flags);
1954 break;
1955 case MD_DISK_ROLE_JOURNAL:
1956 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1957
1958 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1959 return -EINVAL;
1960 }
1961 set_bit(Journal, &rdev->flags);
1962 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1963 rdev->raid_disk = 0;
1964 break;
1965 default:
1966 rdev->saved_raid_disk = role;
1967 if ((le32_to_cpu(sb->feature_map) &
1968 MD_FEATURE_RECOVERY_OFFSET)) {
1969 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1970 if (!(le32_to_cpu(sb->feature_map) &
1971 MD_FEATURE_RECOVERY_BITMAP))
1972 rdev->saved_raid_disk = -1;
1973 } else {
1974
1975
1976
1977
1978 if (!test_bit(MD_RECOVERY_FROZEN,
1979 &mddev->recovery))
1980 set_bit(In_sync, &rdev->flags);
1981 }
1982 rdev->raid_disk = role;
1983 break;
1984 }
1985 if (sb->devflags & WriteMostly1)
1986 set_bit(WriteMostly, &rdev->flags);
1987 if (sb->devflags & FailFast1)
1988 set_bit(FailFast, &rdev->flags);
1989 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1990 set_bit(Replacement, &rdev->flags);
1991 } else
1992 set_bit(In_sync, &rdev->flags);
1993
1994 return 0;
1995}
1996
1997static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1998{
1999 struct mdp_superblock_1 *sb;
2000 struct md_rdev *rdev2;
2001 int max_dev, i;
2002
2003
2004 sb = page_address(rdev->sb_page);
2005
2006 sb->feature_map = 0;
2007 sb->pad0 = 0;
2008 sb->recovery_offset = cpu_to_le64(0);
2009 memset(sb->pad3, 0, sizeof(sb->pad3));
2010
2011 sb->utime = cpu_to_le64((__u64)mddev->utime);
2012 sb->events = cpu_to_le64(mddev->events);
2013 if (mddev->in_sync)
2014 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2015 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2016 sb->resync_offset = cpu_to_le64(MaxSector);
2017 else
2018 sb->resync_offset = cpu_to_le64(0);
2019
2020 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2021
2022 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2023 sb->size = cpu_to_le64(mddev->dev_sectors);
2024 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2025 sb->level = cpu_to_le32(mddev->level);
2026 sb->layout = cpu_to_le32(mddev->layout);
2027 if (test_bit(FailFast, &rdev->flags))
2028 sb->devflags |= FailFast1;
2029 else
2030 sb->devflags &= ~FailFast1;
2031
2032 if (test_bit(WriteMostly, &rdev->flags))
2033 sb->devflags |= WriteMostly1;
2034 else
2035 sb->devflags &= ~WriteMostly1;
2036 sb->data_offset = cpu_to_le64(rdev->data_offset);
2037 sb->data_size = cpu_to_le64(rdev->sectors);
2038
2039 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2040 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2041 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2042 }
2043
2044 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2045 !test_bit(In_sync, &rdev->flags)) {
2046 sb->feature_map |=
2047 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2048 sb->recovery_offset =
2049 cpu_to_le64(rdev->recovery_offset);
2050 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2051 sb->feature_map |=
2052 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2053 }
2054
2055 if (test_bit(Journal, &rdev->flags))
2056 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2057 if (test_bit(Replacement, &rdev->flags))
2058 sb->feature_map |=
2059 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2060
2061 if (mddev->reshape_position != MaxSector) {
2062 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2063 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2064 sb->new_layout = cpu_to_le32(mddev->new_layout);
2065 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2066 sb->new_level = cpu_to_le32(mddev->new_level);
2067 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2068 if (mddev->delta_disks == 0 &&
2069 mddev->reshape_backwards)
2070 sb->feature_map
2071 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2072 if (rdev->new_data_offset != rdev->data_offset) {
2073 sb->feature_map
2074 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2075 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2076 - rdev->data_offset));
2077 }
2078 }
2079
2080 if (mddev_is_clustered(mddev))
2081 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2082
2083 if (rdev->badblocks.count == 0)
2084 ;
2085 else if (sb->bblog_offset == 0)
2086
2087 md_error(mddev, rdev);
2088 else {
2089 struct badblocks *bb = &rdev->badblocks;
2090 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2091 u64 *p = bb->page;
2092 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2093 if (bb->changed) {
2094 unsigned seq;
2095
2096retry:
2097 seq = read_seqbegin(&bb->lock);
2098
2099 memset(bbp, 0xff, PAGE_SIZE);
2100
2101 for (i = 0 ; i < bb->count ; i++) {
2102 u64 internal_bb = p[i];
2103 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2104 | BB_LEN(internal_bb));
2105 bbp[i] = cpu_to_le64(store_bb);
2106 }
2107 bb->changed = 0;
2108 if (read_seqretry(&bb->lock, seq))
2109 goto retry;
2110
2111 bb->sector = (rdev->sb_start +
2112 (int)le32_to_cpu(sb->bblog_offset));
2113 bb->size = le16_to_cpu(sb->bblog_size);
2114 }
2115 }
2116
2117 max_dev = 0;
2118 rdev_for_each(rdev2, mddev)
2119 if (rdev2->desc_nr+1 > max_dev)
2120 max_dev = rdev2->desc_nr+1;
2121
2122 if (max_dev > le32_to_cpu(sb->max_dev)) {
2123 int bmask;
2124 sb->max_dev = cpu_to_le32(max_dev);
2125 rdev->sb_size = max_dev * 2 + 256;
2126 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2127 if (rdev->sb_size & bmask)
2128 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2129 } else
2130 max_dev = le32_to_cpu(sb->max_dev);
2131
2132 for (i=0; i<max_dev;i++)
2133 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2134
2135 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2136 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2137
2138 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2139 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2140 sb->feature_map |=
2141 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2142 else
2143 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2144 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2145 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2146 }
2147
2148 rdev_for_each(rdev2, mddev) {
2149 i = rdev2->desc_nr;
2150 if (test_bit(Faulty, &rdev2->flags))
2151 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2152 else if (test_bit(In_sync, &rdev2->flags))
2153 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2154 else if (test_bit(Journal, &rdev2->flags))
2155 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2156 else if (rdev2->raid_disk >= 0)
2157 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2158 else
2159 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2160 }
2161
2162 sb->sb_csum = calc_sb_1_csum(sb);
2163}
2164
2165static sector_t super_1_choose_bm_space(sector_t dev_size)
2166{
2167 sector_t bm_space;
2168
2169
2170
2171
2172 if (dev_size < 64*2)
2173 bm_space = 0;
2174 else if (dev_size - 64*2 >= 200*1024*1024*2)
2175 bm_space = 128*2;
2176 else if (dev_size - 4*2 > 8*1024*1024*2)
2177 bm_space = 64*2;
2178 else
2179 bm_space = 4*2;
2180 return bm_space;
2181}
2182
2183static unsigned long long
2184super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2185{
2186 struct mdp_superblock_1 *sb;
2187 sector_t max_sectors;
2188 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2189 return 0;
2190 if (rdev->data_offset != rdev->new_data_offset)
2191 return 0;
2192 if (rdev->sb_start < rdev->data_offset) {
2193
2194 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2195 max_sectors -= rdev->data_offset;
2196 if (!num_sectors || num_sectors > max_sectors)
2197 num_sectors = max_sectors;
2198 } else if (rdev->mddev->bitmap_info.offset) {
2199
2200 return 0;
2201 } else {
2202
2203 sector_t sb_start, bm_space;
2204 sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
2205
2206
2207 sb_start = dev_size - 8*2;
2208 sb_start &= ~(sector_t)(4*2 - 1);
2209
2210 bm_space = super_1_choose_bm_space(dev_size);
2211
2212
2213
2214
2215 max_sectors = sb_start - bm_space - 4*2;
2216
2217 if (!num_sectors || num_sectors > max_sectors)
2218 num_sectors = max_sectors;
2219 }
2220 sb = page_address(rdev->sb_page);
2221 sb->data_size = cpu_to_le64(num_sectors);
2222 sb->super_offset = cpu_to_le64(rdev->sb_start);
2223 sb->sb_csum = calc_sb_1_csum(sb);
2224 do {
2225 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2226 rdev->sb_page);
2227 } while (md_super_wait(rdev->mddev) < 0);
2228 return num_sectors;
2229
2230}
2231
2232static int
2233super_1_allow_new_offset(struct md_rdev *rdev,
2234 unsigned long long new_offset)
2235{
2236
2237 struct bitmap *bitmap;
2238 if (new_offset >= rdev->data_offset)
2239 return 1;
2240
2241
2242
2243 if (rdev->mddev->minor_version == 0)
2244 return 1;
2245
2246
2247
2248
2249
2250
2251
2252 if (rdev->sb_start + (32+4)*2 > new_offset)
2253 return 0;
2254 bitmap = rdev->mddev->bitmap;
2255 if (bitmap && !rdev->mddev->bitmap_info.file &&
2256 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2257 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2258 return 0;
2259 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2260 return 0;
2261
2262 return 1;
2263}
2264
2265static struct super_type super_types[] = {
2266 [0] = {
2267 .name = "0.90.0",
2268 .owner = THIS_MODULE,
2269 .load_super = super_90_load,
2270 .validate_super = super_90_validate,
2271 .sync_super = super_90_sync,
2272 .rdev_size_change = super_90_rdev_size_change,
2273 .allow_new_offset = super_90_allow_new_offset,
2274 },
2275 [1] = {
2276 .name = "md-1",
2277 .owner = THIS_MODULE,
2278 .load_super = super_1_load,
2279 .validate_super = super_1_validate,
2280 .sync_super = super_1_sync,
2281 .rdev_size_change = super_1_rdev_size_change,
2282 .allow_new_offset = super_1_allow_new_offset,
2283 },
2284};
2285
2286static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2287{
2288 if (mddev->sync_super) {
2289 mddev->sync_super(mddev, rdev);
2290 return;
2291 }
2292
2293 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2294
2295 super_types[mddev->major_version].sync_super(mddev, rdev);
2296}
2297
2298static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2299{
2300 struct md_rdev *rdev, *rdev2;
2301
2302 rcu_read_lock();
2303 rdev_for_each_rcu(rdev, mddev1) {
2304 if (test_bit(Faulty, &rdev->flags) ||
2305 test_bit(Journal, &rdev->flags) ||
2306 rdev->raid_disk == -1)
2307 continue;
2308 rdev_for_each_rcu(rdev2, mddev2) {
2309 if (test_bit(Faulty, &rdev2->flags) ||
2310 test_bit(Journal, &rdev2->flags) ||
2311 rdev2->raid_disk == -1)
2312 continue;
2313 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2314 rcu_read_unlock();
2315 return 1;
2316 }
2317 }
2318 }
2319 rcu_read_unlock();
2320 return 0;
2321}
2322
2323static LIST_HEAD(pending_raid_disks);
2324
2325
2326
2327
2328
2329
2330
2331
2332int md_integrity_register(struct mddev *mddev)
2333{
2334 struct md_rdev *rdev, *reference = NULL;
2335
2336 if (list_empty(&mddev->disks))
2337 return 0;
2338 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2339 return 0;
2340 rdev_for_each(rdev, mddev) {
2341
2342 if (test_bit(Faulty, &rdev->flags))
2343 continue;
2344 if (rdev->raid_disk < 0)
2345 continue;
2346 if (!reference) {
2347
2348 reference = rdev;
2349 continue;
2350 }
2351
2352 if (blk_integrity_compare(reference->bdev->bd_disk,
2353 rdev->bdev->bd_disk) < 0)
2354 return -EINVAL;
2355 }
2356 if (!reference || !bdev_get_integrity(reference->bdev))
2357 return 0;
2358
2359
2360
2361
2362 blk_integrity_register(mddev->gendisk,
2363 bdev_get_integrity(reference->bdev));
2364
2365 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2366 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2367 pr_err("md: failed to create integrity pool for %s\n",
2368 mdname(mddev));
2369 return -EINVAL;
2370 }
2371 return 0;
2372}
2373EXPORT_SYMBOL(md_integrity_register);
2374
2375
2376
2377
2378
2379int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2380{
2381 struct blk_integrity *bi_mddev;
2382 char name[BDEVNAME_SIZE];
2383
2384 if (!mddev->gendisk)
2385 return 0;
2386
2387 bi_mddev = blk_get_integrity(mddev->gendisk);
2388
2389 if (!bi_mddev)
2390 return 0;
2391
2392 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2393 pr_err("%s: incompatible integrity profile for %s\n",
2394 mdname(mddev), bdevname(rdev->bdev, name));
2395 return -ENXIO;
2396 }
2397
2398 return 0;
2399}
2400EXPORT_SYMBOL(md_integrity_add_rdev);
2401
2402static bool rdev_read_only(struct md_rdev *rdev)
2403{
2404 return bdev_read_only(rdev->bdev) ||
2405 (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
2406}
2407
2408static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2409{
2410 char b[BDEVNAME_SIZE];
2411 int err;
2412
2413
2414 if (find_rdev(mddev, rdev->bdev->bd_dev))
2415 return -EEXIST;
2416
2417 if (rdev_read_only(rdev) && mddev->pers)
2418 return -EROFS;
2419
2420
2421 if (!test_bit(Journal, &rdev->flags) &&
2422 rdev->sectors &&
2423 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2424 if (mddev->pers) {
2425
2426
2427
2428
2429 if (mddev->level > 0)
2430 return -ENOSPC;
2431 } else
2432 mddev->dev_sectors = rdev->sectors;
2433 }
2434
2435
2436
2437
2438
2439 rcu_read_lock();
2440 if (rdev->desc_nr < 0) {
2441 int choice = 0;
2442 if (mddev->pers)
2443 choice = mddev->raid_disks;
2444 while (md_find_rdev_nr_rcu(mddev, choice))
2445 choice++;
2446 rdev->desc_nr = choice;
2447 } else {
2448 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2449 rcu_read_unlock();
2450 return -EBUSY;
2451 }
2452 }
2453 rcu_read_unlock();
2454 if (!test_bit(Journal, &rdev->flags) &&
2455 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2456 pr_warn("md: %s: array is limited to %d devices\n",
2457 mdname(mddev), mddev->max_disks);
2458 return -EBUSY;
2459 }
2460 bdevname(rdev->bdev,b);
2461 strreplace(b, '/', '!');
2462
2463 rdev->mddev = mddev;
2464 pr_debug("md: bind<%s>\n", b);
2465
2466 if (mddev->raid_disks)
2467 mddev_create_serial_pool(mddev, rdev, false);
2468
2469 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2470 goto fail;
2471
2472
2473 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2474 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2475 rdev->sysfs_unack_badblocks =
2476 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2477 rdev->sysfs_badblocks =
2478 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2479
2480 list_add_rcu(&rdev->same_set, &mddev->disks);
2481 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2482
2483
2484 mddev->recovery_disabled++;
2485
2486 return 0;
2487
2488 fail:
2489 pr_warn("md: failed to register dev-%s for %s\n",
2490 b, mdname(mddev));
2491 return err;
2492}
2493
2494static void rdev_delayed_delete(struct work_struct *ws)
2495{
2496 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2497 kobject_del(&rdev->kobj);
2498 kobject_put(&rdev->kobj);
2499}
2500
2501static void unbind_rdev_from_array(struct md_rdev *rdev)
2502{
2503 char b[BDEVNAME_SIZE];
2504
2505 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2506 list_del_rcu(&rdev->same_set);
2507 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2508 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2509 rdev->mddev = NULL;
2510 sysfs_remove_link(&rdev->kobj, "block");
2511 sysfs_put(rdev->sysfs_state);
2512 sysfs_put(rdev->sysfs_unack_badblocks);
2513 sysfs_put(rdev->sysfs_badblocks);
2514 rdev->sysfs_state = NULL;
2515 rdev->sysfs_unack_badblocks = NULL;
2516 rdev->sysfs_badblocks = NULL;
2517 rdev->badblocks.count = 0;
2518
2519
2520
2521
2522 synchronize_rcu();
2523 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2524 kobject_get(&rdev->kobj);
2525 queue_work(md_rdev_misc_wq, &rdev->del_work);
2526}
2527
2528
2529
2530
2531
2532
2533static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2534{
2535 int err = 0;
2536 struct block_device *bdev;
2537
2538 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2539 shared ? (struct md_rdev *)lock_rdev : rdev);
2540 if (IS_ERR(bdev)) {
2541 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2542 MAJOR(dev), MINOR(dev));
2543 return PTR_ERR(bdev);
2544 }
2545 rdev->bdev = bdev;
2546 return err;
2547}
2548
2549static void unlock_rdev(struct md_rdev *rdev)
2550{
2551 struct block_device *bdev = rdev->bdev;
2552 rdev->bdev = NULL;
2553 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2554}
2555
2556void md_autodetect_dev(dev_t dev);
2557
2558static void export_rdev(struct md_rdev *rdev)
2559{
2560 char b[BDEVNAME_SIZE];
2561
2562 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2563 md_rdev_clear(rdev);
2564#ifndef MODULE
2565 if (test_bit(AutoDetected, &rdev->flags))
2566 md_autodetect_dev(rdev->bdev->bd_dev);
2567#endif
2568 unlock_rdev(rdev);
2569 kobject_put(&rdev->kobj);
2570}
2571
2572void md_kick_rdev_from_array(struct md_rdev *rdev)
2573{
2574 unbind_rdev_from_array(rdev);
2575 export_rdev(rdev);
2576}
2577EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2578
2579static void export_array(struct mddev *mddev)
2580{
2581 struct md_rdev *rdev;
2582
2583 while (!list_empty(&mddev->disks)) {
2584 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2585 same_set);
2586 md_kick_rdev_from_array(rdev);
2587 }
2588 mddev->raid_disks = 0;
2589 mddev->major_version = 0;
2590}
2591
2592static bool set_in_sync(struct mddev *mddev)
2593{
2594 lockdep_assert_held(&mddev->lock);
2595 if (!mddev->in_sync) {
2596 mddev->sync_checkers++;
2597 spin_unlock(&mddev->lock);
2598 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2599 spin_lock(&mddev->lock);
2600 if (!mddev->in_sync &&
2601 percpu_ref_is_zero(&mddev->writes_pending)) {
2602 mddev->in_sync = 1;
2603
2604
2605
2606
2607 smp_mb();
2608 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2609 sysfs_notify_dirent_safe(mddev->sysfs_state);
2610 }
2611 if (--mddev->sync_checkers == 0)
2612 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2613 }
2614 if (mddev->safemode == 1)
2615 mddev->safemode = 0;
2616 return mddev->in_sync;
2617}
2618
2619static void sync_sbs(struct mddev *mddev, int nospares)
2620{
2621
2622
2623
2624
2625
2626
2627 struct md_rdev *rdev;
2628 rdev_for_each(rdev, mddev) {
2629 if (rdev->sb_events == mddev->events ||
2630 (nospares &&
2631 rdev->raid_disk < 0 &&
2632 rdev->sb_events+1 == mddev->events)) {
2633
2634 rdev->sb_loaded = 2;
2635 } else {
2636 sync_super(mddev, rdev);
2637 rdev->sb_loaded = 1;
2638 }
2639 }
2640}
2641
2642static bool does_sb_need_changing(struct mddev *mddev)
2643{
2644 struct md_rdev *rdev;
2645 struct mdp_superblock_1 *sb;
2646 int role;
2647
2648
2649 rdev_for_each(rdev, mddev)
2650 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2651 break;
2652
2653
2654 if (!rdev)
2655 return false;
2656
2657 sb = page_address(rdev->sb_page);
2658
2659 rdev_for_each(rdev, mddev) {
2660 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2661
2662 if (role == 0xffff && rdev->raid_disk >=0 &&
2663 !test_bit(Faulty, &rdev->flags))
2664 return true;
2665
2666 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2667 return true;
2668 }
2669
2670
2671 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2672 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2673 (mddev->layout != le32_to_cpu(sb->layout)) ||
2674 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2675 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2676 return true;
2677
2678 return false;
2679}
2680
2681void md_update_sb(struct mddev *mddev, int force_change)
2682{
2683 struct md_rdev *rdev;
2684 int sync_req;
2685 int nospares = 0;
2686 int any_badblocks_changed = 0;
2687 int ret = -1;
2688
2689 if (mddev->ro) {
2690 if (force_change)
2691 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2692 return;
2693 }
2694
2695repeat:
2696 if (mddev_is_clustered(mddev)) {
2697 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2698 force_change = 1;
2699 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2700 nospares = 1;
2701 ret = md_cluster_ops->metadata_update_start(mddev);
2702
2703 if (!does_sb_need_changing(mddev)) {
2704 if (ret == 0)
2705 md_cluster_ops->metadata_update_cancel(mddev);
2706 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2707 BIT(MD_SB_CHANGE_DEVS) |
2708 BIT(MD_SB_CHANGE_CLEAN));
2709 return;
2710 }
2711 }
2712
2713
2714
2715
2716
2717
2718
2719 rdev_for_each(rdev, mddev) {
2720 if (rdev->raid_disk >= 0 &&
2721 mddev->delta_disks >= 0 &&
2722 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2723 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2724 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2725 !test_bit(Journal, &rdev->flags) &&
2726 !test_bit(In_sync, &rdev->flags) &&
2727 mddev->curr_resync_completed > rdev->recovery_offset)
2728 rdev->recovery_offset = mddev->curr_resync_completed;
2729
2730 }
2731 if (!mddev->persistent) {
2732 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2733 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2734 if (!mddev->external) {
2735 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2736 rdev_for_each(rdev, mddev) {
2737 if (rdev->badblocks.changed) {
2738 rdev->badblocks.changed = 0;
2739 ack_all_badblocks(&rdev->badblocks);
2740 md_error(mddev, rdev);
2741 }
2742 clear_bit(Blocked, &rdev->flags);
2743 clear_bit(BlockedBadBlocks, &rdev->flags);
2744 wake_up(&rdev->blocked_wait);
2745 }
2746 }
2747 wake_up(&mddev->sb_wait);
2748 return;
2749 }
2750
2751 spin_lock(&mddev->lock);
2752
2753 mddev->utime = ktime_get_real_seconds();
2754
2755 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2756 force_change = 1;
2757 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2758
2759
2760
2761
2762 nospares = 1;
2763 if (force_change)
2764 nospares = 0;
2765 if (mddev->degraded)
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775 nospares = 0;
2776
2777 sync_req = mddev->in_sync;
2778
2779
2780
2781 if (nospares
2782 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2783 && mddev->can_decrease_events
2784 && mddev->events != 1) {
2785 mddev->events--;
2786 mddev->can_decrease_events = 0;
2787 } else {
2788
2789 mddev->events ++;
2790 mddev->can_decrease_events = nospares;
2791 }
2792
2793
2794
2795
2796
2797
2798 WARN_ON(mddev->events == 0);
2799
2800 rdev_for_each(rdev, mddev) {
2801 if (rdev->badblocks.changed)
2802 any_badblocks_changed++;
2803 if (test_bit(Faulty, &rdev->flags))
2804 set_bit(FaultRecorded, &rdev->flags);
2805 }
2806
2807 sync_sbs(mddev, nospares);
2808 spin_unlock(&mddev->lock);
2809
2810 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2811 mdname(mddev), mddev->in_sync);
2812
2813 if (mddev->queue)
2814 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2815rewrite:
2816 md_bitmap_update_sb(mddev->bitmap);
2817 rdev_for_each(rdev, mddev) {
2818 char b[BDEVNAME_SIZE];
2819
2820 if (rdev->sb_loaded != 1)
2821 continue;
2822
2823 if (!test_bit(Faulty, &rdev->flags)) {
2824 md_super_write(mddev,rdev,
2825 rdev->sb_start, rdev->sb_size,
2826 rdev->sb_page);
2827 pr_debug("md: (write) %s's sb offset: %llu\n",
2828 bdevname(rdev->bdev, b),
2829 (unsigned long long)rdev->sb_start);
2830 rdev->sb_events = mddev->events;
2831 if (rdev->badblocks.size) {
2832 md_super_write(mddev, rdev,
2833 rdev->badblocks.sector,
2834 rdev->badblocks.size << 9,
2835 rdev->bb_page);
2836 rdev->badblocks.size = 0;
2837 }
2838
2839 } else
2840 pr_debug("md: %s (skipping faulty)\n",
2841 bdevname(rdev->bdev, b));
2842
2843 if (mddev->level == LEVEL_MULTIPATH)
2844
2845 break;
2846 }
2847 if (md_super_wait(mddev) < 0)
2848 goto rewrite;
2849
2850
2851 if (mddev_is_clustered(mddev) && ret == 0)
2852 md_cluster_ops->metadata_update_finish(mddev);
2853
2854 if (mddev->in_sync != sync_req ||
2855 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2856 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2857
2858 goto repeat;
2859 wake_up(&mddev->sb_wait);
2860 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2861 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2862
2863 rdev_for_each(rdev, mddev) {
2864 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2865 clear_bit(Blocked, &rdev->flags);
2866
2867 if (any_badblocks_changed)
2868 ack_all_badblocks(&rdev->badblocks);
2869 clear_bit(BlockedBadBlocks, &rdev->flags);
2870 wake_up(&rdev->blocked_wait);
2871 }
2872}
2873EXPORT_SYMBOL(md_update_sb);
2874
2875static int add_bound_rdev(struct md_rdev *rdev)
2876{
2877 struct mddev *mddev = rdev->mddev;
2878 int err = 0;
2879 bool add_journal = test_bit(Journal, &rdev->flags);
2880
2881 if (!mddev->pers->hot_remove_disk || add_journal) {
2882
2883
2884
2885
2886 super_types[mddev->major_version].
2887 validate_super(mddev, rdev);
2888 if (add_journal)
2889 mddev_suspend(mddev);
2890 err = mddev->pers->hot_add_disk(mddev, rdev);
2891 if (add_journal)
2892 mddev_resume(mddev);
2893 if (err) {
2894 md_kick_rdev_from_array(rdev);
2895 return err;
2896 }
2897 }
2898 sysfs_notify_dirent_safe(rdev->sysfs_state);
2899
2900 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2901 if (mddev->degraded)
2902 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2903 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2904 md_new_event(mddev);
2905 md_wakeup_thread(mddev->thread);
2906 return 0;
2907}
2908
2909
2910
2911
2912static int cmd_match(const char *cmd, const char *str)
2913{
2914
2915
2916
2917
2918 while (*cmd && *str && *cmd == *str) {
2919 cmd++;
2920 str++;
2921 }
2922 if (*cmd == '\n')
2923 cmd++;
2924 if (*str || *cmd)
2925 return 0;
2926 return 1;
2927}
2928
2929struct rdev_sysfs_entry {
2930 struct attribute attr;
2931 ssize_t (*show)(struct md_rdev *, char *);
2932 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2933};
2934
2935static ssize_t
2936state_show(struct md_rdev *rdev, char *page)
2937{
2938 char *sep = ",";
2939 size_t len = 0;
2940 unsigned long flags = READ_ONCE(rdev->flags);
2941
2942 if (test_bit(Faulty, &flags) ||
2943 (!test_bit(ExternalBbl, &flags) &&
2944 rdev->badblocks.unacked_exist))
2945 len += sprintf(page+len, "faulty%s", sep);
2946 if (test_bit(In_sync, &flags))
2947 len += sprintf(page+len, "in_sync%s", sep);
2948 if (test_bit(Journal, &flags))
2949 len += sprintf(page+len, "journal%s", sep);
2950 if (test_bit(WriteMostly, &flags))
2951 len += sprintf(page+len, "write_mostly%s", sep);
2952 if (test_bit(Blocked, &flags) ||
2953 (rdev->badblocks.unacked_exist
2954 && !test_bit(Faulty, &flags)))
2955 len += sprintf(page+len, "blocked%s", sep);
2956 if (!test_bit(Faulty, &flags) &&
2957 !test_bit(Journal, &flags) &&
2958 !test_bit(In_sync, &flags))
2959 len += sprintf(page+len, "spare%s", sep);
2960 if (test_bit(WriteErrorSeen, &flags))
2961 len += sprintf(page+len, "write_error%s", sep);
2962 if (test_bit(WantReplacement, &flags))
2963 len += sprintf(page+len, "want_replacement%s", sep);
2964 if (test_bit(Replacement, &flags))
2965 len += sprintf(page+len, "replacement%s", sep);
2966 if (test_bit(ExternalBbl, &flags))
2967 len += sprintf(page+len, "external_bbl%s", sep);
2968 if (test_bit(FailFast, &flags))
2969 len += sprintf(page+len, "failfast%s", sep);
2970
2971 if (len)
2972 len -= strlen(sep);
2973
2974 return len+sprintf(page+len, "\n");
2975}
2976
2977static ssize_t
2978state_store(struct md_rdev *rdev, const char *buf, size_t len)
2979{
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994 int err = -EINVAL;
2995 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2996 md_error(rdev->mddev, rdev);
2997 if (test_bit(Faulty, &rdev->flags))
2998 err = 0;
2999 else
3000 err = -EBUSY;
3001 } else if (cmd_match(buf, "remove")) {
3002 if (rdev->mddev->pers) {
3003 clear_bit(Blocked, &rdev->flags);
3004 remove_and_add_spares(rdev->mddev, rdev);
3005 }
3006 if (rdev->raid_disk >= 0)
3007 err = -EBUSY;
3008 else {
3009 struct mddev *mddev = rdev->mddev;
3010 err = 0;
3011 if (mddev_is_clustered(mddev))
3012 err = md_cluster_ops->remove_disk(mddev, rdev);
3013
3014 if (err == 0) {
3015 md_kick_rdev_from_array(rdev);
3016 if (mddev->pers) {
3017 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3018 md_wakeup_thread(mddev->thread);
3019 }
3020 md_new_event(mddev);
3021 }
3022 }
3023 } else if (cmd_match(buf, "writemostly")) {
3024 set_bit(WriteMostly, &rdev->flags);
3025 mddev_create_serial_pool(rdev->mddev, rdev, false);
3026 err = 0;
3027 } else if (cmd_match(buf, "-writemostly")) {
3028 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3029 clear_bit(WriteMostly, &rdev->flags);
3030 err = 0;
3031 } else if (cmd_match(buf, "blocked")) {
3032 set_bit(Blocked, &rdev->flags);
3033 err = 0;
3034 } else if (cmd_match(buf, "-blocked")) {
3035 if (!test_bit(Faulty, &rdev->flags) &&
3036 !test_bit(ExternalBbl, &rdev->flags) &&
3037 rdev->badblocks.unacked_exist) {
3038
3039
3040
3041 md_error(rdev->mddev, rdev);
3042 }
3043 clear_bit(Blocked, &rdev->flags);
3044 clear_bit(BlockedBadBlocks, &rdev->flags);
3045 wake_up(&rdev->blocked_wait);
3046 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3047 md_wakeup_thread(rdev->mddev->thread);
3048
3049 err = 0;
3050 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3051 set_bit(In_sync, &rdev->flags);
3052 err = 0;
3053 } else if (cmd_match(buf, "failfast")) {
3054 set_bit(FailFast, &rdev->flags);
3055 err = 0;
3056 } else if (cmd_match(buf, "-failfast")) {
3057 clear_bit(FailFast, &rdev->flags);
3058 err = 0;
3059 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3060 !test_bit(Journal, &rdev->flags)) {
3061 if (rdev->mddev->pers == NULL) {
3062 clear_bit(In_sync, &rdev->flags);
3063 rdev->saved_raid_disk = rdev->raid_disk;
3064 rdev->raid_disk = -1;
3065 err = 0;
3066 }
3067 } else if (cmd_match(buf, "write_error")) {
3068 set_bit(WriteErrorSeen, &rdev->flags);
3069 err = 0;
3070 } else if (cmd_match(buf, "-write_error")) {
3071 clear_bit(WriteErrorSeen, &rdev->flags);
3072 err = 0;
3073 } else if (cmd_match(buf, "want_replacement")) {
3074
3075
3076
3077
3078 if (rdev->raid_disk >= 0 &&
3079 !test_bit(Journal, &rdev->flags) &&
3080 !test_bit(Replacement, &rdev->flags))
3081 set_bit(WantReplacement, &rdev->flags);
3082 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3083 md_wakeup_thread(rdev->mddev->thread);
3084 err = 0;
3085 } else if (cmd_match(buf, "-want_replacement")) {
3086
3087
3088
3089 err = 0;
3090 clear_bit(WantReplacement, &rdev->flags);
3091 } else if (cmd_match(buf, "replacement")) {
3092
3093
3094
3095
3096 if (rdev->mddev->pers)
3097 err = -EBUSY;
3098 else {
3099 set_bit(Replacement, &rdev->flags);
3100 err = 0;
3101 }
3102 } else if (cmd_match(buf, "-replacement")) {
3103
3104 if (rdev->mddev->pers)
3105 err = -EBUSY;
3106 else {
3107 clear_bit(Replacement, &rdev->flags);
3108 err = 0;
3109 }
3110 } else if (cmd_match(buf, "re-add")) {
3111 if (!rdev->mddev->pers)
3112 err = -EINVAL;
3113 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3114 rdev->saved_raid_disk >= 0) {
3115
3116
3117
3118
3119
3120
3121 if (!mddev_is_clustered(rdev->mddev) ||
3122 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3123 clear_bit(Faulty, &rdev->flags);
3124 err = add_bound_rdev(rdev);
3125 }
3126 } else
3127 err = -EBUSY;
3128 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3129 set_bit(ExternalBbl, &rdev->flags);
3130 rdev->badblocks.shift = 0;
3131 err = 0;
3132 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3133 clear_bit(ExternalBbl, &rdev->flags);
3134 err = 0;
3135 }
3136 if (!err)
3137 sysfs_notify_dirent_safe(rdev->sysfs_state);
3138 return err ? err : len;
3139}
3140static struct rdev_sysfs_entry rdev_state =
3141__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3142
3143static ssize_t
3144errors_show(struct md_rdev *rdev, char *page)
3145{
3146 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3147}
3148
3149static ssize_t
3150errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3151{
3152 unsigned int n;
3153 int rv;
3154
3155 rv = kstrtouint(buf, 10, &n);
3156 if (rv < 0)
3157 return rv;
3158 atomic_set(&rdev->corrected_errors, n);
3159 return len;
3160}
3161static struct rdev_sysfs_entry rdev_errors =
3162__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3163
3164static ssize_t
3165slot_show(struct md_rdev *rdev, char *page)
3166{
3167 if (test_bit(Journal, &rdev->flags))
3168 return sprintf(page, "journal\n");
3169 else if (rdev->raid_disk < 0)
3170 return sprintf(page, "none\n");
3171 else
3172 return sprintf(page, "%d\n", rdev->raid_disk);
3173}
3174
3175static ssize_t
3176slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3177{
3178 int slot;
3179 int err;
3180
3181 if (test_bit(Journal, &rdev->flags))
3182 return -EBUSY;
3183 if (strncmp(buf, "none", 4)==0)
3184 slot = -1;
3185 else {
3186 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3187 if (err < 0)
3188 return err;
3189 }
3190 if (rdev->mddev->pers && slot == -1) {
3191
3192
3193
3194
3195
3196
3197
3198 if (rdev->raid_disk == -1)
3199 return -EEXIST;
3200
3201 if (rdev->mddev->pers->hot_remove_disk == NULL)
3202 return -EINVAL;
3203 clear_bit(Blocked, &rdev->flags);
3204 remove_and_add_spares(rdev->mddev, rdev);
3205 if (rdev->raid_disk >= 0)
3206 return -EBUSY;
3207 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3208 md_wakeup_thread(rdev->mddev->thread);
3209 } else if (rdev->mddev->pers) {
3210
3211
3212
3213 int err;
3214
3215 if (rdev->raid_disk != -1)
3216 return -EBUSY;
3217
3218 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3219 return -EBUSY;
3220
3221 if (rdev->mddev->pers->hot_add_disk == NULL)
3222 return -EINVAL;
3223
3224 if (slot >= rdev->mddev->raid_disks &&
3225 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3226 return -ENOSPC;
3227
3228 rdev->raid_disk = slot;
3229 if (test_bit(In_sync, &rdev->flags))
3230 rdev->saved_raid_disk = slot;
3231 else
3232 rdev->saved_raid_disk = -1;
3233 clear_bit(In_sync, &rdev->flags);
3234 clear_bit(Bitmap_sync, &rdev->flags);
3235 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3236 if (err) {
3237 rdev->raid_disk = -1;
3238 return err;
3239 } else
3240 sysfs_notify_dirent_safe(rdev->sysfs_state);
3241 ;
3242 sysfs_link_rdev(rdev->mddev, rdev);
3243
3244 } else {
3245 if (slot >= rdev->mddev->raid_disks &&
3246 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3247 return -ENOSPC;
3248 rdev->raid_disk = slot;
3249
3250 clear_bit(Faulty, &rdev->flags);
3251 clear_bit(WriteMostly, &rdev->flags);
3252 set_bit(In_sync, &rdev->flags);
3253 sysfs_notify_dirent_safe(rdev->sysfs_state);
3254 }
3255 return len;
3256}
3257
3258static struct rdev_sysfs_entry rdev_slot =
3259__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3260
3261static ssize_t
3262offset_show(struct md_rdev *rdev, char *page)
3263{
3264 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3265}
3266
3267static ssize_t
3268offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3269{
3270 unsigned long long offset;
3271 if (kstrtoull(buf, 10, &offset) < 0)
3272 return -EINVAL;
3273 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3274 return -EBUSY;
3275 if (rdev->sectors && rdev->mddev->external)
3276
3277
3278 return -EBUSY;
3279 rdev->data_offset = offset;
3280 rdev->new_data_offset = offset;
3281 return len;
3282}
3283
3284static struct rdev_sysfs_entry rdev_offset =
3285__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3286
3287static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3288{
3289 return sprintf(page, "%llu\n",
3290 (unsigned long long)rdev->new_data_offset);
3291}
3292
3293static ssize_t new_offset_store(struct md_rdev *rdev,
3294 const char *buf, size_t len)
3295{
3296 unsigned long long new_offset;
3297 struct mddev *mddev = rdev->mddev;
3298
3299 if (kstrtoull(buf, 10, &new_offset) < 0)
3300 return -EINVAL;
3301
3302 if (mddev->sync_thread ||
3303 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3304 return -EBUSY;
3305 if (new_offset == rdev->data_offset)
3306
3307 ;
3308 else if (new_offset > rdev->data_offset) {
3309
3310 if (new_offset - rdev->data_offset
3311 + mddev->dev_sectors > rdev->sectors)
3312 return -E2BIG;
3313 }
3314
3315
3316
3317
3318
3319 if (new_offset < rdev->data_offset &&
3320 mddev->reshape_backwards)
3321 return -EINVAL;
3322
3323
3324
3325
3326 if (new_offset > rdev->data_offset &&
3327 !mddev->reshape_backwards)
3328 return -EINVAL;
3329
3330 if (mddev->pers && mddev->persistent &&
3331 !super_types[mddev->major_version]
3332 .allow_new_offset(rdev, new_offset))
3333 return -E2BIG;
3334 rdev->new_data_offset = new_offset;
3335 if (new_offset > rdev->data_offset)
3336 mddev->reshape_backwards = 1;
3337 else if (new_offset < rdev->data_offset)
3338 mddev->reshape_backwards = 0;
3339
3340 return len;
3341}
3342static struct rdev_sysfs_entry rdev_new_offset =
3343__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3344
3345static ssize_t
3346rdev_size_show(struct md_rdev *rdev, char *page)
3347{
3348 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3349}
3350
3351static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3352{
3353
3354 if (s1+l1 <= s2)
3355 return 0;
3356 if (s2+l2 <= s1)
3357 return 0;
3358 return 1;
3359}
3360
3361static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3362{
3363 unsigned long long blocks;
3364 sector_t new;
3365
3366 if (kstrtoull(buf, 10, &blocks) < 0)
3367 return -EINVAL;
3368
3369 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3370 return -EINVAL;
3371
3372 new = blocks * 2;
3373 if (new != blocks * 2)
3374 return -EINVAL;
3375
3376 *sectors = new;
3377 return 0;
3378}
3379
3380static ssize_t
3381rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3382{
3383 struct mddev *my_mddev = rdev->mddev;
3384 sector_t oldsectors = rdev->sectors;
3385 sector_t sectors;
3386
3387 if (test_bit(Journal, &rdev->flags))
3388 return -EBUSY;
3389 if (strict_blocks_to_sectors(buf, §ors) < 0)
3390 return -EINVAL;
3391 if (rdev->data_offset != rdev->new_data_offset)
3392 return -EINVAL;
3393 if (my_mddev->pers && rdev->raid_disk >= 0) {
3394 if (my_mddev->persistent) {
3395 sectors = super_types[my_mddev->major_version].
3396 rdev_size_change(rdev, sectors);
3397 if (!sectors)
3398 return -EBUSY;
3399 } else if (!sectors)
3400 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3401 rdev->data_offset;
3402 if (!my_mddev->pers->resize)
3403
3404 return -EINVAL;
3405 }
3406 if (sectors < my_mddev->dev_sectors)
3407 return -EINVAL;
3408
3409 rdev->sectors = sectors;
3410 if (sectors > oldsectors && my_mddev->external) {
3411
3412
3413
3414
3415
3416
3417 struct mddev *mddev;
3418 int overlap = 0;
3419 struct list_head *tmp;
3420
3421 rcu_read_lock();
3422 for_each_mddev(mddev, tmp) {
3423 struct md_rdev *rdev2;
3424
3425 rdev_for_each(rdev2, mddev)
3426 if (rdev->bdev == rdev2->bdev &&
3427 rdev != rdev2 &&
3428 overlaps(rdev->data_offset, rdev->sectors,
3429 rdev2->data_offset,
3430 rdev2->sectors)) {
3431 overlap = 1;
3432 break;
3433 }
3434 if (overlap) {
3435 mddev_put(mddev);
3436 break;
3437 }
3438 }
3439 rcu_read_unlock();
3440 if (overlap) {
3441
3442
3443
3444
3445
3446
3447 rdev->sectors = oldsectors;
3448 return -EBUSY;
3449 }
3450 }
3451 return len;
3452}
3453
3454static struct rdev_sysfs_entry rdev_size =
3455__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3456
3457static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3458{
3459 unsigned long long recovery_start = rdev->recovery_offset;
3460
3461 if (test_bit(In_sync, &rdev->flags) ||
3462 recovery_start == MaxSector)
3463 return sprintf(page, "none\n");
3464
3465 return sprintf(page, "%llu\n", recovery_start);
3466}
3467
3468static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3469{
3470 unsigned long long recovery_start;
3471
3472 if (cmd_match(buf, "none"))
3473 recovery_start = MaxSector;
3474 else if (kstrtoull(buf, 10, &recovery_start))
3475 return -EINVAL;
3476
3477 if (rdev->mddev->pers &&
3478 rdev->raid_disk >= 0)
3479 return -EBUSY;
3480
3481 rdev->recovery_offset = recovery_start;
3482 if (recovery_start == MaxSector)
3483 set_bit(In_sync, &rdev->flags);
3484 else
3485 clear_bit(In_sync, &rdev->flags);
3486 return len;
3487}
3488
3489static struct rdev_sysfs_entry rdev_recovery_start =
3490__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503static ssize_t bb_show(struct md_rdev *rdev, char *page)
3504{
3505 return badblocks_show(&rdev->badblocks, page, 0);
3506}
3507static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3508{
3509 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3510
3511 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3512 wake_up(&rdev->blocked_wait);
3513 return rv;
3514}
3515static struct rdev_sysfs_entry rdev_bad_blocks =
3516__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3517
3518static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3519{
3520 return badblocks_show(&rdev->badblocks, page, 1);
3521}
3522static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3523{
3524 return badblocks_store(&rdev->badblocks, page, len, 1);
3525}
3526static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3527__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3528
3529static ssize_t
3530ppl_sector_show(struct md_rdev *rdev, char *page)
3531{
3532 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3533}
3534
3535static ssize_t
3536ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3537{
3538 unsigned long long sector;
3539
3540 if (kstrtoull(buf, 10, §or) < 0)
3541 return -EINVAL;
3542 if (sector != (sector_t)sector)
3543 return -EINVAL;
3544
3545 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3546 rdev->raid_disk >= 0)
3547 return -EBUSY;
3548
3549 if (rdev->mddev->persistent) {
3550 if (rdev->mddev->major_version == 0)
3551 return -EINVAL;
3552 if ((sector > rdev->sb_start &&
3553 sector - rdev->sb_start > S16_MAX) ||
3554 (sector < rdev->sb_start &&
3555 rdev->sb_start - sector > -S16_MIN))
3556 return -EINVAL;
3557 rdev->ppl.offset = sector - rdev->sb_start;
3558 } else if (!rdev->mddev->external) {
3559 return -EBUSY;
3560 }
3561 rdev->ppl.sector = sector;
3562 return len;
3563}
3564
3565static struct rdev_sysfs_entry rdev_ppl_sector =
3566__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3567
3568static ssize_t
3569ppl_size_show(struct md_rdev *rdev, char *page)
3570{
3571 return sprintf(page, "%u\n", rdev->ppl.size);
3572}
3573
3574static ssize_t
3575ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3576{
3577 unsigned int size;
3578
3579 if (kstrtouint(buf, 10, &size) < 0)
3580 return -EINVAL;
3581
3582 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3583 rdev->raid_disk >= 0)
3584 return -EBUSY;
3585
3586 if (rdev->mddev->persistent) {
3587 if (rdev->mddev->major_version == 0)
3588 return -EINVAL;
3589 if (size > U16_MAX)
3590 return -EINVAL;
3591 } else if (!rdev->mddev->external) {
3592 return -EBUSY;
3593 }
3594 rdev->ppl.size = size;
3595 return len;
3596}
3597
3598static struct rdev_sysfs_entry rdev_ppl_size =
3599__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3600
3601static struct attribute *rdev_default_attrs[] = {
3602 &rdev_state.attr,
3603 &rdev_errors.attr,
3604 &rdev_slot.attr,
3605 &rdev_offset.attr,
3606 &rdev_new_offset.attr,
3607 &rdev_size.attr,
3608 &rdev_recovery_start.attr,
3609 &rdev_bad_blocks.attr,
3610 &rdev_unack_bad_blocks.attr,
3611 &rdev_ppl_sector.attr,
3612 &rdev_ppl_size.attr,
3613 NULL,
3614};
3615static ssize_t
3616rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3617{
3618 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3619 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3620
3621 if (!entry->show)
3622 return -EIO;
3623 if (!rdev->mddev)
3624 return -ENODEV;
3625 return entry->show(rdev, page);
3626}
3627
3628static ssize_t
3629rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3630 const char *page, size_t length)
3631{
3632 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3633 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3634 ssize_t rv;
3635 struct mddev *mddev = rdev->mddev;
3636
3637 if (!entry->store)
3638 return -EIO;
3639 if (!capable(CAP_SYS_ADMIN))
3640 return -EACCES;
3641 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3642 if (!rv) {
3643 if (rdev->mddev == NULL)
3644 rv = -ENODEV;
3645 else
3646 rv = entry->store(rdev, page, length);
3647 mddev_unlock(mddev);
3648 }
3649 return rv;
3650}
3651
3652static void rdev_free(struct kobject *ko)
3653{
3654 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3655 kfree(rdev);
3656}
3657static const struct sysfs_ops rdev_sysfs_ops = {
3658 .show = rdev_attr_show,
3659 .store = rdev_attr_store,
3660};
3661static struct kobj_type rdev_ktype = {
3662 .release = rdev_free,
3663 .sysfs_ops = &rdev_sysfs_ops,
3664 .default_attrs = rdev_default_attrs,
3665};
3666
3667int md_rdev_init(struct md_rdev *rdev)
3668{
3669 rdev->desc_nr = -1;
3670 rdev->saved_raid_disk = -1;
3671 rdev->raid_disk = -1;
3672 rdev->flags = 0;
3673 rdev->data_offset = 0;
3674 rdev->new_data_offset = 0;
3675 rdev->sb_events = 0;
3676 rdev->last_read_error = 0;
3677 rdev->sb_loaded = 0;
3678 rdev->bb_page = NULL;
3679 atomic_set(&rdev->nr_pending, 0);
3680 atomic_set(&rdev->read_errors, 0);
3681 atomic_set(&rdev->corrected_errors, 0);
3682
3683 INIT_LIST_HEAD(&rdev->same_set);
3684 init_waitqueue_head(&rdev->blocked_wait);
3685
3686
3687
3688
3689
3690 return badblocks_init(&rdev->badblocks, 0);
3691}
3692EXPORT_SYMBOL_GPL(md_rdev_init);
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3704{
3705 char b[BDEVNAME_SIZE];
3706 int err;
3707 struct md_rdev *rdev;
3708 sector_t size;
3709
3710 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3711 if (!rdev)
3712 return ERR_PTR(-ENOMEM);
3713
3714 err = md_rdev_init(rdev);
3715 if (err)
3716 goto abort_free;
3717 err = alloc_disk_sb(rdev);
3718 if (err)
3719 goto abort_free;
3720
3721 err = lock_rdev(rdev, newdev, super_format == -2);
3722 if (err)
3723 goto abort_free;
3724
3725 kobject_init(&rdev->kobj, &rdev_ktype);
3726
3727 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3728 if (!size) {
3729 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3730 bdevname(rdev->bdev,b));
3731 err = -EINVAL;
3732 goto abort_free;
3733 }
3734
3735 if (super_format >= 0) {
3736 err = super_types[super_format].
3737 load_super(rdev, NULL, super_minor);
3738 if (err == -EINVAL) {
3739 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3740 bdevname(rdev->bdev,b),
3741 super_format, super_minor);
3742 goto abort_free;
3743 }
3744 if (err < 0) {
3745 pr_warn("md: could not read %s's sb, not importing!\n",
3746 bdevname(rdev->bdev,b));
3747 goto abort_free;
3748 }
3749 }
3750
3751 return rdev;
3752
3753abort_free:
3754 if (rdev->bdev)
3755 unlock_rdev(rdev);
3756 md_rdev_clear(rdev);
3757 kfree(rdev);
3758 return ERR_PTR(err);
3759}
3760
3761
3762
3763
3764
3765static int analyze_sbs(struct mddev *mddev)
3766{
3767 int i;
3768 struct md_rdev *rdev, *freshest, *tmp;
3769 char b[BDEVNAME_SIZE];
3770
3771 freshest = NULL;
3772 rdev_for_each_safe(rdev, tmp, mddev)
3773 switch (super_types[mddev->major_version].
3774 load_super(rdev, freshest, mddev->minor_version)) {
3775 case 1:
3776 freshest = rdev;
3777 break;
3778 case 0:
3779 break;
3780 default:
3781 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3782 bdevname(rdev->bdev,b));
3783 md_kick_rdev_from_array(rdev);
3784 }
3785
3786
3787 if (!freshest) {
3788 pr_warn("md: cannot find a valid disk\n");
3789 return -EINVAL;
3790 }
3791
3792 super_types[mddev->major_version].
3793 validate_super(mddev, freshest);
3794
3795 i = 0;
3796 rdev_for_each_safe(rdev, tmp, mddev) {
3797 if (mddev->max_disks &&
3798 (rdev->desc_nr >= mddev->max_disks ||
3799 i > mddev->max_disks)) {
3800 pr_warn("md: %s: %s: only %d devices permitted\n",
3801 mdname(mddev), bdevname(rdev->bdev, b),
3802 mddev->max_disks);
3803 md_kick_rdev_from_array(rdev);
3804 continue;
3805 }
3806 if (rdev != freshest) {
3807 if (super_types[mddev->major_version].
3808 validate_super(mddev, rdev)) {
3809 pr_warn("md: kicking non-fresh %s from array!\n",
3810 bdevname(rdev->bdev,b));
3811 md_kick_rdev_from_array(rdev);
3812 continue;
3813 }
3814 }
3815 if (mddev->level == LEVEL_MULTIPATH) {
3816 rdev->desc_nr = i++;
3817 rdev->raid_disk = rdev->desc_nr;
3818 set_bit(In_sync, &rdev->flags);
3819 } else if (rdev->raid_disk >=
3820 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3821 !test_bit(Journal, &rdev->flags)) {
3822 rdev->raid_disk = -1;
3823 clear_bit(In_sync, &rdev->flags);
3824 }
3825 }
3826
3827 return 0;
3828}
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3841{
3842 unsigned long result = 0;
3843 long decimals = -1;
3844 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3845 if (*cp == '.')
3846 decimals = 0;
3847 else if (decimals < scale) {
3848 unsigned int value;
3849 value = *cp - '0';
3850 result = result * 10 + value;
3851 if (decimals >= 0)
3852 decimals++;
3853 }
3854 cp++;
3855 }
3856 if (*cp == '\n')
3857 cp++;
3858 if (*cp)
3859 return -EINVAL;
3860 if (decimals < 0)
3861 decimals = 0;
3862 *res = result * int_pow(10, scale - decimals);
3863 return 0;
3864}
3865
3866static ssize_t
3867safe_delay_show(struct mddev *mddev, char *page)
3868{
3869 int msec = (mddev->safemode_delay*1000)/HZ;
3870 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3871}
3872static ssize_t
3873safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3874{
3875 unsigned long msec;
3876
3877 if (mddev_is_clustered(mddev)) {
3878 pr_warn("md: Safemode is disabled for clustered mode\n");
3879 return -EINVAL;
3880 }
3881
3882 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3883 return -EINVAL;
3884 if (msec == 0)
3885 mddev->safemode_delay = 0;
3886 else {
3887 unsigned long old_delay = mddev->safemode_delay;
3888 unsigned long new_delay = (msec*HZ)/1000;
3889
3890 if (new_delay == 0)
3891 new_delay = 1;
3892 mddev->safemode_delay = new_delay;
3893 if (new_delay < old_delay || old_delay == 0)
3894 mod_timer(&mddev->safemode_timer, jiffies+1);
3895 }
3896 return len;
3897}
3898static struct md_sysfs_entry md_safe_delay =
3899__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3900
3901static ssize_t
3902level_show(struct mddev *mddev, char *page)
3903{
3904 struct md_personality *p;
3905 int ret;
3906 spin_lock(&mddev->lock);
3907 p = mddev->pers;
3908 if (p)
3909 ret = sprintf(page, "%s\n", p->name);
3910 else if (mddev->clevel[0])
3911 ret = sprintf(page, "%s\n", mddev->clevel);
3912 else if (mddev->level != LEVEL_NONE)
3913 ret = sprintf(page, "%d\n", mddev->level);
3914 else
3915 ret = 0;
3916 spin_unlock(&mddev->lock);
3917 return ret;
3918}
3919
3920static ssize_t
3921level_store(struct mddev *mddev, const char *buf, size_t len)
3922{
3923 char clevel[16];
3924 ssize_t rv;
3925 size_t slen = len;
3926 struct md_personality *pers, *oldpers;
3927 long level;
3928 void *priv, *oldpriv;
3929 struct md_rdev *rdev;
3930
3931 if (slen == 0 || slen >= sizeof(clevel))
3932 return -EINVAL;
3933
3934 rv = mddev_lock(mddev);
3935 if (rv)
3936 return rv;
3937
3938 if (mddev->pers == NULL) {
3939 strncpy(mddev->clevel, buf, slen);
3940 if (mddev->clevel[slen-1] == '\n')
3941 slen--;
3942 mddev->clevel[slen] = 0;
3943 mddev->level = LEVEL_NONE;
3944 rv = len;
3945 goto out_unlock;
3946 }
3947 rv = -EROFS;
3948 if (mddev->ro)
3949 goto out_unlock;
3950
3951
3952
3953
3954
3955
3956
3957 rv = -EBUSY;
3958 if (mddev->sync_thread ||
3959 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3960 mddev->reshape_position != MaxSector ||
3961 mddev->sysfs_active)
3962 goto out_unlock;
3963
3964 rv = -EINVAL;
3965 if (!mddev->pers->quiesce) {
3966 pr_warn("md: %s: %s does not support online personality change\n",
3967 mdname(mddev), mddev->pers->name);
3968 goto out_unlock;
3969 }
3970
3971
3972 strncpy(clevel, buf, slen);
3973 if (clevel[slen-1] == '\n')
3974 slen--;
3975 clevel[slen] = 0;
3976 if (kstrtol(clevel, 10, &level))
3977 level = LEVEL_NONE;
3978
3979 if (request_module("md-%s", clevel) != 0)
3980 request_module("md-level-%s", clevel);
3981 spin_lock(&pers_lock);
3982 pers = find_pers(level, clevel);
3983 if (!pers || !try_module_get(pers->owner)) {
3984 spin_unlock(&pers_lock);
3985 pr_warn("md: personality %s not loaded\n", clevel);
3986 rv = -EINVAL;
3987 goto out_unlock;
3988 }
3989 spin_unlock(&pers_lock);
3990
3991 if (pers == mddev->pers) {
3992
3993 module_put(pers->owner);
3994 rv = len;
3995 goto out_unlock;
3996 }
3997 if (!pers->takeover) {
3998 module_put(pers->owner);
3999 pr_warn("md: %s: %s does not support personality takeover\n",
4000 mdname(mddev), clevel);
4001 rv = -EINVAL;
4002 goto out_unlock;
4003 }
4004
4005 rdev_for_each(rdev, mddev)
4006 rdev->new_raid_disk = rdev->raid_disk;
4007
4008
4009
4010
4011 priv = pers->takeover(mddev);
4012 if (IS_ERR(priv)) {
4013 mddev->new_level = mddev->level;
4014 mddev->new_layout = mddev->layout;
4015 mddev->new_chunk_sectors = mddev->chunk_sectors;
4016 mddev->raid_disks -= mddev->delta_disks;
4017 mddev->delta_disks = 0;
4018 mddev->reshape_backwards = 0;
4019 module_put(pers->owner);
4020 pr_warn("md: %s: %s would not accept array\n",
4021 mdname(mddev), clevel);
4022 rv = PTR_ERR(priv);
4023 goto out_unlock;
4024 }
4025
4026
4027 mddev_suspend(mddev);
4028 mddev_detach(mddev);
4029
4030 spin_lock(&mddev->lock);
4031 oldpers = mddev->pers;
4032 oldpriv = mddev->private;
4033 mddev->pers = pers;
4034 mddev->private = priv;
4035 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4036 mddev->level = mddev->new_level;
4037 mddev->layout = mddev->new_layout;
4038 mddev->chunk_sectors = mddev->new_chunk_sectors;
4039 mddev->delta_disks = 0;
4040 mddev->reshape_backwards = 0;
4041 mddev->degraded = 0;
4042 spin_unlock(&mddev->lock);
4043
4044 if (oldpers->sync_request == NULL &&
4045 mddev->external) {
4046
4047
4048
4049
4050
4051
4052
4053 mddev->in_sync = 0;
4054 mddev->safemode_delay = 0;
4055 mddev->safemode = 0;
4056 }
4057
4058 oldpers->free(mddev, oldpriv);
4059
4060 if (oldpers->sync_request == NULL &&
4061 pers->sync_request != NULL) {
4062
4063 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4064 pr_warn("md: cannot register extra attributes for %s\n",
4065 mdname(mddev));
4066 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4067 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4068 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4069 }
4070 if (oldpers->sync_request != NULL &&
4071 pers->sync_request == NULL) {
4072
4073 if (mddev->to_remove == NULL)
4074 mddev->to_remove = &md_redundancy_group;
4075 }
4076
4077 module_put(oldpers->owner);
4078
4079 rdev_for_each(rdev, mddev) {
4080 if (rdev->raid_disk < 0)
4081 continue;
4082 if (rdev->new_raid_disk >= mddev->raid_disks)
4083 rdev->new_raid_disk = -1;
4084 if (rdev->new_raid_disk == rdev->raid_disk)
4085 continue;
4086 sysfs_unlink_rdev(mddev, rdev);
4087 }
4088 rdev_for_each(rdev, mddev) {
4089 if (rdev->raid_disk < 0)
4090 continue;
4091 if (rdev->new_raid_disk == rdev->raid_disk)
4092 continue;
4093 rdev->raid_disk = rdev->new_raid_disk;
4094 if (rdev->raid_disk < 0)
4095 clear_bit(In_sync, &rdev->flags);
4096 else {
4097 if (sysfs_link_rdev(mddev, rdev))
4098 pr_warn("md: cannot register rd%d for %s after level change\n",
4099 rdev->raid_disk, mdname(mddev));
4100 }
4101 }
4102
4103 if (pers->sync_request == NULL) {
4104
4105
4106
4107 mddev->in_sync = 1;
4108 del_timer_sync(&mddev->safemode_timer);
4109 }
4110 blk_set_stacking_limits(&mddev->queue->limits);
4111 pers->run(mddev);
4112 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4113 mddev_resume(mddev);
4114 if (!mddev->thread)
4115 md_update_sb(mddev, 1);
4116 sysfs_notify_dirent_safe(mddev->sysfs_level);
4117 md_new_event(mddev);
4118 rv = len;
4119out_unlock:
4120 mddev_unlock(mddev);
4121 return rv;
4122}
4123
4124static struct md_sysfs_entry md_level =
4125__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4126
4127static ssize_t
4128layout_show(struct mddev *mddev, char *page)
4129{
4130
4131 if (mddev->reshape_position != MaxSector &&
4132 mddev->layout != mddev->new_layout)
4133 return sprintf(page, "%d (%d)\n",
4134 mddev->new_layout, mddev->layout);
4135 return sprintf(page, "%d\n", mddev->layout);
4136}
4137
4138static ssize_t
4139layout_store(struct mddev *mddev, const char *buf, size_t len)
4140{
4141 unsigned int n;
4142 int err;
4143
4144 err = kstrtouint(buf, 10, &n);
4145 if (err < 0)
4146 return err;
4147 err = mddev_lock(mddev);
4148 if (err)
4149 return err;
4150
4151 if (mddev->pers) {
4152 if (mddev->pers->check_reshape == NULL)
4153 err = -EBUSY;
4154 else if (mddev->ro)
4155 err = -EROFS;
4156 else {
4157 mddev->new_layout = n;
4158 err = mddev->pers->check_reshape(mddev);
4159 if (err)
4160 mddev->new_layout = mddev->layout;
4161 }
4162 } else {
4163 mddev->new_layout = n;
4164 if (mddev->reshape_position == MaxSector)
4165 mddev->layout = n;
4166 }
4167 mddev_unlock(mddev);
4168 return err ?: len;
4169}
4170static struct md_sysfs_entry md_layout =
4171__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4172
4173static ssize_t
4174raid_disks_show(struct mddev *mddev, char *page)
4175{
4176 if (mddev->raid_disks == 0)
4177 return 0;
4178 if (mddev->reshape_position != MaxSector &&
4179 mddev->delta_disks != 0)
4180 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4181 mddev->raid_disks - mddev->delta_disks);
4182 return sprintf(page, "%d\n", mddev->raid_disks);
4183}
4184
4185static int update_raid_disks(struct mddev *mddev, int raid_disks);
4186
4187static ssize_t
4188raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4189{
4190 unsigned int n;
4191 int err;
4192
4193 err = kstrtouint(buf, 10, &n);
4194 if (err < 0)
4195 return err;
4196
4197 err = mddev_lock(mddev);
4198 if (err)
4199 return err;
4200 if (mddev->pers)
4201 err = update_raid_disks(mddev, n);
4202 else if (mddev->reshape_position != MaxSector) {
4203 struct md_rdev *rdev;
4204 int olddisks = mddev->raid_disks - mddev->delta_disks;
4205
4206 err = -EINVAL;
4207 rdev_for_each(rdev, mddev) {
4208 if (olddisks < n &&
4209 rdev->data_offset < rdev->new_data_offset)
4210 goto out_unlock;
4211 if (olddisks > n &&
4212 rdev->data_offset > rdev->new_data_offset)
4213 goto out_unlock;
4214 }
4215 err = 0;
4216 mddev->delta_disks = n - olddisks;
4217 mddev->raid_disks = n;
4218 mddev->reshape_backwards = (mddev->delta_disks < 0);
4219 } else
4220 mddev->raid_disks = n;
4221out_unlock:
4222 mddev_unlock(mddev);
4223 return err ? err : len;
4224}
4225static struct md_sysfs_entry md_raid_disks =
4226__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4227
4228static ssize_t
4229uuid_show(struct mddev *mddev, char *page)
4230{
4231 return sprintf(page, "%pU\n", mddev->uuid);
4232}
4233static struct md_sysfs_entry md_uuid =
4234__ATTR(uuid, S_IRUGO, uuid_show, NULL);
4235
4236static ssize_t
4237chunk_size_show(struct mddev *mddev, char *page)
4238{
4239 if (mddev->reshape_position != MaxSector &&
4240 mddev->chunk_sectors != mddev->new_chunk_sectors)
4241 return sprintf(page, "%d (%d)\n",
4242 mddev->new_chunk_sectors << 9,
4243 mddev->chunk_sectors << 9);
4244 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4245}
4246
4247static ssize_t
4248chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4249{
4250 unsigned long n;
4251 int err;
4252
4253 err = kstrtoul(buf, 10, &n);
4254 if (err < 0)
4255 return err;
4256
4257 err = mddev_lock(mddev);
4258 if (err)
4259 return err;
4260 if (mddev->pers) {
4261 if (mddev->pers->check_reshape == NULL)
4262 err = -EBUSY;
4263 else if (mddev->ro)
4264 err = -EROFS;
4265 else {
4266 mddev->new_chunk_sectors = n >> 9;
4267 err = mddev->pers->check_reshape(mddev);
4268 if (err)
4269 mddev->new_chunk_sectors = mddev->chunk_sectors;
4270 }
4271 } else {
4272 mddev->new_chunk_sectors = n >> 9;
4273 if (mddev->reshape_position == MaxSector)
4274 mddev->chunk_sectors = n >> 9;
4275 }
4276 mddev_unlock(mddev);
4277 return err ?: len;
4278}
4279static struct md_sysfs_entry md_chunk_size =
4280__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4281
4282static ssize_t
4283resync_start_show(struct mddev *mddev, char *page)
4284{
4285 if (mddev->recovery_cp == MaxSector)
4286 return sprintf(page, "none\n");
4287 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4288}
4289
4290static ssize_t
4291resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4292{
4293 unsigned long long n;
4294 int err;
4295
4296 if (cmd_match(buf, "none"))
4297 n = MaxSector;
4298 else {
4299 err = kstrtoull(buf, 10, &n);
4300 if (err < 0)
4301 return err;
4302 if (n != (sector_t)n)
4303 return -EINVAL;
4304 }
4305
4306 err = mddev_lock(mddev);
4307 if (err)
4308 return err;
4309 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4310 err = -EBUSY;
4311
4312 if (!err) {
4313 mddev->recovery_cp = n;
4314 if (mddev->pers)
4315 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4316 }
4317 mddev_unlock(mddev);
4318 return err ?: len;
4319}
4320static struct md_sysfs_entry md_resync_start =
4321__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4322 resync_start_show, resync_start_store);
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4366 write_pending, active_idle, broken, bad_word};
4367static char *array_states[] = {
4368 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4369 "write-pending", "active-idle", "broken", NULL };
4370
4371static int match_word(const char *word, char **list)
4372{
4373 int n;
4374 for (n=0; list[n]; n++)
4375 if (cmd_match(word, list[n]))
4376 break;
4377 return n;
4378}
4379
4380static ssize_t
4381array_state_show(struct mddev *mddev, char *page)
4382{
4383 enum array_state st = inactive;
4384
4385 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4386 switch(mddev->ro) {
4387 case 1:
4388 st = readonly;
4389 break;
4390 case 2:
4391 st = read_auto;
4392 break;
4393 case 0:
4394 spin_lock(&mddev->lock);
4395 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4396 st = write_pending;
4397 else if (mddev->in_sync)
4398 st = clean;
4399 else if (mddev->safemode)
4400 st = active_idle;
4401 else
4402 st = active;
4403 spin_unlock(&mddev->lock);
4404 }
4405
4406 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4407 st = broken;
4408 } else {
4409 if (list_empty(&mddev->disks) &&
4410 mddev->raid_disks == 0 &&
4411 mddev->dev_sectors == 0)
4412 st = clear;
4413 else
4414 st = inactive;
4415 }
4416 return sprintf(page, "%s\n", array_states[st]);
4417}
4418
4419static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4420static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4421static int restart_array(struct mddev *mddev);
4422
4423static ssize_t
4424array_state_store(struct mddev *mddev, const char *buf, size_t len)
4425{
4426 int err = 0;
4427 enum array_state st = match_word(buf, array_states);
4428
4429 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4430
4431
4432
4433 spin_lock(&mddev->lock);
4434 if (st == active) {
4435 restart_array(mddev);
4436 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4437 md_wakeup_thread(mddev->thread);
4438 wake_up(&mddev->sb_wait);
4439 } else {
4440 restart_array(mddev);
4441 if (!set_in_sync(mddev))
4442 err = -EBUSY;
4443 }
4444 if (!err)
4445 sysfs_notify_dirent_safe(mddev->sysfs_state);
4446 spin_unlock(&mddev->lock);
4447 return err ?: len;
4448 }
4449 err = mddev_lock(mddev);
4450 if (err)
4451 return err;
4452 err = -EINVAL;
4453 switch(st) {
4454 case bad_word:
4455 break;
4456 case clear:
4457
4458 err = do_md_stop(mddev, 0, NULL);
4459 break;
4460 case inactive:
4461
4462 if (mddev->pers)
4463 err = do_md_stop(mddev, 2, NULL);
4464 else
4465 err = 0;
4466 break;
4467 case suspended:
4468 break;
4469 case readonly:
4470 if (mddev->pers)
4471 err = md_set_readonly(mddev, NULL);
4472 else {
4473 mddev->ro = 1;
4474 set_disk_ro(mddev->gendisk, 1);
4475 err = do_md_run(mddev);
4476 }
4477 break;
4478 case read_auto:
4479 if (mddev->pers) {
4480 if (mddev->ro == 0)
4481 err = md_set_readonly(mddev, NULL);
4482 else if (mddev->ro == 1)
4483 err = restart_array(mddev);
4484 if (err == 0) {
4485 mddev->ro = 2;
4486 set_disk_ro(mddev->gendisk, 0);
4487 }
4488 } else {
4489 mddev->ro = 2;
4490 err = do_md_run(mddev);
4491 }
4492 break;
4493 case clean:
4494 if (mddev->pers) {
4495 err = restart_array(mddev);
4496 if (err)
4497 break;
4498 spin_lock(&mddev->lock);
4499 if (!set_in_sync(mddev))
4500 err = -EBUSY;
4501 spin_unlock(&mddev->lock);
4502 } else
4503 err = -EINVAL;
4504 break;
4505 case active:
4506 if (mddev->pers) {
4507 err = restart_array(mddev);
4508 if (err)
4509 break;
4510 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4511 wake_up(&mddev->sb_wait);
4512 err = 0;
4513 } else {
4514 mddev->ro = 0;
4515 set_disk_ro(mddev->gendisk, 0);
4516 err = do_md_run(mddev);
4517 }
4518 break;
4519 case write_pending:
4520 case active_idle:
4521 case broken:
4522
4523 break;
4524 }
4525
4526 if (!err) {
4527 if (mddev->hold_active == UNTIL_IOCTL)
4528 mddev->hold_active = 0;
4529 sysfs_notify_dirent_safe(mddev->sysfs_state);
4530 }
4531 mddev_unlock(mddev);
4532 return err ?: len;
4533}
4534static struct md_sysfs_entry md_array_state =
4535__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4536
4537static ssize_t
4538max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4539 return sprintf(page, "%d\n",
4540 atomic_read(&mddev->max_corr_read_errors));
4541}
4542
4543static ssize_t
4544max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4545{
4546 unsigned int n;
4547 int rv;
4548
4549 rv = kstrtouint(buf, 10, &n);
4550 if (rv < 0)
4551 return rv;
4552 atomic_set(&mddev->max_corr_read_errors, n);
4553 return len;
4554}
4555
4556static struct md_sysfs_entry max_corr_read_errors =
4557__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4558 max_corrected_read_errors_store);
4559
4560static ssize_t
4561null_show(struct mddev *mddev, char *page)
4562{
4563 return -EINVAL;
4564}
4565
4566
4567static void flush_rdev_wq(struct mddev *mddev)
4568{
4569 struct md_rdev *rdev;
4570
4571 rcu_read_lock();
4572 rdev_for_each_rcu(rdev, mddev)
4573 if (work_pending(&rdev->del_work)) {
4574 flush_workqueue(md_rdev_misc_wq);
4575 break;
4576 }
4577 rcu_read_unlock();
4578}
4579
4580static ssize_t
4581new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4582{
4583
4584
4585
4586
4587
4588
4589
4590 char *e;
4591 int major = simple_strtoul(buf, &e, 10);
4592 int minor;
4593 dev_t dev;
4594 struct md_rdev *rdev;
4595 int err;
4596
4597 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4598 return -EINVAL;
4599 minor = simple_strtoul(e+1, &e, 10);
4600 if (*e && *e != '\n')
4601 return -EINVAL;
4602 dev = MKDEV(major, minor);
4603 if (major != MAJOR(dev) ||
4604 minor != MINOR(dev))
4605 return -EOVERFLOW;
4606
4607 flush_rdev_wq(mddev);
4608 err = mddev_lock(mddev);
4609 if (err)
4610 return err;
4611 if (mddev->persistent) {
4612 rdev = md_import_device(dev, mddev->major_version,
4613 mddev->minor_version);
4614 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4615 struct md_rdev *rdev0
4616 = list_entry(mddev->disks.next,
4617 struct md_rdev, same_set);
4618 err = super_types[mddev->major_version]
4619 .load_super(rdev, rdev0, mddev->minor_version);
4620 if (err < 0)
4621 goto out;
4622 }
4623 } else if (mddev->external)
4624 rdev = md_import_device(dev, -2, -1);
4625 else
4626 rdev = md_import_device(dev, -1, -1);
4627
4628 if (IS_ERR(rdev)) {
4629 mddev_unlock(mddev);
4630 return PTR_ERR(rdev);
4631 }
4632 err = bind_rdev_to_array(rdev, mddev);
4633 out:
4634 if (err)
4635 export_rdev(rdev);
4636 mddev_unlock(mddev);
4637 if (!err)
4638 md_new_event(mddev);
4639 return err ? err : len;
4640}
4641
4642static struct md_sysfs_entry md_new_device =
4643__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4644
4645static ssize_t
4646bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4647{
4648 char *end;
4649 unsigned long chunk, end_chunk;
4650 int err;
4651
4652 err = mddev_lock(mddev);
4653 if (err)
4654 return err;
4655 if (!mddev->bitmap)
4656 goto out;
4657
4658 while (*buf) {
4659 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4660 if (buf == end) break;
4661 if (*end == '-') {
4662 buf = end + 1;
4663 end_chunk = simple_strtoul(buf, &end, 0);
4664 if (buf == end) break;
4665 }
4666 if (*end && !isspace(*end)) break;
4667 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4668 buf = skip_spaces(end);
4669 }
4670 md_bitmap_unplug(mddev->bitmap);
4671out:
4672 mddev_unlock(mddev);
4673 return len;
4674}
4675
4676static struct md_sysfs_entry md_bitmap =
4677__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4678
4679static ssize_t
4680size_show(struct mddev *mddev, char *page)
4681{
4682 return sprintf(page, "%llu\n",
4683 (unsigned long long)mddev->dev_sectors / 2);
4684}
4685
4686static int update_size(struct mddev *mddev, sector_t num_sectors);
4687
4688static ssize_t
4689size_store(struct mddev *mddev, const char *buf, size_t len)
4690{
4691
4692
4693
4694
4695 sector_t sectors;
4696 int err = strict_blocks_to_sectors(buf, §ors);
4697
4698 if (err < 0)
4699 return err;
4700 err = mddev_lock(mddev);
4701 if (err)
4702 return err;
4703 if (mddev->pers) {
4704 err = update_size(mddev, sectors);
4705 if (err == 0)
4706 md_update_sb(mddev, 1);
4707 } else {
4708 if (mddev->dev_sectors == 0 ||
4709 mddev->dev_sectors > sectors)
4710 mddev->dev_sectors = sectors;
4711 else
4712 err = -ENOSPC;
4713 }
4714 mddev_unlock(mddev);
4715 return err ? err : len;
4716}
4717
4718static struct md_sysfs_entry md_size =
4719__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4720
4721
4722
4723
4724
4725
4726
4727static ssize_t
4728metadata_show(struct mddev *mddev, char *page)
4729{
4730 if (mddev->persistent)
4731 return sprintf(page, "%d.%d\n",
4732 mddev->major_version, mddev->minor_version);
4733 else if (mddev->external)
4734 return sprintf(page, "external:%s\n", mddev->metadata_type);
4735 else
4736 return sprintf(page, "none\n");
4737}
4738
4739static ssize_t
4740metadata_store(struct mddev *mddev, const char *buf, size_t len)
4741{
4742 int major, minor;
4743 char *e;
4744 int err;
4745
4746
4747
4748
4749
4750 err = mddev_lock(mddev);
4751 if (err)
4752 return err;
4753 err = -EBUSY;
4754 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4755 ;
4756 else if (!list_empty(&mddev->disks))
4757 goto out_unlock;
4758
4759 err = 0;
4760 if (cmd_match(buf, "none")) {
4761 mddev->persistent = 0;
4762 mddev->external = 0;
4763 mddev->major_version = 0;
4764 mddev->minor_version = 90;
4765 goto out_unlock;
4766 }
4767 if (strncmp(buf, "external:", 9) == 0) {
4768 size_t namelen = len-9;
4769 if (namelen >= sizeof(mddev->metadata_type))
4770 namelen = sizeof(mddev->metadata_type)-1;
4771 strncpy(mddev->metadata_type, buf+9, namelen);
4772 mddev->metadata_type[namelen] = 0;
4773 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4774 mddev->metadata_type[--namelen] = 0;
4775 mddev->persistent = 0;
4776 mddev->external = 1;
4777 mddev->major_version = 0;
4778 mddev->minor_version = 90;
4779 goto out_unlock;
4780 }
4781 major = simple_strtoul(buf, &e, 10);
4782 err = -EINVAL;
4783 if (e==buf || *e != '.')
4784 goto out_unlock;
4785 buf = e+1;
4786 minor = simple_strtoul(buf, &e, 10);
4787 if (e==buf || (*e && *e != '\n') )
4788 goto out_unlock;
4789 err = -ENOENT;
4790 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4791 goto out_unlock;
4792 mddev->major_version = major;
4793 mddev->minor_version = minor;
4794 mddev->persistent = 1;
4795 mddev->external = 0;
4796 err = 0;
4797out_unlock:
4798 mddev_unlock(mddev);
4799 return err ?: len;
4800}
4801
4802static struct md_sysfs_entry md_metadata =
4803__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4804
4805static ssize_t
4806action_show(struct mddev *mddev, char *page)
4807{
4808 char *type = "idle";
4809 unsigned long recovery = mddev->recovery;
4810 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4811 type = "frozen";
4812 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4813 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4814 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4815 type = "reshape";
4816 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4817 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4818 type = "resync";
4819 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4820 type = "check";
4821 else
4822 type = "repair";
4823 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4824 type = "recover";
4825 else if (mddev->reshape_position != MaxSector)
4826 type = "reshape";
4827 }
4828 return sprintf(page, "%s\n", type);
4829}
4830
4831static ssize_t
4832action_store(struct mddev *mddev, const char *page, size_t len)
4833{
4834 if (!mddev->pers || !mddev->pers->sync_request)
4835 return -EINVAL;
4836
4837
4838 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4839 if (cmd_match(page, "frozen"))
4840 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4841 else
4842 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4843 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4844 mddev_lock(mddev) == 0) {
4845 if (work_pending(&mddev->del_work))
4846 flush_workqueue(md_misc_wq);
4847 if (mddev->sync_thread) {
4848 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4849 md_reap_sync_thread(mddev);
4850 }
4851 mddev_unlock(mddev);
4852 }
4853 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4854 return -EBUSY;
4855 else if (cmd_match(page, "resync"))
4856 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4857 else if (cmd_match(page, "recover")) {
4858 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4859 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4860 } else if (cmd_match(page, "reshape")) {
4861 int err;
4862 if (mddev->pers->start_reshape == NULL)
4863 return -EINVAL;
4864 err = mddev_lock(mddev);
4865 if (!err) {
4866 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4867 err = -EBUSY;
4868 else {
4869 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4870 err = mddev->pers->start_reshape(mddev);
4871 }
4872 mddev_unlock(mddev);
4873 }
4874 if (err)
4875 return err;
4876 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4877 } else {
4878 if (cmd_match(page, "check"))
4879 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4880 else if (!cmd_match(page, "repair"))
4881 return -EINVAL;
4882 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4883 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4884 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4885 }
4886 if (mddev->ro == 2) {
4887
4888
4889
4890 mddev->ro = 0;
4891 md_wakeup_thread(mddev->sync_thread);
4892 }
4893 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4894 md_wakeup_thread(mddev->thread);
4895 sysfs_notify_dirent_safe(mddev->sysfs_action);
4896 return len;
4897}
4898
4899static struct md_sysfs_entry md_scan_mode =
4900__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4901
4902static ssize_t
4903last_sync_action_show(struct mddev *mddev, char *page)
4904{
4905 return sprintf(page, "%s\n", mddev->last_sync_action);
4906}
4907
4908static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4909
4910static ssize_t
4911mismatch_cnt_show(struct mddev *mddev, char *page)
4912{
4913 return sprintf(page, "%llu\n",
4914 (unsigned long long)
4915 atomic64_read(&mddev->resync_mismatches));
4916}
4917
4918static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4919
4920static ssize_t
4921sync_min_show(struct mddev *mddev, char *page)
4922{
4923 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4924 mddev->sync_speed_min ? "local": "system");
4925}
4926
4927static ssize_t
4928sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4929{
4930 unsigned int min;
4931 int rv;
4932
4933 if (strncmp(buf, "system", 6)==0) {
4934 min = 0;
4935 } else {
4936 rv = kstrtouint(buf, 10, &min);
4937 if (rv < 0)
4938 return rv;
4939 if (min == 0)
4940 return -EINVAL;
4941 }
4942 mddev->sync_speed_min = min;
4943 return len;
4944}
4945
4946static struct md_sysfs_entry md_sync_min =
4947__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4948
4949static ssize_t
4950sync_max_show(struct mddev *mddev, char *page)
4951{
4952 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4953 mddev->sync_speed_max ? "local": "system");
4954}
4955
4956static ssize_t
4957sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4958{
4959 unsigned int max;
4960 int rv;
4961
4962 if (strncmp(buf, "system", 6)==0) {
4963 max = 0;
4964 } else {
4965 rv = kstrtouint(buf, 10, &max);
4966 if (rv < 0)
4967 return rv;
4968 if (max == 0)
4969 return -EINVAL;
4970 }
4971 mddev->sync_speed_max = max;
4972 return len;
4973}
4974
4975static struct md_sysfs_entry md_sync_max =
4976__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4977
4978static ssize_t
4979degraded_show(struct mddev *mddev, char *page)
4980{
4981 return sprintf(page, "%d\n", mddev->degraded);
4982}
4983static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4984
4985static ssize_t
4986sync_force_parallel_show(struct mddev *mddev, char *page)
4987{
4988 return sprintf(page, "%d\n", mddev->parallel_resync);
4989}
4990
4991static ssize_t
4992sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4993{
4994 long n;
4995
4996 if (kstrtol(buf, 10, &n))
4997 return -EINVAL;
4998
4999 if (n != 0 && n != 1)
5000 return -EINVAL;
5001
5002 mddev->parallel_resync = n;
5003
5004 if (mddev->sync_thread)
5005 wake_up(&resync_wait);
5006
5007 return len;
5008}
5009
5010
5011static struct md_sysfs_entry md_sync_force_parallel =
5012__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
5013 sync_force_parallel_show, sync_force_parallel_store);
5014
5015static ssize_t
5016sync_speed_show(struct mddev *mddev, char *page)
5017{
5018 unsigned long resync, dt, db;
5019 if (mddev->curr_resync == 0)
5020 return sprintf(page, "none\n");
5021 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5022 dt = (jiffies - mddev->resync_mark) / HZ;
5023 if (!dt) dt++;
5024 db = resync - mddev->resync_mark_cnt;
5025 return sprintf(page, "%lu\n", db/dt/2);
5026}
5027
5028static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
5029
5030static ssize_t
5031sync_completed_show(struct mddev *mddev, char *page)
5032{
5033 unsigned long long max_sectors, resync;
5034
5035 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5036 return sprintf(page, "none\n");
5037
5038 if (mddev->curr_resync == 1 ||
5039 mddev->curr_resync == 2)
5040 return sprintf(page, "delayed\n");
5041
5042 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5043 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5044 max_sectors = mddev->resync_max_sectors;
5045 else
5046 max_sectors = mddev->dev_sectors;
5047
5048 resync = mddev->curr_resync_completed;
5049 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
5050}
5051
5052static struct md_sysfs_entry md_sync_completed =
5053 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
5054
5055static ssize_t
5056min_sync_show(struct mddev *mddev, char *page)
5057{
5058 return sprintf(page, "%llu\n",
5059 (unsigned long long)mddev->resync_min);
5060}
5061static ssize_t
5062min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5063{
5064 unsigned long long min;
5065 int err;
5066
5067 if (kstrtoull(buf, 10, &min))
5068 return -EINVAL;
5069
5070 spin_lock(&mddev->lock);
5071 err = -EINVAL;
5072 if (min > mddev->resync_max)
5073 goto out_unlock;
5074
5075 err = -EBUSY;
5076 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5077 goto out_unlock;
5078
5079
5080 mddev->resync_min = round_down(min, 8);
5081 err = 0;
5082
5083out_unlock:
5084 spin_unlock(&mddev->lock);
5085 return err ?: len;
5086}
5087
5088static struct md_sysfs_entry md_min_sync =
5089__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5090
5091static ssize_t
5092max_sync_show(struct mddev *mddev, char *page)
5093{
5094 if (mddev->resync_max == MaxSector)
5095 return sprintf(page, "max\n");
5096 else
5097 return sprintf(page, "%llu\n",
5098 (unsigned long long)mddev->resync_max);
5099}
5100static ssize_t
5101max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5102{
5103 int err;
5104 spin_lock(&mddev->lock);
5105 if (strncmp(buf, "max", 3) == 0)
5106 mddev->resync_max = MaxSector;
5107 else {
5108 unsigned long long max;
5109 int chunk;
5110
5111 err = -EINVAL;
5112 if (kstrtoull(buf, 10, &max))
5113 goto out_unlock;
5114 if (max < mddev->resync_min)
5115 goto out_unlock;
5116
5117 err = -EBUSY;
5118 if (max < mddev->resync_max &&
5119 mddev->ro == 0 &&
5120 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5121 goto out_unlock;
5122
5123
5124 chunk = mddev->chunk_sectors;
5125 if (chunk) {
5126 sector_t temp = max;
5127
5128 err = -EINVAL;
5129 if (sector_div(temp, chunk))
5130 goto out_unlock;
5131 }
5132 mddev->resync_max = max;
5133 }
5134 wake_up(&mddev->recovery_wait);
5135 err = 0;
5136out_unlock:
5137 spin_unlock(&mddev->lock);
5138 return err ?: len;
5139}
5140
5141static struct md_sysfs_entry md_max_sync =
5142__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5143
5144static ssize_t
5145suspend_lo_show(struct mddev *mddev, char *page)
5146{
5147 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5148}
5149
5150static ssize_t
5151suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5152{
5153 unsigned long long new;
5154 int err;
5155
5156 err = kstrtoull(buf, 10, &new);
5157 if (err < 0)
5158 return err;
5159 if (new != (sector_t)new)
5160 return -EINVAL;
5161
5162 err = mddev_lock(mddev);
5163 if (err)
5164 return err;
5165 err = -EINVAL;
5166 if (mddev->pers == NULL ||
5167 mddev->pers->quiesce == NULL)
5168 goto unlock;
5169 mddev_suspend(mddev);
5170 mddev->suspend_lo = new;
5171 mddev_resume(mddev);
5172
5173 err = 0;
5174unlock:
5175 mddev_unlock(mddev);
5176 return err ?: len;
5177}
5178static struct md_sysfs_entry md_suspend_lo =
5179__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5180
5181static ssize_t
5182suspend_hi_show(struct mddev *mddev, char *page)
5183{
5184 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5185}
5186
5187static ssize_t
5188suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5189{
5190 unsigned long long new;
5191 int err;
5192
5193 err = kstrtoull(buf, 10, &new);
5194 if (err < 0)
5195 return err;
5196 if (new != (sector_t)new)
5197 return -EINVAL;
5198
5199 err = mddev_lock(mddev);
5200 if (err)
5201 return err;
5202 err = -EINVAL;
5203 if (mddev->pers == NULL)
5204 goto unlock;
5205
5206 mddev_suspend(mddev);
5207 mddev->suspend_hi = new;
5208 mddev_resume(mddev);
5209
5210 err = 0;
5211unlock:
5212 mddev_unlock(mddev);
5213 return err ?: len;
5214}
5215static struct md_sysfs_entry md_suspend_hi =
5216__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5217
5218static ssize_t
5219reshape_position_show(struct mddev *mddev, char *page)
5220{
5221 if (mddev->reshape_position != MaxSector)
5222 return sprintf(page, "%llu\n",
5223 (unsigned long long)mddev->reshape_position);
5224 strcpy(page, "none\n");
5225 return 5;
5226}
5227
5228static ssize_t
5229reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5230{
5231 struct md_rdev *rdev;
5232 unsigned long long new;
5233 int err;
5234
5235 err = kstrtoull(buf, 10, &new);
5236 if (err < 0)
5237 return err;
5238 if (new != (sector_t)new)
5239 return -EINVAL;
5240 err = mddev_lock(mddev);
5241 if (err)
5242 return err;
5243 err = -EBUSY;
5244 if (mddev->pers)
5245 goto unlock;
5246 mddev->reshape_position = new;
5247 mddev->delta_disks = 0;
5248 mddev->reshape_backwards = 0;
5249 mddev->new_level = mddev->level;
5250 mddev->new_layout = mddev->layout;
5251 mddev->new_chunk_sectors = mddev->chunk_sectors;
5252 rdev_for_each(rdev, mddev)
5253 rdev->new_data_offset = rdev->data_offset;
5254 err = 0;
5255unlock:
5256 mddev_unlock(mddev);
5257 return err ?: len;
5258}
5259
5260static struct md_sysfs_entry md_reshape_position =
5261__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5262 reshape_position_store);
5263
5264static ssize_t
5265reshape_direction_show(struct mddev *mddev, char *page)
5266{
5267 return sprintf(page, "%s\n",
5268 mddev->reshape_backwards ? "backwards" : "forwards");
5269}
5270
5271static ssize_t
5272reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5273{
5274 int backwards = 0;
5275 int err;
5276
5277 if (cmd_match(buf, "forwards"))
5278 backwards = 0;
5279 else if (cmd_match(buf, "backwards"))
5280 backwards = 1;
5281 else
5282 return -EINVAL;
5283 if (mddev->reshape_backwards == backwards)
5284 return len;
5285
5286 err = mddev_lock(mddev);
5287 if (err)
5288 return err;
5289
5290 if (mddev->delta_disks)
5291 err = -EBUSY;
5292 else if (mddev->persistent &&
5293 mddev->major_version == 0)
5294 err = -EINVAL;
5295 else
5296 mddev->reshape_backwards = backwards;
5297 mddev_unlock(mddev);
5298 return err ?: len;
5299}
5300
5301static struct md_sysfs_entry md_reshape_direction =
5302__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5303 reshape_direction_store);
5304
5305static ssize_t
5306array_size_show(struct mddev *mddev, char *page)
5307{
5308 if (mddev->external_size)
5309 return sprintf(page, "%llu\n",
5310 (unsigned long long)mddev->array_sectors/2);
5311 else
5312 return sprintf(page, "default\n");
5313}
5314
5315static ssize_t
5316array_size_store(struct mddev *mddev, const char *buf, size_t len)
5317{
5318 sector_t sectors;
5319 int err;
5320
5321 err = mddev_lock(mddev);
5322 if (err)
5323 return err;
5324
5325
5326 if (mddev_is_clustered(mddev)) {
5327 mddev_unlock(mddev);
5328 return -EINVAL;
5329 }
5330
5331 if (strncmp(buf, "default", 7) == 0) {
5332 if (mddev->pers)
5333 sectors = mddev->pers->size(mddev, 0, 0);
5334 else
5335 sectors = mddev->array_sectors;
5336
5337 mddev->external_size = 0;
5338 } else {
5339 if (strict_blocks_to_sectors(buf, §ors) < 0)
5340 err = -EINVAL;
5341 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5342 err = -E2BIG;
5343 else
5344 mddev->external_size = 1;
5345 }
5346
5347 if (!err) {
5348 mddev->array_sectors = sectors;
5349 if (mddev->pers)
5350 set_capacity_and_notify(mddev->gendisk,
5351 mddev->array_sectors);
5352 }
5353 mddev_unlock(mddev);
5354 return err ?: len;
5355}
5356
5357static struct md_sysfs_entry md_array_size =
5358__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5359 array_size_store);
5360
5361static ssize_t
5362consistency_policy_show(struct mddev *mddev, char *page)
5363{
5364 int ret;
5365
5366 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5367 ret = sprintf(page, "journal\n");
5368 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5369 ret = sprintf(page, "ppl\n");
5370 } else if (mddev->bitmap) {
5371 ret = sprintf(page, "bitmap\n");
5372 } else if (mddev->pers) {
5373 if (mddev->pers->sync_request)
5374 ret = sprintf(page, "resync\n");
5375 else
5376 ret = sprintf(page, "none\n");
5377 } else {
5378 ret = sprintf(page, "unknown\n");
5379 }
5380
5381 return ret;
5382}
5383
5384static ssize_t
5385consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5386{
5387 int err = 0;
5388
5389 if (mddev->pers) {
5390 if (mddev->pers->change_consistency_policy)
5391 err = mddev->pers->change_consistency_policy(mddev, buf);
5392 else
5393 err = -EBUSY;
5394 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5395 set_bit(MD_HAS_PPL, &mddev->flags);
5396 } else {
5397 err = -EINVAL;
5398 }
5399
5400 return err ? err : len;
5401}
5402
5403static struct md_sysfs_entry md_consistency_policy =
5404__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5405 consistency_policy_store);
5406
5407static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5408{
5409 return sprintf(page, "%d\n", mddev->fail_last_dev);
5410}
5411
5412
5413
5414
5415
5416static ssize_t
5417fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5418{
5419 int ret;
5420 bool value;
5421
5422 ret = kstrtobool(buf, &value);
5423 if (ret)
5424 return ret;
5425
5426 if (value != mddev->fail_last_dev)
5427 mddev->fail_last_dev = value;
5428
5429 return len;
5430}
5431static struct md_sysfs_entry md_fail_last_dev =
5432__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5433 fail_last_dev_store);
5434
5435static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5436{
5437 if (mddev->pers == NULL || (mddev->pers->level != 1))
5438 return sprintf(page, "n/a\n");
5439 else
5440 return sprintf(page, "%d\n", mddev->serialize_policy);
5441}
5442
5443
5444
5445
5446
5447static ssize_t
5448serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5449{
5450 int err;
5451 bool value;
5452
5453 err = kstrtobool(buf, &value);
5454 if (err)
5455 return err;
5456
5457 if (value == mddev->serialize_policy)
5458 return len;
5459
5460 err = mddev_lock(mddev);
5461 if (err)
5462 return err;
5463 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5464 pr_err("md: serialize_policy is only effective for raid1\n");
5465 err = -EINVAL;
5466 goto unlock;
5467 }
5468
5469 mddev_suspend(mddev);
5470 if (value)
5471 mddev_create_serial_pool(mddev, NULL, true);
5472 else
5473 mddev_destroy_serial_pool(mddev, NULL, true);
5474 mddev->serialize_policy = value;
5475 mddev_resume(mddev);
5476unlock:
5477 mddev_unlock(mddev);
5478 return err ?: len;
5479}
5480
5481static struct md_sysfs_entry md_serialize_policy =
5482__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5483 serialize_policy_store);
5484
5485
5486static struct attribute *md_default_attrs[] = {
5487 &md_level.attr,
5488 &md_layout.attr,
5489 &md_raid_disks.attr,
5490 &md_uuid.attr,
5491 &md_chunk_size.attr,
5492 &md_size.attr,
5493 &md_resync_start.attr,
5494 &md_metadata.attr,
5495 &md_new_device.attr,
5496 &md_safe_delay.attr,
5497 &md_array_state.attr,
5498 &md_reshape_position.attr,
5499 &md_reshape_direction.attr,
5500 &md_array_size.attr,
5501 &max_corr_read_errors.attr,
5502 &md_consistency_policy.attr,
5503 &md_fail_last_dev.attr,
5504 &md_serialize_policy.attr,
5505 NULL,
5506};
5507
5508static struct attribute *md_redundancy_attrs[] = {
5509 &md_scan_mode.attr,
5510 &md_last_scan_mode.attr,
5511 &md_mismatches.attr,
5512 &md_sync_min.attr,
5513 &md_sync_max.attr,
5514 &md_sync_speed.attr,
5515 &md_sync_force_parallel.attr,
5516 &md_sync_completed.attr,
5517 &md_min_sync.attr,
5518 &md_max_sync.attr,
5519 &md_suspend_lo.attr,
5520 &md_suspend_hi.attr,
5521 &md_bitmap.attr,
5522 &md_degraded.attr,
5523 NULL,
5524};
5525static struct attribute_group md_redundancy_group = {
5526 .name = NULL,
5527 .attrs = md_redundancy_attrs,
5528};
5529
5530static ssize_t
5531md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5532{
5533 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5534 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5535 ssize_t rv;
5536
5537 if (!entry->show)
5538 return -EIO;
5539 spin_lock(&all_mddevs_lock);
5540 if (list_empty(&mddev->all_mddevs)) {
5541 spin_unlock(&all_mddevs_lock);
5542 return -EBUSY;
5543 }
5544 mddev_get(mddev);
5545 spin_unlock(&all_mddevs_lock);
5546
5547 rv = entry->show(mddev, page);
5548 mddev_put(mddev);
5549 return rv;
5550}
5551
5552static ssize_t
5553md_attr_store(struct kobject *kobj, struct attribute *attr,
5554 const char *page, size_t length)
5555{
5556 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5557 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5558 ssize_t rv;
5559
5560 if (!entry->store)
5561 return -EIO;
5562 if (!capable(CAP_SYS_ADMIN))
5563 return -EACCES;
5564 spin_lock(&all_mddevs_lock);
5565 if (list_empty(&mddev->all_mddevs)) {
5566 spin_unlock(&all_mddevs_lock);
5567 return -EBUSY;
5568 }
5569 mddev_get(mddev);
5570 spin_unlock(&all_mddevs_lock);
5571 rv = entry->store(mddev, page, length);
5572 mddev_put(mddev);
5573 return rv;
5574}
5575
5576static void md_free(struct kobject *ko)
5577{
5578 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5579
5580 if (mddev->sysfs_state)
5581 sysfs_put(mddev->sysfs_state);
5582 if (mddev->sysfs_level)
5583 sysfs_put(mddev->sysfs_level);
5584
5585 if (mddev->gendisk)
5586 del_gendisk(mddev->gendisk);
5587 if (mddev->queue)
5588 blk_cleanup_queue(mddev->queue);
5589 if (mddev->gendisk)
5590 put_disk(mddev->gendisk);
5591 percpu_ref_exit(&mddev->writes_pending);
5592
5593 bioset_exit(&mddev->bio_set);
5594 bioset_exit(&mddev->sync_set);
5595 mempool_exit(&mddev->md_io_pool);
5596 kfree(mddev);
5597}
5598
5599static const struct sysfs_ops md_sysfs_ops = {
5600 .show = md_attr_show,
5601 .store = md_attr_store,
5602};
5603static struct kobj_type md_ktype = {
5604 .release = md_free,
5605 .sysfs_ops = &md_sysfs_ops,
5606 .default_attrs = md_default_attrs,
5607};
5608
5609int mdp_major = 0;
5610
5611static void mddev_delayed_delete(struct work_struct *ws)
5612{
5613 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5614
5615 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5616 kobject_del(&mddev->kobj);
5617 kobject_put(&mddev->kobj);
5618}
5619
5620static void no_op(struct percpu_ref *r) {}
5621
5622int mddev_init_writes_pending(struct mddev *mddev)
5623{
5624 if (mddev->writes_pending.percpu_count_ptr)
5625 return 0;
5626 if (percpu_ref_init(&mddev->writes_pending, no_op,
5627 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5628 return -ENOMEM;
5629
5630 percpu_ref_put(&mddev->writes_pending);
5631 return 0;
5632}
5633EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5634
5635static int md_alloc(dev_t dev, char *name)
5636{
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646 static DEFINE_MUTEX(disks_mutex);
5647 struct mddev *mddev = mddev_find(dev);
5648 struct gendisk *disk;
5649 int partitioned;
5650 int shift;
5651 int unit;
5652 int error;
5653
5654 if (!mddev)
5655 return -ENODEV;
5656
5657 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5658 shift = partitioned ? MdpMinorShift : 0;
5659 unit = MINOR(mddev->unit) >> shift;
5660
5661
5662
5663
5664 flush_workqueue(md_misc_wq);
5665
5666 mutex_lock(&disks_mutex);
5667 error = -EEXIST;
5668 if (mddev->gendisk)
5669 goto abort;
5670
5671 if (name && !dev) {
5672
5673
5674 struct mddev *mddev2;
5675 spin_lock(&all_mddevs_lock);
5676
5677 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5678 if (mddev2->gendisk &&
5679 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5680 spin_unlock(&all_mddevs_lock);
5681 goto abort;
5682 }
5683 spin_unlock(&all_mddevs_lock);
5684 }
5685 if (name && dev)
5686
5687
5688
5689 mddev->hold_active = UNTIL_STOP;
5690
5691 error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
5692 sizeof(struct md_io));
5693 if (error)
5694 goto abort;
5695
5696 error = -ENOMEM;
5697 mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
5698 if (!mddev->queue)
5699 goto abort;
5700
5701 blk_set_stacking_limits(&mddev->queue->limits);
5702
5703 disk = alloc_disk(1 << shift);
5704 if (!disk) {
5705 blk_cleanup_queue(mddev->queue);
5706 mddev->queue = NULL;
5707 goto abort;
5708 }
5709 disk->major = MAJOR(mddev->unit);
5710 disk->first_minor = unit << shift;
5711 if (name)
5712 strcpy(disk->disk_name, name);
5713 else if (partitioned)
5714 sprintf(disk->disk_name, "md_d%d", unit);
5715 else
5716 sprintf(disk->disk_name, "md%d", unit);
5717 disk->fops = &md_fops;
5718 disk->private_data = mddev;
5719 disk->queue = mddev->queue;
5720 blk_queue_write_cache(mddev->queue, true, true);
5721
5722
5723
5724
5725 disk->flags |= GENHD_FL_EXT_DEVT;
5726 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5727 mddev->gendisk = disk;
5728
5729
5730
5731 mutex_lock(&mddev->open_mutex);
5732 add_disk(disk);
5733
5734 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5735 if (error) {
5736
5737
5738
5739 pr_debug("md: cannot register %s/md - name in use\n",
5740 disk->disk_name);
5741 error = 0;
5742 }
5743 if (mddev->kobj.sd &&
5744 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5745 pr_debug("pointless warning\n");
5746 mutex_unlock(&mddev->open_mutex);
5747 abort:
5748 mutex_unlock(&disks_mutex);
5749 if (!error && mddev->kobj.sd) {
5750 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5751 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5752 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5753 }
5754 mddev_put(mddev);
5755 return error;
5756}
5757
5758static void md_probe(dev_t dev)
5759{
5760 if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
5761 return;
5762 if (create_on_open)
5763 md_alloc(dev, NULL);
5764}
5765
5766static int add_named_array(const char *val, const struct kernel_param *kp)
5767{
5768
5769
5770
5771
5772
5773
5774
5775 int len = strlen(val);
5776 char buf[DISK_NAME_LEN];
5777 unsigned long devnum;
5778
5779 while (len && val[len-1] == '\n')
5780 len--;
5781 if (len >= DISK_NAME_LEN)
5782 return -E2BIG;
5783 strlcpy(buf, val, len+1);
5784 if (strncmp(buf, "md_", 3) == 0)
5785 return md_alloc(0, buf);
5786 if (strncmp(buf, "md", 2) == 0 &&
5787 isdigit(buf[2]) &&
5788 kstrtoul(buf+2, 10, &devnum) == 0 &&
5789 devnum <= MINORMASK)
5790 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5791
5792 return -EINVAL;
5793}
5794
5795static void md_safemode_timeout(struct timer_list *t)
5796{
5797 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5798
5799 mddev->safemode = 1;
5800 if (mddev->external)
5801 sysfs_notify_dirent_safe(mddev->sysfs_state);
5802
5803 md_wakeup_thread(mddev->thread);
5804}
5805
5806static int start_dirty_degraded;
5807
5808int md_run(struct mddev *mddev)
5809{
5810 int err;
5811 struct md_rdev *rdev;
5812 struct md_personality *pers;
5813
5814 if (list_empty(&mddev->disks))
5815
5816 return -EINVAL;
5817
5818 if (mddev->pers)
5819 return -EBUSY;
5820
5821 if (mddev->sysfs_active)
5822 return -EBUSY;
5823
5824
5825
5826
5827 if (!mddev->raid_disks) {
5828 if (!mddev->persistent)
5829 return -EINVAL;
5830 err = analyze_sbs(mddev);
5831 if (err)
5832 return -EINVAL;
5833 }
5834
5835 if (mddev->level != LEVEL_NONE)
5836 request_module("md-level-%d", mddev->level);
5837 else if (mddev->clevel[0])
5838 request_module("md-%s", mddev->clevel);
5839
5840
5841
5842
5843
5844
5845 mddev->has_superblocks = false;
5846 rdev_for_each(rdev, mddev) {
5847 if (test_bit(Faulty, &rdev->flags))
5848 continue;
5849 sync_blockdev(rdev->bdev);
5850 invalidate_bdev(rdev->bdev);
5851 if (mddev->ro != 1 && rdev_read_only(rdev)) {
5852 mddev->ro = 1;
5853 if (mddev->gendisk)
5854 set_disk_ro(mddev->gendisk, 1);
5855 }
5856
5857 if (rdev->sb_page)
5858 mddev->has_superblocks = true;
5859
5860
5861
5862
5863
5864 if (rdev->meta_bdev) {
5865 ;
5866 } else if (rdev->data_offset < rdev->sb_start) {
5867 if (mddev->dev_sectors &&
5868 rdev->data_offset + mddev->dev_sectors
5869 > rdev->sb_start) {
5870 pr_warn("md: %s: data overlaps metadata\n",
5871 mdname(mddev));
5872 return -EINVAL;
5873 }
5874 } else {
5875 if (rdev->sb_start + rdev->sb_size/512
5876 > rdev->data_offset) {
5877 pr_warn("md: %s: metadata overlaps data\n",
5878 mdname(mddev));
5879 return -EINVAL;
5880 }
5881 }
5882 sysfs_notify_dirent_safe(rdev->sysfs_state);
5883 }
5884
5885 if (!bioset_initialized(&mddev->bio_set)) {
5886 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5887 if (err)
5888 return err;
5889 }
5890 if (!bioset_initialized(&mddev->sync_set)) {
5891 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5892 if (err)
5893 return err;
5894 }
5895
5896 spin_lock(&pers_lock);
5897 pers = find_pers(mddev->level, mddev->clevel);
5898 if (!pers || !try_module_get(pers->owner)) {
5899 spin_unlock(&pers_lock);
5900 if (mddev->level != LEVEL_NONE)
5901 pr_warn("md: personality for level %d is not loaded!\n",
5902 mddev->level);
5903 else
5904 pr_warn("md: personality for level %s is not loaded!\n",
5905 mddev->clevel);
5906 err = -EINVAL;
5907 goto abort;
5908 }
5909 spin_unlock(&pers_lock);
5910 if (mddev->level != pers->level) {
5911 mddev->level = pers->level;
5912 mddev->new_level = pers->level;
5913 }
5914 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5915
5916 if (mddev->reshape_position != MaxSector &&
5917 pers->start_reshape == NULL) {
5918
5919 module_put(pers->owner);
5920 err = -EINVAL;
5921 goto abort;
5922 }
5923
5924 if (pers->sync_request) {
5925
5926
5927
5928 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5929 struct md_rdev *rdev2;
5930 int warned = 0;
5931
5932 rdev_for_each(rdev, mddev)
5933 rdev_for_each(rdev2, mddev) {
5934 if (rdev < rdev2 &&
5935 rdev->bdev->bd_disk ==
5936 rdev2->bdev->bd_disk) {
5937 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5938 mdname(mddev),
5939 bdevname(rdev->bdev,b),
5940 bdevname(rdev2->bdev,b2));
5941 warned = 1;
5942 }
5943 }
5944
5945 if (warned)
5946 pr_warn("True protection against single-disk failure might be compromised.\n");
5947 }
5948
5949 mddev->recovery = 0;
5950
5951 mddev->resync_max_sectors = mddev->dev_sectors;
5952
5953 mddev->ok_start_degraded = start_dirty_degraded;
5954
5955 if (start_readonly && mddev->ro == 0)
5956 mddev->ro = 2;
5957
5958 err = pers->run(mddev);
5959 if (err)
5960 pr_warn("md: pers->run() failed ...\n");
5961 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5962 WARN_ONCE(!mddev->external_size,
5963 "%s: default size too small, but 'external_size' not in effect?\n",
5964 __func__);
5965 pr_warn("md: invalid array_size %llu > default size %llu\n",
5966 (unsigned long long)mddev->array_sectors / 2,
5967 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5968 err = -EINVAL;
5969 }
5970 if (err == 0 && pers->sync_request &&
5971 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5972 struct bitmap *bitmap;
5973
5974 bitmap = md_bitmap_create(mddev, -1);
5975 if (IS_ERR(bitmap)) {
5976 err = PTR_ERR(bitmap);
5977 pr_warn("%s: failed to create bitmap (%d)\n",
5978 mdname(mddev), err);
5979 } else
5980 mddev->bitmap = bitmap;
5981
5982 }
5983 if (err)
5984 goto bitmap_abort;
5985
5986 if (mddev->bitmap_info.max_write_behind > 0) {
5987 bool create_pool = false;
5988
5989 rdev_for_each(rdev, mddev) {
5990 if (test_bit(WriteMostly, &rdev->flags) &&
5991 rdev_init_serial(rdev))
5992 create_pool = true;
5993 }
5994 if (create_pool && mddev->serial_info_pool == NULL) {
5995 mddev->serial_info_pool =
5996 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
5997 sizeof(struct serial_info));
5998 if (!mddev->serial_info_pool) {
5999 err = -ENOMEM;
6000 goto bitmap_abort;
6001 }
6002 }
6003 }
6004
6005 if (mddev->queue) {
6006 bool nonrot = true;
6007
6008 rdev_for_each(rdev, mddev) {
6009 if (rdev->raid_disk >= 0 &&
6010 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
6011 nonrot = false;
6012 break;
6013 }
6014 }
6015 if (mddev->degraded)
6016 nonrot = false;
6017 if (nonrot)
6018 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6019 else
6020 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6021 }
6022 if (pers->sync_request) {
6023 if (mddev->kobj.sd &&
6024 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6025 pr_warn("md: cannot register extra attributes for %s\n",
6026 mdname(mddev));
6027 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6028 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6029 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6030 } else if (mddev->ro == 2)
6031 mddev->ro = 0;
6032
6033 atomic_set(&mddev->max_corr_read_errors,
6034 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
6035 mddev->safemode = 0;
6036 if (mddev_is_clustered(mddev))
6037 mddev->safemode_delay = 0;
6038 else
6039 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6040 mddev->in_sync = 1;
6041 smp_wmb();
6042 spin_lock(&mddev->lock);
6043 mddev->pers = pers;
6044 spin_unlock(&mddev->lock);
6045 rdev_for_each(rdev, mddev)
6046 if (rdev->raid_disk >= 0)
6047 sysfs_link_rdev(mddev, rdev);
6048
6049 if (mddev->degraded && !mddev->ro)
6050
6051
6052
6053 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6054 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6055
6056 if (mddev->sb_flags)
6057 md_update_sb(mddev, 0);
6058
6059 md_new_event(mddev);
6060 return 0;
6061
6062bitmap_abort:
6063 mddev_detach(mddev);
6064 if (mddev->private)
6065 pers->free(mddev, mddev->private);
6066 mddev->private = NULL;
6067 module_put(pers->owner);
6068 md_bitmap_destroy(mddev);
6069abort:
6070 bioset_exit(&mddev->bio_set);
6071 bioset_exit(&mddev->sync_set);
6072 return err;
6073}
6074EXPORT_SYMBOL_GPL(md_run);
6075
6076int do_md_run(struct mddev *mddev)
6077{
6078 int err;
6079
6080 set_bit(MD_NOT_READY, &mddev->flags);
6081 err = md_run(mddev);
6082 if (err)
6083 goto out;
6084 err = md_bitmap_load(mddev);
6085 if (err) {
6086 md_bitmap_destroy(mddev);
6087 goto out;
6088 }
6089
6090 if (mddev_is_clustered(mddev))
6091 md_allow_write(mddev);
6092
6093
6094 md_start(mddev);
6095
6096 md_wakeup_thread(mddev->thread);
6097 md_wakeup_thread(mddev->sync_thread);
6098
6099 set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
6100 clear_bit(MD_NOT_READY, &mddev->flags);
6101 mddev->changed = 1;
6102 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6103 sysfs_notify_dirent_safe(mddev->sysfs_state);
6104 sysfs_notify_dirent_safe(mddev->sysfs_action);
6105 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6106out:
6107 clear_bit(MD_NOT_READY, &mddev->flags);
6108 return err;
6109}
6110
6111int md_start(struct mddev *mddev)
6112{
6113 int ret = 0;
6114
6115 if (mddev->pers->start) {
6116 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6117 md_wakeup_thread(mddev->thread);
6118 ret = mddev->pers->start(mddev);
6119 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6120 md_wakeup_thread(mddev->sync_thread);
6121 }
6122 return ret;
6123}
6124EXPORT_SYMBOL_GPL(md_start);
6125
6126static int restart_array(struct mddev *mddev)
6127{
6128 struct gendisk *disk = mddev->gendisk;
6129 struct md_rdev *rdev;
6130 bool has_journal = false;
6131 bool has_readonly = false;
6132
6133
6134 if (list_empty(&mddev->disks))
6135 return -ENXIO;
6136 if (!mddev->pers)
6137 return -EINVAL;
6138 if (!mddev->ro)
6139 return -EBUSY;
6140
6141 rcu_read_lock();
6142 rdev_for_each_rcu(rdev, mddev) {
6143 if (test_bit(Journal, &rdev->flags) &&
6144 !test_bit(Faulty, &rdev->flags))
6145 has_journal = true;
6146 if (rdev_read_only(rdev))
6147 has_readonly = true;
6148 }
6149 rcu_read_unlock();
6150 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6151
6152 return -EINVAL;
6153 if (has_readonly)
6154 return -EROFS;
6155
6156 mddev->safemode = 0;
6157 mddev->ro = 0;
6158 set_disk_ro(disk, 0);
6159 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6160
6161 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6162 md_wakeup_thread(mddev->thread);
6163 md_wakeup_thread(mddev->sync_thread);
6164 sysfs_notify_dirent_safe(mddev->sysfs_state);
6165 return 0;
6166}
6167
6168static void md_clean(struct mddev *mddev)
6169{
6170 mddev->array_sectors = 0;
6171 mddev->external_size = 0;
6172 mddev->dev_sectors = 0;
6173 mddev->raid_disks = 0;
6174 mddev->recovery_cp = 0;
6175 mddev->resync_min = 0;
6176 mddev->resync_max = MaxSector;
6177 mddev->reshape_position = MaxSector;
6178 mddev->external = 0;
6179 mddev->persistent = 0;
6180 mddev->level = LEVEL_NONE;
6181 mddev->clevel[0] = 0;
6182 mddev->flags = 0;
6183 mddev->sb_flags = 0;
6184 mddev->ro = 0;
6185 mddev->metadata_type[0] = 0;
6186 mddev->chunk_sectors = 0;
6187 mddev->ctime = mddev->utime = 0;
6188 mddev->layout = 0;
6189 mddev->max_disks = 0;
6190 mddev->events = 0;
6191 mddev->can_decrease_events = 0;
6192 mddev->delta_disks = 0;
6193 mddev->reshape_backwards = 0;
6194 mddev->new_level = LEVEL_NONE;
6195 mddev->new_layout = 0;
6196 mddev->new_chunk_sectors = 0;
6197 mddev->curr_resync = 0;
6198 atomic64_set(&mddev->resync_mismatches, 0);
6199 mddev->suspend_lo = mddev->suspend_hi = 0;
6200 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6201 mddev->recovery = 0;
6202 mddev->in_sync = 0;
6203 mddev->changed = 0;
6204 mddev->degraded = 0;
6205 mddev->safemode = 0;
6206 mddev->private = NULL;
6207 mddev->cluster_info = NULL;
6208 mddev->bitmap_info.offset = 0;
6209 mddev->bitmap_info.default_offset = 0;
6210 mddev->bitmap_info.default_space = 0;
6211 mddev->bitmap_info.chunksize = 0;
6212 mddev->bitmap_info.daemon_sleep = 0;
6213 mddev->bitmap_info.max_write_behind = 0;
6214 mddev->bitmap_info.nodes = 0;
6215}
6216
6217static void __md_stop_writes(struct mddev *mddev)
6218{
6219 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6220 if (work_pending(&mddev->del_work))
6221 flush_workqueue(md_misc_wq);
6222 if (mddev->sync_thread) {
6223 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6224 md_reap_sync_thread(mddev);
6225 }
6226
6227 del_timer_sync(&mddev->safemode_timer);
6228
6229 if (mddev->pers && mddev->pers->quiesce) {
6230 mddev->pers->quiesce(mddev, 1);
6231 mddev->pers->quiesce(mddev, 0);
6232 }
6233 md_bitmap_flush(mddev);
6234
6235 if (mddev->ro == 0 &&
6236 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6237 mddev->sb_flags)) {
6238
6239 if (!mddev_is_clustered(mddev))
6240 mddev->in_sync = 1;
6241 md_update_sb(mddev, 1);
6242 }
6243
6244 mddev->serialize_policy = 0;
6245 mddev_destroy_serial_pool(mddev, NULL, true);
6246}
6247
6248void md_stop_writes(struct mddev *mddev)
6249{
6250 mddev_lock_nointr(mddev);
6251 __md_stop_writes(mddev);
6252 mddev_unlock(mddev);
6253}
6254EXPORT_SYMBOL_GPL(md_stop_writes);
6255
6256static void mddev_detach(struct mddev *mddev)
6257{
6258 md_bitmap_wait_behind_writes(mddev);
6259 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6260 mddev->pers->quiesce(mddev, 1);
6261 mddev->pers->quiesce(mddev, 0);
6262 }
6263 md_unregister_thread(&mddev->thread);
6264 if (mddev->queue)
6265 blk_sync_queue(mddev->queue);
6266}
6267
6268static void __md_stop(struct mddev *mddev)
6269{
6270 struct md_personality *pers = mddev->pers;
6271 md_bitmap_destroy(mddev);
6272 mddev_detach(mddev);
6273
6274 if (mddev->event_work.func)
6275 flush_workqueue(md_misc_wq);
6276 spin_lock(&mddev->lock);
6277 mddev->pers = NULL;
6278 spin_unlock(&mddev->lock);
6279 pers->free(mddev, mddev->private);
6280 mddev->private = NULL;
6281 if (pers->sync_request && mddev->to_remove == NULL)
6282 mddev->to_remove = &md_redundancy_group;
6283 module_put(pers->owner);
6284 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6285}
6286
6287void md_stop(struct mddev *mddev)
6288{
6289
6290
6291
6292 __md_stop(mddev);
6293 bioset_exit(&mddev->bio_set);
6294 bioset_exit(&mddev->sync_set);
6295}
6296
6297EXPORT_SYMBOL_GPL(md_stop);
6298
6299static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6300{
6301 int err = 0;
6302 int did_freeze = 0;
6303
6304 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6305 did_freeze = 1;
6306 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6307 md_wakeup_thread(mddev->thread);
6308 }
6309 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6310 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6311 if (mddev->sync_thread)
6312
6313
6314 wake_up_process(mddev->sync_thread->tsk);
6315
6316 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6317 return -EBUSY;
6318 mddev_unlock(mddev);
6319 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6320 &mddev->recovery));
6321 wait_event(mddev->sb_wait,
6322 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6323 mddev_lock_nointr(mddev);
6324
6325 mutex_lock(&mddev->open_mutex);
6326 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6327 mddev->sync_thread ||
6328 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6329 pr_warn("md: %s still in use.\n",mdname(mddev));
6330 if (did_freeze) {
6331 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6332 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6333 md_wakeup_thread(mddev->thread);
6334 }
6335 err = -EBUSY;
6336 goto out;
6337 }
6338 if (mddev->pers) {
6339 __md_stop_writes(mddev);
6340
6341 err = -ENXIO;
6342 if (mddev->ro==1)
6343 goto out;
6344 mddev->ro = 1;
6345 set_disk_ro(mddev->gendisk, 1);
6346 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6347 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6348 md_wakeup_thread(mddev->thread);
6349 sysfs_notify_dirent_safe(mddev->sysfs_state);
6350 err = 0;
6351 }
6352out:
6353 mutex_unlock(&mddev->open_mutex);
6354 return err;
6355}
6356
6357
6358
6359
6360
6361static int do_md_stop(struct mddev *mddev, int mode,
6362 struct block_device *bdev)
6363{
6364 struct gendisk *disk = mddev->gendisk;
6365 struct md_rdev *rdev;
6366 int did_freeze = 0;
6367
6368 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6369 did_freeze = 1;
6370 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6371 md_wakeup_thread(mddev->thread);
6372 }
6373 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6374 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6375 if (mddev->sync_thread)
6376
6377
6378 wake_up_process(mddev->sync_thread->tsk);
6379
6380 mddev_unlock(mddev);
6381 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6382 !test_bit(MD_RECOVERY_RUNNING,
6383 &mddev->recovery)));
6384 mddev_lock_nointr(mddev);
6385
6386 mutex_lock(&mddev->open_mutex);
6387 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6388 mddev->sysfs_active ||
6389 mddev->sync_thread ||
6390 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6391 pr_warn("md: %s still in use.\n",mdname(mddev));
6392 mutex_unlock(&mddev->open_mutex);
6393 if (did_freeze) {
6394 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6395 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6396 md_wakeup_thread(mddev->thread);
6397 }
6398 return -EBUSY;
6399 }
6400 if (mddev->pers) {
6401 if (mddev->ro)
6402 set_disk_ro(disk, 0);
6403
6404 __md_stop_writes(mddev);
6405 __md_stop(mddev);
6406
6407
6408 sysfs_notify_dirent_safe(mddev->sysfs_state);
6409
6410 rdev_for_each(rdev, mddev)
6411 if (rdev->raid_disk >= 0)
6412 sysfs_unlink_rdev(mddev, rdev);
6413
6414 set_capacity_and_notify(disk, 0);
6415 mutex_unlock(&mddev->open_mutex);
6416 mddev->changed = 1;
6417
6418 if (mddev->ro)
6419 mddev->ro = 0;
6420 } else
6421 mutex_unlock(&mddev->open_mutex);
6422
6423
6424
6425 if (mode == 0) {
6426 pr_info("md: %s stopped.\n", mdname(mddev));
6427
6428 if (mddev->bitmap_info.file) {
6429 struct file *f = mddev->bitmap_info.file;
6430 spin_lock(&mddev->lock);
6431 mddev->bitmap_info.file = NULL;
6432 spin_unlock(&mddev->lock);
6433 fput(f);
6434 }
6435 mddev->bitmap_info.offset = 0;
6436
6437 export_array(mddev);
6438
6439 md_clean(mddev);
6440 if (mddev->hold_active == UNTIL_STOP)
6441 mddev->hold_active = 0;
6442 }
6443 md_new_event(mddev);
6444 sysfs_notify_dirent_safe(mddev->sysfs_state);
6445 return 0;
6446}
6447
6448#ifndef MODULE
6449static void autorun_array(struct mddev *mddev)
6450{
6451 struct md_rdev *rdev;
6452 int err;
6453
6454 if (list_empty(&mddev->disks))
6455 return;
6456
6457 pr_info("md: running: ");
6458
6459 rdev_for_each(rdev, mddev) {
6460 char b[BDEVNAME_SIZE];
6461 pr_cont("<%s>", bdevname(rdev->bdev,b));
6462 }
6463 pr_cont("\n");
6464
6465 err = do_md_run(mddev);
6466 if (err) {
6467 pr_warn("md: do_md_run() returned %d\n", err);
6468 do_md_stop(mddev, 0, NULL);
6469 }
6470}
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484static void autorun_devices(int part)
6485{
6486 struct md_rdev *rdev0, *rdev, *tmp;
6487 struct mddev *mddev;
6488 char b[BDEVNAME_SIZE];
6489
6490 pr_info("md: autorun ...\n");
6491 while (!list_empty(&pending_raid_disks)) {
6492 int unit;
6493 dev_t dev;
6494 LIST_HEAD(candidates);
6495 rdev0 = list_entry(pending_raid_disks.next,
6496 struct md_rdev, same_set);
6497
6498 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6499 INIT_LIST_HEAD(&candidates);
6500 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6501 if (super_90_load(rdev, rdev0, 0) >= 0) {
6502 pr_debug("md: adding %s ...\n",
6503 bdevname(rdev->bdev,b));
6504 list_move(&rdev->same_set, &candidates);
6505 }
6506
6507
6508
6509
6510
6511 if (part) {
6512 dev = MKDEV(mdp_major,
6513 rdev0->preferred_minor << MdpMinorShift);
6514 unit = MINOR(dev) >> MdpMinorShift;
6515 } else {
6516 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6517 unit = MINOR(dev);
6518 }
6519 if (rdev0->preferred_minor != unit) {
6520 pr_warn("md: unit number in %s is bad: %d\n",
6521 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6522 break;
6523 }
6524
6525 md_probe(dev);
6526 mddev = mddev_find(dev);
6527 if (!mddev || !mddev->gendisk) {
6528 if (mddev)
6529 mddev_put(mddev);
6530 break;
6531 }
6532 if (mddev_lock(mddev))
6533 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6534 else if (mddev->raid_disks || mddev->major_version
6535 || !list_empty(&mddev->disks)) {
6536 pr_warn("md: %s already running, cannot run %s\n",
6537 mdname(mddev), bdevname(rdev0->bdev,b));
6538 mddev_unlock(mddev);
6539 } else {
6540 pr_debug("md: created %s\n", mdname(mddev));
6541 mddev->persistent = 1;
6542 rdev_for_each_list(rdev, tmp, &candidates) {
6543 list_del_init(&rdev->same_set);
6544 if (bind_rdev_to_array(rdev, mddev))
6545 export_rdev(rdev);
6546 }
6547 autorun_array(mddev);
6548 mddev_unlock(mddev);
6549 }
6550
6551
6552
6553 rdev_for_each_list(rdev, tmp, &candidates) {
6554 list_del_init(&rdev->same_set);
6555 export_rdev(rdev);
6556 }
6557 mddev_put(mddev);
6558 }
6559 pr_info("md: ... autorun DONE.\n");
6560}
6561#endif
6562
6563static int get_version(void __user *arg)
6564{
6565 mdu_version_t ver;
6566
6567 ver.major = MD_MAJOR_VERSION;
6568 ver.minor = MD_MINOR_VERSION;
6569 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6570
6571 if (copy_to_user(arg, &ver, sizeof(ver)))
6572 return -EFAULT;
6573
6574 return 0;
6575}
6576
6577static int get_array_info(struct mddev *mddev, void __user *arg)
6578{
6579 mdu_array_info_t info;
6580 int nr,working,insync,failed,spare;
6581 struct md_rdev *rdev;
6582
6583 nr = working = insync = failed = spare = 0;
6584 rcu_read_lock();
6585 rdev_for_each_rcu(rdev, mddev) {
6586 nr++;
6587 if (test_bit(Faulty, &rdev->flags))
6588 failed++;
6589 else {
6590 working++;
6591 if (test_bit(In_sync, &rdev->flags))
6592 insync++;
6593 else if (test_bit(Journal, &rdev->flags))
6594
6595 ;
6596 else
6597 spare++;
6598 }
6599 }
6600 rcu_read_unlock();
6601
6602 info.major_version = mddev->major_version;
6603 info.minor_version = mddev->minor_version;
6604 info.patch_version = MD_PATCHLEVEL_VERSION;
6605 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6606 info.level = mddev->level;
6607 info.size = mddev->dev_sectors / 2;
6608 if (info.size != mddev->dev_sectors / 2)
6609 info.size = -1;
6610 info.nr_disks = nr;
6611 info.raid_disks = mddev->raid_disks;
6612 info.md_minor = mddev->md_minor;
6613 info.not_persistent= !mddev->persistent;
6614
6615 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6616 info.state = 0;
6617 if (mddev->in_sync)
6618 info.state = (1<<MD_SB_CLEAN);
6619 if (mddev->bitmap && mddev->bitmap_info.offset)
6620 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6621 if (mddev_is_clustered(mddev))
6622 info.state |= (1<<MD_SB_CLUSTERED);
6623 info.active_disks = insync;
6624 info.working_disks = working;
6625 info.failed_disks = failed;
6626 info.spare_disks = spare;
6627
6628 info.layout = mddev->layout;
6629 info.chunk_size = mddev->chunk_sectors << 9;
6630
6631 if (copy_to_user(arg, &info, sizeof(info)))
6632 return -EFAULT;
6633
6634 return 0;
6635}
6636
6637static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6638{
6639 mdu_bitmap_file_t *file = NULL;
6640 char *ptr;
6641 int err;
6642
6643 file = kzalloc(sizeof(*file), GFP_NOIO);
6644 if (!file)
6645 return -ENOMEM;
6646
6647 err = 0;
6648 spin_lock(&mddev->lock);
6649
6650 if (mddev->bitmap_info.file) {
6651 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6652 sizeof(file->pathname));
6653 if (IS_ERR(ptr))
6654 err = PTR_ERR(ptr);
6655 else
6656 memmove(file->pathname, ptr,
6657 sizeof(file->pathname)-(ptr-file->pathname));
6658 }
6659 spin_unlock(&mddev->lock);
6660
6661 if (err == 0 &&
6662 copy_to_user(arg, file, sizeof(*file)))
6663 err = -EFAULT;
6664
6665 kfree(file);
6666 return err;
6667}
6668
6669static int get_disk_info(struct mddev *mddev, void __user * arg)
6670{
6671 mdu_disk_info_t info;
6672 struct md_rdev *rdev;
6673
6674 if (copy_from_user(&info, arg, sizeof(info)))
6675 return -EFAULT;
6676
6677 rcu_read_lock();
6678 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6679 if (rdev) {
6680 info.major = MAJOR(rdev->bdev->bd_dev);
6681 info.minor = MINOR(rdev->bdev->bd_dev);
6682 info.raid_disk = rdev->raid_disk;
6683 info.state = 0;
6684 if (test_bit(Faulty, &rdev->flags))
6685 info.state |= (1<<MD_DISK_FAULTY);
6686 else if (test_bit(In_sync, &rdev->flags)) {
6687 info.state |= (1<<MD_DISK_ACTIVE);
6688 info.state |= (1<<MD_DISK_SYNC);
6689 }
6690 if (test_bit(Journal, &rdev->flags))
6691 info.state |= (1<<MD_DISK_JOURNAL);
6692 if (test_bit(WriteMostly, &rdev->flags))
6693 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6694 if (test_bit(FailFast, &rdev->flags))
6695 info.state |= (1<<MD_DISK_FAILFAST);
6696 } else {
6697 info.major = info.minor = 0;
6698 info.raid_disk = -1;
6699 info.state = (1<<MD_DISK_REMOVED);
6700 }
6701 rcu_read_unlock();
6702
6703 if (copy_to_user(arg, &info, sizeof(info)))
6704 return -EFAULT;
6705
6706 return 0;
6707}
6708
6709int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
6710{
6711 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6712 struct md_rdev *rdev;
6713 dev_t dev = MKDEV(info->major,info->minor);
6714
6715 if (mddev_is_clustered(mddev) &&
6716 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6717 pr_warn("%s: Cannot add to clustered mddev.\n",
6718 mdname(mddev));
6719 return -EINVAL;
6720 }
6721
6722 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6723 return -EOVERFLOW;
6724
6725 if (!mddev->raid_disks) {
6726 int err;
6727
6728 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6729 if (IS_ERR(rdev)) {
6730 pr_warn("md: md_import_device returned %ld\n",
6731 PTR_ERR(rdev));
6732 return PTR_ERR(rdev);
6733 }
6734 if (!list_empty(&mddev->disks)) {
6735 struct md_rdev *rdev0
6736 = list_entry(mddev->disks.next,
6737 struct md_rdev, same_set);
6738 err = super_types[mddev->major_version]
6739 .load_super(rdev, rdev0, mddev->minor_version);
6740 if (err < 0) {
6741 pr_warn("md: %s has different UUID to %s\n",
6742 bdevname(rdev->bdev,b),
6743 bdevname(rdev0->bdev,b2));
6744 export_rdev(rdev);
6745 return -EINVAL;
6746 }
6747 }
6748 err = bind_rdev_to_array(rdev, mddev);
6749 if (err)
6750 export_rdev(rdev);
6751 return err;
6752 }
6753
6754
6755
6756
6757
6758
6759 if (mddev->pers) {
6760 int err;
6761 if (!mddev->pers->hot_add_disk) {
6762 pr_warn("%s: personality does not support diskops!\n",
6763 mdname(mddev));
6764 return -EINVAL;
6765 }
6766 if (mddev->persistent)
6767 rdev = md_import_device(dev, mddev->major_version,
6768 mddev->minor_version);
6769 else
6770 rdev = md_import_device(dev, -1, -1);
6771 if (IS_ERR(rdev)) {
6772 pr_warn("md: md_import_device returned %ld\n",
6773 PTR_ERR(rdev));
6774 return PTR_ERR(rdev);
6775 }
6776
6777 if (!mddev->persistent) {
6778 if (info->state & (1<<MD_DISK_SYNC) &&
6779 info->raid_disk < mddev->raid_disks) {
6780 rdev->raid_disk = info->raid_disk;
6781 set_bit(In_sync, &rdev->flags);
6782 clear_bit(Bitmap_sync, &rdev->flags);
6783 } else
6784 rdev->raid_disk = -1;
6785 rdev->saved_raid_disk = rdev->raid_disk;
6786 } else
6787 super_types[mddev->major_version].
6788 validate_super(mddev, rdev);
6789 if ((info->state & (1<<MD_DISK_SYNC)) &&
6790 rdev->raid_disk != info->raid_disk) {
6791
6792
6793
6794 export_rdev(rdev);
6795 return -EINVAL;
6796 }
6797
6798 clear_bit(In_sync, &rdev->flags);
6799 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6800 set_bit(WriteMostly, &rdev->flags);
6801 else
6802 clear_bit(WriteMostly, &rdev->flags);
6803 if (info->state & (1<<MD_DISK_FAILFAST))
6804 set_bit(FailFast, &rdev->flags);
6805 else
6806 clear_bit(FailFast, &rdev->flags);
6807
6808 if (info->state & (1<<MD_DISK_JOURNAL)) {
6809 struct md_rdev *rdev2;
6810 bool has_journal = false;
6811
6812
6813 rdev_for_each(rdev2, mddev) {
6814 if (test_bit(Journal, &rdev2->flags)) {
6815 has_journal = true;
6816 break;
6817 }
6818 }
6819 if (has_journal || mddev->bitmap) {
6820 export_rdev(rdev);
6821 return -EBUSY;
6822 }
6823 set_bit(Journal, &rdev->flags);
6824 }
6825
6826
6827
6828 if (mddev_is_clustered(mddev)) {
6829 if (info->state & (1 << MD_DISK_CANDIDATE))
6830 set_bit(Candidate, &rdev->flags);
6831 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6832
6833 err = md_cluster_ops->add_new_disk(mddev, rdev);
6834 if (err) {
6835 export_rdev(rdev);
6836 return err;
6837 }
6838 }
6839 }
6840
6841 rdev->raid_disk = -1;
6842 err = bind_rdev_to_array(rdev, mddev);
6843
6844 if (err)
6845 export_rdev(rdev);
6846
6847 if (mddev_is_clustered(mddev)) {
6848 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6849 if (!err) {
6850 err = md_cluster_ops->new_disk_ack(mddev,
6851 err == 0);
6852 if (err)
6853 md_kick_rdev_from_array(rdev);
6854 }
6855 } else {
6856 if (err)
6857 md_cluster_ops->add_new_disk_cancel(mddev);
6858 else
6859 err = add_bound_rdev(rdev);
6860 }
6861
6862 } else if (!err)
6863 err = add_bound_rdev(rdev);
6864
6865 return err;
6866 }
6867
6868
6869
6870
6871 if (mddev->major_version != 0) {
6872 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6873 return -EINVAL;
6874 }
6875
6876 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6877 int err;
6878 rdev = md_import_device(dev, -1, 0);
6879 if (IS_ERR(rdev)) {
6880 pr_warn("md: error, md_import_device() returned %ld\n",
6881 PTR_ERR(rdev));
6882 return PTR_ERR(rdev);
6883 }
6884 rdev->desc_nr = info->number;
6885 if (info->raid_disk < mddev->raid_disks)
6886 rdev->raid_disk = info->raid_disk;
6887 else
6888 rdev->raid_disk = -1;
6889
6890 if (rdev->raid_disk < mddev->raid_disks)
6891 if (info->state & (1<<MD_DISK_SYNC))
6892 set_bit(In_sync, &rdev->flags);
6893
6894 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6895 set_bit(WriteMostly, &rdev->flags);
6896 if (info->state & (1<<MD_DISK_FAILFAST))
6897 set_bit(FailFast, &rdev->flags);
6898
6899 if (!mddev->persistent) {
6900 pr_debug("md: nonpersistent superblock ...\n");
6901 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6902 } else
6903 rdev->sb_start = calc_dev_sboffset(rdev);
6904 rdev->sectors = rdev->sb_start;
6905
6906 err = bind_rdev_to_array(rdev, mddev);
6907 if (err) {
6908 export_rdev(rdev);
6909 return err;
6910 }
6911 }
6912
6913 return 0;
6914}
6915
6916static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6917{
6918 char b[BDEVNAME_SIZE];
6919 struct md_rdev *rdev;
6920
6921 if (!mddev->pers)
6922 return -ENODEV;
6923
6924 rdev = find_rdev(mddev, dev);
6925 if (!rdev)
6926 return -ENXIO;
6927
6928 if (rdev->raid_disk < 0)
6929 goto kick_rdev;
6930
6931 clear_bit(Blocked, &rdev->flags);
6932 remove_and_add_spares(mddev, rdev);
6933
6934 if (rdev->raid_disk >= 0)
6935 goto busy;
6936
6937kick_rdev:
6938 if (mddev_is_clustered(mddev)) {
6939 if (md_cluster_ops->remove_disk(mddev, rdev))
6940 goto busy;
6941 }
6942
6943 md_kick_rdev_from_array(rdev);
6944 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6945 if (mddev->thread)
6946 md_wakeup_thread(mddev->thread);
6947 else
6948 md_update_sb(mddev, 1);
6949 md_new_event(mddev);
6950
6951 return 0;
6952busy:
6953 pr_debug("md: cannot remove active disk %s from %s ...\n",
6954 bdevname(rdev->bdev,b), mdname(mddev));
6955 return -EBUSY;
6956}
6957
6958static int hot_add_disk(struct mddev *mddev, dev_t dev)
6959{
6960 char b[BDEVNAME_SIZE];
6961 int err;
6962 struct md_rdev *rdev;
6963
6964 if (!mddev->pers)
6965 return -ENODEV;
6966
6967 if (mddev->major_version != 0) {
6968 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6969 mdname(mddev));
6970 return -EINVAL;
6971 }
6972 if (!mddev->pers->hot_add_disk) {
6973 pr_warn("%s: personality does not support diskops!\n",
6974 mdname(mddev));
6975 return -EINVAL;
6976 }
6977
6978 rdev = md_import_device(dev, -1, 0);
6979 if (IS_ERR(rdev)) {
6980 pr_warn("md: error, md_import_device() returned %ld\n",
6981 PTR_ERR(rdev));
6982 return -EINVAL;
6983 }
6984
6985 if (mddev->persistent)
6986 rdev->sb_start = calc_dev_sboffset(rdev);
6987 else
6988 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6989
6990 rdev->sectors = rdev->sb_start;
6991
6992 if (test_bit(Faulty, &rdev->flags)) {
6993 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6994 bdevname(rdev->bdev,b), mdname(mddev));
6995 err = -EINVAL;
6996 goto abort_export;
6997 }
6998
6999 clear_bit(In_sync, &rdev->flags);
7000 rdev->desc_nr = -1;
7001 rdev->saved_raid_disk = -1;
7002 err = bind_rdev_to_array(rdev, mddev);
7003 if (err)
7004 goto abort_export;
7005
7006
7007
7008
7009
7010
7011 rdev->raid_disk = -1;
7012
7013 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7014 if (!mddev->thread)
7015 md_update_sb(mddev, 1);
7016
7017
7018
7019
7020 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7021 md_wakeup_thread(mddev->thread);
7022 md_new_event(mddev);
7023 return 0;
7024
7025abort_export:
7026 export_rdev(rdev);
7027 return err;
7028}
7029
7030static int set_bitmap_file(struct mddev *mddev, int fd)
7031{
7032 int err = 0;
7033
7034 if (mddev->pers) {
7035 if (!mddev->pers->quiesce || !mddev->thread)
7036 return -EBUSY;
7037 if (mddev->recovery || mddev->sync_thread)
7038 return -EBUSY;
7039
7040 }
7041
7042 if (fd >= 0) {
7043 struct inode *inode;
7044 struct file *f;
7045
7046 if (mddev->bitmap || mddev->bitmap_info.file)
7047 return -EEXIST;
7048 f = fget(fd);
7049
7050 if (f == NULL) {
7051 pr_warn("%s: error: failed to get bitmap file\n",
7052 mdname(mddev));
7053 return -EBADF;
7054 }
7055
7056 inode = f->f_mapping->host;
7057 if (!S_ISREG(inode->i_mode)) {
7058 pr_warn("%s: error: bitmap file must be a regular file\n",
7059 mdname(mddev));
7060 err = -EBADF;
7061 } else if (!(f->f_mode & FMODE_WRITE)) {
7062 pr_warn("%s: error: bitmap file must open for write\n",
7063 mdname(mddev));
7064 err = -EBADF;
7065 } else if (atomic_read(&inode->i_writecount) != 1) {
7066 pr_warn("%s: error: bitmap file is already in use\n",
7067 mdname(mddev));
7068 err = -EBUSY;
7069 }
7070 if (err) {
7071 fput(f);
7072 return err;
7073 }
7074 mddev->bitmap_info.file = f;
7075 mddev->bitmap_info.offset = 0;
7076 } else if (mddev->bitmap == NULL)
7077 return -ENOENT;
7078 err = 0;
7079 if (mddev->pers) {
7080 if (fd >= 0) {
7081 struct bitmap *bitmap;
7082
7083 bitmap = md_bitmap_create(mddev, -1);
7084 mddev_suspend(mddev);
7085 if (!IS_ERR(bitmap)) {
7086 mddev->bitmap = bitmap;
7087 err = md_bitmap_load(mddev);
7088 } else
7089 err = PTR_ERR(bitmap);
7090 if (err) {
7091 md_bitmap_destroy(mddev);
7092 fd = -1;
7093 }
7094 mddev_resume(mddev);
7095 } else if (fd < 0) {
7096 mddev_suspend(mddev);
7097 md_bitmap_destroy(mddev);
7098 mddev_resume(mddev);
7099 }
7100 }
7101 if (fd < 0) {
7102 struct file *f = mddev->bitmap_info.file;
7103 if (f) {
7104 spin_lock(&mddev->lock);
7105 mddev->bitmap_info.file = NULL;
7106 spin_unlock(&mddev->lock);
7107 fput(f);
7108 }
7109 }
7110
7111 return err;
7112}
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
7128{
7129 if (info->raid_disks == 0) {
7130
7131 if (info->major_version < 0 ||
7132 info->major_version >= ARRAY_SIZE(super_types) ||
7133 super_types[info->major_version].name == NULL) {
7134
7135 pr_warn("md: superblock version %d not known\n",
7136 info->major_version);
7137 return -EINVAL;
7138 }
7139 mddev->major_version = info->major_version;
7140 mddev->minor_version = info->minor_version;
7141 mddev->patch_version = info->patch_version;
7142 mddev->persistent = !info->not_persistent;
7143
7144
7145
7146 mddev->ctime = ktime_get_real_seconds();
7147 return 0;
7148 }
7149 mddev->major_version = MD_MAJOR_VERSION;
7150 mddev->minor_version = MD_MINOR_VERSION;
7151 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7152 mddev->ctime = ktime_get_real_seconds();
7153
7154 mddev->level = info->level;
7155 mddev->clevel[0] = 0;
7156 mddev->dev_sectors = 2 * (sector_t)info->size;
7157 mddev->raid_disks = info->raid_disks;
7158
7159
7160
7161 if (info->state & (1<<MD_SB_CLEAN))
7162 mddev->recovery_cp = MaxSector;
7163 else
7164 mddev->recovery_cp = 0;
7165 mddev->persistent = ! info->not_persistent;
7166 mddev->external = 0;
7167
7168 mddev->layout = info->layout;
7169 if (mddev->level == 0)
7170
7171 mddev->layout = -1;
7172 mddev->chunk_sectors = info->chunk_size >> 9;
7173
7174 if (mddev->persistent) {
7175 mddev->max_disks = MD_SB_DISKS;
7176 mddev->flags = 0;
7177 mddev->sb_flags = 0;
7178 }
7179 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7180
7181 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7182 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7183 mddev->bitmap_info.offset = 0;
7184
7185 mddev->reshape_position = MaxSector;
7186
7187
7188
7189
7190 get_random_bytes(mddev->uuid, 16);
7191
7192 mddev->new_level = mddev->level;
7193 mddev->new_chunk_sectors = mddev->chunk_sectors;
7194 mddev->new_layout = mddev->layout;
7195 mddev->delta_disks = 0;
7196 mddev->reshape_backwards = 0;
7197
7198 return 0;
7199}
7200
7201void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7202{
7203 lockdep_assert_held(&mddev->reconfig_mutex);
7204
7205 if (mddev->external_size)
7206 return;
7207
7208 mddev->array_sectors = array_sectors;
7209}
7210EXPORT_SYMBOL(md_set_array_sectors);
7211
7212static int update_size(struct mddev *mddev, sector_t num_sectors)
7213{
7214 struct md_rdev *rdev;
7215 int rv;
7216 int fit = (num_sectors == 0);
7217 sector_t old_dev_sectors = mddev->dev_sectors;
7218
7219 if (mddev->pers->resize == NULL)
7220 return -EINVAL;
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7231 mddev->sync_thread)
7232 return -EBUSY;
7233 if (mddev->ro)
7234 return -EROFS;
7235
7236 rdev_for_each(rdev, mddev) {
7237 sector_t avail = rdev->sectors;
7238
7239 if (fit && (num_sectors == 0 || num_sectors > avail))
7240 num_sectors = avail;
7241 if (avail < num_sectors)
7242 return -ENOSPC;
7243 }
7244 rv = mddev->pers->resize(mddev, num_sectors);
7245 if (!rv) {
7246 if (mddev_is_clustered(mddev))
7247 md_cluster_ops->update_size(mddev, old_dev_sectors);
7248 else if (mddev->queue) {
7249 set_capacity_and_notify(mddev->gendisk,
7250 mddev->array_sectors);
7251 }
7252 }
7253 return rv;
7254}
7255
7256static int update_raid_disks(struct mddev *mddev, int raid_disks)
7257{
7258 int rv;
7259 struct md_rdev *rdev;
7260
7261 if (mddev->pers->check_reshape == NULL)
7262 return -EINVAL;
7263 if (mddev->ro)
7264 return -EROFS;
7265 if (raid_disks <= 0 ||
7266 (mddev->max_disks && raid_disks >= mddev->max_disks))
7267 return -EINVAL;
7268 if (mddev->sync_thread ||
7269 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7270 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7271 mddev->reshape_position != MaxSector)
7272 return -EBUSY;
7273
7274 rdev_for_each(rdev, mddev) {
7275 if (mddev->raid_disks < raid_disks &&
7276 rdev->data_offset < rdev->new_data_offset)
7277 return -EINVAL;
7278 if (mddev->raid_disks > raid_disks &&
7279 rdev->data_offset > rdev->new_data_offset)
7280 return -EINVAL;
7281 }
7282
7283 mddev->delta_disks = raid_disks - mddev->raid_disks;
7284 if (mddev->delta_disks < 0)
7285 mddev->reshape_backwards = 1;
7286 else if (mddev->delta_disks > 0)
7287 mddev->reshape_backwards = 0;
7288
7289 rv = mddev->pers->check_reshape(mddev);
7290 if (rv < 0) {
7291 mddev->delta_disks = 0;
7292 mddev->reshape_backwards = 0;
7293 }
7294 return rv;
7295}
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7306{
7307 int rv = 0;
7308 int cnt = 0;
7309 int state = 0;
7310
7311
7312 if (mddev->bitmap && mddev->bitmap_info.offset)
7313 state |= (1 << MD_SB_BITMAP_PRESENT);
7314
7315 if (mddev->major_version != info->major_version ||
7316 mddev->minor_version != info->minor_version ||
7317
7318 mddev->ctime != info->ctime ||
7319 mddev->level != info->level ||
7320
7321 mddev->persistent != !info->not_persistent ||
7322 mddev->chunk_sectors != info->chunk_size >> 9 ||
7323
7324 ((state^info->state) & 0xfffffe00)
7325 )
7326 return -EINVAL;
7327
7328 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7329 cnt++;
7330 if (mddev->raid_disks != info->raid_disks)
7331 cnt++;
7332 if (mddev->layout != info->layout)
7333 cnt++;
7334 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7335 cnt++;
7336 if (cnt == 0)
7337 return 0;
7338 if (cnt > 1)
7339 return -EINVAL;
7340
7341 if (mddev->layout != info->layout) {
7342
7343
7344
7345
7346 if (mddev->pers->check_reshape == NULL)
7347 return -EINVAL;
7348 else {
7349 mddev->new_layout = info->layout;
7350 rv = mddev->pers->check_reshape(mddev);
7351 if (rv)
7352 mddev->new_layout = mddev->layout;
7353 return rv;
7354 }
7355 }
7356 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7357 rv = update_size(mddev, (sector_t)info->size * 2);
7358
7359 if (mddev->raid_disks != info->raid_disks)
7360 rv = update_raid_disks(mddev, info->raid_disks);
7361
7362 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7363 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7364 rv = -EINVAL;
7365 goto err;
7366 }
7367 if (mddev->recovery || mddev->sync_thread) {
7368 rv = -EBUSY;
7369 goto err;
7370 }
7371 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7372 struct bitmap *bitmap;
7373
7374 if (mddev->bitmap) {
7375 rv = -EEXIST;
7376 goto err;
7377 }
7378 if (mddev->bitmap_info.default_offset == 0) {
7379 rv = -EINVAL;
7380 goto err;
7381 }
7382 mddev->bitmap_info.offset =
7383 mddev->bitmap_info.default_offset;
7384 mddev->bitmap_info.space =
7385 mddev->bitmap_info.default_space;
7386 bitmap = md_bitmap_create(mddev, -1);
7387 mddev_suspend(mddev);
7388 if (!IS_ERR(bitmap)) {
7389 mddev->bitmap = bitmap;
7390 rv = md_bitmap_load(mddev);
7391 } else
7392 rv = PTR_ERR(bitmap);
7393 if (rv)
7394 md_bitmap_destroy(mddev);
7395 mddev_resume(mddev);
7396 } else {
7397
7398 if (!mddev->bitmap) {
7399 rv = -ENOENT;
7400 goto err;
7401 }
7402 if (mddev->bitmap->storage.file) {
7403 rv = -EINVAL;
7404 goto err;
7405 }
7406 if (mddev->bitmap_info.nodes) {
7407
7408 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7409 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7410 rv = -EPERM;
7411 md_cluster_ops->unlock_all_bitmaps(mddev);
7412 goto err;
7413 }
7414
7415 mddev->bitmap_info.nodes = 0;
7416 md_cluster_ops->leave(mddev);
7417 module_put(md_cluster_mod);
7418 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7419 }
7420 mddev_suspend(mddev);
7421 md_bitmap_destroy(mddev);
7422 mddev_resume(mddev);
7423 mddev->bitmap_info.offset = 0;
7424 }
7425 }
7426 md_update_sb(mddev, 1);
7427 return rv;
7428err:
7429 return rv;
7430}
7431
7432static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7433{
7434 struct md_rdev *rdev;
7435 int err = 0;
7436
7437 if (mddev->pers == NULL)
7438 return -ENODEV;
7439
7440 rcu_read_lock();
7441 rdev = md_find_rdev_rcu(mddev, dev);
7442 if (!rdev)
7443 err = -ENODEV;
7444 else {
7445 md_error(mddev, rdev);
7446 if (!test_bit(Faulty, &rdev->flags))
7447 err = -EBUSY;
7448 }
7449 rcu_read_unlock();
7450 return err;
7451}
7452
7453
7454
7455
7456
7457
7458
7459static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7460{
7461 struct mddev *mddev = bdev->bd_disk->private_data;
7462
7463 geo->heads = 2;
7464 geo->sectors = 4;
7465 geo->cylinders = mddev->array_sectors / 8;
7466 return 0;
7467}
7468
7469static inline bool md_ioctl_valid(unsigned int cmd)
7470{
7471 switch (cmd) {
7472 case ADD_NEW_DISK:
7473 case GET_ARRAY_INFO:
7474 case GET_BITMAP_FILE:
7475 case GET_DISK_INFO:
7476 case HOT_ADD_DISK:
7477 case HOT_REMOVE_DISK:
7478 case RAID_VERSION:
7479 case RESTART_ARRAY_RW:
7480 case RUN_ARRAY:
7481 case SET_ARRAY_INFO:
7482 case SET_BITMAP_FILE:
7483 case SET_DISK_FAULTY:
7484 case STOP_ARRAY:
7485 case STOP_ARRAY_RO:
7486 case CLUSTERED_DISK_NACK:
7487 return true;
7488 default:
7489 return false;
7490 }
7491}
7492
7493static int md_ioctl(struct block_device *bdev, fmode_t mode,
7494 unsigned int cmd, unsigned long arg)
7495{
7496 int err = 0;
7497 void __user *argp = (void __user *)arg;
7498 struct mddev *mddev = NULL;
7499 bool did_set_md_closing = false;
7500
7501 if (!md_ioctl_valid(cmd))
7502 return -ENOTTY;
7503
7504 switch (cmd) {
7505 case RAID_VERSION:
7506 case GET_ARRAY_INFO:
7507 case GET_DISK_INFO:
7508 break;
7509 default:
7510 if (!capable(CAP_SYS_ADMIN))
7511 return -EACCES;
7512 }
7513
7514
7515
7516
7517
7518 switch (cmd) {
7519 case RAID_VERSION:
7520 err = get_version(argp);
7521 goto out;
7522 default:;
7523 }
7524
7525
7526
7527
7528
7529 mddev = bdev->bd_disk->private_data;
7530
7531 if (!mddev) {
7532 BUG();
7533 goto out;
7534 }
7535
7536
7537 switch (cmd) {
7538 case GET_ARRAY_INFO:
7539 if (!mddev->raid_disks && !mddev->external)
7540 err = -ENODEV;
7541 else
7542 err = get_array_info(mddev, argp);
7543 goto out;
7544
7545 case GET_DISK_INFO:
7546 if (!mddev->raid_disks && !mddev->external)
7547 err = -ENODEV;
7548 else
7549 err = get_disk_info(mddev, argp);
7550 goto out;
7551
7552 case SET_DISK_FAULTY:
7553 err = set_disk_faulty(mddev, new_decode_dev(arg));
7554 goto out;
7555
7556 case GET_BITMAP_FILE:
7557 err = get_bitmap_file(mddev, argp);
7558 goto out;
7559
7560 }
7561
7562 if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7563 flush_rdev_wq(mddev);
7564
7565 if (cmd == HOT_REMOVE_DISK)
7566
7567 wait_event_interruptible_timeout(mddev->sb_wait,
7568 !test_bit(MD_RECOVERY_NEEDED,
7569 &mddev->recovery),
7570 msecs_to_jiffies(5000));
7571 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7572
7573
7574
7575 mutex_lock(&mddev->open_mutex);
7576 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7577 mutex_unlock(&mddev->open_mutex);
7578 err = -EBUSY;
7579 goto out;
7580 }
7581 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7582 mutex_unlock(&mddev->open_mutex);
7583 err = -EBUSY;
7584 goto out;
7585 }
7586 did_set_md_closing = true;
7587 mutex_unlock(&mddev->open_mutex);
7588 sync_blockdev(bdev);
7589 }
7590 err = mddev_lock(mddev);
7591 if (err) {
7592 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7593 err, cmd);
7594 goto out;
7595 }
7596
7597 if (cmd == SET_ARRAY_INFO) {
7598 mdu_array_info_t info;
7599 if (!arg)
7600 memset(&info, 0, sizeof(info));
7601 else if (copy_from_user(&info, argp, sizeof(info))) {
7602 err = -EFAULT;
7603 goto unlock;
7604 }
7605 if (mddev->pers) {
7606 err = update_array_info(mddev, &info);
7607 if (err) {
7608 pr_warn("md: couldn't update array info. %d\n", err);
7609 goto unlock;
7610 }
7611 goto unlock;
7612 }
7613 if (!list_empty(&mddev->disks)) {
7614 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7615 err = -EBUSY;
7616 goto unlock;
7617 }
7618 if (mddev->raid_disks) {
7619 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7620 err = -EBUSY;
7621 goto unlock;
7622 }
7623 err = md_set_array_info(mddev, &info);
7624 if (err) {
7625 pr_warn("md: couldn't set array info. %d\n", err);
7626 goto unlock;
7627 }
7628 goto unlock;
7629 }
7630
7631
7632
7633
7634
7635
7636 if ((!mddev->raid_disks && !mddev->external)
7637 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7638 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7639 && cmd != GET_BITMAP_FILE) {
7640 err = -ENODEV;
7641 goto unlock;
7642 }
7643
7644
7645
7646
7647 switch (cmd) {
7648 case RESTART_ARRAY_RW:
7649 err = restart_array(mddev);
7650 goto unlock;
7651
7652 case STOP_ARRAY:
7653 err = do_md_stop(mddev, 0, bdev);
7654 goto unlock;
7655
7656 case STOP_ARRAY_RO:
7657 err = md_set_readonly(mddev, bdev);
7658 goto unlock;
7659
7660 case HOT_REMOVE_DISK:
7661 err = hot_remove_disk(mddev, new_decode_dev(arg));
7662 goto unlock;
7663
7664 case ADD_NEW_DISK:
7665
7666
7667
7668
7669 if (mddev->pers) {
7670 mdu_disk_info_t info;
7671 if (copy_from_user(&info, argp, sizeof(info)))
7672 err = -EFAULT;
7673 else if (!(info.state & (1<<MD_DISK_SYNC)))
7674
7675 break;
7676 else
7677 err = md_add_new_disk(mddev, &info);
7678 goto unlock;
7679 }
7680 break;
7681 }
7682
7683
7684
7685
7686
7687 if (mddev->ro && mddev->pers) {
7688 if (mddev->ro == 2) {
7689 mddev->ro = 0;
7690 sysfs_notify_dirent_safe(mddev->sysfs_state);
7691 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7692
7693
7694
7695
7696 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7697 mddev_unlock(mddev);
7698 wait_event(mddev->sb_wait,
7699 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7700 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7701 mddev_lock_nointr(mddev);
7702 }
7703 } else {
7704 err = -EROFS;
7705 goto unlock;
7706 }
7707 }
7708
7709 switch (cmd) {
7710 case ADD_NEW_DISK:
7711 {
7712 mdu_disk_info_t info;
7713 if (copy_from_user(&info, argp, sizeof(info)))
7714 err = -EFAULT;
7715 else
7716 err = md_add_new_disk(mddev, &info);
7717 goto unlock;
7718 }
7719
7720 case CLUSTERED_DISK_NACK:
7721 if (mddev_is_clustered(mddev))
7722 md_cluster_ops->new_disk_ack(mddev, false);
7723 else
7724 err = -EINVAL;
7725 goto unlock;
7726
7727 case HOT_ADD_DISK:
7728 err = hot_add_disk(mddev, new_decode_dev(arg));
7729 goto unlock;
7730
7731 case RUN_ARRAY:
7732 err = do_md_run(mddev);
7733 goto unlock;
7734
7735 case SET_BITMAP_FILE:
7736 err = set_bitmap_file(mddev, (int)arg);
7737 goto unlock;
7738
7739 default:
7740 err = -EINVAL;
7741 goto unlock;
7742 }
7743
7744unlock:
7745 if (mddev->hold_active == UNTIL_IOCTL &&
7746 err != -EINVAL)
7747 mddev->hold_active = 0;
7748 mddev_unlock(mddev);
7749out:
7750 if(did_set_md_closing)
7751 clear_bit(MD_CLOSING, &mddev->flags);
7752 return err;
7753}
7754#ifdef CONFIG_COMPAT
7755static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7756 unsigned int cmd, unsigned long arg)
7757{
7758 switch (cmd) {
7759 case HOT_REMOVE_DISK:
7760 case HOT_ADD_DISK:
7761 case SET_DISK_FAULTY:
7762 case SET_BITMAP_FILE:
7763
7764 break;
7765 default:
7766 arg = (unsigned long)compat_ptr(arg);
7767 break;
7768 }
7769
7770 return md_ioctl(bdev, mode, cmd, arg);
7771}
7772#endif
7773
7774static int md_set_read_only(struct block_device *bdev, bool ro)
7775{
7776 struct mddev *mddev = bdev->bd_disk->private_data;
7777 int err;
7778
7779 err = mddev_lock(mddev);
7780 if (err)
7781 return err;
7782
7783 if (!mddev->raid_disks && !mddev->external) {
7784 err = -ENODEV;
7785 goto out_unlock;
7786 }
7787
7788
7789
7790
7791
7792 if (!ro && mddev->ro == 1 && mddev->pers) {
7793 err = restart_array(mddev);
7794 if (err)
7795 goto out_unlock;
7796 mddev->ro = 2;
7797 }
7798
7799out_unlock:
7800 mddev_unlock(mddev);
7801 return err;
7802}
7803
7804static int md_open(struct block_device *bdev, fmode_t mode)
7805{
7806
7807
7808
7809
7810 struct mddev *mddev = mddev_find(bdev->bd_dev);
7811 int err;
7812
7813 if (!mddev)
7814 return -ENODEV;
7815
7816 if (mddev->gendisk != bdev->bd_disk) {
7817
7818
7819
7820 mddev_put(mddev);
7821
7822 if (work_pending(&mddev->del_work))
7823 flush_workqueue(md_misc_wq);
7824
7825 return -ERESTARTSYS;
7826 }
7827 BUG_ON(mddev != bdev->bd_disk->private_data);
7828
7829 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7830 goto out;
7831
7832 if (test_bit(MD_CLOSING, &mddev->flags)) {
7833 mutex_unlock(&mddev->open_mutex);
7834 err = -ENODEV;
7835 goto out;
7836 }
7837
7838 err = 0;
7839 atomic_inc(&mddev->openers);
7840 mutex_unlock(&mddev->open_mutex);
7841
7842 bdev_check_media_change(bdev);
7843 out:
7844 if (err)
7845 mddev_put(mddev);
7846 return err;
7847}
7848
7849static void md_release(struct gendisk *disk, fmode_t mode)
7850{
7851 struct mddev *mddev = disk->private_data;
7852
7853 BUG_ON(!mddev);
7854 atomic_dec(&mddev->openers);
7855 mddev_put(mddev);
7856}
7857
7858static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
7859{
7860 struct mddev *mddev = disk->private_data;
7861 unsigned int ret = 0;
7862
7863 if (mddev->changed)
7864 ret = DISK_EVENT_MEDIA_CHANGE;
7865 mddev->changed = 0;
7866 return ret;
7867}
7868
7869const struct block_device_operations md_fops =
7870{
7871 .owner = THIS_MODULE,
7872 .submit_bio = md_submit_bio,
7873 .open = md_open,
7874 .release = md_release,
7875 .ioctl = md_ioctl,
7876#ifdef CONFIG_COMPAT
7877 .compat_ioctl = md_compat_ioctl,
7878#endif
7879 .getgeo = md_getgeo,
7880 .check_events = md_check_events,
7881 .set_read_only = md_set_read_only,
7882};
7883
7884static int md_thread(void *arg)
7885{
7886 struct md_thread *thread = arg;
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900 allow_signal(SIGKILL);
7901 while (!kthread_should_stop()) {
7902
7903
7904
7905
7906
7907
7908 if (signal_pending(current))
7909 flush_signals(current);
7910
7911 wait_event_interruptible_timeout
7912 (thread->wqueue,
7913 test_bit(THREAD_WAKEUP, &thread->flags)
7914 || kthread_should_stop() || kthread_should_park(),
7915 thread->timeout);
7916
7917 clear_bit(THREAD_WAKEUP, &thread->flags);
7918 if (kthread_should_park())
7919 kthread_parkme();
7920 if (!kthread_should_stop())
7921 thread->run(thread);
7922 }
7923
7924 return 0;
7925}
7926
7927void md_wakeup_thread(struct md_thread *thread)
7928{
7929 if (thread) {
7930 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7931 set_bit(THREAD_WAKEUP, &thread->flags);
7932 wake_up(&thread->wqueue);
7933 }
7934}
7935EXPORT_SYMBOL(md_wakeup_thread);
7936
7937struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7938 struct mddev *mddev, const char *name)
7939{
7940 struct md_thread *thread;
7941
7942 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7943 if (!thread)
7944 return NULL;
7945
7946 init_waitqueue_head(&thread->wqueue);
7947
7948 thread->run = run;
7949 thread->mddev = mddev;
7950 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7951 thread->tsk = kthread_run(md_thread, thread,
7952 "%s_%s",
7953 mdname(thread->mddev),
7954 name);
7955 if (IS_ERR(thread->tsk)) {
7956 kfree(thread);
7957 return NULL;
7958 }
7959 return thread;
7960}
7961EXPORT_SYMBOL(md_register_thread);
7962
7963void md_unregister_thread(struct md_thread **threadp)
7964{
7965 struct md_thread *thread = *threadp;
7966 if (!thread)
7967 return;
7968 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7969
7970
7971
7972 spin_lock(&pers_lock);
7973 *threadp = NULL;
7974 spin_unlock(&pers_lock);
7975
7976 kthread_stop(thread->tsk);
7977 kfree(thread);
7978}
7979EXPORT_SYMBOL(md_unregister_thread);
7980
7981void md_error(struct mddev *mddev, struct md_rdev *rdev)
7982{
7983 if (!rdev || test_bit(Faulty, &rdev->flags))
7984 return;
7985
7986 if (!mddev->pers || !mddev->pers->error_handler)
7987 return;
7988 mddev->pers->error_handler(mddev,rdev);
7989 if (mddev->degraded)
7990 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7991 sysfs_notify_dirent_safe(rdev->sysfs_state);
7992 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7993 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7994 md_wakeup_thread(mddev->thread);
7995 if (mddev->event_work.func)
7996 queue_work(md_misc_wq, &mddev->event_work);
7997 md_new_event(mddev);
7998}
7999EXPORT_SYMBOL(md_error);
8000
8001
8002
8003static void status_unused(struct seq_file *seq)
8004{
8005 int i = 0;
8006 struct md_rdev *rdev;
8007
8008 seq_printf(seq, "unused devices: ");
8009
8010 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
8011 char b[BDEVNAME_SIZE];
8012 i++;
8013 seq_printf(seq, "%s ",
8014 bdevname(rdev->bdev,b));
8015 }
8016 if (!i)
8017 seq_printf(seq, "<none>");
8018
8019 seq_printf(seq, "\n");
8020}
8021
8022static int status_resync(struct seq_file *seq, struct mddev *mddev)
8023{
8024 sector_t max_sectors, resync, res;
8025 unsigned long dt, db = 0;
8026 sector_t rt, curr_mark_cnt, resync_mark_cnt;
8027 int scale, recovery_active;
8028 unsigned int per_milli;
8029
8030 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8031 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8032 max_sectors = mddev->resync_max_sectors;
8033 else
8034 max_sectors = mddev->dev_sectors;
8035
8036 resync = mddev->curr_resync;
8037 if (resync <= 3) {
8038 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8039
8040 resync = max_sectors;
8041 } else if (resync > max_sectors)
8042 resync = max_sectors;
8043 else
8044 resync -= atomic_read(&mddev->recovery_active);
8045
8046 if (resync == 0) {
8047 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8048 struct md_rdev *rdev;
8049
8050 rdev_for_each(rdev, mddev)
8051 if (rdev->raid_disk >= 0 &&
8052 !test_bit(Faulty, &rdev->flags) &&
8053 rdev->recovery_offset != MaxSector &&
8054 rdev->recovery_offset) {
8055 seq_printf(seq, "\trecover=REMOTE");
8056 return 1;
8057 }
8058 if (mddev->reshape_position != MaxSector)
8059 seq_printf(seq, "\treshape=REMOTE");
8060 else
8061 seq_printf(seq, "\tresync=REMOTE");
8062 return 1;
8063 }
8064 if (mddev->recovery_cp < MaxSector) {
8065 seq_printf(seq, "\tresync=PENDING");
8066 return 1;
8067 }
8068 return 0;
8069 }
8070 if (resync < 3) {
8071 seq_printf(seq, "\tresync=DELAYED");
8072 return 1;
8073 }
8074
8075 WARN_ON(max_sectors == 0);
8076
8077
8078
8079
8080
8081 scale = 10;
8082 if (sizeof(sector_t) > sizeof(unsigned long)) {
8083 while ( max_sectors/2 > (1ULL<<(scale+32)))
8084 scale++;
8085 }
8086 res = (resync>>scale)*1000;
8087 sector_div(res, (u32)((max_sectors>>scale)+1));
8088
8089 per_milli = res;
8090 {
8091 int i, x = per_milli/50, y = 20-x;
8092 seq_printf(seq, "[");
8093 for (i = 0; i < x; i++)
8094 seq_printf(seq, "=");
8095 seq_printf(seq, ">");
8096 for (i = 0; i < y; i++)
8097 seq_printf(seq, ".");
8098 seq_printf(seq, "] ");
8099 }
8100 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8101 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8102 "reshape" :
8103 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8104 "check" :
8105 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8106 "resync" : "recovery"))),
8107 per_milli/10, per_milli % 10,
8108 (unsigned long long) resync/2,
8109 (unsigned long long) max_sectors/2);
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128 dt = ((jiffies - mddev->resync_mark) / HZ);
8129 if (!dt) dt++;
8130
8131 curr_mark_cnt = mddev->curr_mark_cnt;
8132 recovery_active = atomic_read(&mddev->recovery_active);
8133 resync_mark_cnt = mddev->resync_mark_cnt;
8134
8135 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8136 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8137
8138 rt = max_sectors - resync;
8139 rt = div64_u64(rt, db/32+1);
8140 rt *= dt;
8141 rt >>= 5;
8142
8143 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8144 ((unsigned long)rt % 60)/6);
8145
8146 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8147 return 1;
8148}
8149
8150static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8151{
8152 struct list_head *tmp;
8153 loff_t l = *pos;
8154 struct mddev *mddev;
8155
8156 if (l >= 0x10000)
8157 return NULL;
8158 if (!l--)
8159
8160 return (void*)1;
8161
8162 spin_lock(&all_mddevs_lock);
8163 list_for_each(tmp,&all_mddevs)
8164 if (!l--) {
8165 mddev = list_entry(tmp, struct mddev, all_mddevs);
8166 mddev_get(mddev);
8167 spin_unlock(&all_mddevs_lock);
8168 return mddev;
8169 }
8170 spin_unlock(&all_mddevs_lock);
8171 if (!l--)
8172 return (void*)2;
8173 return NULL;
8174}
8175
8176static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8177{
8178 struct list_head *tmp;
8179 struct mddev *next_mddev, *mddev = v;
8180
8181 ++*pos;
8182 if (v == (void*)2)
8183 return NULL;
8184
8185 spin_lock(&all_mddevs_lock);
8186 if (v == (void*)1)
8187 tmp = all_mddevs.next;
8188 else
8189 tmp = mddev->all_mddevs.next;
8190 if (tmp != &all_mddevs)
8191 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8192 else {
8193 next_mddev = (void*)2;
8194 *pos = 0x10000;
8195 }
8196 spin_unlock(&all_mddevs_lock);
8197
8198 if (v != (void*)1)
8199 mddev_put(mddev);
8200 return next_mddev;
8201
8202}
8203
8204static void md_seq_stop(struct seq_file *seq, void *v)
8205{
8206 struct mddev *mddev = v;
8207
8208 if (mddev && v != (void*)1 && v != (void*)2)
8209 mddev_put(mddev);
8210}
8211
8212static int md_seq_show(struct seq_file *seq, void *v)
8213{
8214 struct mddev *mddev = v;
8215 sector_t sectors;
8216 struct md_rdev *rdev;
8217
8218 if (v == (void*)1) {
8219 struct md_personality *pers;
8220 seq_printf(seq, "Personalities : ");
8221 spin_lock(&pers_lock);
8222 list_for_each_entry(pers, &pers_list, list)
8223 seq_printf(seq, "[%s] ", pers->name);
8224
8225 spin_unlock(&pers_lock);
8226 seq_printf(seq, "\n");
8227 seq->poll_event = atomic_read(&md_event_count);
8228 return 0;
8229 }
8230 if (v == (void*)2) {
8231 status_unused(seq);
8232 return 0;
8233 }
8234
8235 spin_lock(&mddev->lock);
8236 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8237 seq_printf(seq, "%s : %sactive", mdname(mddev),
8238 mddev->pers ? "" : "in");
8239 if (mddev->pers) {
8240 if (mddev->ro==1)
8241 seq_printf(seq, " (read-only)");
8242 if (mddev->ro==2)
8243 seq_printf(seq, " (auto-read-only)");
8244 seq_printf(seq, " %s", mddev->pers->name);
8245 }
8246
8247 sectors = 0;
8248 rcu_read_lock();
8249 rdev_for_each_rcu(rdev, mddev) {
8250 char b[BDEVNAME_SIZE];
8251 seq_printf(seq, " %s[%d]",
8252 bdevname(rdev->bdev,b), rdev->desc_nr);
8253 if (test_bit(WriteMostly, &rdev->flags))
8254 seq_printf(seq, "(W)");
8255 if (test_bit(Journal, &rdev->flags))
8256 seq_printf(seq, "(J)");
8257 if (test_bit(Faulty, &rdev->flags)) {
8258 seq_printf(seq, "(F)");
8259 continue;
8260 }
8261 if (rdev->raid_disk < 0)
8262 seq_printf(seq, "(S)");
8263 if (test_bit(Replacement, &rdev->flags))
8264 seq_printf(seq, "(R)");
8265 sectors += rdev->sectors;
8266 }
8267 rcu_read_unlock();
8268
8269 if (!list_empty(&mddev->disks)) {
8270 if (mddev->pers)
8271 seq_printf(seq, "\n %llu blocks",
8272 (unsigned long long)
8273 mddev->array_sectors / 2);
8274 else
8275 seq_printf(seq, "\n %llu blocks",
8276 (unsigned long long)sectors / 2);
8277 }
8278 if (mddev->persistent) {
8279 if (mddev->major_version != 0 ||
8280 mddev->minor_version != 90) {
8281 seq_printf(seq," super %d.%d",
8282 mddev->major_version,
8283 mddev->minor_version);
8284 }
8285 } else if (mddev->external)
8286 seq_printf(seq, " super external:%s",
8287 mddev->metadata_type);
8288 else
8289 seq_printf(seq, " super non-persistent");
8290
8291 if (mddev->pers) {
8292 mddev->pers->status(seq, mddev);
8293 seq_printf(seq, "\n ");
8294 if (mddev->pers->sync_request) {
8295 if (status_resync(seq, mddev))
8296 seq_printf(seq, "\n ");
8297 }
8298 } else
8299 seq_printf(seq, "\n ");
8300
8301 md_bitmap_status(seq, mddev->bitmap);
8302
8303 seq_printf(seq, "\n");
8304 }
8305 spin_unlock(&mddev->lock);
8306
8307 return 0;
8308}
8309
8310static const struct seq_operations md_seq_ops = {
8311 .start = md_seq_start,
8312 .next = md_seq_next,
8313 .stop = md_seq_stop,
8314 .show = md_seq_show,
8315};
8316
8317static int md_seq_open(struct inode *inode, struct file *file)
8318{
8319 struct seq_file *seq;
8320 int error;
8321
8322 error = seq_open(file, &md_seq_ops);
8323 if (error)
8324 return error;
8325
8326 seq = file->private_data;
8327 seq->poll_event = atomic_read(&md_event_count);
8328 return error;
8329}
8330
8331static int md_unloading;
8332static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8333{
8334 struct seq_file *seq = filp->private_data;
8335 __poll_t mask;
8336
8337 if (md_unloading)
8338 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8339 poll_wait(filp, &md_event_waiters, wait);
8340
8341
8342 mask = EPOLLIN | EPOLLRDNORM;
8343
8344 if (seq->poll_event != atomic_read(&md_event_count))
8345 mask |= EPOLLERR | EPOLLPRI;
8346 return mask;
8347}
8348
8349static const struct proc_ops mdstat_proc_ops = {
8350 .proc_open = md_seq_open,
8351 .proc_read = seq_read,
8352 .proc_lseek = seq_lseek,
8353 .proc_release = seq_release,
8354 .proc_poll = mdstat_poll,
8355};
8356
8357int register_md_personality(struct md_personality *p)
8358{
8359 pr_debug("md: %s personality registered for level %d\n",
8360 p->name, p->level);
8361 spin_lock(&pers_lock);
8362 list_add_tail(&p->list, &pers_list);
8363 spin_unlock(&pers_lock);
8364 return 0;
8365}
8366EXPORT_SYMBOL(register_md_personality);
8367
8368int unregister_md_personality(struct md_personality *p)
8369{
8370 pr_debug("md: %s personality unregistered\n", p->name);
8371 spin_lock(&pers_lock);
8372 list_del_init(&p->list);
8373 spin_unlock(&pers_lock);
8374 return 0;
8375}
8376EXPORT_SYMBOL(unregister_md_personality);
8377
8378int register_md_cluster_operations(struct md_cluster_operations *ops,
8379 struct module *module)
8380{
8381 int ret = 0;
8382 spin_lock(&pers_lock);
8383 if (md_cluster_ops != NULL)
8384 ret = -EALREADY;
8385 else {
8386 md_cluster_ops = ops;
8387 md_cluster_mod = module;
8388 }
8389 spin_unlock(&pers_lock);
8390 return ret;
8391}
8392EXPORT_SYMBOL(register_md_cluster_operations);
8393
8394int unregister_md_cluster_operations(void)
8395{
8396 spin_lock(&pers_lock);
8397 md_cluster_ops = NULL;
8398 spin_unlock(&pers_lock);
8399 return 0;
8400}
8401EXPORT_SYMBOL(unregister_md_cluster_operations);
8402
8403int md_setup_cluster(struct mddev *mddev, int nodes)
8404{
8405 int ret;
8406 if (!md_cluster_ops)
8407 request_module("md-cluster");
8408 spin_lock(&pers_lock);
8409
8410 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8411 pr_warn("can't find md-cluster module or get it's reference.\n");
8412 spin_unlock(&pers_lock);
8413 return -ENOENT;
8414 }
8415 spin_unlock(&pers_lock);
8416
8417 ret = md_cluster_ops->join(mddev, nodes);
8418 if (!ret)
8419 mddev->safemode_delay = 0;
8420 return ret;
8421}
8422
8423void md_cluster_stop(struct mddev *mddev)
8424{
8425 if (!md_cluster_ops)
8426 return;
8427 md_cluster_ops->leave(mddev);
8428 module_put(md_cluster_mod);
8429}
8430
8431static int is_mddev_idle(struct mddev *mddev, int init)
8432{
8433 struct md_rdev *rdev;
8434 int idle;
8435 int curr_events;
8436
8437 idle = 1;
8438 rcu_read_lock();
8439 rdev_for_each_rcu(rdev, mddev) {
8440 struct gendisk *disk = rdev->bdev->bd_disk;
8441 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
8442 atomic_read(&disk->sync_io);
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465 if (init || curr_events - rdev->last_events > 64) {
8466 rdev->last_events = curr_events;
8467 idle = 0;
8468 }
8469 }
8470 rcu_read_unlock();
8471 return idle;
8472}
8473
8474void md_done_sync(struct mddev *mddev, int blocks, int ok)
8475{
8476
8477 atomic_sub(blocks, &mddev->recovery_active);
8478 wake_up(&mddev->recovery_wait);
8479 if (!ok) {
8480 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8481 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8482 md_wakeup_thread(mddev->thread);
8483
8484 }
8485}
8486EXPORT_SYMBOL(md_done_sync);
8487
8488
8489
8490
8491
8492
8493
8494
8495bool md_write_start(struct mddev *mddev, struct bio *bi)
8496{
8497 int did_change = 0;
8498
8499 if (bio_data_dir(bi) != WRITE)
8500 return true;
8501
8502 BUG_ON(mddev->ro == 1);
8503 if (mddev->ro == 2) {
8504
8505 mddev->ro = 0;
8506 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8507 md_wakeup_thread(mddev->thread);
8508 md_wakeup_thread(mddev->sync_thread);
8509 did_change = 1;
8510 }
8511 rcu_read_lock();
8512 percpu_ref_get(&mddev->writes_pending);
8513 smp_mb();
8514 if (mddev->safemode == 1)
8515 mddev->safemode = 0;
8516
8517 if (mddev->in_sync || mddev->sync_checkers) {
8518 spin_lock(&mddev->lock);
8519 if (mddev->in_sync) {
8520 mddev->in_sync = 0;
8521 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8522 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8523 md_wakeup_thread(mddev->thread);
8524 did_change = 1;
8525 }
8526 spin_unlock(&mddev->lock);
8527 }
8528 rcu_read_unlock();
8529 if (did_change)
8530 sysfs_notify_dirent_safe(mddev->sysfs_state);
8531 if (!mddev->has_superblocks)
8532 return true;
8533 wait_event(mddev->sb_wait,
8534 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8535 mddev->suspended);
8536 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8537 percpu_ref_put(&mddev->writes_pending);
8538 return false;
8539 }
8540 return true;
8541}
8542EXPORT_SYMBOL(md_write_start);
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552void md_write_inc(struct mddev *mddev, struct bio *bi)
8553{
8554 if (bio_data_dir(bi) != WRITE)
8555 return;
8556 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8557 percpu_ref_get(&mddev->writes_pending);
8558}
8559EXPORT_SYMBOL(md_write_inc);
8560
8561void md_write_end(struct mddev *mddev)
8562{
8563 percpu_ref_put(&mddev->writes_pending);
8564
8565 if (mddev->safemode == 2)
8566 md_wakeup_thread(mddev->thread);
8567 else if (mddev->safemode_delay)
8568
8569
8570
8571 mod_timer(&mddev->safemode_timer,
8572 roundup(jiffies, mddev->safemode_delay) +
8573 mddev->safemode_delay);
8574}
8575
8576EXPORT_SYMBOL(md_write_end);
8577
8578
8579
8580
8581
8582
8583
8584void md_allow_write(struct mddev *mddev)
8585{
8586 if (!mddev->pers)
8587 return;
8588 if (mddev->ro)
8589 return;
8590 if (!mddev->pers->sync_request)
8591 return;
8592
8593 spin_lock(&mddev->lock);
8594 if (mddev->in_sync) {
8595 mddev->in_sync = 0;
8596 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8597 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8598 if (mddev->safemode_delay &&
8599 mddev->safemode == 0)
8600 mddev->safemode = 1;
8601 spin_unlock(&mddev->lock);
8602 md_update_sb(mddev, 0);
8603 sysfs_notify_dirent_safe(mddev->sysfs_state);
8604
8605 wait_event(mddev->sb_wait,
8606 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8607 } else
8608 spin_unlock(&mddev->lock);
8609}
8610EXPORT_SYMBOL_GPL(md_allow_write);
8611
8612#define SYNC_MARKS 10
8613#define SYNC_MARK_STEP (3*HZ)
8614#define UPDATE_FREQUENCY (5*60*HZ)
8615void md_do_sync(struct md_thread *thread)
8616{
8617 struct mddev *mddev = thread->mddev;
8618 struct mddev *mddev2;
8619 unsigned int currspeed = 0, window;
8620 sector_t max_sectors,j, io_sectors, recovery_done;
8621 unsigned long mark[SYNC_MARKS];
8622 unsigned long update_time;
8623 sector_t mark_cnt[SYNC_MARKS];
8624 int last_mark,m;
8625 struct list_head *tmp;
8626 sector_t last_check;
8627 int skipped = 0;
8628 struct md_rdev *rdev;
8629 char *desc, *action = NULL;
8630 struct blk_plug plug;
8631 int ret;
8632
8633
8634 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8635 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8636 return;
8637 if (mddev->ro) {
8638 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8639 return;
8640 }
8641
8642 if (mddev_is_clustered(mddev)) {
8643 ret = md_cluster_ops->resync_start(mddev);
8644 if (ret)
8645 goto skip;
8646
8647 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8648 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8649 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8650 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8651 && ((unsigned long long)mddev->curr_resync_completed
8652 < (unsigned long long)mddev->resync_max_sectors))
8653 goto skip;
8654 }
8655
8656 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8657 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8658 desc = "data-check";
8659 action = "check";
8660 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8661 desc = "requested-resync";
8662 action = "repair";
8663 } else
8664 desc = "resync";
8665 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8666 desc = "reshape";
8667 else
8668 desc = "recovery";
8669
8670 mddev->last_sync_action = action ?: desc;
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688 do {
8689 int mddev2_minor = -1;
8690 mddev->curr_resync = 2;
8691
8692 try_again:
8693 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8694 goto skip;
8695 for_each_mddev(mddev2, tmp) {
8696 if (mddev2 == mddev)
8697 continue;
8698 if (!mddev->parallel_resync
8699 && mddev2->curr_resync
8700 && match_mddev_units(mddev, mddev2)) {
8701 DEFINE_WAIT(wq);
8702 if (mddev < mddev2 && mddev->curr_resync == 2) {
8703
8704 mddev->curr_resync = 1;
8705 wake_up(&resync_wait);
8706 }
8707 if (mddev > mddev2 && mddev->curr_resync == 1)
8708
8709
8710
8711 continue;
8712
8713
8714
8715
8716 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8717 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8718 mddev2->curr_resync >= mddev->curr_resync) {
8719 if (mddev2_minor != mddev2->md_minor) {
8720 mddev2_minor = mddev2->md_minor;
8721 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8722 desc, mdname(mddev),
8723 mdname(mddev2));
8724 }
8725 mddev_put(mddev2);
8726 if (signal_pending(current))
8727 flush_signals(current);
8728 schedule();
8729 finish_wait(&resync_wait, &wq);
8730 goto try_again;
8731 }
8732 finish_wait(&resync_wait, &wq);
8733 }
8734 }
8735 } while (mddev->curr_resync < 2);
8736
8737 j = 0;
8738 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8739
8740
8741
8742 max_sectors = mddev->resync_max_sectors;
8743 atomic64_set(&mddev->resync_mismatches, 0);
8744
8745 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8746 j = mddev->resync_min;
8747 else if (!mddev->bitmap)
8748 j = mddev->recovery_cp;
8749
8750 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8751 max_sectors = mddev->resync_max_sectors;
8752
8753
8754
8755
8756
8757 if (mddev_is_clustered(mddev) &&
8758 mddev->reshape_position != MaxSector)
8759 j = mddev->reshape_position;
8760 } else {
8761
8762 max_sectors = mddev->dev_sectors;
8763 j = MaxSector;
8764 rcu_read_lock();
8765 rdev_for_each_rcu(rdev, mddev)
8766 if (rdev->raid_disk >= 0 &&
8767 !test_bit(Journal, &rdev->flags) &&
8768 !test_bit(Faulty, &rdev->flags) &&
8769 !test_bit(In_sync, &rdev->flags) &&
8770 rdev->recovery_offset < j)
8771 j = rdev->recovery_offset;
8772 rcu_read_unlock();
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782 if (mddev->bitmap) {
8783 mddev->pers->quiesce(mddev, 1);
8784 mddev->pers->quiesce(mddev, 0);
8785 }
8786 }
8787
8788 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8789 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8790 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8791 speed_max(mddev), desc);
8792
8793 is_mddev_idle(mddev, 1);
8794
8795 io_sectors = 0;
8796 for (m = 0; m < SYNC_MARKS; m++) {
8797 mark[m] = jiffies;
8798 mark_cnt[m] = io_sectors;
8799 }
8800 last_mark = 0;
8801 mddev->resync_mark = mark[last_mark];
8802 mddev->resync_mark_cnt = mark_cnt[last_mark];
8803
8804
8805
8806
8807 window = 32 * (PAGE_SIZE / 512);
8808 pr_debug("md: using %dk window, over a total of %lluk.\n",
8809 window/2, (unsigned long long)max_sectors/2);
8810
8811 atomic_set(&mddev->recovery_active, 0);
8812 last_check = 0;
8813
8814 if (j>2) {
8815 pr_debug("md: resuming %s of %s from checkpoint.\n",
8816 desc, mdname(mddev));
8817 mddev->curr_resync = j;
8818 } else
8819 mddev->curr_resync = 3;
8820 mddev->curr_resync_completed = j;
8821 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8822 md_new_event(mddev);
8823 update_time = jiffies;
8824
8825 blk_start_plug(&plug);
8826 while (j < max_sectors) {
8827 sector_t sectors;
8828
8829 skipped = 0;
8830
8831 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8832 ((mddev->curr_resync > mddev->curr_resync_completed &&
8833 (mddev->curr_resync - mddev->curr_resync_completed)
8834 > (max_sectors >> 4)) ||
8835 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8836 (j - mddev->curr_resync_completed)*2
8837 >= mddev->resync_max - mddev->curr_resync_completed ||
8838 mddev->curr_resync_completed > mddev->resync_max
8839 )) {
8840
8841 wait_event(mddev->recovery_wait,
8842 atomic_read(&mddev->recovery_active) == 0);
8843 mddev->curr_resync_completed = j;
8844 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8845 j > mddev->recovery_cp)
8846 mddev->recovery_cp = j;
8847 update_time = jiffies;
8848 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8849 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8850 }
8851
8852 while (j >= mddev->resync_max &&
8853 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8854
8855
8856
8857
8858 flush_signals(current);
8859 wait_event_interruptible(mddev->recovery_wait,
8860 mddev->resync_max > j
8861 || test_bit(MD_RECOVERY_INTR,
8862 &mddev->recovery));
8863 }
8864
8865 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8866 break;
8867
8868 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8869 if (sectors == 0) {
8870 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8871 break;
8872 }
8873
8874 if (!skipped) {
8875 io_sectors += sectors;
8876 atomic_add(sectors, &mddev->recovery_active);
8877 }
8878
8879 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8880 break;
8881
8882 j += sectors;
8883 if (j > max_sectors)
8884
8885 j = max_sectors;
8886 if (j > 2)
8887 mddev->curr_resync = j;
8888 mddev->curr_mark_cnt = io_sectors;
8889 if (last_check == 0)
8890
8891
8892
8893 md_new_event(mddev);
8894
8895 if (last_check + window > io_sectors || j == max_sectors)
8896 continue;
8897
8898 last_check = io_sectors;
8899 repeat:
8900 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8901
8902 int next = (last_mark+1) % SYNC_MARKS;
8903
8904 mddev->resync_mark = mark[next];
8905 mddev->resync_mark_cnt = mark_cnt[next];
8906 mark[next] = jiffies;
8907 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8908 last_mark = next;
8909 }
8910
8911 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8912 break;
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922 cond_resched();
8923
8924 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8925 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8926 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8927
8928 if (currspeed > speed_min(mddev)) {
8929 if (currspeed > speed_max(mddev)) {
8930 msleep(500);
8931 goto repeat;
8932 }
8933 if (!is_mddev_idle(mddev, 0)) {
8934
8935
8936
8937
8938 wait_event(mddev->recovery_wait,
8939 !atomic_read(&mddev->recovery_active));
8940 }
8941 }
8942 }
8943 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8944 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8945 ? "interrupted" : "done");
8946
8947
8948
8949 blk_finish_plug(&plug);
8950 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8951
8952 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8953 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8954 mddev->curr_resync > 3) {
8955 mddev->curr_resync_completed = mddev->curr_resync;
8956 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8957 }
8958 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8959
8960 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8961 mddev->curr_resync > 3) {
8962 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8963 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8964 if (mddev->curr_resync >= mddev->recovery_cp) {
8965 pr_debug("md: checkpointing %s of %s.\n",
8966 desc, mdname(mddev));
8967 if (test_bit(MD_RECOVERY_ERROR,
8968 &mddev->recovery))
8969 mddev->recovery_cp =
8970 mddev->curr_resync_completed;
8971 else
8972 mddev->recovery_cp =
8973 mddev->curr_resync;
8974 }
8975 } else
8976 mddev->recovery_cp = MaxSector;
8977 } else {
8978 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8979 mddev->curr_resync = MaxSector;
8980 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8981 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8982 rcu_read_lock();
8983 rdev_for_each_rcu(rdev, mddev)
8984 if (rdev->raid_disk >= 0 &&
8985 mddev->delta_disks >= 0 &&
8986 !test_bit(Journal, &rdev->flags) &&
8987 !test_bit(Faulty, &rdev->flags) &&
8988 !test_bit(In_sync, &rdev->flags) &&
8989 rdev->recovery_offset < mddev->curr_resync)
8990 rdev->recovery_offset = mddev->curr_resync;
8991 rcu_read_unlock();
8992 }
8993 }
8994 }
8995 skip:
8996
8997
8998
8999 set_mask_bits(&mddev->sb_flags, 0,
9000 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
9001
9002 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9003 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9004 mddev->delta_disks > 0 &&
9005 mddev->pers->finish_reshape &&
9006 mddev->pers->size &&
9007 mddev->queue) {
9008 mddev_lock_nointr(mddev);
9009 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9010 mddev_unlock(mddev);
9011 if (!mddev_is_clustered(mddev))
9012 set_capacity_and_notify(mddev->gendisk,
9013 mddev->array_sectors);
9014 }
9015
9016 spin_lock(&mddev->lock);
9017 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9018
9019 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9020 mddev->resync_min = 0;
9021 mddev->resync_max = MaxSector;
9022 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9023 mddev->resync_min = mddev->curr_resync_completed;
9024 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9025 mddev->curr_resync = 0;
9026 spin_unlock(&mddev->lock);
9027
9028 wake_up(&resync_wait);
9029 md_wakeup_thread(mddev->thread);
9030 return;
9031}
9032EXPORT_SYMBOL_GPL(md_do_sync);
9033
9034static int remove_and_add_spares(struct mddev *mddev,
9035 struct md_rdev *this)
9036{
9037 struct md_rdev *rdev;
9038 int spares = 0;
9039 int removed = 0;
9040 bool remove_some = false;
9041
9042 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9043
9044 return 0;
9045
9046 rdev_for_each(rdev, mddev) {
9047 if ((this == NULL || rdev == this) &&
9048 rdev->raid_disk >= 0 &&
9049 !test_bit(Blocked, &rdev->flags) &&
9050 test_bit(Faulty, &rdev->flags) &&
9051 atomic_read(&rdev->nr_pending)==0) {
9052
9053
9054
9055
9056
9057 remove_some = true;
9058 set_bit(RemoveSynchronized, &rdev->flags);
9059 }
9060 }
9061
9062 if (remove_some)
9063 synchronize_rcu();
9064 rdev_for_each(rdev, mddev) {
9065 if ((this == NULL || rdev == this) &&
9066 rdev->raid_disk >= 0 &&
9067 !test_bit(Blocked, &rdev->flags) &&
9068 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9069 (!test_bit(In_sync, &rdev->flags) &&
9070 !test_bit(Journal, &rdev->flags))) &&
9071 atomic_read(&rdev->nr_pending)==0)) {
9072 if (mddev->pers->hot_remove_disk(
9073 mddev, rdev) == 0) {
9074 sysfs_unlink_rdev(mddev, rdev);
9075 rdev->saved_raid_disk = rdev->raid_disk;
9076 rdev->raid_disk = -1;
9077 removed++;
9078 }
9079 }
9080 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9081 clear_bit(RemoveSynchronized, &rdev->flags);
9082 }
9083
9084 if (removed && mddev->kobj.sd)
9085 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9086
9087 if (this && removed)
9088 goto no_add;
9089
9090 rdev_for_each(rdev, mddev) {
9091 if (this && this != rdev)
9092 continue;
9093 if (test_bit(Candidate, &rdev->flags))
9094 continue;
9095 if (rdev->raid_disk >= 0 &&
9096 !test_bit(In_sync, &rdev->flags) &&
9097 !test_bit(Journal, &rdev->flags) &&
9098 !test_bit(Faulty, &rdev->flags))
9099 spares++;
9100 if (rdev->raid_disk >= 0)
9101 continue;
9102 if (test_bit(Faulty, &rdev->flags))
9103 continue;
9104 if (!test_bit(Journal, &rdev->flags)) {
9105 if (mddev->ro &&
9106 ! (rdev->saved_raid_disk >= 0 &&
9107 !test_bit(Bitmap_sync, &rdev->flags)))
9108 continue;
9109
9110 rdev->recovery_offset = 0;
9111 }
9112 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9113
9114 sysfs_link_rdev(mddev, rdev);
9115 if (!test_bit(Journal, &rdev->flags))
9116 spares++;
9117 md_new_event(mddev);
9118 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9119 }
9120 }
9121no_add:
9122 if (removed)
9123 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9124 return spares;
9125}
9126
9127static void md_start_sync(struct work_struct *ws)
9128{
9129 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9130
9131 mddev->sync_thread = md_register_thread(md_do_sync,
9132 mddev,
9133 "resync");
9134 if (!mddev->sync_thread) {
9135 pr_warn("%s: could not start resync thread...\n",
9136 mdname(mddev));
9137
9138 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9139 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9140 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9141 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9142 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9143 wake_up(&resync_wait);
9144 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9145 &mddev->recovery))
9146 if (mddev->sysfs_action)
9147 sysfs_notify_dirent_safe(mddev->sysfs_action);
9148 } else
9149 md_wakeup_thread(mddev->sync_thread);
9150 sysfs_notify_dirent_safe(mddev->sysfs_action);
9151 md_new_event(mddev);
9152}
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176void md_check_recovery(struct mddev *mddev)
9177{
9178 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9179
9180
9181
9182 set_bit(MD_UPDATING_SB, &mddev->flags);
9183 smp_mb__after_atomic();
9184 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9185 md_update_sb(mddev, 0);
9186 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9187 wake_up(&mddev->sb_wait);
9188 }
9189
9190 if (mddev->suspended)
9191 return;
9192
9193 if (mddev->bitmap)
9194 md_bitmap_daemon_work(mddev);
9195
9196 if (signal_pending(current)) {
9197 if (mddev->pers->sync_request && !mddev->external) {
9198 pr_debug("md: %s in immediate safe mode\n",
9199 mdname(mddev));
9200 mddev->safemode = 2;
9201 }
9202 flush_signals(current);
9203 }
9204
9205 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9206 return;
9207 if ( ! (
9208 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9209 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9210 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9211 (mddev->external == 0 && mddev->safemode == 1) ||
9212 (mddev->safemode == 2
9213 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9214 ))
9215 return;
9216
9217 if (mddev_trylock(mddev)) {
9218 int spares = 0;
9219 bool try_set_sync = mddev->safemode != 0;
9220
9221 if (!mddev->external && mddev->safemode == 1)
9222 mddev->safemode = 0;
9223
9224 if (mddev->ro) {
9225 struct md_rdev *rdev;
9226 if (!mddev->external && mddev->in_sync)
9227
9228
9229
9230
9231
9232 rdev_for_each(rdev, mddev)
9233 clear_bit(Blocked, &rdev->flags);
9234
9235
9236
9237
9238
9239
9240
9241 remove_and_add_spares(mddev, NULL);
9242
9243
9244
9245 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9246 md_reap_sync_thread(mddev);
9247 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9248 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9249 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9250 goto unlock;
9251 }
9252
9253 if (mddev_is_clustered(mddev)) {
9254 struct md_rdev *rdev;
9255
9256
9257
9258 rdev_for_each(rdev, mddev) {
9259 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9260 rdev->raid_disk < 0)
9261 md_kick_rdev_from_array(rdev);
9262 }
9263 }
9264
9265 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9266 spin_lock(&mddev->lock);
9267 set_in_sync(mddev);
9268 spin_unlock(&mddev->lock);
9269 }
9270
9271 if (mddev->sb_flags)
9272 md_update_sb(mddev, 0);
9273
9274 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9275 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9276
9277 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9278 goto unlock;
9279 }
9280 if (mddev->sync_thread) {
9281 md_reap_sync_thread(mddev);
9282 goto unlock;
9283 }
9284
9285
9286
9287 mddev->curr_resync_completed = 0;
9288 spin_lock(&mddev->lock);
9289 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9290 spin_unlock(&mddev->lock);
9291
9292
9293
9294 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9295 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9296
9297 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9298 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9299 goto not_running;
9300
9301
9302
9303
9304
9305
9306
9307 if (mddev->reshape_position != MaxSector) {
9308 if (mddev->pers->check_reshape == NULL ||
9309 mddev->pers->check_reshape(mddev) != 0)
9310
9311 goto not_running;
9312 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9313 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9314 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9315 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9316 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9317 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9318 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9319 } else if (mddev->recovery_cp < MaxSector) {
9320 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9321 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9322 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9323
9324 goto not_running;
9325
9326 if (mddev->pers->sync_request) {
9327 if (spares) {
9328
9329
9330
9331
9332 md_bitmap_write_all(mddev->bitmap);
9333 }
9334 INIT_WORK(&mddev->del_work, md_start_sync);
9335 queue_work(md_misc_wq, &mddev->del_work);
9336 goto unlock;
9337 }
9338 not_running:
9339 if (!mddev->sync_thread) {
9340 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9341 wake_up(&resync_wait);
9342 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9343 &mddev->recovery))
9344 if (mddev->sysfs_action)
9345 sysfs_notify_dirent_safe(mddev->sysfs_action);
9346 }
9347 unlock:
9348 wake_up(&mddev->sb_wait);
9349 mddev_unlock(mddev);
9350 }
9351}
9352EXPORT_SYMBOL(md_check_recovery);
9353
9354void md_reap_sync_thread(struct mddev *mddev)
9355{
9356 struct md_rdev *rdev;
9357 sector_t old_dev_sectors = mddev->dev_sectors;
9358 bool is_reshaped = false;
9359
9360
9361 md_unregister_thread(&mddev->sync_thread);
9362 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9363 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9364 mddev->degraded != mddev->raid_disks) {
9365
9366
9367 if (mddev->pers->spare_active(mddev)) {
9368 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9369 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9370 }
9371 }
9372 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9373 mddev->pers->finish_reshape) {
9374 mddev->pers->finish_reshape(mddev);
9375 if (mddev_is_clustered(mddev))
9376 is_reshaped = true;
9377 }
9378
9379
9380
9381
9382 if (!mddev->degraded)
9383 rdev_for_each(rdev, mddev)
9384 rdev->saved_raid_disk = -1;
9385
9386 md_update_sb(mddev, 1);
9387
9388
9389
9390 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9391 md_cluster_ops->resync_finish(mddev);
9392 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9393 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9394 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9395 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9396 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9397 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9398
9399
9400
9401
9402
9403 if (mddev_is_clustered(mddev) && is_reshaped
9404 && !test_bit(MD_CLOSING, &mddev->flags))
9405 md_cluster_ops->update_size(mddev, old_dev_sectors);
9406 wake_up(&resync_wait);
9407
9408 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9409 sysfs_notify_dirent_safe(mddev->sysfs_action);
9410 md_new_event(mddev);
9411 if (mddev->event_work.func)
9412 queue_work(md_misc_wq, &mddev->event_work);
9413}
9414EXPORT_SYMBOL(md_reap_sync_thread);
9415
9416void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9417{
9418 sysfs_notify_dirent_safe(rdev->sysfs_state);
9419 wait_event_timeout(rdev->blocked_wait,
9420 !test_bit(Blocked, &rdev->flags) &&
9421 !test_bit(BlockedBadBlocks, &rdev->flags),
9422 msecs_to_jiffies(5000));
9423 rdev_dec_pending(rdev, mddev);
9424}
9425EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9426
9427void md_finish_reshape(struct mddev *mddev)
9428{
9429
9430 struct md_rdev *rdev;
9431
9432 rdev_for_each(rdev, mddev) {
9433 if (rdev->data_offset > rdev->new_data_offset)
9434 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9435 else
9436 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9437 rdev->data_offset = rdev->new_data_offset;
9438 }
9439}
9440EXPORT_SYMBOL(md_finish_reshape);
9441
9442
9443
9444
9445int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9446 int is_new)
9447{
9448 struct mddev *mddev = rdev->mddev;
9449 int rv;
9450 if (is_new)
9451 s += rdev->new_data_offset;
9452 else
9453 s += rdev->data_offset;
9454 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9455 if (rv == 0) {
9456
9457 if (test_bit(ExternalBbl, &rdev->flags))
9458 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9459 sysfs_notify_dirent_safe(rdev->sysfs_state);
9460 set_mask_bits(&mddev->sb_flags, 0,
9461 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9462 md_wakeup_thread(rdev->mddev->thread);
9463 return 1;
9464 } else
9465 return 0;
9466}
9467EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9468
9469int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9470 int is_new)
9471{
9472 int rv;
9473 if (is_new)
9474 s += rdev->new_data_offset;
9475 else
9476 s += rdev->data_offset;
9477 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9478 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9479 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9480 return rv;
9481}
9482EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9483
9484static int md_notify_reboot(struct notifier_block *this,
9485 unsigned long code, void *x)
9486{
9487 struct list_head *tmp;
9488 struct mddev *mddev;
9489 int need_delay = 0;
9490
9491 for_each_mddev(mddev, tmp) {
9492 if (mddev_trylock(mddev)) {
9493 if (mddev->pers)
9494 __md_stop_writes(mddev);
9495 if (mddev->persistent)
9496 mddev->safemode = 2;
9497 mddev_unlock(mddev);
9498 }
9499 need_delay = 1;
9500 }
9501
9502
9503
9504
9505
9506
9507 if (need_delay)
9508 mdelay(1000*1);
9509
9510 return NOTIFY_DONE;
9511}
9512
9513static struct notifier_block md_notifier = {
9514 .notifier_call = md_notify_reboot,
9515 .next = NULL,
9516 .priority = INT_MAX,
9517};
9518
9519static void md_geninit(void)
9520{
9521 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9522
9523 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9524}
9525
9526static int __init md_init(void)
9527{
9528 int ret = -ENOMEM;
9529
9530 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9531 if (!md_wq)
9532 goto err_wq;
9533
9534 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9535 if (!md_misc_wq)
9536 goto err_misc_wq;
9537
9538 md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9539 if (!md_rdev_misc_wq)
9540 goto err_rdev_misc_wq;
9541
9542 ret = __register_blkdev(MD_MAJOR, "md", md_probe);
9543 if (ret < 0)
9544 goto err_md;
9545
9546 ret = __register_blkdev(0, "mdp", md_probe);
9547 if (ret < 0)
9548 goto err_mdp;
9549 mdp_major = ret;
9550
9551 register_reboot_notifier(&md_notifier);
9552 raid_table_header = register_sysctl_table(raid_root_table);
9553
9554 md_geninit();
9555 return 0;
9556
9557err_mdp:
9558 unregister_blkdev(MD_MAJOR, "md");
9559err_md:
9560 destroy_workqueue(md_rdev_misc_wq);
9561err_rdev_misc_wq:
9562 destroy_workqueue(md_misc_wq);
9563err_misc_wq:
9564 destroy_workqueue(md_wq);
9565err_wq:
9566 return ret;
9567}
9568
9569static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9570{
9571 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9572 struct md_rdev *rdev2;
9573 int role, ret;
9574 char b[BDEVNAME_SIZE];
9575
9576
9577
9578
9579
9580 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9581 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9582 if (ret)
9583 pr_info("md-cluster: resize failed\n");
9584 else
9585 md_bitmap_update_sb(mddev->bitmap);
9586 }
9587
9588
9589 rdev_for_each(rdev2, mddev) {
9590 if (test_bit(Faulty, &rdev2->flags))
9591 continue;
9592
9593
9594 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9595
9596 if (test_bit(Candidate, &rdev2->flags)) {
9597 if (role == 0xfffe) {
9598 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9599 md_kick_rdev_from_array(rdev2);
9600 continue;
9601 }
9602 else
9603 clear_bit(Candidate, &rdev2->flags);
9604 }
9605
9606 if (role != rdev2->raid_disk) {
9607
9608
9609
9610 if (rdev2->raid_disk == -1 && role != 0xffff &&
9611 !(le32_to_cpu(sb->feature_map) &
9612 MD_FEATURE_RESHAPE_ACTIVE)) {
9613 rdev2->saved_raid_disk = role;
9614 ret = remove_and_add_spares(mddev, rdev2);
9615 pr_info("Activated spare: %s\n",
9616 bdevname(rdev2->bdev,b));
9617
9618
9619 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9620 md_wakeup_thread(mddev->thread);
9621 }
9622
9623
9624
9625
9626
9627 if ((role == 0xfffe) || (role == 0xfffd)) {
9628 md_error(mddev, rdev2);
9629 clear_bit(Blocked, &rdev2->flags);
9630 }
9631 }
9632 }
9633
9634 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9635 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9636 if (ret)
9637 pr_warn("md: updating array disks failed. %d\n", ret);
9638 }
9639
9640
9641
9642
9643
9644 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9645 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9646
9647
9648
9649
9650 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9651 if (mddev->pers->update_reshape_pos)
9652 mddev->pers->update_reshape_pos(mddev);
9653 if (mddev->pers->start_reshape)
9654 mddev->pers->start_reshape(mddev);
9655 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9656 mddev->reshape_position != MaxSector &&
9657 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9658
9659 mddev->reshape_position = MaxSector;
9660 if (mddev->pers->update_reshape_pos)
9661 mddev->pers->update_reshape_pos(mddev);
9662 }
9663
9664
9665 mddev->events = le64_to_cpu(sb->events);
9666}
9667
9668static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9669{
9670 int err;
9671 struct page *swapout = rdev->sb_page;
9672 struct mdp_superblock_1 *sb;
9673
9674
9675
9676
9677 rdev->sb_page = NULL;
9678 err = alloc_disk_sb(rdev);
9679 if (err == 0) {
9680 ClearPageUptodate(rdev->sb_page);
9681 rdev->sb_loaded = 0;
9682 err = super_types[mddev->major_version].
9683 load_super(rdev, NULL, mddev->minor_version);
9684 }
9685 if (err < 0) {
9686 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9687 __func__, __LINE__, rdev->desc_nr, err);
9688 if (rdev->sb_page)
9689 put_page(rdev->sb_page);
9690 rdev->sb_page = swapout;
9691 rdev->sb_loaded = 1;
9692 return err;
9693 }
9694
9695 sb = page_address(rdev->sb_page);
9696
9697
9698
9699
9700 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9701 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9702
9703
9704
9705
9706 if (rdev->recovery_offset == MaxSector &&
9707 !test_bit(In_sync, &rdev->flags) &&
9708 mddev->pers->spare_active(mddev))
9709 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9710
9711 put_page(swapout);
9712 return 0;
9713}
9714
9715void md_reload_sb(struct mddev *mddev, int nr)
9716{
9717 struct md_rdev *rdev;
9718 int err;
9719
9720
9721 rdev_for_each_rcu(rdev, mddev) {
9722 if (rdev->desc_nr == nr)
9723 break;
9724 }
9725
9726 if (!rdev || rdev->desc_nr != nr) {
9727 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9728 return;
9729 }
9730
9731 err = read_rdev(mddev, rdev);
9732 if (err < 0)
9733 return;
9734
9735 check_sb_changes(mddev, rdev);
9736
9737
9738 rdev_for_each_rcu(rdev, mddev) {
9739 if (!test_bit(Faulty, &rdev->flags))
9740 read_rdev(mddev, rdev);
9741 }
9742}
9743EXPORT_SYMBOL(md_reload_sb);
9744
9745#ifndef MODULE
9746
9747
9748
9749
9750
9751
9752static DEFINE_MUTEX(detected_devices_mutex);
9753static LIST_HEAD(all_detected_devices);
9754struct detected_devices_node {
9755 struct list_head list;
9756 dev_t dev;
9757};
9758
9759void md_autodetect_dev(dev_t dev)
9760{
9761 struct detected_devices_node *node_detected_dev;
9762
9763 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9764 if (node_detected_dev) {
9765 node_detected_dev->dev = dev;
9766 mutex_lock(&detected_devices_mutex);
9767 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9768 mutex_unlock(&detected_devices_mutex);
9769 }
9770}
9771
9772void md_autostart_arrays(int part)
9773{
9774 struct md_rdev *rdev;
9775 struct detected_devices_node *node_detected_dev;
9776 dev_t dev;
9777 int i_scanned, i_passed;
9778
9779 i_scanned = 0;
9780 i_passed = 0;
9781
9782 pr_info("md: Autodetecting RAID arrays.\n");
9783
9784 mutex_lock(&detected_devices_mutex);
9785 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9786 i_scanned++;
9787 node_detected_dev = list_entry(all_detected_devices.next,
9788 struct detected_devices_node, list);
9789 list_del(&node_detected_dev->list);
9790 dev = node_detected_dev->dev;
9791 kfree(node_detected_dev);
9792 mutex_unlock(&detected_devices_mutex);
9793 rdev = md_import_device(dev,0, 90);
9794 mutex_lock(&detected_devices_mutex);
9795 if (IS_ERR(rdev))
9796 continue;
9797
9798 if (test_bit(Faulty, &rdev->flags))
9799 continue;
9800
9801 set_bit(AutoDetected, &rdev->flags);
9802 list_add(&rdev->same_set, &pending_raid_disks);
9803 i_passed++;
9804 }
9805 mutex_unlock(&detected_devices_mutex);
9806
9807 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9808
9809 autorun_devices(part);
9810}
9811
9812#endif
9813
9814static __exit void md_exit(void)
9815{
9816 struct mddev *mddev;
9817 struct list_head *tmp;
9818 int delay = 1;
9819
9820 unregister_blkdev(MD_MAJOR,"md");
9821 unregister_blkdev(mdp_major, "mdp");
9822 unregister_reboot_notifier(&md_notifier);
9823 unregister_sysctl_table(raid_table_header);
9824
9825
9826
9827
9828 md_unloading = 1;
9829 while (waitqueue_active(&md_event_waiters)) {
9830
9831 wake_up(&md_event_waiters);
9832 msleep(delay);
9833 delay += delay;
9834 }
9835 remove_proc_entry("mdstat", NULL);
9836
9837 for_each_mddev(mddev, tmp) {
9838 export_array(mddev);
9839 mddev->ctime = 0;
9840 mddev->hold_active = 0;
9841
9842
9843
9844
9845
9846
9847 }
9848 destroy_workqueue(md_rdev_misc_wq);
9849 destroy_workqueue(md_misc_wq);
9850 destroy_workqueue(md_wq);
9851}
9852
9853subsys_initcall(md_init);
9854module_exit(md_exit)
9855
9856static int get_ro(char *buffer, const struct kernel_param *kp)
9857{
9858 return sprintf(buffer, "%d\n", start_readonly);
9859}
9860static int set_ro(const char *val, const struct kernel_param *kp)
9861{
9862 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9863}
9864
9865module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9866module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9867module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9868module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9869
9870MODULE_LICENSE("GPL");
9871MODULE_DESCRIPTION("MD RAID framework");
9872MODULE_ALIAS("md");
9873MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9874