1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/raid/detect.h>
62#include <linux/slab.h>
63#include <linux/percpu-refcount.h>
64#include <linux/part_stat.h>
65
66#include <trace/events/block.h>
67#include "md.h"
68#include "md-bitmap.h"
69#include "md-cluster.h"
70
71#ifndef MODULE
72static void autostart_arrays(int part);
73#endif
74
75
76
77
78
79
80static LIST_HEAD(pers_list);
81static DEFINE_SPINLOCK(pers_lock);
82
83static struct kobj_type md_ktype;
84
85struct md_cluster_operations *md_cluster_ops;
86EXPORT_SYMBOL(md_cluster_ops);
87static struct module *md_cluster_mod;
88
89static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
90static struct workqueue_struct *md_wq;
91static struct workqueue_struct *md_misc_wq;
92static struct workqueue_struct *md_rdev_misc_wq;
93
94static int remove_and_add_spares(struct mddev *mddev,
95 struct md_rdev *this);
96static void mddev_detach(struct mddev *mddev);
97
98
99
100
101
102
103#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
104
105
106
107
108
109
110
111
112
113
114
115
116
117static int sysctl_speed_limit_min = 1000;
118static int sysctl_speed_limit_max = 200000;
119static inline int speed_min(struct mddev *mddev)
120{
121 return mddev->sync_speed_min ?
122 mddev->sync_speed_min : sysctl_speed_limit_min;
123}
124
125static inline int speed_max(struct mddev *mddev)
126{
127 return mddev->sync_speed_max ?
128 mddev->sync_speed_max : sysctl_speed_limit_max;
129}
130
131static void rdev_uninit_serial(struct md_rdev *rdev)
132{
133 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
134 return;
135
136 kvfree(rdev->serial);
137 rdev->serial = NULL;
138}
139
140static void rdevs_uninit_serial(struct mddev *mddev)
141{
142 struct md_rdev *rdev;
143
144 rdev_for_each(rdev, mddev)
145 rdev_uninit_serial(rdev);
146}
147
148static int rdev_init_serial(struct md_rdev *rdev)
149{
150
151 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
152 struct serial_in_rdev *serial = NULL;
153
154 if (test_bit(CollisionCheck, &rdev->flags))
155 return 0;
156
157 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
158 GFP_KERNEL);
159 if (!serial)
160 return -ENOMEM;
161
162 for (i = 0; i < serial_nums; i++) {
163 struct serial_in_rdev *serial_tmp = &serial[i];
164
165 spin_lock_init(&serial_tmp->serial_lock);
166 serial_tmp->serial_rb = RB_ROOT_CACHED;
167 init_waitqueue_head(&serial_tmp->serial_io_wait);
168 }
169
170 rdev->serial = serial;
171 set_bit(CollisionCheck, &rdev->flags);
172
173 return 0;
174}
175
176static int rdevs_init_serial(struct mddev *mddev)
177{
178 struct md_rdev *rdev;
179 int ret = 0;
180
181 rdev_for_each(rdev, mddev) {
182 ret = rdev_init_serial(rdev);
183 if (ret)
184 break;
185 }
186
187
188 if (ret && !mddev->serial_info_pool)
189 rdevs_uninit_serial(mddev);
190
191 return ret;
192}
193
194
195
196
197
198
199static int rdev_need_serial(struct md_rdev *rdev)
200{
201 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
202 rdev->bdev->bd_queue->nr_hw_queues != 1 &&
203 test_bit(WriteMostly, &rdev->flags));
204}
205
206
207
208
209
210
211void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
212 bool is_suspend)
213{
214 int ret = 0;
215
216 if (rdev && !rdev_need_serial(rdev) &&
217 !test_bit(CollisionCheck, &rdev->flags))
218 return;
219
220 if (!is_suspend)
221 mddev_suspend(mddev);
222
223 if (!rdev)
224 ret = rdevs_init_serial(mddev);
225 else
226 ret = rdev_init_serial(rdev);
227 if (ret)
228 goto abort;
229
230 if (mddev->serial_info_pool == NULL) {
231
232
233
234
235 mddev->serial_info_pool =
236 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
237 sizeof(struct serial_info));
238 if (!mddev->serial_info_pool) {
239 rdevs_uninit_serial(mddev);
240 pr_err("can't alloc memory pool for serialization\n");
241 }
242 }
243
244abort:
245 if (!is_suspend)
246 mddev_resume(mddev);
247}
248
249
250
251
252
253
254
255void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
256 bool is_suspend)
257{
258 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
259 return;
260
261 if (mddev->serial_info_pool) {
262 struct md_rdev *temp;
263 int num = 0;
264
265 if (!is_suspend)
266 mddev_suspend(mddev);
267 rdev_for_each(temp, mddev) {
268 if (!rdev) {
269 if (!mddev->serialize_policy ||
270 !rdev_need_serial(temp))
271 rdev_uninit_serial(temp);
272 else
273 num++;
274 } else if (temp != rdev &&
275 test_bit(CollisionCheck, &temp->flags))
276 num++;
277 }
278
279 if (rdev)
280 rdev_uninit_serial(rdev);
281
282 if (num)
283 pr_info("The mempool could be used by other devices\n");
284 else {
285 mempool_destroy(mddev->serial_info_pool);
286 mddev->serial_info_pool = NULL;
287 }
288 if (!is_suspend)
289 mddev_resume(mddev);
290 }
291}
292
293static struct ctl_table_header *raid_table_header;
294
295static struct ctl_table raid_table[] = {
296 {
297 .procname = "speed_limit_min",
298 .data = &sysctl_speed_limit_min,
299 .maxlen = sizeof(int),
300 .mode = S_IRUGO|S_IWUSR,
301 .proc_handler = proc_dointvec,
302 },
303 {
304 .procname = "speed_limit_max",
305 .data = &sysctl_speed_limit_max,
306 .maxlen = sizeof(int),
307 .mode = S_IRUGO|S_IWUSR,
308 .proc_handler = proc_dointvec,
309 },
310 { }
311};
312
313static struct ctl_table raid_dir_table[] = {
314 {
315 .procname = "raid",
316 .maxlen = 0,
317 .mode = S_IRUGO|S_IXUGO,
318 .child = raid_table,
319 },
320 { }
321};
322
323static struct ctl_table raid_root_table[] = {
324 {
325 .procname = "dev",
326 .maxlen = 0,
327 .mode = 0555,
328 .child = raid_dir_table,
329 },
330 { }
331};
332
333static const struct block_device_operations md_fops;
334
335static int start_readonly;
336
337
338
339
340
341
342
343
344
345static bool create_on_open = true;
346
347struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
348 struct mddev *mddev)
349{
350 if (!mddev || !bioset_initialized(&mddev->bio_set))
351 return bio_alloc(gfp_mask, nr_iovecs);
352
353 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
354}
355EXPORT_SYMBOL_GPL(bio_alloc_mddev);
356
357static struct bio *md_bio_alloc_sync(struct mddev *mddev)
358{
359 if (!mddev || !bioset_initialized(&mddev->sync_set))
360 return bio_alloc(GFP_NOIO, 1);
361
362 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
363}
364
365
366
367
368
369
370
371
372
373
374
375static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
376static atomic_t md_event_count;
377void md_new_event(struct mddev *mddev)
378{
379 atomic_inc(&md_event_count);
380 wake_up(&md_event_waiters);
381}
382EXPORT_SYMBOL_GPL(md_new_event);
383
384
385
386
387
388static LIST_HEAD(all_mddevs);
389static DEFINE_SPINLOCK(all_mddevs_lock);
390
391
392
393
394
395
396
397
398#define for_each_mddev(_mddev,_tmp) \
399 \
400 for (({ spin_lock(&all_mddevs_lock); \
401 _tmp = all_mddevs.next; \
402 _mddev = NULL;}); \
403 ({ if (_tmp != &all_mddevs) \
404 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
405 spin_unlock(&all_mddevs_lock); \
406 if (_mddev) mddev_put(_mddev); \
407 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
408 _tmp != &all_mddevs;}); \
409 ({ spin_lock(&all_mddevs_lock); \
410 _tmp = _tmp->next;}) \
411 )
412
413
414
415
416
417
418
419
420static bool is_suspended(struct mddev *mddev, struct bio *bio)
421{
422 if (mddev->suspended)
423 return true;
424 if (bio_data_dir(bio) != WRITE)
425 return false;
426 if (mddev->suspend_lo >= mddev->suspend_hi)
427 return false;
428 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
429 return false;
430 if (bio_end_sector(bio) < mddev->suspend_lo)
431 return false;
432 return true;
433}
434
435void md_handle_request(struct mddev *mddev, struct bio *bio)
436{
437check_suspended:
438 rcu_read_lock();
439 if (is_suspended(mddev, bio)) {
440 DEFINE_WAIT(__wait);
441 for (;;) {
442 prepare_to_wait(&mddev->sb_wait, &__wait,
443 TASK_UNINTERRUPTIBLE);
444 if (!is_suspended(mddev, bio))
445 break;
446 rcu_read_unlock();
447 schedule();
448 rcu_read_lock();
449 }
450 finish_wait(&mddev->sb_wait, &__wait);
451 }
452 atomic_inc(&mddev->active_io);
453 rcu_read_unlock();
454
455 if (!mddev->pers->make_request(mddev, bio)) {
456 atomic_dec(&mddev->active_io);
457 wake_up(&mddev->sb_wait);
458 goto check_suspended;
459 }
460
461 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
462 wake_up(&mddev->sb_wait);
463}
464EXPORT_SYMBOL(md_handle_request);
465
466static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
467{
468 const int rw = bio_data_dir(bio);
469 const int sgrp = op_stat_group(bio_op(bio));
470 struct mddev *mddev = bio->bi_disk->private_data;
471 unsigned int sectors;
472
473 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
474 bio_io_error(bio);
475 return BLK_QC_T_NONE;
476 }
477
478 blk_queue_split(q, &bio);
479
480 if (mddev == NULL || mddev->pers == NULL) {
481 bio_io_error(bio);
482 return BLK_QC_T_NONE;
483 }
484 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
485 if (bio_sectors(bio) != 0)
486 bio->bi_status = BLK_STS_IOERR;
487 bio_endio(bio);
488 return BLK_QC_T_NONE;
489 }
490
491
492
493
494
495 sectors = bio_sectors(bio);
496
497 bio->bi_opf &= ~REQ_NOMERGE;
498
499 md_handle_request(mddev, bio);
500
501 part_stat_lock();
502 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
503 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
504 part_stat_unlock();
505
506 return BLK_QC_T_NONE;
507}
508
509
510
511
512
513
514
515void mddev_suspend(struct mddev *mddev)
516{
517 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
518 lockdep_assert_held(&mddev->reconfig_mutex);
519 if (mddev->suspended++)
520 return;
521 synchronize_rcu();
522 wake_up(&mddev->sb_wait);
523 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
524 smp_mb__after_atomic();
525 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
526 mddev->pers->quiesce(mddev, 1);
527 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
528 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
529
530 del_timer_sync(&mddev->safemode_timer);
531
532 mddev->noio_flag = memalloc_noio_save();
533}
534EXPORT_SYMBOL_GPL(mddev_suspend);
535
536void mddev_resume(struct mddev *mddev)
537{
538
539 memalloc_noio_restore(mddev->noio_flag);
540 lockdep_assert_held(&mddev->reconfig_mutex);
541 if (--mddev->suspended)
542 return;
543 wake_up(&mddev->sb_wait);
544 mddev->pers->quiesce(mddev, 0);
545
546 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
547 md_wakeup_thread(mddev->thread);
548 md_wakeup_thread(mddev->sync_thread);
549}
550EXPORT_SYMBOL_GPL(mddev_resume);
551
552int mddev_congested(struct mddev *mddev, int bits)
553{
554 struct md_personality *pers = mddev->pers;
555 int ret = 0;
556
557 rcu_read_lock();
558 if (mddev->suspended)
559 ret = 1;
560 else if (pers && pers->congested)
561 ret = pers->congested(mddev, bits);
562 rcu_read_unlock();
563 return ret;
564}
565EXPORT_SYMBOL_GPL(mddev_congested);
566static int md_congested(void *data, int bits)
567{
568 struct mddev *mddev = data;
569 return mddev_congested(mddev, bits);
570}
571
572
573
574
575
576static void md_end_flush(struct bio *bio)
577{
578 struct md_rdev *rdev = bio->bi_private;
579 struct mddev *mddev = rdev->mddev;
580
581 rdev_dec_pending(rdev, mddev);
582
583 if (atomic_dec_and_test(&mddev->flush_pending)) {
584
585 queue_work(md_wq, &mddev->flush_work);
586 }
587 bio_put(bio);
588}
589
590static void md_submit_flush_data(struct work_struct *ws);
591
592static void submit_flushes(struct work_struct *ws)
593{
594 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
595 struct md_rdev *rdev;
596
597 mddev->start_flush = ktime_get_boottime();
598 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
599 atomic_set(&mddev->flush_pending, 1);
600 rcu_read_lock();
601 rdev_for_each_rcu(rdev, mddev)
602 if (rdev->raid_disk >= 0 &&
603 !test_bit(Faulty, &rdev->flags)) {
604
605
606
607
608 struct bio *bi;
609 atomic_inc(&rdev->nr_pending);
610 atomic_inc(&rdev->nr_pending);
611 rcu_read_unlock();
612 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
613 bi->bi_end_io = md_end_flush;
614 bi->bi_private = rdev;
615 bio_set_dev(bi, rdev->bdev);
616 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
617 atomic_inc(&mddev->flush_pending);
618 submit_bio(bi);
619 rcu_read_lock();
620 rdev_dec_pending(rdev, mddev);
621 }
622 rcu_read_unlock();
623 if (atomic_dec_and_test(&mddev->flush_pending))
624 queue_work(md_wq, &mddev->flush_work);
625}
626
627static void md_submit_flush_data(struct work_struct *ws)
628{
629 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
630 struct bio *bio = mddev->flush_bio;
631
632
633
634
635
636
637
638 mddev->last_flush = mddev->start_flush;
639 mddev->flush_bio = NULL;
640 wake_up(&mddev->sb_wait);
641
642 if (bio->bi_iter.bi_size == 0) {
643
644 bio_endio(bio);
645 } else {
646 bio->bi_opf &= ~REQ_PREFLUSH;
647 md_handle_request(mddev, bio);
648 }
649}
650
651
652
653
654
655
656
657bool md_flush_request(struct mddev *mddev, struct bio *bio)
658{
659 ktime_t start = ktime_get_boottime();
660 spin_lock_irq(&mddev->lock);
661 wait_event_lock_irq(mddev->sb_wait,
662 !mddev->flush_bio ||
663 ktime_after(mddev->last_flush, start),
664 mddev->lock);
665 if (!ktime_after(mddev->last_flush, start)) {
666 WARN_ON(mddev->flush_bio);
667 mddev->flush_bio = bio;
668 bio = NULL;
669 }
670 spin_unlock_irq(&mddev->lock);
671
672 if (!bio) {
673 INIT_WORK(&mddev->flush_work, submit_flushes);
674 queue_work(md_wq, &mddev->flush_work);
675 } else {
676
677 if (bio->bi_iter.bi_size == 0)
678
679 bio_endio(bio);
680 else {
681 bio->bi_opf &= ~REQ_PREFLUSH;
682 return false;
683 }
684 }
685 return true;
686}
687EXPORT_SYMBOL(md_flush_request);
688
689static inline struct mddev *mddev_get(struct mddev *mddev)
690{
691 atomic_inc(&mddev->active);
692 return mddev;
693}
694
695static void mddev_delayed_delete(struct work_struct *ws);
696
697static void mddev_put(struct mddev *mddev)
698{
699 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
700 return;
701 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
702 mddev->ctime == 0 && !mddev->hold_active) {
703
704
705 list_del_init(&mddev->all_mddevs);
706
707
708
709
710
711
712 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
713 queue_work(md_misc_wq, &mddev->del_work);
714 }
715 spin_unlock(&all_mddevs_lock);
716}
717
718static void md_safemode_timeout(struct timer_list *t);
719
720void mddev_init(struct mddev *mddev)
721{
722 kobject_init(&mddev->kobj, &md_ktype);
723 mutex_init(&mddev->open_mutex);
724 mutex_init(&mddev->reconfig_mutex);
725 mutex_init(&mddev->bitmap_info.mutex);
726 INIT_LIST_HEAD(&mddev->disks);
727 INIT_LIST_HEAD(&mddev->all_mddevs);
728 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
729 atomic_set(&mddev->active, 1);
730 atomic_set(&mddev->openers, 0);
731 atomic_set(&mddev->active_io, 0);
732 spin_lock_init(&mddev->lock);
733 atomic_set(&mddev->flush_pending, 0);
734 init_waitqueue_head(&mddev->sb_wait);
735 init_waitqueue_head(&mddev->recovery_wait);
736 mddev->reshape_position = MaxSector;
737 mddev->reshape_backwards = 0;
738 mddev->last_sync_action = "none";
739 mddev->resync_min = 0;
740 mddev->resync_max = MaxSector;
741 mddev->level = LEVEL_NONE;
742}
743EXPORT_SYMBOL_GPL(mddev_init);
744
745static struct mddev *mddev_find(dev_t unit)
746{
747 struct mddev *mddev, *new = NULL;
748
749 if (unit && MAJOR(unit) != MD_MAJOR)
750 unit &= ~((1<<MdpMinorShift)-1);
751
752 retry:
753 spin_lock(&all_mddevs_lock);
754
755 if (unit) {
756 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
757 if (mddev->unit == unit) {
758 mddev_get(mddev);
759 spin_unlock(&all_mddevs_lock);
760 kfree(new);
761 return mddev;
762 }
763
764 if (new) {
765 list_add(&new->all_mddevs, &all_mddevs);
766 spin_unlock(&all_mddevs_lock);
767 new->hold_active = UNTIL_IOCTL;
768 return new;
769 }
770 } else if (new) {
771
772 static int next_minor = 512;
773 int start = next_minor;
774 int is_free = 0;
775 int dev = 0;
776 while (!is_free) {
777 dev = MKDEV(MD_MAJOR, next_minor);
778 next_minor++;
779 if (next_minor > MINORMASK)
780 next_minor = 0;
781 if (next_minor == start) {
782
783 spin_unlock(&all_mddevs_lock);
784 kfree(new);
785 return NULL;
786 }
787
788 is_free = 1;
789 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
790 if (mddev->unit == dev) {
791 is_free = 0;
792 break;
793 }
794 }
795 new->unit = dev;
796 new->md_minor = MINOR(dev);
797 new->hold_active = UNTIL_STOP;
798 list_add(&new->all_mddevs, &all_mddevs);
799 spin_unlock(&all_mddevs_lock);
800 return new;
801 }
802 spin_unlock(&all_mddevs_lock);
803
804 new = kzalloc(sizeof(*new), GFP_KERNEL);
805 if (!new)
806 return NULL;
807
808 new->unit = unit;
809 if (MAJOR(unit) == MD_MAJOR)
810 new->md_minor = MINOR(unit);
811 else
812 new->md_minor = MINOR(unit) >> MdpMinorShift;
813
814 mddev_init(new);
815
816 goto retry;
817}
818
819static struct attribute_group md_redundancy_group;
820
821void mddev_unlock(struct mddev *mddev)
822{
823 if (mddev->to_remove) {
824
825
826
827
828
829
830
831
832
833
834
835
836 struct attribute_group *to_remove = mddev->to_remove;
837 mddev->to_remove = NULL;
838 mddev->sysfs_active = 1;
839 mutex_unlock(&mddev->reconfig_mutex);
840
841 if (mddev->kobj.sd) {
842 if (to_remove != &md_redundancy_group)
843 sysfs_remove_group(&mddev->kobj, to_remove);
844 if (mddev->pers == NULL ||
845 mddev->pers->sync_request == NULL) {
846 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
847 if (mddev->sysfs_action)
848 sysfs_put(mddev->sysfs_action);
849 mddev->sysfs_action = NULL;
850 }
851 }
852 mddev->sysfs_active = 0;
853 } else
854 mutex_unlock(&mddev->reconfig_mutex);
855
856
857
858
859 spin_lock(&pers_lock);
860 md_wakeup_thread(mddev->thread);
861 wake_up(&mddev->sb_wait);
862 spin_unlock(&pers_lock);
863}
864EXPORT_SYMBOL_GPL(mddev_unlock);
865
866struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
867{
868 struct md_rdev *rdev;
869
870 rdev_for_each_rcu(rdev, mddev)
871 if (rdev->desc_nr == nr)
872 return rdev;
873
874 return NULL;
875}
876EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
877
878static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
879{
880 struct md_rdev *rdev;
881
882 rdev_for_each(rdev, mddev)
883 if (rdev->bdev->bd_dev == dev)
884 return rdev;
885
886 return NULL;
887}
888
889struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
890{
891 struct md_rdev *rdev;
892
893 rdev_for_each_rcu(rdev, mddev)
894 if (rdev->bdev->bd_dev == dev)
895 return rdev;
896
897 return NULL;
898}
899EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
900
901static struct md_personality *find_pers(int level, char *clevel)
902{
903 struct md_personality *pers;
904 list_for_each_entry(pers, &pers_list, list) {
905 if (level != LEVEL_NONE && pers->level == level)
906 return pers;
907 if (strcmp(pers->name, clevel)==0)
908 return pers;
909 }
910 return NULL;
911}
912
913
914static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
915{
916 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
917 return MD_NEW_SIZE_SECTORS(num_sectors);
918}
919
920static int alloc_disk_sb(struct md_rdev *rdev)
921{
922 rdev->sb_page = alloc_page(GFP_KERNEL);
923 if (!rdev->sb_page)
924 return -ENOMEM;
925 return 0;
926}
927
928void md_rdev_clear(struct md_rdev *rdev)
929{
930 if (rdev->sb_page) {
931 put_page(rdev->sb_page);
932 rdev->sb_loaded = 0;
933 rdev->sb_page = NULL;
934 rdev->sb_start = 0;
935 rdev->sectors = 0;
936 }
937 if (rdev->bb_page) {
938 put_page(rdev->bb_page);
939 rdev->bb_page = NULL;
940 }
941 badblocks_exit(&rdev->badblocks);
942}
943EXPORT_SYMBOL_GPL(md_rdev_clear);
944
945static void super_written(struct bio *bio)
946{
947 struct md_rdev *rdev = bio->bi_private;
948 struct mddev *mddev = rdev->mddev;
949
950 if (bio->bi_status) {
951 pr_err("md: super_written gets error=%d\n", bio->bi_status);
952 md_error(mddev, rdev);
953 if (!test_bit(Faulty, &rdev->flags)
954 && (bio->bi_opf & MD_FAILFAST)) {
955 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
956 set_bit(LastDev, &rdev->flags);
957 }
958 } else
959 clear_bit(LastDev, &rdev->flags);
960
961 if (atomic_dec_and_test(&mddev->pending_writes))
962 wake_up(&mddev->sb_wait);
963 rdev_dec_pending(rdev, mddev);
964 bio_put(bio);
965}
966
967void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
968 sector_t sector, int size, struct page *page)
969{
970
971
972
973
974
975
976 struct bio *bio;
977 int ff = 0;
978
979 if (!page)
980 return;
981
982 if (test_bit(Faulty, &rdev->flags))
983 return;
984
985 bio = md_bio_alloc_sync(mddev);
986
987 atomic_inc(&rdev->nr_pending);
988
989 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
990 bio->bi_iter.bi_sector = sector;
991 bio_add_page(bio, page, size, 0);
992 bio->bi_private = rdev;
993 bio->bi_end_io = super_written;
994
995 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
996 test_bit(FailFast, &rdev->flags) &&
997 !test_bit(LastDev, &rdev->flags))
998 ff = MD_FAILFAST;
999 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
1000
1001 atomic_inc(&mddev->pending_writes);
1002 submit_bio(bio);
1003}
1004
1005int md_super_wait(struct mddev *mddev)
1006{
1007
1008 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1009 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1010 return -EAGAIN;
1011 return 0;
1012}
1013
1014int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
1015 struct page *page, int op, int op_flags, bool metadata_op)
1016{
1017 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1018 int ret;
1019
1020 if (metadata_op && rdev->meta_bdev)
1021 bio_set_dev(bio, rdev->meta_bdev);
1022 else
1023 bio_set_dev(bio, rdev->bdev);
1024 bio_set_op_attrs(bio, op, op_flags);
1025 if (metadata_op)
1026 bio->bi_iter.bi_sector = sector + rdev->sb_start;
1027 else if (rdev->mddev->reshape_position != MaxSector &&
1028 (rdev->mddev->reshape_backwards ==
1029 (sector >= rdev->mddev->reshape_position)))
1030 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
1031 else
1032 bio->bi_iter.bi_sector = sector + rdev->data_offset;
1033 bio_add_page(bio, page, size, 0);
1034
1035 submit_bio_wait(bio);
1036
1037 ret = !bio->bi_status;
1038 bio_put(bio);
1039 return ret;
1040}
1041EXPORT_SYMBOL_GPL(sync_page_io);
1042
1043static int read_disk_sb(struct md_rdev *rdev, int size)
1044{
1045 char b[BDEVNAME_SIZE];
1046
1047 if (rdev->sb_loaded)
1048 return 0;
1049
1050 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1051 goto fail;
1052 rdev->sb_loaded = 1;
1053 return 0;
1054
1055fail:
1056 pr_err("md: disabled device %s, could not read superblock.\n",
1057 bdevname(rdev->bdev,b));
1058 return -EINVAL;
1059}
1060
1061static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1062{
1063 return sb1->set_uuid0 == sb2->set_uuid0 &&
1064 sb1->set_uuid1 == sb2->set_uuid1 &&
1065 sb1->set_uuid2 == sb2->set_uuid2 &&
1066 sb1->set_uuid3 == sb2->set_uuid3;
1067}
1068
1069static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1070{
1071 int ret;
1072 mdp_super_t *tmp1, *tmp2;
1073
1074 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1075 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1076
1077 if (!tmp1 || !tmp2) {
1078 ret = 0;
1079 goto abort;
1080 }
1081
1082 *tmp1 = *sb1;
1083 *tmp2 = *sb2;
1084
1085
1086
1087
1088 tmp1->nr_disks = 0;
1089 tmp2->nr_disks = 0;
1090
1091 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1092abort:
1093 kfree(tmp1);
1094 kfree(tmp2);
1095 return ret;
1096}
1097
1098static u32 md_csum_fold(u32 csum)
1099{
1100 csum = (csum & 0xffff) + (csum >> 16);
1101 return (csum & 0xffff) + (csum >> 16);
1102}
1103
1104static unsigned int calc_sb_csum(mdp_super_t *sb)
1105{
1106 u64 newcsum = 0;
1107 u32 *sb32 = (u32*)sb;
1108 int i;
1109 unsigned int disk_csum, csum;
1110
1111 disk_csum = sb->sb_csum;
1112 sb->sb_csum = 0;
1113
1114 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1115 newcsum += sb32[i];
1116 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1117
1118#ifdef CONFIG_ALPHA
1119
1120
1121
1122
1123
1124
1125
1126
1127 sb->sb_csum = md_csum_fold(disk_csum);
1128#else
1129 sb->sb_csum = disk_csum;
1130#endif
1131 return csum;
1132}
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164struct super_type {
1165 char *name;
1166 struct module *owner;
1167 int (*load_super)(struct md_rdev *rdev,
1168 struct md_rdev *refdev,
1169 int minor_version);
1170 int (*validate_super)(struct mddev *mddev,
1171 struct md_rdev *rdev);
1172 void (*sync_super)(struct mddev *mddev,
1173 struct md_rdev *rdev);
1174 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1175 sector_t num_sectors);
1176 int (*allow_new_offset)(struct md_rdev *rdev,
1177 unsigned long long new_offset);
1178};
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188int md_check_no_bitmap(struct mddev *mddev)
1189{
1190 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1191 return 0;
1192 pr_warn("%s: bitmaps are not supported for %s\n",
1193 mdname(mddev), mddev->pers->name);
1194 return 1;
1195}
1196EXPORT_SYMBOL(md_check_no_bitmap);
1197
1198
1199
1200
1201static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1202{
1203 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1204 mdp_super_t *sb;
1205 int ret;
1206 bool spare_disk = true;
1207
1208
1209
1210
1211
1212
1213
1214 rdev->sb_start = calc_dev_sboffset(rdev);
1215
1216 ret = read_disk_sb(rdev, MD_SB_BYTES);
1217 if (ret)
1218 return ret;
1219
1220 ret = -EINVAL;
1221
1222 bdevname(rdev->bdev, b);
1223 sb = page_address(rdev->sb_page);
1224
1225 if (sb->md_magic != MD_SB_MAGIC) {
1226 pr_warn("md: invalid raid superblock magic on %s\n", b);
1227 goto abort;
1228 }
1229
1230 if (sb->major_version != 0 ||
1231 sb->minor_version < 90 ||
1232 sb->minor_version > 91) {
1233 pr_warn("Bad version number %d.%d on %s\n",
1234 sb->major_version, sb->minor_version, b);
1235 goto abort;
1236 }
1237
1238 if (sb->raid_disks <= 0)
1239 goto abort;
1240
1241 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1242 pr_warn("md: invalid superblock checksum on %s\n", b);
1243 goto abort;
1244 }
1245
1246 rdev->preferred_minor = sb->md_minor;
1247 rdev->data_offset = 0;
1248 rdev->new_data_offset = 0;
1249 rdev->sb_size = MD_SB_BYTES;
1250 rdev->badblocks.shift = -1;
1251
1252 if (sb->level == LEVEL_MULTIPATH)
1253 rdev->desc_nr = -1;
1254 else
1255 rdev->desc_nr = sb->this_disk.number;
1256
1257
1258 if (sb->level == LEVEL_MULTIPATH ||
1259 (rdev->desc_nr >= 0 &&
1260 rdev->desc_nr < MD_SB_DISKS &&
1261 sb->disks[rdev->desc_nr].state &
1262 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1263 spare_disk = false;
1264
1265 if (!refdev) {
1266 if (!spare_disk)
1267 ret = 1;
1268 else
1269 ret = 0;
1270 } else {
1271 __u64 ev1, ev2;
1272 mdp_super_t *refsb = page_address(refdev->sb_page);
1273 if (!md_uuid_equal(refsb, sb)) {
1274 pr_warn("md: %s has different UUID to %s\n",
1275 b, bdevname(refdev->bdev,b2));
1276 goto abort;
1277 }
1278 if (!md_sb_equal(refsb, sb)) {
1279 pr_warn("md: %s has same UUID but different superblock to %s\n",
1280 b, bdevname(refdev->bdev, b2));
1281 goto abort;
1282 }
1283 ev1 = md_event(sb);
1284 ev2 = md_event(refsb);
1285
1286 if (!spare_disk && ev1 > ev2)
1287 ret = 1;
1288 else
1289 ret = 0;
1290 }
1291 rdev->sectors = rdev->sb_start;
1292
1293
1294
1295
1296 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1297 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1298
1299 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1300
1301 ret = -EINVAL;
1302
1303 abort:
1304 return ret;
1305}
1306
1307
1308
1309
1310static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1311{
1312 mdp_disk_t *desc;
1313 mdp_super_t *sb = page_address(rdev->sb_page);
1314 __u64 ev1 = md_event(sb);
1315
1316 rdev->raid_disk = -1;
1317 clear_bit(Faulty, &rdev->flags);
1318 clear_bit(In_sync, &rdev->flags);
1319 clear_bit(Bitmap_sync, &rdev->flags);
1320 clear_bit(WriteMostly, &rdev->flags);
1321
1322 if (mddev->raid_disks == 0) {
1323 mddev->major_version = 0;
1324 mddev->minor_version = sb->minor_version;
1325 mddev->patch_version = sb->patch_version;
1326 mddev->external = 0;
1327 mddev->chunk_sectors = sb->chunk_size >> 9;
1328 mddev->ctime = sb->ctime;
1329 mddev->utime = sb->utime;
1330 mddev->level = sb->level;
1331 mddev->clevel[0] = 0;
1332 mddev->layout = sb->layout;
1333 mddev->raid_disks = sb->raid_disks;
1334 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1335 mddev->events = ev1;
1336 mddev->bitmap_info.offset = 0;
1337 mddev->bitmap_info.space = 0;
1338
1339 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1340 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1341 mddev->reshape_backwards = 0;
1342
1343 if (mddev->minor_version >= 91) {
1344 mddev->reshape_position = sb->reshape_position;
1345 mddev->delta_disks = sb->delta_disks;
1346 mddev->new_level = sb->new_level;
1347 mddev->new_layout = sb->new_layout;
1348 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1349 if (mddev->delta_disks < 0)
1350 mddev->reshape_backwards = 1;
1351 } else {
1352 mddev->reshape_position = MaxSector;
1353 mddev->delta_disks = 0;
1354 mddev->new_level = mddev->level;
1355 mddev->new_layout = mddev->layout;
1356 mddev->new_chunk_sectors = mddev->chunk_sectors;
1357 }
1358 if (mddev->level == 0)
1359 mddev->layout = -1;
1360
1361 if (sb->state & (1<<MD_SB_CLEAN))
1362 mddev->recovery_cp = MaxSector;
1363 else {
1364 if (sb->events_hi == sb->cp_events_hi &&
1365 sb->events_lo == sb->cp_events_lo) {
1366 mddev->recovery_cp = sb->recovery_cp;
1367 } else
1368 mddev->recovery_cp = 0;
1369 }
1370
1371 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1372 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1373 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1374 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1375
1376 mddev->max_disks = MD_SB_DISKS;
1377
1378 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1379 mddev->bitmap_info.file == NULL) {
1380 mddev->bitmap_info.offset =
1381 mddev->bitmap_info.default_offset;
1382 mddev->bitmap_info.space =
1383 mddev->bitmap_info.default_space;
1384 }
1385
1386 } else if (mddev->pers == NULL) {
1387
1388
1389 ++ev1;
1390 if (sb->disks[rdev->desc_nr].state & (
1391 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1392 if (ev1 < mddev->events)
1393 return -EINVAL;
1394 } else if (mddev->bitmap) {
1395
1396
1397
1398 if (ev1 < mddev->bitmap->events_cleared)
1399 return 0;
1400 if (ev1 < mddev->events)
1401 set_bit(Bitmap_sync, &rdev->flags);
1402 } else {
1403 if (ev1 < mddev->events)
1404
1405 return 0;
1406 }
1407
1408 if (mddev->level != LEVEL_MULTIPATH) {
1409 desc = sb->disks + rdev->desc_nr;
1410
1411 if (desc->state & (1<<MD_DISK_FAULTY))
1412 set_bit(Faulty, &rdev->flags);
1413 else if (desc->state & (1<<MD_DISK_SYNC)
1414) {
1415 set_bit(In_sync, &rdev->flags);
1416 rdev->raid_disk = desc->raid_disk;
1417 rdev->saved_raid_disk = desc->raid_disk;
1418 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1419
1420
1421
1422 if (mddev->minor_version >= 91) {
1423 rdev->recovery_offset = 0;
1424 rdev->raid_disk = desc->raid_disk;
1425 }
1426 }
1427 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1428 set_bit(WriteMostly, &rdev->flags);
1429 if (desc->state & (1<<MD_DISK_FAILFAST))
1430 set_bit(FailFast, &rdev->flags);
1431 } else
1432 set_bit(In_sync, &rdev->flags);
1433 return 0;
1434}
1435
1436
1437
1438
1439static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1440{
1441 mdp_super_t *sb;
1442 struct md_rdev *rdev2;
1443 int next_spare = mddev->raid_disks;
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455 int i;
1456 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1457
1458 rdev->sb_size = MD_SB_BYTES;
1459
1460 sb = page_address(rdev->sb_page);
1461
1462 memset(sb, 0, sizeof(*sb));
1463
1464 sb->md_magic = MD_SB_MAGIC;
1465 sb->major_version = mddev->major_version;
1466 sb->patch_version = mddev->patch_version;
1467 sb->gvalid_words = 0;
1468 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1469 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1470 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1471 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1472
1473 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1474 sb->level = mddev->level;
1475 sb->size = mddev->dev_sectors / 2;
1476 sb->raid_disks = mddev->raid_disks;
1477 sb->md_minor = mddev->md_minor;
1478 sb->not_persistent = 0;
1479 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1480 sb->state = 0;
1481 sb->events_hi = (mddev->events>>32);
1482 sb->events_lo = (u32)mddev->events;
1483
1484 if (mddev->reshape_position == MaxSector)
1485 sb->minor_version = 90;
1486 else {
1487 sb->minor_version = 91;
1488 sb->reshape_position = mddev->reshape_position;
1489 sb->new_level = mddev->new_level;
1490 sb->delta_disks = mddev->delta_disks;
1491 sb->new_layout = mddev->new_layout;
1492 sb->new_chunk = mddev->new_chunk_sectors << 9;
1493 }
1494 mddev->minor_version = sb->minor_version;
1495 if (mddev->in_sync)
1496 {
1497 sb->recovery_cp = mddev->recovery_cp;
1498 sb->cp_events_hi = (mddev->events>>32);
1499 sb->cp_events_lo = (u32)mddev->events;
1500 if (mddev->recovery_cp == MaxSector)
1501 sb->state = (1<< MD_SB_CLEAN);
1502 } else
1503 sb->recovery_cp = 0;
1504
1505 sb->layout = mddev->layout;
1506 sb->chunk_size = mddev->chunk_sectors << 9;
1507
1508 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1509 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1510
1511 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1512 rdev_for_each(rdev2, mddev) {
1513 mdp_disk_t *d;
1514 int desc_nr;
1515 int is_active = test_bit(In_sync, &rdev2->flags);
1516
1517 if (rdev2->raid_disk >= 0 &&
1518 sb->minor_version >= 91)
1519
1520
1521
1522
1523 is_active = 1;
1524 if (rdev2->raid_disk < 0 ||
1525 test_bit(Faulty, &rdev2->flags))
1526 is_active = 0;
1527 if (is_active)
1528 desc_nr = rdev2->raid_disk;
1529 else
1530 desc_nr = next_spare++;
1531 rdev2->desc_nr = desc_nr;
1532 d = &sb->disks[rdev2->desc_nr];
1533 nr_disks++;
1534 d->number = rdev2->desc_nr;
1535 d->major = MAJOR(rdev2->bdev->bd_dev);
1536 d->minor = MINOR(rdev2->bdev->bd_dev);
1537 if (is_active)
1538 d->raid_disk = rdev2->raid_disk;
1539 else
1540 d->raid_disk = rdev2->desc_nr;
1541 if (test_bit(Faulty, &rdev2->flags))
1542 d->state = (1<<MD_DISK_FAULTY);
1543 else if (is_active) {
1544 d->state = (1<<MD_DISK_ACTIVE);
1545 if (test_bit(In_sync, &rdev2->flags))
1546 d->state |= (1<<MD_DISK_SYNC);
1547 active++;
1548 working++;
1549 } else {
1550 d->state = 0;
1551 spare++;
1552 working++;
1553 }
1554 if (test_bit(WriteMostly, &rdev2->flags))
1555 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1556 if (test_bit(FailFast, &rdev2->flags))
1557 d->state |= (1<<MD_DISK_FAILFAST);
1558 }
1559
1560 for (i=0 ; i < mddev->raid_disks ; i++) {
1561 mdp_disk_t *d = &sb->disks[i];
1562 if (d->state == 0 && d->number == 0) {
1563 d->number = i;
1564 d->raid_disk = i;
1565 d->state = (1<<MD_DISK_REMOVED);
1566 d->state |= (1<<MD_DISK_FAULTY);
1567 failed++;
1568 }
1569 }
1570 sb->nr_disks = nr_disks;
1571 sb->active_disks = active;
1572 sb->working_disks = working;
1573 sb->failed_disks = failed;
1574 sb->spare_disks = spare;
1575
1576 sb->this_disk = sb->disks[rdev->desc_nr];
1577 sb->sb_csum = calc_sb_csum(sb);
1578}
1579
1580
1581
1582
1583static unsigned long long
1584super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1585{
1586 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1587 return 0;
1588 if (rdev->mddev->bitmap_info.offset)
1589 return 0;
1590 rdev->sb_start = calc_dev_sboffset(rdev);
1591 if (!num_sectors || num_sectors > rdev->sb_start)
1592 num_sectors = rdev->sb_start;
1593
1594
1595
1596 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1597 num_sectors = (sector_t)(2ULL << 32) - 2;
1598 do {
1599 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1600 rdev->sb_page);
1601 } while (md_super_wait(rdev->mddev) < 0);
1602 return num_sectors;
1603}
1604
1605static int
1606super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1607{
1608
1609 return new_offset == 0;
1610}
1611
1612
1613
1614
1615
1616static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1617{
1618 __le32 disk_csum;
1619 u32 csum;
1620 unsigned long long newcsum;
1621 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1622 __le32 *isuper = (__le32*)sb;
1623
1624 disk_csum = sb->sb_csum;
1625 sb->sb_csum = 0;
1626 newcsum = 0;
1627 for (; size >= 4; size -= 4)
1628 newcsum += le32_to_cpu(*isuper++);
1629
1630 if (size == 2)
1631 newcsum += le16_to_cpu(*(__le16*) isuper);
1632
1633 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1634 sb->sb_csum = disk_csum;
1635 return cpu_to_le32(csum);
1636}
1637
1638static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1639{
1640 struct mdp_superblock_1 *sb;
1641 int ret;
1642 sector_t sb_start;
1643 sector_t sectors;
1644 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1645 int bmask;
1646 bool spare_disk = true;
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656 switch(minor_version) {
1657 case 0:
1658 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1659 sb_start -= 8*2;
1660 sb_start &= ~(sector_t)(4*2-1);
1661 break;
1662 case 1:
1663 sb_start = 0;
1664 break;
1665 case 2:
1666 sb_start = 8;
1667 break;
1668 default:
1669 return -EINVAL;
1670 }
1671 rdev->sb_start = sb_start;
1672
1673
1674
1675
1676 ret = read_disk_sb(rdev, 4096);
1677 if (ret) return ret;
1678
1679 sb = page_address(rdev->sb_page);
1680
1681 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1682 sb->major_version != cpu_to_le32(1) ||
1683 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1684 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1685 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1686 return -EINVAL;
1687
1688 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1689 pr_warn("md: invalid superblock checksum on %s\n",
1690 bdevname(rdev->bdev,b));
1691 return -EINVAL;
1692 }
1693 if (le64_to_cpu(sb->data_size) < 10) {
1694 pr_warn("md: data_size too small on %s\n",
1695 bdevname(rdev->bdev,b));
1696 return -EINVAL;
1697 }
1698 if (sb->pad0 ||
1699 sb->pad3[0] ||
1700 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1701
1702 return -EINVAL;
1703
1704 rdev->preferred_minor = 0xffff;
1705 rdev->data_offset = le64_to_cpu(sb->data_offset);
1706 rdev->new_data_offset = rdev->data_offset;
1707 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1708 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1709 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1710 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1711
1712 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1713 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1714 if (rdev->sb_size & bmask)
1715 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1716
1717 if (minor_version
1718 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1719 return -EINVAL;
1720 if (minor_version
1721 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1722 return -EINVAL;
1723
1724 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1725 rdev->desc_nr = -1;
1726 else
1727 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1728
1729 if (!rdev->bb_page) {
1730 rdev->bb_page = alloc_page(GFP_KERNEL);
1731 if (!rdev->bb_page)
1732 return -ENOMEM;
1733 }
1734 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1735 rdev->badblocks.count == 0) {
1736
1737
1738
1739 s32 offset;
1740 sector_t bb_sector;
1741 __le64 *bbp;
1742 int i;
1743 int sectors = le16_to_cpu(sb->bblog_size);
1744 if (sectors > (PAGE_SIZE / 512))
1745 return -EINVAL;
1746 offset = le32_to_cpu(sb->bblog_offset);
1747 if (offset == 0)
1748 return -EINVAL;
1749 bb_sector = (long long)offset;
1750 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1751 rdev->bb_page, REQ_OP_READ, 0, true))
1752 return -EIO;
1753 bbp = (__le64 *)page_address(rdev->bb_page);
1754 rdev->badblocks.shift = sb->bblog_shift;
1755 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1756 u64 bb = le64_to_cpu(*bbp);
1757 int count = bb & (0x3ff);
1758 u64 sector = bb >> 10;
1759 sector <<= sb->bblog_shift;
1760 count <<= sb->bblog_shift;
1761 if (bb + 1 == 0)
1762 break;
1763 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1764 return -EINVAL;
1765 }
1766 } else if (sb->bblog_offset != 0)
1767 rdev->badblocks.shift = 0;
1768
1769 if ((le32_to_cpu(sb->feature_map) &
1770 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1771 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1772 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1773 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1774 }
1775
1776 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1777 sb->level != 0)
1778 return -EINVAL;
1779
1780
1781 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1782 (rdev->desc_nr >= 0 &&
1783 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1784 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1785 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1786 spare_disk = false;
1787
1788 if (!refdev) {
1789 if (!spare_disk)
1790 ret = 1;
1791 else
1792 ret = 0;
1793 } else {
1794 __u64 ev1, ev2;
1795 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1796
1797 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1798 sb->level != refsb->level ||
1799 sb->layout != refsb->layout ||
1800 sb->chunksize != refsb->chunksize) {
1801 pr_warn("md: %s has strangely different superblock to %s\n",
1802 bdevname(rdev->bdev,b),
1803 bdevname(refdev->bdev,b2));
1804 return -EINVAL;
1805 }
1806 ev1 = le64_to_cpu(sb->events);
1807 ev2 = le64_to_cpu(refsb->events);
1808
1809 if (!spare_disk && ev1 > ev2)
1810 ret = 1;
1811 else
1812 ret = 0;
1813 }
1814 if (minor_version) {
1815 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1816 sectors -= rdev->data_offset;
1817 } else
1818 sectors = rdev->sb_start;
1819 if (sectors < le64_to_cpu(sb->data_size))
1820 return -EINVAL;
1821 rdev->sectors = le64_to_cpu(sb->data_size);
1822 return ret;
1823}
1824
1825static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1826{
1827 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1828 __u64 ev1 = le64_to_cpu(sb->events);
1829
1830 rdev->raid_disk = -1;
1831 clear_bit(Faulty, &rdev->flags);
1832 clear_bit(In_sync, &rdev->flags);
1833 clear_bit(Bitmap_sync, &rdev->flags);
1834 clear_bit(WriteMostly, &rdev->flags);
1835
1836 if (mddev->raid_disks == 0) {
1837 mddev->major_version = 1;
1838 mddev->patch_version = 0;
1839 mddev->external = 0;
1840 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1841 mddev->ctime = le64_to_cpu(sb->ctime);
1842 mddev->utime = le64_to_cpu(sb->utime);
1843 mddev->level = le32_to_cpu(sb->level);
1844 mddev->clevel[0] = 0;
1845 mddev->layout = le32_to_cpu(sb->layout);
1846 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1847 mddev->dev_sectors = le64_to_cpu(sb->size);
1848 mddev->events = ev1;
1849 mddev->bitmap_info.offset = 0;
1850 mddev->bitmap_info.space = 0;
1851
1852
1853
1854 mddev->bitmap_info.default_offset = 1024 >> 9;
1855 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1856 mddev->reshape_backwards = 0;
1857
1858 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1859 memcpy(mddev->uuid, sb->set_uuid, 16);
1860
1861 mddev->max_disks = (4096-256)/2;
1862
1863 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1864 mddev->bitmap_info.file == NULL) {
1865 mddev->bitmap_info.offset =
1866 (__s32)le32_to_cpu(sb->bitmap_offset);
1867
1868
1869
1870
1871
1872 if (mddev->minor_version > 0)
1873 mddev->bitmap_info.space = 0;
1874 else if (mddev->bitmap_info.offset > 0)
1875 mddev->bitmap_info.space =
1876 8 - mddev->bitmap_info.offset;
1877 else
1878 mddev->bitmap_info.space =
1879 -mddev->bitmap_info.offset;
1880 }
1881
1882 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1883 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1884 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1885 mddev->new_level = le32_to_cpu(sb->new_level);
1886 mddev->new_layout = le32_to_cpu(sb->new_layout);
1887 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1888 if (mddev->delta_disks < 0 ||
1889 (mddev->delta_disks == 0 &&
1890 (le32_to_cpu(sb->feature_map)
1891 & MD_FEATURE_RESHAPE_BACKWARDS)))
1892 mddev->reshape_backwards = 1;
1893 } else {
1894 mddev->reshape_position = MaxSector;
1895 mddev->delta_disks = 0;
1896 mddev->new_level = mddev->level;
1897 mddev->new_layout = mddev->layout;
1898 mddev->new_chunk_sectors = mddev->chunk_sectors;
1899 }
1900
1901 if (mddev->level == 0 &&
1902 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1903 mddev->layout = -1;
1904
1905 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1906 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1907
1908 if (le32_to_cpu(sb->feature_map) &
1909 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1910 if (le32_to_cpu(sb->feature_map) &
1911 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1912 return -EINVAL;
1913 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1914 (le32_to_cpu(sb->feature_map) &
1915 MD_FEATURE_MULTIPLE_PPLS))
1916 return -EINVAL;
1917 set_bit(MD_HAS_PPL, &mddev->flags);
1918 }
1919 } else if (mddev->pers == NULL) {
1920
1921
1922 ++ev1;
1923 if (rdev->desc_nr >= 0 &&
1924 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1925 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1926 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1927 if (ev1 < mddev->events)
1928 return -EINVAL;
1929 } else if (mddev->bitmap) {
1930
1931
1932
1933 if (ev1 < mddev->bitmap->events_cleared)
1934 return 0;
1935 if (ev1 < mddev->events)
1936 set_bit(Bitmap_sync, &rdev->flags);
1937 } else {
1938 if (ev1 < mddev->events)
1939
1940 return 0;
1941 }
1942 if (mddev->level != LEVEL_MULTIPATH) {
1943 int role;
1944 if (rdev->desc_nr < 0 ||
1945 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1946 role = MD_DISK_ROLE_SPARE;
1947 rdev->desc_nr = -1;
1948 } else
1949 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1950 switch(role) {
1951 case MD_DISK_ROLE_SPARE:
1952 break;
1953 case MD_DISK_ROLE_FAULTY:
1954 set_bit(Faulty, &rdev->flags);
1955 break;
1956 case MD_DISK_ROLE_JOURNAL:
1957 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1958
1959 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1960 return -EINVAL;
1961 }
1962 set_bit(Journal, &rdev->flags);
1963 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1964 rdev->raid_disk = 0;
1965 break;
1966 default:
1967 rdev->saved_raid_disk = role;
1968 if ((le32_to_cpu(sb->feature_map) &
1969 MD_FEATURE_RECOVERY_OFFSET)) {
1970 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1971 if (!(le32_to_cpu(sb->feature_map) &
1972 MD_FEATURE_RECOVERY_BITMAP))
1973 rdev->saved_raid_disk = -1;
1974 } else {
1975
1976
1977
1978
1979 if (!test_bit(MD_RECOVERY_FROZEN,
1980 &mddev->recovery))
1981 set_bit(In_sync, &rdev->flags);
1982 }
1983 rdev->raid_disk = role;
1984 break;
1985 }
1986 if (sb->devflags & WriteMostly1)
1987 set_bit(WriteMostly, &rdev->flags);
1988 if (sb->devflags & FailFast1)
1989 set_bit(FailFast, &rdev->flags);
1990 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1991 set_bit(Replacement, &rdev->flags);
1992 } else
1993 set_bit(In_sync, &rdev->flags);
1994
1995 return 0;
1996}
1997
1998static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1999{
2000 struct mdp_superblock_1 *sb;
2001 struct md_rdev *rdev2;
2002 int max_dev, i;
2003
2004
2005 sb = page_address(rdev->sb_page);
2006
2007 sb->feature_map = 0;
2008 sb->pad0 = 0;
2009 sb->recovery_offset = cpu_to_le64(0);
2010 memset(sb->pad3, 0, sizeof(sb->pad3));
2011
2012 sb->utime = cpu_to_le64((__u64)mddev->utime);
2013 sb->events = cpu_to_le64(mddev->events);
2014 if (mddev->in_sync)
2015 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2016 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2017 sb->resync_offset = cpu_to_le64(MaxSector);
2018 else
2019 sb->resync_offset = cpu_to_le64(0);
2020
2021 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2022
2023 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2024 sb->size = cpu_to_le64(mddev->dev_sectors);
2025 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2026 sb->level = cpu_to_le32(mddev->level);
2027 sb->layout = cpu_to_le32(mddev->layout);
2028 if (test_bit(FailFast, &rdev->flags))
2029 sb->devflags |= FailFast1;
2030 else
2031 sb->devflags &= ~FailFast1;
2032
2033 if (test_bit(WriteMostly, &rdev->flags))
2034 sb->devflags |= WriteMostly1;
2035 else
2036 sb->devflags &= ~WriteMostly1;
2037 sb->data_offset = cpu_to_le64(rdev->data_offset);
2038 sb->data_size = cpu_to_le64(rdev->sectors);
2039
2040 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2041 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2042 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2043 }
2044
2045 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2046 !test_bit(In_sync, &rdev->flags)) {
2047 sb->feature_map |=
2048 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2049 sb->recovery_offset =
2050 cpu_to_le64(rdev->recovery_offset);
2051 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2052 sb->feature_map |=
2053 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2054 }
2055
2056 if (test_bit(Journal, &rdev->flags))
2057 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2058 if (test_bit(Replacement, &rdev->flags))
2059 sb->feature_map |=
2060 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2061
2062 if (mddev->reshape_position != MaxSector) {
2063 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2064 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2065 sb->new_layout = cpu_to_le32(mddev->new_layout);
2066 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2067 sb->new_level = cpu_to_le32(mddev->new_level);
2068 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2069 if (mddev->delta_disks == 0 &&
2070 mddev->reshape_backwards)
2071 sb->feature_map
2072 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2073 if (rdev->new_data_offset != rdev->data_offset) {
2074 sb->feature_map
2075 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2076 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2077 - rdev->data_offset));
2078 }
2079 }
2080
2081 if (mddev_is_clustered(mddev))
2082 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2083
2084 if (rdev->badblocks.count == 0)
2085 ;
2086 else if (sb->bblog_offset == 0)
2087
2088 md_error(mddev, rdev);
2089 else {
2090 struct badblocks *bb = &rdev->badblocks;
2091 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2092 u64 *p = bb->page;
2093 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2094 if (bb->changed) {
2095 unsigned seq;
2096
2097retry:
2098 seq = read_seqbegin(&bb->lock);
2099
2100 memset(bbp, 0xff, PAGE_SIZE);
2101
2102 for (i = 0 ; i < bb->count ; i++) {
2103 u64 internal_bb = p[i];
2104 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2105 | BB_LEN(internal_bb));
2106 bbp[i] = cpu_to_le64(store_bb);
2107 }
2108 bb->changed = 0;
2109 if (read_seqretry(&bb->lock, seq))
2110 goto retry;
2111
2112 bb->sector = (rdev->sb_start +
2113 (int)le32_to_cpu(sb->bblog_offset));
2114 bb->size = le16_to_cpu(sb->bblog_size);
2115 }
2116 }
2117
2118 max_dev = 0;
2119 rdev_for_each(rdev2, mddev)
2120 if (rdev2->desc_nr+1 > max_dev)
2121 max_dev = rdev2->desc_nr+1;
2122
2123 if (max_dev > le32_to_cpu(sb->max_dev)) {
2124 int bmask;
2125 sb->max_dev = cpu_to_le32(max_dev);
2126 rdev->sb_size = max_dev * 2 + 256;
2127 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2128 if (rdev->sb_size & bmask)
2129 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2130 } else
2131 max_dev = le32_to_cpu(sb->max_dev);
2132
2133 for (i=0; i<max_dev;i++)
2134 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2135
2136 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2137 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2138
2139 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2140 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2141 sb->feature_map |=
2142 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2143 else
2144 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2145 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2146 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2147 }
2148
2149 rdev_for_each(rdev2, mddev) {
2150 i = rdev2->desc_nr;
2151 if (test_bit(Faulty, &rdev2->flags))
2152 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2153 else if (test_bit(In_sync, &rdev2->flags))
2154 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2155 else if (test_bit(Journal, &rdev2->flags))
2156 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2157 else if (rdev2->raid_disk >= 0)
2158 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2159 else
2160 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2161 }
2162
2163 sb->sb_csum = calc_sb_1_csum(sb);
2164}
2165
2166static unsigned long long
2167super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2168{
2169 struct mdp_superblock_1 *sb;
2170 sector_t max_sectors;
2171 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2172 return 0;
2173 if (rdev->data_offset != rdev->new_data_offset)
2174 return 0;
2175 if (rdev->sb_start < rdev->data_offset) {
2176
2177 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2178 max_sectors -= rdev->data_offset;
2179 if (!num_sectors || num_sectors > max_sectors)
2180 num_sectors = max_sectors;
2181 } else if (rdev->mddev->bitmap_info.offset) {
2182
2183 return 0;
2184 } else {
2185
2186 sector_t sb_start;
2187 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
2188 sb_start &= ~(sector_t)(4*2 - 1);
2189 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
2190 if (!num_sectors || num_sectors > max_sectors)
2191 num_sectors = max_sectors;
2192 rdev->sb_start = sb_start;
2193 }
2194 sb = page_address(rdev->sb_page);
2195 sb->data_size = cpu_to_le64(num_sectors);
2196 sb->super_offset = cpu_to_le64(rdev->sb_start);
2197 sb->sb_csum = calc_sb_1_csum(sb);
2198 do {
2199 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2200 rdev->sb_page);
2201 } while (md_super_wait(rdev->mddev) < 0);
2202 return num_sectors;
2203
2204}
2205
2206static int
2207super_1_allow_new_offset(struct md_rdev *rdev,
2208 unsigned long long new_offset)
2209{
2210
2211 struct bitmap *bitmap;
2212 if (new_offset >= rdev->data_offset)
2213 return 1;
2214
2215
2216
2217 if (rdev->mddev->minor_version == 0)
2218 return 1;
2219
2220
2221
2222
2223
2224
2225
2226 if (rdev->sb_start + (32+4)*2 > new_offset)
2227 return 0;
2228 bitmap = rdev->mddev->bitmap;
2229 if (bitmap && !rdev->mddev->bitmap_info.file &&
2230 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2231 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2232 return 0;
2233 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2234 return 0;
2235
2236 return 1;
2237}
2238
2239static struct super_type super_types[] = {
2240 [0] = {
2241 .name = "0.90.0",
2242 .owner = THIS_MODULE,
2243 .load_super = super_90_load,
2244 .validate_super = super_90_validate,
2245 .sync_super = super_90_sync,
2246 .rdev_size_change = super_90_rdev_size_change,
2247 .allow_new_offset = super_90_allow_new_offset,
2248 },
2249 [1] = {
2250 .name = "md-1",
2251 .owner = THIS_MODULE,
2252 .load_super = super_1_load,
2253 .validate_super = super_1_validate,
2254 .sync_super = super_1_sync,
2255 .rdev_size_change = super_1_rdev_size_change,
2256 .allow_new_offset = super_1_allow_new_offset,
2257 },
2258};
2259
2260static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2261{
2262 if (mddev->sync_super) {
2263 mddev->sync_super(mddev, rdev);
2264 return;
2265 }
2266
2267 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2268
2269 super_types[mddev->major_version].sync_super(mddev, rdev);
2270}
2271
2272static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2273{
2274 struct md_rdev *rdev, *rdev2;
2275
2276 rcu_read_lock();
2277 rdev_for_each_rcu(rdev, mddev1) {
2278 if (test_bit(Faulty, &rdev->flags) ||
2279 test_bit(Journal, &rdev->flags) ||
2280 rdev->raid_disk == -1)
2281 continue;
2282 rdev_for_each_rcu(rdev2, mddev2) {
2283 if (test_bit(Faulty, &rdev2->flags) ||
2284 test_bit(Journal, &rdev2->flags) ||
2285 rdev2->raid_disk == -1)
2286 continue;
2287 if (rdev->bdev->bd_contains ==
2288 rdev2->bdev->bd_contains) {
2289 rcu_read_unlock();
2290 return 1;
2291 }
2292 }
2293 }
2294 rcu_read_unlock();
2295 return 0;
2296}
2297
2298static LIST_HEAD(pending_raid_disks);
2299
2300
2301
2302
2303
2304
2305
2306
2307int md_integrity_register(struct mddev *mddev)
2308{
2309 struct md_rdev *rdev, *reference = NULL;
2310
2311 if (list_empty(&mddev->disks))
2312 return 0;
2313 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2314 return 0;
2315 rdev_for_each(rdev, mddev) {
2316
2317 if (test_bit(Faulty, &rdev->flags))
2318 continue;
2319 if (rdev->raid_disk < 0)
2320 continue;
2321 if (!reference) {
2322
2323 reference = rdev;
2324 continue;
2325 }
2326
2327 if (blk_integrity_compare(reference->bdev->bd_disk,
2328 rdev->bdev->bd_disk) < 0)
2329 return -EINVAL;
2330 }
2331 if (!reference || !bdev_get_integrity(reference->bdev))
2332 return 0;
2333
2334
2335
2336
2337 blk_integrity_register(mddev->gendisk,
2338 bdev_get_integrity(reference->bdev));
2339
2340 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2341 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2342 pr_err("md: failed to create integrity pool for %s\n",
2343 mdname(mddev));
2344 return -EINVAL;
2345 }
2346 return 0;
2347}
2348EXPORT_SYMBOL(md_integrity_register);
2349
2350
2351
2352
2353
2354int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2355{
2356 struct blk_integrity *bi_mddev;
2357 char name[BDEVNAME_SIZE];
2358
2359 if (!mddev->gendisk)
2360 return 0;
2361
2362 bi_mddev = blk_get_integrity(mddev->gendisk);
2363
2364 if (!bi_mddev)
2365 return 0;
2366
2367 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2368 pr_err("%s: incompatible integrity profile for %s\n",
2369 mdname(mddev), bdevname(rdev->bdev, name));
2370 return -ENXIO;
2371 }
2372
2373 return 0;
2374}
2375EXPORT_SYMBOL(md_integrity_add_rdev);
2376
2377static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2378{
2379 char b[BDEVNAME_SIZE];
2380 struct kobject *ko;
2381 int err;
2382
2383
2384 if (find_rdev(mddev, rdev->bdev->bd_dev))
2385 return -EEXIST;
2386
2387 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2388 mddev->pers)
2389 return -EROFS;
2390
2391
2392 if (!test_bit(Journal, &rdev->flags) &&
2393 rdev->sectors &&
2394 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2395 if (mddev->pers) {
2396
2397
2398
2399
2400 if (mddev->level > 0)
2401 return -ENOSPC;
2402 } else
2403 mddev->dev_sectors = rdev->sectors;
2404 }
2405
2406
2407
2408
2409
2410 rcu_read_lock();
2411 if (rdev->desc_nr < 0) {
2412 int choice = 0;
2413 if (mddev->pers)
2414 choice = mddev->raid_disks;
2415 while (md_find_rdev_nr_rcu(mddev, choice))
2416 choice++;
2417 rdev->desc_nr = choice;
2418 } else {
2419 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2420 rcu_read_unlock();
2421 return -EBUSY;
2422 }
2423 }
2424 rcu_read_unlock();
2425 if (!test_bit(Journal, &rdev->flags) &&
2426 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2427 pr_warn("md: %s: array is limited to %d devices\n",
2428 mdname(mddev), mddev->max_disks);
2429 return -EBUSY;
2430 }
2431 bdevname(rdev->bdev,b);
2432 strreplace(b, '/', '!');
2433
2434 rdev->mddev = mddev;
2435 pr_debug("md: bind<%s>\n", b);
2436
2437 if (mddev->raid_disks)
2438 mddev_create_serial_pool(mddev, rdev, false);
2439
2440 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2441 goto fail;
2442
2443 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2444 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2445 ;
2446 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2447
2448 list_add_rcu(&rdev->same_set, &mddev->disks);
2449 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2450
2451
2452 mddev->recovery_disabled++;
2453
2454 return 0;
2455
2456 fail:
2457 pr_warn("md: failed to register dev-%s for %s\n",
2458 b, mdname(mddev));
2459 return err;
2460}
2461
2462static void rdev_delayed_delete(struct work_struct *ws)
2463{
2464 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2465 kobject_del(&rdev->kobj);
2466 kobject_put(&rdev->kobj);
2467}
2468
2469static void unbind_rdev_from_array(struct md_rdev *rdev)
2470{
2471 char b[BDEVNAME_SIZE];
2472
2473 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2474 list_del_rcu(&rdev->same_set);
2475 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2476 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2477 rdev->mddev = NULL;
2478 sysfs_remove_link(&rdev->kobj, "block");
2479 sysfs_put(rdev->sysfs_state);
2480 rdev->sysfs_state = NULL;
2481 rdev->badblocks.count = 0;
2482
2483
2484
2485
2486 synchronize_rcu();
2487 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2488 kobject_get(&rdev->kobj);
2489 queue_work(md_rdev_misc_wq, &rdev->del_work);
2490}
2491
2492
2493
2494
2495
2496
2497static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2498{
2499 int err = 0;
2500 struct block_device *bdev;
2501
2502 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2503 shared ? (struct md_rdev *)lock_rdev : rdev);
2504 if (IS_ERR(bdev)) {
2505 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2506 MAJOR(dev), MINOR(dev));
2507 return PTR_ERR(bdev);
2508 }
2509 rdev->bdev = bdev;
2510 return err;
2511}
2512
2513static void unlock_rdev(struct md_rdev *rdev)
2514{
2515 struct block_device *bdev = rdev->bdev;
2516 rdev->bdev = NULL;
2517 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2518}
2519
2520void md_autodetect_dev(dev_t dev);
2521
2522static void export_rdev(struct md_rdev *rdev)
2523{
2524 char b[BDEVNAME_SIZE];
2525
2526 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2527 md_rdev_clear(rdev);
2528#ifndef MODULE
2529 if (test_bit(AutoDetected, &rdev->flags))
2530 md_autodetect_dev(rdev->bdev->bd_dev);
2531#endif
2532 unlock_rdev(rdev);
2533 kobject_put(&rdev->kobj);
2534}
2535
2536void md_kick_rdev_from_array(struct md_rdev *rdev)
2537{
2538 unbind_rdev_from_array(rdev);
2539 export_rdev(rdev);
2540}
2541EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2542
2543static void export_array(struct mddev *mddev)
2544{
2545 struct md_rdev *rdev;
2546
2547 while (!list_empty(&mddev->disks)) {
2548 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2549 same_set);
2550 md_kick_rdev_from_array(rdev);
2551 }
2552 mddev->raid_disks = 0;
2553 mddev->major_version = 0;
2554}
2555
2556static bool set_in_sync(struct mddev *mddev)
2557{
2558 lockdep_assert_held(&mddev->lock);
2559 if (!mddev->in_sync) {
2560 mddev->sync_checkers++;
2561 spin_unlock(&mddev->lock);
2562 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2563 spin_lock(&mddev->lock);
2564 if (!mddev->in_sync &&
2565 percpu_ref_is_zero(&mddev->writes_pending)) {
2566 mddev->in_sync = 1;
2567
2568
2569
2570
2571 smp_mb();
2572 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2573 sysfs_notify_dirent_safe(mddev->sysfs_state);
2574 }
2575 if (--mddev->sync_checkers == 0)
2576 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2577 }
2578 if (mddev->safemode == 1)
2579 mddev->safemode = 0;
2580 return mddev->in_sync;
2581}
2582
2583static void sync_sbs(struct mddev *mddev, int nospares)
2584{
2585
2586
2587
2588
2589
2590
2591 struct md_rdev *rdev;
2592 rdev_for_each(rdev, mddev) {
2593 if (rdev->sb_events == mddev->events ||
2594 (nospares &&
2595 rdev->raid_disk < 0 &&
2596 rdev->sb_events+1 == mddev->events)) {
2597
2598 rdev->sb_loaded = 2;
2599 } else {
2600 sync_super(mddev, rdev);
2601 rdev->sb_loaded = 1;
2602 }
2603 }
2604}
2605
2606static bool does_sb_need_changing(struct mddev *mddev)
2607{
2608 struct md_rdev *rdev;
2609 struct mdp_superblock_1 *sb;
2610 int role;
2611
2612
2613 rdev_for_each(rdev, mddev)
2614 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2615 break;
2616
2617
2618 if (!rdev)
2619 return false;
2620
2621 sb = page_address(rdev->sb_page);
2622
2623 rdev_for_each(rdev, mddev) {
2624 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2625
2626 if (role == 0xffff && rdev->raid_disk >=0 &&
2627 !test_bit(Faulty, &rdev->flags))
2628 return true;
2629
2630 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2631 return true;
2632 }
2633
2634
2635 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2636 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2637 (mddev->layout != le32_to_cpu(sb->layout)) ||
2638 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2639 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2640 return true;
2641
2642 return false;
2643}
2644
2645void md_update_sb(struct mddev *mddev, int force_change)
2646{
2647 struct md_rdev *rdev;
2648 int sync_req;
2649 int nospares = 0;
2650 int any_badblocks_changed = 0;
2651 int ret = -1;
2652
2653 if (mddev->ro) {
2654 if (force_change)
2655 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2656 return;
2657 }
2658
2659repeat:
2660 if (mddev_is_clustered(mddev)) {
2661 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2662 force_change = 1;
2663 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2664 nospares = 1;
2665 ret = md_cluster_ops->metadata_update_start(mddev);
2666
2667 if (!does_sb_need_changing(mddev)) {
2668 if (ret == 0)
2669 md_cluster_ops->metadata_update_cancel(mddev);
2670 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2671 BIT(MD_SB_CHANGE_DEVS) |
2672 BIT(MD_SB_CHANGE_CLEAN));
2673 return;
2674 }
2675 }
2676
2677
2678
2679
2680
2681
2682
2683 rdev_for_each(rdev, mddev) {
2684 if (rdev->raid_disk >= 0 &&
2685 mddev->delta_disks >= 0 &&
2686 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2687 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2688 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2689 !test_bit(Journal, &rdev->flags) &&
2690 !test_bit(In_sync, &rdev->flags) &&
2691 mddev->curr_resync_completed > rdev->recovery_offset)
2692 rdev->recovery_offset = mddev->curr_resync_completed;
2693
2694 }
2695 if (!mddev->persistent) {
2696 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2697 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2698 if (!mddev->external) {
2699 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2700 rdev_for_each(rdev, mddev) {
2701 if (rdev->badblocks.changed) {
2702 rdev->badblocks.changed = 0;
2703 ack_all_badblocks(&rdev->badblocks);
2704 md_error(mddev, rdev);
2705 }
2706 clear_bit(Blocked, &rdev->flags);
2707 clear_bit(BlockedBadBlocks, &rdev->flags);
2708 wake_up(&rdev->blocked_wait);
2709 }
2710 }
2711 wake_up(&mddev->sb_wait);
2712 return;
2713 }
2714
2715 spin_lock(&mddev->lock);
2716
2717 mddev->utime = ktime_get_real_seconds();
2718
2719 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2720 force_change = 1;
2721 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2722
2723
2724
2725
2726 nospares = 1;
2727 if (force_change)
2728 nospares = 0;
2729 if (mddev->degraded)
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739 nospares = 0;
2740
2741 sync_req = mddev->in_sync;
2742
2743
2744
2745 if (nospares
2746 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2747 && mddev->can_decrease_events
2748 && mddev->events != 1) {
2749 mddev->events--;
2750 mddev->can_decrease_events = 0;
2751 } else {
2752
2753 mddev->events ++;
2754 mddev->can_decrease_events = nospares;
2755 }
2756
2757
2758
2759
2760
2761
2762 WARN_ON(mddev->events == 0);
2763
2764 rdev_for_each(rdev, mddev) {
2765 if (rdev->badblocks.changed)
2766 any_badblocks_changed++;
2767 if (test_bit(Faulty, &rdev->flags))
2768 set_bit(FaultRecorded, &rdev->flags);
2769 }
2770
2771 sync_sbs(mddev, nospares);
2772 spin_unlock(&mddev->lock);
2773
2774 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2775 mdname(mddev), mddev->in_sync);
2776
2777 if (mddev->queue)
2778 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2779rewrite:
2780 md_bitmap_update_sb(mddev->bitmap);
2781 rdev_for_each(rdev, mddev) {
2782 char b[BDEVNAME_SIZE];
2783
2784 if (rdev->sb_loaded != 1)
2785 continue;
2786
2787 if (!test_bit(Faulty, &rdev->flags)) {
2788 md_super_write(mddev,rdev,
2789 rdev->sb_start, rdev->sb_size,
2790 rdev->sb_page);
2791 pr_debug("md: (write) %s's sb offset: %llu\n",
2792 bdevname(rdev->bdev, b),
2793 (unsigned long long)rdev->sb_start);
2794 rdev->sb_events = mddev->events;
2795 if (rdev->badblocks.size) {
2796 md_super_write(mddev, rdev,
2797 rdev->badblocks.sector,
2798 rdev->badblocks.size << 9,
2799 rdev->bb_page);
2800 rdev->badblocks.size = 0;
2801 }
2802
2803 } else
2804 pr_debug("md: %s (skipping faulty)\n",
2805 bdevname(rdev->bdev, b));
2806
2807 if (mddev->level == LEVEL_MULTIPATH)
2808
2809 break;
2810 }
2811 if (md_super_wait(mddev) < 0)
2812 goto rewrite;
2813
2814
2815 if (mddev_is_clustered(mddev) && ret == 0)
2816 md_cluster_ops->metadata_update_finish(mddev);
2817
2818 if (mddev->in_sync != sync_req ||
2819 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2820 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2821
2822 goto repeat;
2823 wake_up(&mddev->sb_wait);
2824 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2825 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2826
2827 rdev_for_each(rdev, mddev) {
2828 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2829 clear_bit(Blocked, &rdev->flags);
2830
2831 if (any_badblocks_changed)
2832 ack_all_badblocks(&rdev->badblocks);
2833 clear_bit(BlockedBadBlocks, &rdev->flags);
2834 wake_up(&rdev->blocked_wait);
2835 }
2836}
2837EXPORT_SYMBOL(md_update_sb);
2838
2839static int add_bound_rdev(struct md_rdev *rdev)
2840{
2841 struct mddev *mddev = rdev->mddev;
2842 int err = 0;
2843 bool add_journal = test_bit(Journal, &rdev->flags);
2844
2845 if (!mddev->pers->hot_remove_disk || add_journal) {
2846
2847
2848
2849
2850 super_types[mddev->major_version].
2851 validate_super(mddev, rdev);
2852 if (add_journal)
2853 mddev_suspend(mddev);
2854 err = mddev->pers->hot_add_disk(mddev, rdev);
2855 if (add_journal)
2856 mddev_resume(mddev);
2857 if (err) {
2858 md_kick_rdev_from_array(rdev);
2859 return err;
2860 }
2861 }
2862 sysfs_notify_dirent_safe(rdev->sysfs_state);
2863
2864 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2865 if (mddev->degraded)
2866 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2867 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2868 md_new_event(mddev);
2869 md_wakeup_thread(mddev->thread);
2870 return 0;
2871}
2872
2873
2874
2875
2876static int cmd_match(const char *cmd, const char *str)
2877{
2878
2879
2880
2881
2882 while (*cmd && *str && *cmd == *str) {
2883 cmd++;
2884 str++;
2885 }
2886 if (*cmd == '\n')
2887 cmd++;
2888 if (*str || *cmd)
2889 return 0;
2890 return 1;
2891}
2892
2893struct rdev_sysfs_entry {
2894 struct attribute attr;
2895 ssize_t (*show)(struct md_rdev *, char *);
2896 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2897};
2898
2899static ssize_t
2900state_show(struct md_rdev *rdev, char *page)
2901{
2902 char *sep = ",";
2903 size_t len = 0;
2904 unsigned long flags = READ_ONCE(rdev->flags);
2905
2906 if (test_bit(Faulty, &flags) ||
2907 (!test_bit(ExternalBbl, &flags) &&
2908 rdev->badblocks.unacked_exist))
2909 len += sprintf(page+len, "faulty%s", sep);
2910 if (test_bit(In_sync, &flags))
2911 len += sprintf(page+len, "in_sync%s", sep);
2912 if (test_bit(Journal, &flags))
2913 len += sprintf(page+len, "journal%s", sep);
2914 if (test_bit(WriteMostly, &flags))
2915 len += sprintf(page+len, "write_mostly%s", sep);
2916 if (test_bit(Blocked, &flags) ||
2917 (rdev->badblocks.unacked_exist
2918 && !test_bit(Faulty, &flags)))
2919 len += sprintf(page+len, "blocked%s", sep);
2920 if (!test_bit(Faulty, &flags) &&
2921 !test_bit(Journal, &flags) &&
2922 !test_bit(In_sync, &flags))
2923 len += sprintf(page+len, "spare%s", sep);
2924 if (test_bit(WriteErrorSeen, &flags))
2925 len += sprintf(page+len, "write_error%s", sep);
2926 if (test_bit(WantReplacement, &flags))
2927 len += sprintf(page+len, "want_replacement%s", sep);
2928 if (test_bit(Replacement, &flags))
2929 len += sprintf(page+len, "replacement%s", sep);
2930 if (test_bit(ExternalBbl, &flags))
2931 len += sprintf(page+len, "external_bbl%s", sep);
2932 if (test_bit(FailFast, &flags))
2933 len += sprintf(page+len, "failfast%s", sep);
2934
2935 if (len)
2936 len -= strlen(sep);
2937
2938 return len+sprintf(page+len, "\n");
2939}
2940
2941static ssize_t
2942state_store(struct md_rdev *rdev, const char *buf, size_t len)
2943{
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958 int err = -EINVAL;
2959 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2960 md_error(rdev->mddev, rdev);
2961 if (test_bit(Faulty, &rdev->flags))
2962 err = 0;
2963 else
2964 err = -EBUSY;
2965 } else if (cmd_match(buf, "remove")) {
2966 if (rdev->mddev->pers) {
2967 clear_bit(Blocked, &rdev->flags);
2968 remove_and_add_spares(rdev->mddev, rdev);
2969 }
2970 if (rdev->raid_disk >= 0)
2971 err = -EBUSY;
2972 else {
2973 struct mddev *mddev = rdev->mddev;
2974 err = 0;
2975 if (mddev_is_clustered(mddev))
2976 err = md_cluster_ops->remove_disk(mddev, rdev);
2977
2978 if (err == 0) {
2979 md_kick_rdev_from_array(rdev);
2980 if (mddev->pers) {
2981 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2982 md_wakeup_thread(mddev->thread);
2983 }
2984 md_new_event(mddev);
2985 }
2986 }
2987 } else if (cmd_match(buf, "writemostly")) {
2988 set_bit(WriteMostly, &rdev->flags);
2989 mddev_create_serial_pool(rdev->mddev, rdev, false);
2990 err = 0;
2991 } else if (cmd_match(buf, "-writemostly")) {
2992 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2993 clear_bit(WriteMostly, &rdev->flags);
2994 err = 0;
2995 } else if (cmd_match(buf, "blocked")) {
2996 set_bit(Blocked, &rdev->flags);
2997 err = 0;
2998 } else if (cmd_match(buf, "-blocked")) {
2999 if (!test_bit(Faulty, &rdev->flags) &&
3000 !test_bit(ExternalBbl, &rdev->flags) &&
3001 rdev->badblocks.unacked_exist) {
3002
3003
3004
3005 md_error(rdev->mddev, rdev);
3006 }
3007 clear_bit(Blocked, &rdev->flags);
3008 clear_bit(BlockedBadBlocks, &rdev->flags);
3009 wake_up(&rdev->blocked_wait);
3010 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3011 md_wakeup_thread(rdev->mddev->thread);
3012
3013 err = 0;
3014 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3015 set_bit(In_sync, &rdev->flags);
3016 err = 0;
3017 } else if (cmd_match(buf, "failfast")) {
3018 set_bit(FailFast, &rdev->flags);
3019 err = 0;
3020 } else if (cmd_match(buf, "-failfast")) {
3021 clear_bit(FailFast, &rdev->flags);
3022 err = 0;
3023 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3024 !test_bit(Journal, &rdev->flags)) {
3025 if (rdev->mddev->pers == NULL) {
3026 clear_bit(In_sync, &rdev->flags);
3027 rdev->saved_raid_disk = rdev->raid_disk;
3028 rdev->raid_disk = -1;
3029 err = 0;
3030 }
3031 } else if (cmd_match(buf, "write_error")) {
3032 set_bit(WriteErrorSeen, &rdev->flags);
3033 err = 0;
3034 } else if (cmd_match(buf, "-write_error")) {
3035 clear_bit(WriteErrorSeen, &rdev->flags);
3036 err = 0;
3037 } else if (cmd_match(buf, "want_replacement")) {
3038
3039
3040
3041
3042 if (rdev->raid_disk >= 0 &&
3043 !test_bit(Journal, &rdev->flags) &&
3044 !test_bit(Replacement, &rdev->flags))
3045 set_bit(WantReplacement, &rdev->flags);
3046 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3047 md_wakeup_thread(rdev->mddev->thread);
3048 err = 0;
3049 } else if (cmd_match(buf, "-want_replacement")) {
3050
3051
3052
3053 err = 0;
3054 clear_bit(WantReplacement, &rdev->flags);
3055 } else if (cmd_match(buf, "replacement")) {
3056
3057
3058
3059
3060 if (rdev->mddev->pers)
3061 err = -EBUSY;
3062 else {
3063 set_bit(Replacement, &rdev->flags);
3064 err = 0;
3065 }
3066 } else if (cmd_match(buf, "-replacement")) {
3067
3068 if (rdev->mddev->pers)
3069 err = -EBUSY;
3070 else {
3071 clear_bit(Replacement, &rdev->flags);
3072 err = 0;
3073 }
3074 } else if (cmd_match(buf, "re-add")) {
3075 if (!rdev->mddev->pers)
3076 err = -EINVAL;
3077 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3078 rdev->saved_raid_disk >= 0) {
3079
3080
3081
3082
3083
3084
3085 if (!mddev_is_clustered(rdev->mddev) ||
3086 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3087 clear_bit(Faulty, &rdev->flags);
3088 err = add_bound_rdev(rdev);
3089 }
3090 } else
3091 err = -EBUSY;
3092 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3093 set_bit(ExternalBbl, &rdev->flags);
3094 rdev->badblocks.shift = 0;
3095 err = 0;
3096 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3097 clear_bit(ExternalBbl, &rdev->flags);
3098 err = 0;
3099 }
3100 if (!err)
3101 sysfs_notify_dirent_safe(rdev->sysfs_state);
3102 return err ? err : len;
3103}
3104static struct rdev_sysfs_entry rdev_state =
3105__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3106
3107static ssize_t
3108errors_show(struct md_rdev *rdev, char *page)
3109{
3110 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3111}
3112
3113static ssize_t
3114errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3115{
3116 unsigned int n;
3117 int rv;
3118
3119 rv = kstrtouint(buf, 10, &n);
3120 if (rv < 0)
3121 return rv;
3122 atomic_set(&rdev->corrected_errors, n);
3123 return len;
3124}
3125static struct rdev_sysfs_entry rdev_errors =
3126__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3127
3128static ssize_t
3129slot_show(struct md_rdev *rdev, char *page)
3130{
3131 if (test_bit(Journal, &rdev->flags))
3132 return sprintf(page, "journal\n");
3133 else if (rdev->raid_disk < 0)
3134 return sprintf(page, "none\n");
3135 else
3136 return sprintf(page, "%d\n", rdev->raid_disk);
3137}
3138
3139static ssize_t
3140slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3141{
3142 int slot;
3143 int err;
3144
3145 if (test_bit(Journal, &rdev->flags))
3146 return -EBUSY;
3147 if (strncmp(buf, "none", 4)==0)
3148 slot = -1;
3149 else {
3150 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3151 if (err < 0)
3152 return err;
3153 }
3154 if (rdev->mddev->pers && slot == -1) {
3155
3156
3157
3158
3159
3160
3161
3162 if (rdev->raid_disk == -1)
3163 return -EEXIST;
3164
3165 if (rdev->mddev->pers->hot_remove_disk == NULL)
3166 return -EINVAL;
3167 clear_bit(Blocked, &rdev->flags);
3168 remove_and_add_spares(rdev->mddev, rdev);
3169 if (rdev->raid_disk >= 0)
3170 return -EBUSY;
3171 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3172 md_wakeup_thread(rdev->mddev->thread);
3173 } else if (rdev->mddev->pers) {
3174
3175
3176
3177 int err;
3178
3179 if (rdev->raid_disk != -1)
3180 return -EBUSY;
3181
3182 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3183 return -EBUSY;
3184
3185 if (rdev->mddev->pers->hot_add_disk == NULL)
3186 return -EINVAL;
3187
3188 if (slot >= rdev->mddev->raid_disks &&
3189 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3190 return -ENOSPC;
3191
3192 rdev->raid_disk = slot;
3193 if (test_bit(In_sync, &rdev->flags))
3194 rdev->saved_raid_disk = slot;
3195 else
3196 rdev->saved_raid_disk = -1;
3197 clear_bit(In_sync, &rdev->flags);
3198 clear_bit(Bitmap_sync, &rdev->flags);
3199 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3200 if (err) {
3201 rdev->raid_disk = -1;
3202 return err;
3203 } else
3204 sysfs_notify_dirent_safe(rdev->sysfs_state);
3205 if (sysfs_link_rdev(rdev->mddev, rdev))
3206 ;
3207
3208 } else {
3209 if (slot >= rdev->mddev->raid_disks &&
3210 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3211 return -ENOSPC;
3212 rdev->raid_disk = slot;
3213
3214 clear_bit(Faulty, &rdev->flags);
3215 clear_bit(WriteMostly, &rdev->flags);
3216 set_bit(In_sync, &rdev->flags);
3217 sysfs_notify_dirent_safe(rdev->sysfs_state);
3218 }
3219 return len;
3220}
3221
3222static struct rdev_sysfs_entry rdev_slot =
3223__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3224
3225static ssize_t
3226offset_show(struct md_rdev *rdev, char *page)
3227{
3228 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3229}
3230
3231static ssize_t
3232offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3233{
3234 unsigned long long offset;
3235 if (kstrtoull(buf, 10, &offset) < 0)
3236 return -EINVAL;
3237 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3238 return -EBUSY;
3239 if (rdev->sectors && rdev->mddev->external)
3240
3241
3242 return -EBUSY;
3243 rdev->data_offset = offset;
3244 rdev->new_data_offset = offset;
3245 return len;
3246}
3247
3248static struct rdev_sysfs_entry rdev_offset =
3249__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3250
3251static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3252{
3253 return sprintf(page, "%llu\n",
3254 (unsigned long long)rdev->new_data_offset);
3255}
3256
3257static ssize_t new_offset_store(struct md_rdev *rdev,
3258 const char *buf, size_t len)
3259{
3260 unsigned long long new_offset;
3261 struct mddev *mddev = rdev->mddev;
3262
3263 if (kstrtoull(buf, 10, &new_offset) < 0)
3264 return -EINVAL;
3265
3266 if (mddev->sync_thread ||
3267 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3268 return -EBUSY;
3269 if (new_offset == rdev->data_offset)
3270
3271 ;
3272 else if (new_offset > rdev->data_offset) {
3273
3274 if (new_offset - rdev->data_offset
3275 + mddev->dev_sectors > rdev->sectors)
3276 return -E2BIG;
3277 }
3278
3279
3280
3281
3282
3283 if (new_offset < rdev->data_offset &&
3284 mddev->reshape_backwards)
3285 return -EINVAL;
3286
3287
3288
3289
3290 if (new_offset > rdev->data_offset &&
3291 !mddev->reshape_backwards)
3292 return -EINVAL;
3293
3294 if (mddev->pers && mddev->persistent &&
3295 !super_types[mddev->major_version]
3296 .allow_new_offset(rdev, new_offset))
3297 return -E2BIG;
3298 rdev->new_data_offset = new_offset;
3299 if (new_offset > rdev->data_offset)
3300 mddev->reshape_backwards = 1;
3301 else if (new_offset < rdev->data_offset)
3302 mddev->reshape_backwards = 0;
3303
3304 return len;
3305}
3306static struct rdev_sysfs_entry rdev_new_offset =
3307__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3308
3309static ssize_t
3310rdev_size_show(struct md_rdev *rdev, char *page)
3311{
3312 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3313}
3314
3315static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3316{
3317
3318 if (s1+l1 <= s2)
3319 return 0;
3320 if (s2+l2 <= s1)
3321 return 0;
3322 return 1;
3323}
3324
3325static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3326{
3327 unsigned long long blocks;
3328 sector_t new;
3329
3330 if (kstrtoull(buf, 10, &blocks) < 0)
3331 return -EINVAL;
3332
3333 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3334 return -EINVAL;
3335
3336 new = blocks * 2;
3337 if (new != blocks * 2)
3338 return -EINVAL;
3339
3340 *sectors = new;
3341 return 0;
3342}
3343
3344static ssize_t
3345rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3346{
3347 struct mddev *my_mddev = rdev->mddev;
3348 sector_t oldsectors = rdev->sectors;
3349 sector_t sectors;
3350
3351 if (test_bit(Journal, &rdev->flags))
3352 return -EBUSY;
3353 if (strict_blocks_to_sectors(buf, §ors) < 0)
3354 return -EINVAL;
3355 if (rdev->data_offset != rdev->new_data_offset)
3356 return -EINVAL;
3357 if (my_mddev->pers && rdev->raid_disk >= 0) {
3358 if (my_mddev->persistent) {
3359 sectors = super_types[my_mddev->major_version].
3360 rdev_size_change(rdev, sectors);
3361 if (!sectors)
3362 return -EBUSY;
3363 } else if (!sectors)
3364 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3365 rdev->data_offset;
3366 if (!my_mddev->pers->resize)
3367
3368 return -EINVAL;
3369 }
3370 if (sectors < my_mddev->dev_sectors)
3371 return -EINVAL;
3372
3373 rdev->sectors = sectors;
3374 if (sectors > oldsectors && my_mddev->external) {
3375
3376
3377
3378
3379
3380
3381 struct mddev *mddev;
3382 int overlap = 0;
3383 struct list_head *tmp;
3384
3385 rcu_read_lock();
3386 for_each_mddev(mddev, tmp) {
3387 struct md_rdev *rdev2;
3388
3389 rdev_for_each(rdev2, mddev)
3390 if (rdev->bdev == rdev2->bdev &&
3391 rdev != rdev2 &&
3392 overlaps(rdev->data_offset, rdev->sectors,
3393 rdev2->data_offset,
3394 rdev2->sectors)) {
3395 overlap = 1;
3396 break;
3397 }
3398 if (overlap) {
3399 mddev_put(mddev);
3400 break;
3401 }
3402 }
3403 rcu_read_unlock();
3404 if (overlap) {
3405
3406
3407
3408
3409
3410
3411 rdev->sectors = oldsectors;
3412 return -EBUSY;
3413 }
3414 }
3415 return len;
3416}
3417
3418static struct rdev_sysfs_entry rdev_size =
3419__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3420
3421static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3422{
3423 unsigned long long recovery_start = rdev->recovery_offset;
3424
3425 if (test_bit(In_sync, &rdev->flags) ||
3426 recovery_start == MaxSector)
3427 return sprintf(page, "none\n");
3428
3429 return sprintf(page, "%llu\n", recovery_start);
3430}
3431
3432static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3433{
3434 unsigned long long recovery_start;
3435
3436 if (cmd_match(buf, "none"))
3437 recovery_start = MaxSector;
3438 else if (kstrtoull(buf, 10, &recovery_start))
3439 return -EINVAL;
3440
3441 if (rdev->mddev->pers &&
3442 rdev->raid_disk >= 0)
3443 return -EBUSY;
3444
3445 rdev->recovery_offset = recovery_start;
3446 if (recovery_start == MaxSector)
3447 set_bit(In_sync, &rdev->flags);
3448 else
3449 clear_bit(In_sync, &rdev->flags);
3450 return len;
3451}
3452
3453static struct rdev_sysfs_entry rdev_recovery_start =
3454__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467static ssize_t bb_show(struct md_rdev *rdev, char *page)
3468{
3469 return badblocks_show(&rdev->badblocks, page, 0);
3470}
3471static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3472{
3473 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3474
3475 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3476 wake_up(&rdev->blocked_wait);
3477 return rv;
3478}
3479static struct rdev_sysfs_entry rdev_bad_blocks =
3480__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3481
3482static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3483{
3484 return badblocks_show(&rdev->badblocks, page, 1);
3485}
3486static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3487{
3488 return badblocks_store(&rdev->badblocks, page, len, 1);
3489}
3490static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3491__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3492
3493static ssize_t
3494ppl_sector_show(struct md_rdev *rdev, char *page)
3495{
3496 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3497}
3498
3499static ssize_t
3500ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3501{
3502 unsigned long long sector;
3503
3504 if (kstrtoull(buf, 10, §or) < 0)
3505 return -EINVAL;
3506 if (sector != (sector_t)sector)
3507 return -EINVAL;
3508
3509 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3510 rdev->raid_disk >= 0)
3511 return -EBUSY;
3512
3513 if (rdev->mddev->persistent) {
3514 if (rdev->mddev->major_version == 0)
3515 return -EINVAL;
3516 if ((sector > rdev->sb_start &&
3517 sector - rdev->sb_start > S16_MAX) ||
3518 (sector < rdev->sb_start &&
3519 rdev->sb_start - sector > -S16_MIN))
3520 return -EINVAL;
3521 rdev->ppl.offset = sector - rdev->sb_start;
3522 } else if (!rdev->mddev->external) {
3523 return -EBUSY;
3524 }
3525 rdev->ppl.sector = sector;
3526 return len;
3527}
3528
3529static struct rdev_sysfs_entry rdev_ppl_sector =
3530__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3531
3532static ssize_t
3533ppl_size_show(struct md_rdev *rdev, char *page)
3534{
3535 return sprintf(page, "%u\n", rdev->ppl.size);
3536}
3537
3538static ssize_t
3539ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3540{
3541 unsigned int size;
3542
3543 if (kstrtouint(buf, 10, &size) < 0)
3544 return -EINVAL;
3545
3546 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3547 rdev->raid_disk >= 0)
3548 return -EBUSY;
3549
3550 if (rdev->mddev->persistent) {
3551 if (rdev->mddev->major_version == 0)
3552 return -EINVAL;
3553 if (size > U16_MAX)
3554 return -EINVAL;
3555 } else if (!rdev->mddev->external) {
3556 return -EBUSY;
3557 }
3558 rdev->ppl.size = size;
3559 return len;
3560}
3561
3562static struct rdev_sysfs_entry rdev_ppl_size =
3563__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3564
3565static struct attribute *rdev_default_attrs[] = {
3566 &rdev_state.attr,
3567 &rdev_errors.attr,
3568 &rdev_slot.attr,
3569 &rdev_offset.attr,
3570 &rdev_new_offset.attr,
3571 &rdev_size.attr,
3572 &rdev_recovery_start.attr,
3573 &rdev_bad_blocks.attr,
3574 &rdev_unack_bad_blocks.attr,
3575 &rdev_ppl_sector.attr,
3576 &rdev_ppl_size.attr,
3577 NULL,
3578};
3579static ssize_t
3580rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3581{
3582 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3583 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3584
3585 if (!entry->show)
3586 return -EIO;
3587 if (!rdev->mddev)
3588 return -ENODEV;
3589 return entry->show(rdev, page);
3590}
3591
3592static ssize_t
3593rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3594 const char *page, size_t length)
3595{
3596 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3597 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3598 ssize_t rv;
3599 struct mddev *mddev = rdev->mddev;
3600
3601 if (!entry->store)
3602 return -EIO;
3603 if (!capable(CAP_SYS_ADMIN))
3604 return -EACCES;
3605 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3606 if (!rv) {
3607 if (rdev->mddev == NULL)
3608 rv = -ENODEV;
3609 else
3610 rv = entry->store(rdev, page, length);
3611 mddev_unlock(mddev);
3612 }
3613 return rv;
3614}
3615
3616static void rdev_free(struct kobject *ko)
3617{
3618 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3619 kfree(rdev);
3620}
3621static const struct sysfs_ops rdev_sysfs_ops = {
3622 .show = rdev_attr_show,
3623 .store = rdev_attr_store,
3624};
3625static struct kobj_type rdev_ktype = {
3626 .release = rdev_free,
3627 .sysfs_ops = &rdev_sysfs_ops,
3628 .default_attrs = rdev_default_attrs,
3629};
3630
3631int md_rdev_init(struct md_rdev *rdev)
3632{
3633 rdev->desc_nr = -1;
3634 rdev->saved_raid_disk = -1;
3635 rdev->raid_disk = -1;
3636 rdev->flags = 0;
3637 rdev->data_offset = 0;
3638 rdev->new_data_offset = 0;
3639 rdev->sb_events = 0;
3640 rdev->last_read_error = 0;
3641 rdev->sb_loaded = 0;
3642 rdev->bb_page = NULL;
3643 atomic_set(&rdev->nr_pending, 0);
3644 atomic_set(&rdev->read_errors, 0);
3645 atomic_set(&rdev->corrected_errors, 0);
3646
3647 INIT_LIST_HEAD(&rdev->same_set);
3648 init_waitqueue_head(&rdev->blocked_wait);
3649
3650
3651
3652
3653
3654 return badblocks_init(&rdev->badblocks, 0);
3655}
3656EXPORT_SYMBOL_GPL(md_rdev_init);
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3668{
3669 char b[BDEVNAME_SIZE];
3670 int err;
3671 struct md_rdev *rdev;
3672 sector_t size;
3673
3674 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3675 if (!rdev)
3676 return ERR_PTR(-ENOMEM);
3677
3678 err = md_rdev_init(rdev);
3679 if (err)
3680 goto abort_free;
3681 err = alloc_disk_sb(rdev);
3682 if (err)
3683 goto abort_free;
3684
3685 err = lock_rdev(rdev, newdev, super_format == -2);
3686 if (err)
3687 goto abort_free;
3688
3689 kobject_init(&rdev->kobj, &rdev_ktype);
3690
3691 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3692 if (!size) {
3693 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3694 bdevname(rdev->bdev,b));
3695 err = -EINVAL;
3696 goto abort_free;
3697 }
3698
3699 if (super_format >= 0) {
3700 err = super_types[super_format].
3701 load_super(rdev, NULL, super_minor);
3702 if (err == -EINVAL) {
3703 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3704 bdevname(rdev->bdev,b),
3705 super_format, super_minor);
3706 goto abort_free;
3707 }
3708 if (err < 0) {
3709 pr_warn("md: could not read %s's sb, not importing!\n",
3710 bdevname(rdev->bdev,b));
3711 goto abort_free;
3712 }
3713 }
3714
3715 return rdev;
3716
3717abort_free:
3718 if (rdev->bdev)
3719 unlock_rdev(rdev);
3720 md_rdev_clear(rdev);
3721 kfree(rdev);
3722 return ERR_PTR(err);
3723}
3724
3725
3726
3727
3728
3729static int analyze_sbs(struct mddev *mddev)
3730{
3731 int i;
3732 struct md_rdev *rdev, *freshest, *tmp;
3733 char b[BDEVNAME_SIZE];
3734
3735 freshest = NULL;
3736 rdev_for_each_safe(rdev, tmp, mddev)
3737 switch (super_types[mddev->major_version].
3738 load_super(rdev, freshest, mddev->minor_version)) {
3739 case 1:
3740 freshest = rdev;
3741 break;
3742 case 0:
3743 break;
3744 default:
3745 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3746 bdevname(rdev->bdev,b));
3747 md_kick_rdev_from_array(rdev);
3748 }
3749
3750
3751 if (!freshest) {
3752 pr_warn("md: cannot find a valid disk\n");
3753 return -EINVAL;
3754 }
3755
3756 super_types[mddev->major_version].
3757 validate_super(mddev, freshest);
3758
3759 i = 0;
3760 rdev_for_each_safe(rdev, tmp, mddev) {
3761 if (mddev->max_disks &&
3762 (rdev->desc_nr >= mddev->max_disks ||
3763 i > mddev->max_disks)) {
3764 pr_warn("md: %s: %s: only %d devices permitted\n",
3765 mdname(mddev), bdevname(rdev->bdev, b),
3766 mddev->max_disks);
3767 md_kick_rdev_from_array(rdev);
3768 continue;
3769 }
3770 if (rdev != freshest) {
3771 if (super_types[mddev->major_version].
3772 validate_super(mddev, rdev)) {
3773 pr_warn("md: kicking non-fresh %s from array!\n",
3774 bdevname(rdev->bdev,b));
3775 md_kick_rdev_from_array(rdev);
3776 continue;
3777 }
3778 }
3779 if (mddev->level == LEVEL_MULTIPATH) {
3780 rdev->desc_nr = i++;
3781 rdev->raid_disk = rdev->desc_nr;
3782 set_bit(In_sync, &rdev->flags);
3783 } else if (rdev->raid_disk >=
3784 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3785 !test_bit(Journal, &rdev->flags)) {
3786 rdev->raid_disk = -1;
3787 clear_bit(In_sync, &rdev->flags);
3788 }
3789 }
3790
3791 return 0;
3792}
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3805{
3806 unsigned long result = 0;
3807 long decimals = -1;
3808 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3809 if (*cp == '.')
3810 decimals = 0;
3811 else if (decimals < scale) {
3812 unsigned int value;
3813 value = *cp - '0';
3814 result = result * 10 + value;
3815 if (decimals >= 0)
3816 decimals++;
3817 }
3818 cp++;
3819 }
3820 if (*cp == '\n')
3821 cp++;
3822 if (*cp)
3823 return -EINVAL;
3824 if (decimals < 0)
3825 decimals = 0;
3826 *res = result * int_pow(10, scale - decimals);
3827 return 0;
3828}
3829
3830static ssize_t
3831safe_delay_show(struct mddev *mddev, char *page)
3832{
3833 int msec = (mddev->safemode_delay*1000)/HZ;
3834 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3835}
3836static ssize_t
3837safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3838{
3839 unsigned long msec;
3840
3841 if (mddev_is_clustered(mddev)) {
3842 pr_warn("md: Safemode is disabled for clustered mode\n");
3843 return -EINVAL;
3844 }
3845
3846 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3847 return -EINVAL;
3848 if (msec == 0)
3849 mddev->safemode_delay = 0;
3850 else {
3851 unsigned long old_delay = mddev->safemode_delay;
3852 unsigned long new_delay = (msec*HZ)/1000;
3853
3854 if (new_delay == 0)
3855 new_delay = 1;
3856 mddev->safemode_delay = new_delay;
3857 if (new_delay < old_delay || old_delay == 0)
3858 mod_timer(&mddev->safemode_timer, jiffies+1);
3859 }
3860 return len;
3861}
3862static struct md_sysfs_entry md_safe_delay =
3863__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3864
3865static ssize_t
3866level_show(struct mddev *mddev, char *page)
3867{
3868 struct md_personality *p;
3869 int ret;
3870 spin_lock(&mddev->lock);
3871 p = mddev->pers;
3872 if (p)
3873 ret = sprintf(page, "%s\n", p->name);
3874 else if (mddev->clevel[0])
3875 ret = sprintf(page, "%s\n", mddev->clevel);
3876 else if (mddev->level != LEVEL_NONE)
3877 ret = sprintf(page, "%d\n", mddev->level);
3878 else
3879 ret = 0;
3880 spin_unlock(&mddev->lock);
3881 return ret;
3882}
3883
3884static ssize_t
3885level_store(struct mddev *mddev, const char *buf, size_t len)
3886{
3887 char clevel[16];
3888 ssize_t rv;
3889 size_t slen = len;
3890 struct md_personality *pers, *oldpers;
3891 long level;
3892 void *priv, *oldpriv;
3893 struct md_rdev *rdev;
3894
3895 if (slen == 0 || slen >= sizeof(clevel))
3896 return -EINVAL;
3897
3898 rv = mddev_lock(mddev);
3899 if (rv)
3900 return rv;
3901
3902 if (mddev->pers == NULL) {
3903 strncpy(mddev->clevel, buf, slen);
3904 if (mddev->clevel[slen-1] == '\n')
3905 slen--;
3906 mddev->clevel[slen] = 0;
3907 mddev->level = LEVEL_NONE;
3908 rv = len;
3909 goto out_unlock;
3910 }
3911 rv = -EROFS;
3912 if (mddev->ro)
3913 goto out_unlock;
3914
3915
3916
3917
3918
3919
3920
3921 rv = -EBUSY;
3922 if (mddev->sync_thread ||
3923 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3924 mddev->reshape_position != MaxSector ||
3925 mddev->sysfs_active)
3926 goto out_unlock;
3927
3928 rv = -EINVAL;
3929 if (!mddev->pers->quiesce) {
3930 pr_warn("md: %s: %s does not support online personality change\n",
3931 mdname(mddev), mddev->pers->name);
3932 goto out_unlock;
3933 }
3934
3935
3936 strncpy(clevel, buf, slen);
3937 if (clevel[slen-1] == '\n')
3938 slen--;
3939 clevel[slen] = 0;
3940 if (kstrtol(clevel, 10, &level))
3941 level = LEVEL_NONE;
3942
3943 if (request_module("md-%s", clevel) != 0)
3944 request_module("md-level-%s", clevel);
3945 spin_lock(&pers_lock);
3946 pers = find_pers(level, clevel);
3947 if (!pers || !try_module_get(pers->owner)) {
3948 spin_unlock(&pers_lock);
3949 pr_warn("md: personality %s not loaded\n", clevel);
3950 rv = -EINVAL;
3951 goto out_unlock;
3952 }
3953 spin_unlock(&pers_lock);
3954
3955 if (pers == mddev->pers) {
3956
3957 module_put(pers->owner);
3958 rv = len;
3959 goto out_unlock;
3960 }
3961 if (!pers->takeover) {
3962 module_put(pers->owner);
3963 pr_warn("md: %s: %s does not support personality takeover\n",
3964 mdname(mddev), clevel);
3965 rv = -EINVAL;
3966 goto out_unlock;
3967 }
3968
3969 rdev_for_each(rdev, mddev)
3970 rdev->new_raid_disk = rdev->raid_disk;
3971
3972
3973
3974
3975 priv = pers->takeover(mddev);
3976 if (IS_ERR(priv)) {
3977 mddev->new_level = mddev->level;
3978 mddev->new_layout = mddev->layout;
3979 mddev->new_chunk_sectors = mddev->chunk_sectors;
3980 mddev->raid_disks -= mddev->delta_disks;
3981 mddev->delta_disks = 0;
3982 mddev->reshape_backwards = 0;
3983 module_put(pers->owner);
3984 pr_warn("md: %s: %s would not accept array\n",
3985 mdname(mddev), clevel);
3986 rv = PTR_ERR(priv);
3987 goto out_unlock;
3988 }
3989
3990
3991 mddev_suspend(mddev);
3992 mddev_detach(mddev);
3993
3994 spin_lock(&mddev->lock);
3995 oldpers = mddev->pers;
3996 oldpriv = mddev->private;
3997 mddev->pers = pers;
3998 mddev->private = priv;
3999 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4000 mddev->level = mddev->new_level;
4001 mddev->layout = mddev->new_layout;
4002 mddev->chunk_sectors = mddev->new_chunk_sectors;
4003 mddev->delta_disks = 0;
4004 mddev->reshape_backwards = 0;
4005 mddev->degraded = 0;
4006 spin_unlock(&mddev->lock);
4007
4008 if (oldpers->sync_request == NULL &&
4009 mddev->external) {
4010
4011
4012
4013
4014
4015
4016
4017 mddev->in_sync = 0;
4018 mddev->safemode_delay = 0;
4019 mddev->safemode = 0;
4020 }
4021
4022 oldpers->free(mddev, oldpriv);
4023
4024 if (oldpers->sync_request == NULL &&
4025 pers->sync_request != NULL) {
4026
4027 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4028 pr_warn("md: cannot register extra attributes for %s\n",
4029 mdname(mddev));
4030 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4031 }
4032 if (oldpers->sync_request != NULL &&
4033 pers->sync_request == NULL) {
4034
4035 if (mddev->to_remove == NULL)
4036 mddev->to_remove = &md_redundancy_group;
4037 }
4038
4039 module_put(oldpers->owner);
4040
4041 rdev_for_each(rdev, mddev) {
4042 if (rdev->raid_disk < 0)
4043 continue;
4044 if (rdev->new_raid_disk >= mddev->raid_disks)
4045 rdev->new_raid_disk = -1;
4046 if (rdev->new_raid_disk == rdev->raid_disk)
4047 continue;
4048 sysfs_unlink_rdev(mddev, rdev);
4049 }
4050 rdev_for_each(rdev, mddev) {
4051 if (rdev->raid_disk < 0)
4052 continue;
4053 if (rdev->new_raid_disk == rdev->raid_disk)
4054 continue;
4055 rdev->raid_disk = rdev->new_raid_disk;
4056 if (rdev->raid_disk < 0)
4057 clear_bit(In_sync, &rdev->flags);
4058 else {
4059 if (sysfs_link_rdev(mddev, rdev))
4060 pr_warn("md: cannot register rd%d for %s after level change\n",
4061 rdev->raid_disk, mdname(mddev));
4062 }
4063 }
4064
4065 if (pers->sync_request == NULL) {
4066
4067
4068
4069 mddev->in_sync = 1;
4070 del_timer_sync(&mddev->safemode_timer);
4071 }
4072 blk_set_stacking_limits(&mddev->queue->limits);
4073 pers->run(mddev);
4074 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4075 mddev_resume(mddev);
4076 if (!mddev->thread)
4077 md_update_sb(mddev, 1);
4078 sysfs_notify(&mddev->kobj, NULL, "level");
4079 md_new_event(mddev);
4080 rv = len;
4081out_unlock:
4082 mddev_unlock(mddev);
4083 return rv;
4084}
4085
4086static struct md_sysfs_entry md_level =
4087__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4088
4089static ssize_t
4090layout_show(struct mddev *mddev, char *page)
4091{
4092
4093 if (mddev->reshape_position != MaxSector &&
4094 mddev->layout != mddev->new_layout)
4095 return sprintf(page, "%d (%d)\n",
4096 mddev->new_layout, mddev->layout);
4097 return sprintf(page, "%d\n", mddev->layout);
4098}
4099
4100static ssize_t
4101layout_store(struct mddev *mddev, const char *buf, size_t len)
4102{
4103 unsigned int n;
4104 int err;
4105
4106 err = kstrtouint(buf, 10, &n);
4107 if (err < 0)
4108 return err;
4109 err = mddev_lock(mddev);
4110 if (err)
4111 return err;
4112
4113 if (mddev->pers) {
4114 if (mddev->pers->check_reshape == NULL)
4115 err = -EBUSY;
4116 else if (mddev->ro)
4117 err = -EROFS;
4118 else {
4119 mddev->new_layout = n;
4120 err = mddev->pers->check_reshape(mddev);
4121 if (err)
4122 mddev->new_layout = mddev->layout;
4123 }
4124 } else {
4125 mddev->new_layout = n;
4126 if (mddev->reshape_position == MaxSector)
4127 mddev->layout = n;
4128 }
4129 mddev_unlock(mddev);
4130 return err ?: len;
4131}
4132static struct md_sysfs_entry md_layout =
4133__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4134
4135static ssize_t
4136raid_disks_show(struct mddev *mddev, char *page)
4137{
4138 if (mddev->raid_disks == 0)
4139 return 0;
4140 if (mddev->reshape_position != MaxSector &&
4141 mddev->delta_disks != 0)
4142 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4143 mddev->raid_disks - mddev->delta_disks);
4144 return sprintf(page, "%d\n", mddev->raid_disks);
4145}
4146
4147static int update_raid_disks(struct mddev *mddev, int raid_disks);
4148
4149static ssize_t
4150raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4151{
4152 unsigned int n;
4153 int err;
4154
4155 err = kstrtouint(buf, 10, &n);
4156 if (err < 0)
4157 return err;
4158
4159 err = mddev_lock(mddev);
4160 if (err)
4161 return err;
4162 if (mddev->pers)
4163 err = update_raid_disks(mddev, n);
4164 else if (mddev->reshape_position != MaxSector) {
4165 struct md_rdev *rdev;
4166 int olddisks = mddev->raid_disks - mddev->delta_disks;
4167
4168 err = -EINVAL;
4169 rdev_for_each(rdev, mddev) {
4170 if (olddisks < n &&
4171 rdev->data_offset < rdev->new_data_offset)
4172 goto out_unlock;
4173 if (olddisks > n &&
4174 rdev->data_offset > rdev->new_data_offset)
4175 goto out_unlock;
4176 }
4177 err = 0;
4178 mddev->delta_disks = n - olddisks;
4179 mddev->raid_disks = n;
4180 mddev->reshape_backwards = (mddev->delta_disks < 0);
4181 } else
4182 mddev->raid_disks = n;
4183out_unlock:
4184 mddev_unlock(mddev);
4185 return err ? err : len;
4186}
4187static struct md_sysfs_entry md_raid_disks =
4188__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4189
4190static ssize_t
4191chunk_size_show(struct mddev *mddev, char *page)
4192{
4193 if (mddev->reshape_position != MaxSector &&
4194 mddev->chunk_sectors != mddev->new_chunk_sectors)
4195 return sprintf(page, "%d (%d)\n",
4196 mddev->new_chunk_sectors << 9,
4197 mddev->chunk_sectors << 9);
4198 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4199}
4200
4201static ssize_t
4202chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4203{
4204 unsigned long n;
4205 int err;
4206
4207 err = kstrtoul(buf, 10, &n);
4208 if (err < 0)
4209 return err;
4210
4211 err = mddev_lock(mddev);
4212 if (err)
4213 return err;
4214 if (mddev->pers) {
4215 if (mddev->pers->check_reshape == NULL)
4216 err = -EBUSY;
4217 else if (mddev->ro)
4218 err = -EROFS;
4219 else {
4220 mddev->new_chunk_sectors = n >> 9;
4221 err = mddev->pers->check_reshape(mddev);
4222 if (err)
4223 mddev->new_chunk_sectors = mddev->chunk_sectors;
4224 }
4225 } else {
4226 mddev->new_chunk_sectors = n >> 9;
4227 if (mddev->reshape_position == MaxSector)
4228 mddev->chunk_sectors = n >> 9;
4229 }
4230 mddev_unlock(mddev);
4231 return err ?: len;
4232}
4233static struct md_sysfs_entry md_chunk_size =
4234__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4235
4236static ssize_t
4237resync_start_show(struct mddev *mddev, char *page)
4238{
4239 if (mddev->recovery_cp == MaxSector)
4240 return sprintf(page, "none\n");
4241 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4242}
4243
4244static ssize_t
4245resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4246{
4247 unsigned long long n;
4248 int err;
4249
4250 if (cmd_match(buf, "none"))
4251 n = MaxSector;
4252 else {
4253 err = kstrtoull(buf, 10, &n);
4254 if (err < 0)
4255 return err;
4256 if (n != (sector_t)n)
4257 return -EINVAL;
4258 }
4259
4260 err = mddev_lock(mddev);
4261 if (err)
4262 return err;
4263 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4264 err = -EBUSY;
4265
4266 if (!err) {
4267 mddev->recovery_cp = n;
4268 if (mddev->pers)
4269 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4270 }
4271 mddev_unlock(mddev);
4272 return err ?: len;
4273}
4274static struct md_sysfs_entry md_resync_start =
4275__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4276 resync_start_show, resync_start_store);
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4320 write_pending, active_idle, broken, bad_word};
4321static char *array_states[] = {
4322 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4323 "write-pending", "active-idle", "broken", NULL };
4324
4325static int match_word(const char *word, char **list)
4326{
4327 int n;
4328 for (n=0; list[n]; n++)
4329 if (cmd_match(word, list[n]))
4330 break;
4331 return n;
4332}
4333
4334static ssize_t
4335array_state_show(struct mddev *mddev, char *page)
4336{
4337 enum array_state st = inactive;
4338
4339 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4340 switch(mddev->ro) {
4341 case 1:
4342 st = readonly;
4343 break;
4344 case 2:
4345 st = read_auto;
4346 break;
4347 case 0:
4348 spin_lock(&mddev->lock);
4349 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4350 st = write_pending;
4351 else if (mddev->in_sync)
4352 st = clean;
4353 else if (mddev->safemode)
4354 st = active_idle;
4355 else
4356 st = active;
4357 spin_unlock(&mddev->lock);
4358 }
4359
4360 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4361 st = broken;
4362 } else {
4363 if (list_empty(&mddev->disks) &&
4364 mddev->raid_disks == 0 &&
4365 mddev->dev_sectors == 0)
4366 st = clear;
4367 else
4368 st = inactive;
4369 }
4370 return sprintf(page, "%s\n", array_states[st]);
4371}
4372
4373static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4374static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4375static int do_md_run(struct mddev *mddev);
4376static int restart_array(struct mddev *mddev);
4377
4378static ssize_t
4379array_state_store(struct mddev *mddev, const char *buf, size_t len)
4380{
4381 int err = 0;
4382 enum array_state st = match_word(buf, array_states);
4383
4384 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4385
4386
4387
4388 spin_lock(&mddev->lock);
4389 if (st == active) {
4390 restart_array(mddev);
4391 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4392 md_wakeup_thread(mddev->thread);
4393 wake_up(&mddev->sb_wait);
4394 } else {
4395 restart_array(mddev);
4396 if (!set_in_sync(mddev))
4397 err = -EBUSY;
4398 }
4399 if (!err)
4400 sysfs_notify_dirent_safe(mddev->sysfs_state);
4401 spin_unlock(&mddev->lock);
4402 return err ?: len;
4403 }
4404 err = mddev_lock(mddev);
4405 if (err)
4406 return err;
4407 err = -EINVAL;
4408 switch(st) {
4409 case bad_word:
4410 break;
4411 case clear:
4412
4413 err = do_md_stop(mddev, 0, NULL);
4414 break;
4415 case inactive:
4416
4417 if (mddev->pers)
4418 err = do_md_stop(mddev, 2, NULL);
4419 else
4420 err = 0;
4421 break;
4422 case suspended:
4423 break;
4424 case readonly:
4425 if (mddev->pers)
4426 err = md_set_readonly(mddev, NULL);
4427 else {
4428 mddev->ro = 1;
4429 set_disk_ro(mddev->gendisk, 1);
4430 err = do_md_run(mddev);
4431 }
4432 break;
4433 case read_auto:
4434 if (mddev->pers) {
4435 if (mddev->ro == 0)
4436 err = md_set_readonly(mddev, NULL);
4437 else if (mddev->ro == 1)
4438 err = restart_array(mddev);
4439 if (err == 0) {
4440 mddev->ro = 2;
4441 set_disk_ro(mddev->gendisk, 0);
4442 }
4443 } else {
4444 mddev->ro = 2;
4445 err = do_md_run(mddev);
4446 }
4447 break;
4448 case clean:
4449 if (mddev->pers) {
4450 err = restart_array(mddev);
4451 if (err)
4452 break;
4453 spin_lock(&mddev->lock);
4454 if (!set_in_sync(mddev))
4455 err = -EBUSY;
4456 spin_unlock(&mddev->lock);
4457 } else
4458 err = -EINVAL;
4459 break;
4460 case active:
4461 if (mddev->pers) {
4462 err = restart_array(mddev);
4463 if (err)
4464 break;
4465 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4466 wake_up(&mddev->sb_wait);
4467 err = 0;
4468 } else {
4469 mddev->ro = 0;
4470 set_disk_ro(mddev->gendisk, 0);
4471 err = do_md_run(mddev);
4472 }
4473 break;
4474 case write_pending:
4475 case active_idle:
4476 case broken:
4477
4478 break;
4479 }
4480
4481 if (!err) {
4482 if (mddev->hold_active == UNTIL_IOCTL)
4483 mddev->hold_active = 0;
4484 sysfs_notify_dirent_safe(mddev->sysfs_state);
4485 }
4486 mddev_unlock(mddev);
4487 return err ?: len;
4488}
4489static struct md_sysfs_entry md_array_state =
4490__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4491
4492static ssize_t
4493max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4494 return sprintf(page, "%d\n",
4495 atomic_read(&mddev->max_corr_read_errors));
4496}
4497
4498static ssize_t
4499max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4500{
4501 unsigned int n;
4502 int rv;
4503
4504 rv = kstrtouint(buf, 10, &n);
4505 if (rv < 0)
4506 return rv;
4507 atomic_set(&mddev->max_corr_read_errors, n);
4508 return len;
4509}
4510
4511static struct md_sysfs_entry max_corr_read_errors =
4512__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4513 max_corrected_read_errors_store);
4514
4515static ssize_t
4516null_show(struct mddev *mddev, char *page)
4517{
4518 return -EINVAL;
4519}
4520
4521
4522static void flush_rdev_wq(struct mddev *mddev)
4523{
4524 struct md_rdev *rdev;
4525
4526 rcu_read_lock();
4527 rdev_for_each_rcu(rdev, mddev)
4528 if (work_pending(&rdev->del_work)) {
4529 flush_workqueue(md_rdev_misc_wq);
4530 break;
4531 }
4532 rcu_read_unlock();
4533}
4534
4535static ssize_t
4536new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4537{
4538
4539
4540
4541
4542
4543
4544
4545 char *e;
4546 int major = simple_strtoul(buf, &e, 10);
4547 int minor;
4548 dev_t dev;
4549 struct md_rdev *rdev;
4550 int err;
4551
4552 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4553 return -EINVAL;
4554 minor = simple_strtoul(e+1, &e, 10);
4555 if (*e && *e != '\n')
4556 return -EINVAL;
4557 dev = MKDEV(major, minor);
4558 if (major != MAJOR(dev) ||
4559 minor != MINOR(dev))
4560 return -EOVERFLOW;
4561
4562 flush_rdev_wq(mddev);
4563 err = mddev_lock(mddev);
4564 if (err)
4565 return err;
4566 if (mddev->persistent) {
4567 rdev = md_import_device(dev, mddev->major_version,
4568 mddev->minor_version);
4569 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4570 struct md_rdev *rdev0
4571 = list_entry(mddev->disks.next,
4572 struct md_rdev, same_set);
4573 err = super_types[mddev->major_version]
4574 .load_super(rdev, rdev0, mddev->minor_version);
4575 if (err < 0)
4576 goto out;
4577 }
4578 } else if (mddev->external)
4579 rdev = md_import_device(dev, -2, -1);
4580 else
4581 rdev = md_import_device(dev, -1, -1);
4582
4583 if (IS_ERR(rdev)) {
4584 mddev_unlock(mddev);
4585 return PTR_ERR(rdev);
4586 }
4587 err = bind_rdev_to_array(rdev, mddev);
4588 out:
4589 if (err)
4590 export_rdev(rdev);
4591 mddev_unlock(mddev);
4592 if (!err)
4593 md_new_event(mddev);
4594 return err ? err : len;
4595}
4596
4597static struct md_sysfs_entry md_new_device =
4598__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4599
4600static ssize_t
4601bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4602{
4603 char *end;
4604 unsigned long chunk, end_chunk;
4605 int err;
4606
4607 err = mddev_lock(mddev);
4608 if (err)
4609 return err;
4610 if (!mddev->bitmap)
4611 goto out;
4612
4613 while (*buf) {
4614 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4615 if (buf == end) break;
4616 if (*end == '-') {
4617 buf = end + 1;
4618 end_chunk = simple_strtoul(buf, &end, 0);
4619 if (buf == end) break;
4620 }
4621 if (*end && !isspace(*end)) break;
4622 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4623 buf = skip_spaces(end);
4624 }
4625 md_bitmap_unplug(mddev->bitmap);
4626out:
4627 mddev_unlock(mddev);
4628 return len;
4629}
4630
4631static struct md_sysfs_entry md_bitmap =
4632__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4633
4634static ssize_t
4635size_show(struct mddev *mddev, char *page)
4636{
4637 return sprintf(page, "%llu\n",
4638 (unsigned long long)mddev->dev_sectors / 2);
4639}
4640
4641static int update_size(struct mddev *mddev, sector_t num_sectors);
4642
4643static ssize_t
4644size_store(struct mddev *mddev, const char *buf, size_t len)
4645{
4646
4647
4648
4649
4650 sector_t sectors;
4651 int err = strict_blocks_to_sectors(buf, §ors);
4652
4653 if (err < 0)
4654 return err;
4655 err = mddev_lock(mddev);
4656 if (err)
4657 return err;
4658 if (mddev->pers) {
4659 err = update_size(mddev, sectors);
4660 if (err == 0)
4661 md_update_sb(mddev, 1);
4662 } else {
4663 if (mddev->dev_sectors == 0 ||
4664 mddev->dev_sectors > sectors)
4665 mddev->dev_sectors = sectors;
4666 else
4667 err = -ENOSPC;
4668 }
4669 mddev_unlock(mddev);
4670 return err ? err : len;
4671}
4672
4673static struct md_sysfs_entry md_size =
4674__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4675
4676
4677
4678
4679
4680
4681
4682static ssize_t
4683metadata_show(struct mddev *mddev, char *page)
4684{
4685 if (mddev->persistent)
4686 return sprintf(page, "%d.%d\n",
4687 mddev->major_version, mddev->minor_version);
4688 else if (mddev->external)
4689 return sprintf(page, "external:%s\n", mddev->metadata_type);
4690 else
4691 return sprintf(page, "none\n");
4692}
4693
4694static ssize_t
4695metadata_store(struct mddev *mddev, const char *buf, size_t len)
4696{
4697 int major, minor;
4698 char *e;
4699 int err;
4700
4701
4702
4703
4704
4705 err = mddev_lock(mddev);
4706 if (err)
4707 return err;
4708 err = -EBUSY;
4709 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4710 ;
4711 else if (!list_empty(&mddev->disks))
4712 goto out_unlock;
4713
4714 err = 0;
4715 if (cmd_match(buf, "none")) {
4716 mddev->persistent = 0;
4717 mddev->external = 0;
4718 mddev->major_version = 0;
4719 mddev->minor_version = 90;
4720 goto out_unlock;
4721 }
4722 if (strncmp(buf, "external:", 9) == 0) {
4723 size_t namelen = len-9;
4724 if (namelen >= sizeof(mddev->metadata_type))
4725 namelen = sizeof(mddev->metadata_type)-1;
4726 strncpy(mddev->metadata_type, buf+9, namelen);
4727 mddev->metadata_type[namelen] = 0;
4728 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4729 mddev->metadata_type[--namelen] = 0;
4730 mddev->persistent = 0;
4731 mddev->external = 1;
4732 mddev->major_version = 0;
4733 mddev->minor_version = 90;
4734 goto out_unlock;
4735 }
4736 major = simple_strtoul(buf, &e, 10);
4737 err = -EINVAL;
4738 if (e==buf || *e != '.')
4739 goto out_unlock;
4740 buf = e+1;
4741 minor = simple_strtoul(buf, &e, 10);
4742 if (e==buf || (*e && *e != '\n') )
4743 goto out_unlock;
4744 err = -ENOENT;
4745 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4746 goto out_unlock;
4747 mddev->major_version = major;
4748 mddev->minor_version = minor;
4749 mddev->persistent = 1;
4750 mddev->external = 0;
4751 err = 0;
4752out_unlock:
4753 mddev_unlock(mddev);
4754 return err ?: len;
4755}
4756
4757static struct md_sysfs_entry md_metadata =
4758__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4759
4760static ssize_t
4761action_show(struct mddev *mddev, char *page)
4762{
4763 char *type = "idle";
4764 unsigned long recovery = mddev->recovery;
4765 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4766 type = "frozen";
4767 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4768 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4769 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4770 type = "reshape";
4771 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4772 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4773 type = "resync";
4774 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4775 type = "check";
4776 else
4777 type = "repair";
4778 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4779 type = "recover";
4780 else if (mddev->reshape_position != MaxSector)
4781 type = "reshape";
4782 }
4783 return sprintf(page, "%s\n", type);
4784}
4785
4786static ssize_t
4787action_store(struct mddev *mddev, const char *page, size_t len)
4788{
4789 if (!mddev->pers || !mddev->pers->sync_request)
4790 return -EINVAL;
4791
4792
4793 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4794 if (cmd_match(page, "frozen"))
4795 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4796 else
4797 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4798 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4799 mddev_lock(mddev) == 0) {
4800 if (work_pending(&mddev->del_work))
4801 flush_workqueue(md_misc_wq);
4802 if (mddev->sync_thread) {
4803 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4804 md_reap_sync_thread(mddev);
4805 }
4806 mddev_unlock(mddev);
4807 }
4808 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4809 return -EBUSY;
4810 else if (cmd_match(page, "resync"))
4811 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4812 else if (cmd_match(page, "recover")) {
4813 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4814 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4815 } else if (cmd_match(page, "reshape")) {
4816 int err;
4817 if (mddev->pers->start_reshape == NULL)
4818 return -EINVAL;
4819 err = mddev_lock(mddev);
4820 if (!err) {
4821 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4822 err = -EBUSY;
4823 else {
4824 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4825 err = mddev->pers->start_reshape(mddev);
4826 }
4827 mddev_unlock(mddev);
4828 }
4829 if (err)
4830 return err;
4831 sysfs_notify(&mddev->kobj, NULL, "degraded");
4832 } else {
4833 if (cmd_match(page, "check"))
4834 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4835 else if (!cmd_match(page, "repair"))
4836 return -EINVAL;
4837 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4838 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4839 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4840 }
4841 if (mddev->ro == 2) {
4842
4843
4844
4845 mddev->ro = 0;
4846 md_wakeup_thread(mddev->sync_thread);
4847 }
4848 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4849 md_wakeup_thread(mddev->thread);
4850 sysfs_notify_dirent_safe(mddev->sysfs_action);
4851 return len;
4852}
4853
4854static struct md_sysfs_entry md_scan_mode =
4855__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4856
4857static ssize_t
4858last_sync_action_show(struct mddev *mddev, char *page)
4859{
4860 return sprintf(page, "%s\n", mddev->last_sync_action);
4861}
4862
4863static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4864
4865static ssize_t
4866mismatch_cnt_show(struct mddev *mddev, char *page)
4867{
4868 return sprintf(page, "%llu\n",
4869 (unsigned long long)
4870 atomic64_read(&mddev->resync_mismatches));
4871}
4872
4873static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4874
4875static ssize_t
4876sync_min_show(struct mddev *mddev, char *page)
4877{
4878 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4879 mddev->sync_speed_min ? "local": "system");
4880}
4881
4882static ssize_t
4883sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4884{
4885 unsigned int min;
4886 int rv;
4887
4888 if (strncmp(buf, "system", 6)==0) {
4889 min = 0;
4890 } else {
4891 rv = kstrtouint(buf, 10, &min);
4892 if (rv < 0)
4893 return rv;
4894 if (min == 0)
4895 return -EINVAL;
4896 }
4897 mddev->sync_speed_min = min;
4898 return len;
4899}
4900
4901static struct md_sysfs_entry md_sync_min =
4902__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4903
4904static ssize_t
4905sync_max_show(struct mddev *mddev, char *page)
4906{
4907 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4908 mddev->sync_speed_max ? "local": "system");
4909}
4910
4911static ssize_t
4912sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4913{
4914 unsigned int max;
4915 int rv;
4916
4917 if (strncmp(buf, "system", 6)==0) {
4918 max = 0;
4919 } else {
4920 rv = kstrtouint(buf, 10, &max);
4921 if (rv < 0)
4922 return rv;
4923 if (max == 0)
4924 return -EINVAL;
4925 }
4926 mddev->sync_speed_max = max;
4927 return len;
4928}
4929
4930static struct md_sysfs_entry md_sync_max =
4931__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4932
4933static ssize_t
4934degraded_show(struct mddev *mddev, char *page)
4935{
4936 return sprintf(page, "%d\n", mddev->degraded);
4937}
4938static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4939
4940static ssize_t
4941sync_force_parallel_show(struct mddev *mddev, char *page)
4942{
4943 return sprintf(page, "%d\n", mddev->parallel_resync);
4944}
4945
4946static ssize_t
4947sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4948{
4949 long n;
4950
4951 if (kstrtol(buf, 10, &n))
4952 return -EINVAL;
4953
4954 if (n != 0 && n != 1)
4955 return -EINVAL;
4956
4957 mddev->parallel_resync = n;
4958
4959 if (mddev->sync_thread)
4960 wake_up(&resync_wait);
4961
4962 return len;
4963}
4964
4965
4966static struct md_sysfs_entry md_sync_force_parallel =
4967__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4968 sync_force_parallel_show, sync_force_parallel_store);
4969
4970static ssize_t
4971sync_speed_show(struct mddev *mddev, char *page)
4972{
4973 unsigned long resync, dt, db;
4974 if (mddev->curr_resync == 0)
4975 return sprintf(page, "none\n");
4976 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4977 dt = (jiffies - mddev->resync_mark) / HZ;
4978 if (!dt) dt++;
4979 db = resync - mddev->resync_mark_cnt;
4980 return sprintf(page, "%lu\n", db/dt/2);
4981}
4982
4983static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4984
4985static ssize_t
4986sync_completed_show(struct mddev *mddev, char *page)
4987{
4988 unsigned long long max_sectors, resync;
4989
4990 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4991 return sprintf(page, "none\n");
4992
4993 if (mddev->curr_resync == 1 ||
4994 mddev->curr_resync == 2)
4995 return sprintf(page, "delayed\n");
4996
4997 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4998 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4999 max_sectors = mddev->resync_max_sectors;
5000 else
5001 max_sectors = mddev->dev_sectors;
5002
5003 resync = mddev->curr_resync_completed;
5004 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
5005}
5006
5007static struct md_sysfs_entry md_sync_completed =
5008 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
5009
5010static ssize_t
5011min_sync_show(struct mddev *mddev, char *page)
5012{
5013 return sprintf(page, "%llu\n",
5014 (unsigned long long)mddev->resync_min);
5015}
5016static ssize_t
5017min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5018{
5019 unsigned long long min;
5020 int err;
5021
5022 if (kstrtoull(buf, 10, &min))
5023 return -EINVAL;
5024
5025 spin_lock(&mddev->lock);
5026 err = -EINVAL;
5027 if (min > mddev->resync_max)
5028 goto out_unlock;
5029
5030 err = -EBUSY;
5031 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5032 goto out_unlock;
5033
5034
5035 mddev->resync_min = round_down(min, 8);
5036 err = 0;
5037
5038out_unlock:
5039 spin_unlock(&mddev->lock);
5040 return err ?: len;
5041}
5042
5043static struct md_sysfs_entry md_min_sync =
5044__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5045
5046static ssize_t
5047max_sync_show(struct mddev *mddev, char *page)
5048{
5049 if (mddev->resync_max == MaxSector)
5050 return sprintf(page, "max\n");
5051 else
5052 return sprintf(page, "%llu\n",
5053 (unsigned long long)mddev->resync_max);
5054}
5055static ssize_t
5056max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5057{
5058 int err;
5059 spin_lock(&mddev->lock);
5060 if (strncmp(buf, "max", 3) == 0)
5061 mddev->resync_max = MaxSector;
5062 else {
5063 unsigned long long max;
5064 int chunk;
5065
5066 err = -EINVAL;
5067 if (kstrtoull(buf, 10, &max))
5068 goto out_unlock;
5069 if (max < mddev->resync_min)
5070 goto out_unlock;
5071
5072 err = -EBUSY;
5073 if (max < mddev->resync_max &&
5074 mddev->ro == 0 &&
5075 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5076 goto out_unlock;
5077
5078
5079 chunk = mddev->chunk_sectors;
5080 if (chunk) {
5081 sector_t temp = max;
5082
5083 err = -EINVAL;
5084 if (sector_div(temp, chunk))
5085 goto out_unlock;
5086 }
5087 mddev->resync_max = max;
5088 }
5089 wake_up(&mddev->recovery_wait);
5090 err = 0;
5091out_unlock:
5092 spin_unlock(&mddev->lock);
5093 return err ?: len;
5094}
5095
5096static struct md_sysfs_entry md_max_sync =
5097__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5098
5099static ssize_t
5100suspend_lo_show(struct mddev *mddev, char *page)
5101{
5102 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5103}
5104
5105static ssize_t
5106suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5107{
5108 unsigned long long new;
5109 int err;
5110
5111 err = kstrtoull(buf, 10, &new);
5112 if (err < 0)
5113 return err;
5114 if (new != (sector_t)new)
5115 return -EINVAL;
5116
5117 err = mddev_lock(mddev);
5118 if (err)
5119 return err;
5120 err = -EINVAL;
5121 if (mddev->pers == NULL ||
5122 mddev->pers->quiesce == NULL)
5123 goto unlock;
5124 mddev_suspend(mddev);
5125 mddev->suspend_lo = new;
5126 mddev_resume(mddev);
5127
5128 err = 0;
5129unlock:
5130 mddev_unlock(mddev);
5131 return err ?: len;
5132}
5133static struct md_sysfs_entry md_suspend_lo =
5134__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5135
5136static ssize_t
5137suspend_hi_show(struct mddev *mddev, char *page)
5138{
5139 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5140}
5141
5142static ssize_t
5143suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5144{
5145 unsigned long long new;
5146 int err;
5147
5148 err = kstrtoull(buf, 10, &new);
5149 if (err < 0)
5150 return err;
5151 if (new != (sector_t)new)
5152 return -EINVAL;
5153
5154 err = mddev_lock(mddev);
5155 if (err)
5156 return err;
5157 err = -EINVAL;
5158 if (mddev->pers == NULL)
5159 goto unlock;
5160
5161 mddev_suspend(mddev);
5162 mddev->suspend_hi = new;
5163 mddev_resume(mddev);
5164
5165 err = 0;
5166unlock:
5167 mddev_unlock(mddev);
5168 return err ?: len;
5169}
5170static struct md_sysfs_entry md_suspend_hi =
5171__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5172
5173static ssize_t
5174reshape_position_show(struct mddev *mddev, char *page)
5175{
5176 if (mddev->reshape_position != MaxSector)
5177 return sprintf(page, "%llu\n",
5178 (unsigned long long)mddev->reshape_position);
5179 strcpy(page, "none\n");
5180 return 5;
5181}
5182
5183static ssize_t
5184reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5185{
5186 struct md_rdev *rdev;
5187 unsigned long long new;
5188 int err;
5189
5190 err = kstrtoull(buf, 10, &new);
5191 if (err < 0)
5192 return err;
5193 if (new != (sector_t)new)
5194 return -EINVAL;
5195 err = mddev_lock(mddev);
5196 if (err)
5197 return err;
5198 err = -EBUSY;
5199 if (mddev->pers)
5200 goto unlock;
5201 mddev->reshape_position = new;
5202 mddev->delta_disks = 0;
5203 mddev->reshape_backwards = 0;
5204 mddev->new_level = mddev->level;
5205 mddev->new_layout = mddev->layout;
5206 mddev->new_chunk_sectors = mddev->chunk_sectors;
5207 rdev_for_each(rdev, mddev)
5208 rdev->new_data_offset = rdev->data_offset;
5209 err = 0;
5210unlock:
5211 mddev_unlock(mddev);
5212 return err ?: len;
5213}
5214
5215static struct md_sysfs_entry md_reshape_position =
5216__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5217 reshape_position_store);
5218
5219static ssize_t
5220reshape_direction_show(struct mddev *mddev, char *page)
5221{
5222 return sprintf(page, "%s\n",
5223 mddev->reshape_backwards ? "backwards" : "forwards");
5224}
5225
5226static ssize_t
5227reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5228{
5229 int backwards = 0;
5230 int err;
5231
5232 if (cmd_match(buf, "forwards"))
5233 backwards = 0;
5234 else if (cmd_match(buf, "backwards"))
5235 backwards = 1;
5236 else
5237 return -EINVAL;
5238 if (mddev->reshape_backwards == backwards)
5239 return len;
5240
5241 err = mddev_lock(mddev);
5242 if (err)
5243 return err;
5244
5245 if (mddev->delta_disks)
5246 err = -EBUSY;
5247 else if (mddev->persistent &&
5248 mddev->major_version == 0)
5249 err = -EINVAL;
5250 else
5251 mddev->reshape_backwards = backwards;
5252 mddev_unlock(mddev);
5253 return err ?: len;
5254}
5255
5256static struct md_sysfs_entry md_reshape_direction =
5257__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5258 reshape_direction_store);
5259
5260static ssize_t
5261array_size_show(struct mddev *mddev, char *page)
5262{
5263 if (mddev->external_size)
5264 return sprintf(page, "%llu\n",
5265 (unsigned long long)mddev->array_sectors/2);
5266 else
5267 return sprintf(page, "default\n");
5268}
5269
5270static ssize_t
5271array_size_store(struct mddev *mddev, const char *buf, size_t len)
5272{
5273 sector_t sectors;
5274 int err;
5275
5276 err = mddev_lock(mddev);
5277 if (err)
5278 return err;
5279
5280
5281 if (mddev_is_clustered(mddev)) {
5282 mddev_unlock(mddev);
5283 return -EINVAL;
5284 }
5285
5286 if (strncmp(buf, "default", 7) == 0) {
5287 if (mddev->pers)
5288 sectors = mddev->pers->size(mddev, 0, 0);
5289 else
5290 sectors = mddev->array_sectors;
5291
5292 mddev->external_size = 0;
5293 } else {
5294 if (strict_blocks_to_sectors(buf, §ors) < 0)
5295 err = -EINVAL;
5296 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5297 err = -E2BIG;
5298 else
5299 mddev->external_size = 1;
5300 }
5301
5302 if (!err) {
5303 mddev->array_sectors = sectors;
5304 if (mddev->pers) {
5305 set_capacity(mddev->gendisk, mddev->array_sectors);
5306 revalidate_disk(mddev->gendisk);
5307 }
5308 }
5309 mddev_unlock(mddev);
5310 return err ?: len;
5311}
5312
5313static struct md_sysfs_entry md_array_size =
5314__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5315 array_size_store);
5316
5317static ssize_t
5318consistency_policy_show(struct mddev *mddev, char *page)
5319{
5320 int ret;
5321
5322 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5323 ret = sprintf(page, "journal\n");
5324 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5325 ret = sprintf(page, "ppl\n");
5326 } else if (mddev->bitmap) {
5327 ret = sprintf(page, "bitmap\n");
5328 } else if (mddev->pers) {
5329 if (mddev->pers->sync_request)
5330 ret = sprintf(page, "resync\n");
5331 else
5332 ret = sprintf(page, "none\n");
5333 } else {
5334 ret = sprintf(page, "unknown\n");
5335 }
5336
5337 return ret;
5338}
5339
5340static ssize_t
5341consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5342{
5343 int err = 0;
5344
5345 if (mddev->pers) {
5346 if (mddev->pers->change_consistency_policy)
5347 err = mddev->pers->change_consistency_policy(mddev, buf);
5348 else
5349 err = -EBUSY;
5350 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5351 set_bit(MD_HAS_PPL, &mddev->flags);
5352 } else {
5353 err = -EINVAL;
5354 }
5355
5356 return err ? err : len;
5357}
5358
5359static struct md_sysfs_entry md_consistency_policy =
5360__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5361 consistency_policy_store);
5362
5363static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5364{
5365 return sprintf(page, "%d\n", mddev->fail_last_dev);
5366}
5367
5368
5369
5370
5371
5372static ssize_t
5373fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5374{
5375 int ret;
5376 bool value;
5377
5378 ret = kstrtobool(buf, &value);
5379 if (ret)
5380 return ret;
5381
5382 if (value != mddev->fail_last_dev)
5383 mddev->fail_last_dev = value;
5384
5385 return len;
5386}
5387static struct md_sysfs_entry md_fail_last_dev =
5388__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5389 fail_last_dev_store);
5390
5391static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5392{
5393 if (mddev->pers == NULL || (mddev->pers->level != 1))
5394 return sprintf(page, "n/a\n");
5395 else
5396 return sprintf(page, "%d\n", mddev->serialize_policy);
5397}
5398
5399
5400
5401
5402
5403static ssize_t
5404serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5405{
5406 int err;
5407 bool value;
5408
5409 err = kstrtobool(buf, &value);
5410 if (err)
5411 return err;
5412
5413 if (value == mddev->serialize_policy)
5414 return len;
5415
5416 err = mddev_lock(mddev);
5417 if (err)
5418 return err;
5419 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5420 pr_err("md: serialize_policy is only effective for raid1\n");
5421 err = -EINVAL;
5422 goto unlock;
5423 }
5424
5425 mddev_suspend(mddev);
5426 if (value)
5427 mddev_create_serial_pool(mddev, NULL, true);
5428 else
5429 mddev_destroy_serial_pool(mddev, NULL, true);
5430 mddev->serialize_policy = value;
5431 mddev_resume(mddev);
5432unlock:
5433 mddev_unlock(mddev);
5434 return err ?: len;
5435}
5436
5437static struct md_sysfs_entry md_serialize_policy =
5438__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5439 serialize_policy_store);
5440
5441
5442static struct attribute *md_default_attrs[] = {
5443 &md_level.attr,
5444 &md_layout.attr,
5445 &md_raid_disks.attr,
5446 &md_chunk_size.attr,
5447 &md_size.attr,
5448 &md_resync_start.attr,
5449 &md_metadata.attr,
5450 &md_new_device.attr,
5451 &md_safe_delay.attr,
5452 &md_array_state.attr,
5453 &md_reshape_position.attr,
5454 &md_reshape_direction.attr,
5455 &md_array_size.attr,
5456 &max_corr_read_errors.attr,
5457 &md_consistency_policy.attr,
5458 &md_fail_last_dev.attr,
5459 &md_serialize_policy.attr,
5460 NULL,
5461};
5462
5463static struct attribute *md_redundancy_attrs[] = {
5464 &md_scan_mode.attr,
5465 &md_last_scan_mode.attr,
5466 &md_mismatches.attr,
5467 &md_sync_min.attr,
5468 &md_sync_max.attr,
5469 &md_sync_speed.attr,
5470 &md_sync_force_parallel.attr,
5471 &md_sync_completed.attr,
5472 &md_min_sync.attr,
5473 &md_max_sync.attr,
5474 &md_suspend_lo.attr,
5475 &md_suspend_hi.attr,
5476 &md_bitmap.attr,
5477 &md_degraded.attr,
5478 NULL,
5479};
5480static struct attribute_group md_redundancy_group = {
5481 .name = NULL,
5482 .attrs = md_redundancy_attrs,
5483};
5484
5485static ssize_t
5486md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5487{
5488 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5489 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5490 ssize_t rv;
5491
5492 if (!entry->show)
5493 return -EIO;
5494 spin_lock(&all_mddevs_lock);
5495 if (list_empty(&mddev->all_mddevs)) {
5496 spin_unlock(&all_mddevs_lock);
5497 return -EBUSY;
5498 }
5499 mddev_get(mddev);
5500 spin_unlock(&all_mddevs_lock);
5501
5502 rv = entry->show(mddev, page);
5503 mddev_put(mddev);
5504 return rv;
5505}
5506
5507static ssize_t
5508md_attr_store(struct kobject *kobj, struct attribute *attr,
5509 const char *page, size_t length)
5510{
5511 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5512 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5513 ssize_t rv;
5514
5515 if (!entry->store)
5516 return -EIO;
5517 if (!capable(CAP_SYS_ADMIN))
5518 return -EACCES;
5519 spin_lock(&all_mddevs_lock);
5520 if (list_empty(&mddev->all_mddevs)) {
5521 spin_unlock(&all_mddevs_lock);
5522 return -EBUSY;
5523 }
5524 mddev_get(mddev);
5525 spin_unlock(&all_mddevs_lock);
5526 rv = entry->store(mddev, page, length);
5527 mddev_put(mddev);
5528 return rv;
5529}
5530
5531static void md_free(struct kobject *ko)
5532{
5533 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5534
5535 if (mddev->sysfs_state)
5536 sysfs_put(mddev->sysfs_state);
5537
5538 if (mddev->gendisk)
5539 del_gendisk(mddev->gendisk);
5540 if (mddev->queue)
5541 blk_cleanup_queue(mddev->queue);
5542 if (mddev->gendisk)
5543 put_disk(mddev->gendisk);
5544 percpu_ref_exit(&mddev->writes_pending);
5545
5546 bioset_exit(&mddev->bio_set);
5547 bioset_exit(&mddev->sync_set);
5548 kfree(mddev);
5549}
5550
5551static const struct sysfs_ops md_sysfs_ops = {
5552 .show = md_attr_show,
5553 .store = md_attr_store,
5554};
5555static struct kobj_type md_ktype = {
5556 .release = md_free,
5557 .sysfs_ops = &md_sysfs_ops,
5558 .default_attrs = md_default_attrs,
5559};
5560
5561int mdp_major = 0;
5562
5563static void mddev_delayed_delete(struct work_struct *ws)
5564{
5565 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5566
5567 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5568 kobject_del(&mddev->kobj);
5569 kobject_put(&mddev->kobj);
5570}
5571
5572static void no_op(struct percpu_ref *r) {}
5573
5574int mddev_init_writes_pending(struct mddev *mddev)
5575{
5576 if (mddev->writes_pending.percpu_count_ptr)
5577 return 0;
5578 if (percpu_ref_init(&mddev->writes_pending, no_op,
5579 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5580 return -ENOMEM;
5581
5582 percpu_ref_put(&mddev->writes_pending);
5583 return 0;
5584}
5585EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5586
5587static int md_alloc(dev_t dev, char *name)
5588{
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598 static DEFINE_MUTEX(disks_mutex);
5599 struct mddev *mddev = mddev_find(dev);
5600 struct gendisk *disk;
5601 int partitioned;
5602 int shift;
5603 int unit;
5604 int error;
5605
5606 if (!mddev)
5607 return -ENODEV;
5608
5609 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5610 shift = partitioned ? MdpMinorShift : 0;
5611 unit = MINOR(mddev->unit) >> shift;
5612
5613
5614
5615
5616 flush_workqueue(md_misc_wq);
5617
5618 mutex_lock(&disks_mutex);
5619 error = -EEXIST;
5620 if (mddev->gendisk)
5621 goto abort;
5622
5623 if (name && !dev) {
5624
5625
5626 struct mddev *mddev2;
5627 spin_lock(&all_mddevs_lock);
5628
5629 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5630 if (mddev2->gendisk &&
5631 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5632 spin_unlock(&all_mddevs_lock);
5633 goto abort;
5634 }
5635 spin_unlock(&all_mddevs_lock);
5636 }
5637 if (name && dev)
5638
5639
5640
5641 mddev->hold_active = UNTIL_STOP;
5642
5643 error = -ENOMEM;
5644 mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
5645 if (!mddev->queue)
5646 goto abort;
5647
5648 blk_set_stacking_limits(&mddev->queue->limits);
5649
5650 disk = alloc_disk(1 << shift);
5651 if (!disk) {
5652 blk_cleanup_queue(mddev->queue);
5653 mddev->queue = NULL;
5654 goto abort;
5655 }
5656 disk->major = MAJOR(mddev->unit);
5657 disk->first_minor = unit << shift;
5658 if (name)
5659 strcpy(disk->disk_name, name);
5660 else if (partitioned)
5661 sprintf(disk->disk_name, "md_d%d", unit);
5662 else
5663 sprintf(disk->disk_name, "md%d", unit);
5664 disk->fops = &md_fops;
5665 disk->private_data = mddev;
5666 disk->queue = mddev->queue;
5667 blk_queue_write_cache(mddev->queue, true, true);
5668
5669
5670
5671
5672 disk->flags |= GENHD_FL_EXT_DEVT;
5673 mddev->gendisk = disk;
5674
5675
5676
5677 mutex_lock(&mddev->open_mutex);
5678 add_disk(disk);
5679
5680 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5681 if (error) {
5682
5683
5684
5685 pr_debug("md: cannot register %s/md - name in use\n",
5686 disk->disk_name);
5687 error = 0;
5688 }
5689 if (mddev->kobj.sd &&
5690 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5691 pr_debug("pointless warning\n");
5692 mutex_unlock(&mddev->open_mutex);
5693 abort:
5694 mutex_unlock(&disks_mutex);
5695 if (!error && mddev->kobj.sd) {
5696 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5697 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5698 }
5699 mddev_put(mddev);
5700 return error;
5701}
5702
5703static struct kobject *md_probe(dev_t dev, int *part, void *data)
5704{
5705 if (create_on_open)
5706 md_alloc(dev, NULL);
5707 return NULL;
5708}
5709
5710static int add_named_array(const char *val, const struct kernel_param *kp)
5711{
5712
5713
5714
5715
5716
5717
5718
5719 int len = strlen(val);
5720 char buf[DISK_NAME_LEN];
5721 unsigned long devnum;
5722
5723 while (len && val[len-1] == '\n')
5724 len--;
5725 if (len >= DISK_NAME_LEN)
5726 return -E2BIG;
5727 strlcpy(buf, val, len+1);
5728 if (strncmp(buf, "md_", 3) == 0)
5729 return md_alloc(0, buf);
5730 if (strncmp(buf, "md", 2) == 0 &&
5731 isdigit(buf[2]) &&
5732 kstrtoul(buf+2, 10, &devnum) == 0 &&
5733 devnum <= MINORMASK)
5734 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5735
5736 return -EINVAL;
5737}
5738
5739static void md_safemode_timeout(struct timer_list *t)
5740{
5741 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5742
5743 mddev->safemode = 1;
5744 if (mddev->external)
5745 sysfs_notify_dirent_safe(mddev->sysfs_state);
5746
5747 md_wakeup_thread(mddev->thread);
5748}
5749
5750static int start_dirty_degraded;
5751
5752int md_run(struct mddev *mddev)
5753{
5754 int err;
5755 struct md_rdev *rdev;
5756 struct md_personality *pers;
5757
5758 if (list_empty(&mddev->disks))
5759
5760 return -EINVAL;
5761
5762 if (mddev->pers)
5763 return -EBUSY;
5764
5765 if (mddev->sysfs_active)
5766 return -EBUSY;
5767
5768
5769
5770
5771 if (!mddev->raid_disks) {
5772 if (!mddev->persistent)
5773 return -EINVAL;
5774 err = analyze_sbs(mddev);
5775 if (err)
5776 return -EINVAL;
5777 }
5778
5779 if (mddev->level != LEVEL_NONE)
5780 request_module("md-level-%d", mddev->level);
5781 else if (mddev->clevel[0])
5782 request_module("md-%s", mddev->clevel);
5783
5784
5785
5786
5787
5788
5789 mddev->has_superblocks = false;
5790 rdev_for_each(rdev, mddev) {
5791 if (test_bit(Faulty, &rdev->flags))
5792 continue;
5793 sync_blockdev(rdev->bdev);
5794 invalidate_bdev(rdev->bdev);
5795 if (mddev->ro != 1 &&
5796 (bdev_read_only(rdev->bdev) ||
5797 bdev_read_only(rdev->meta_bdev))) {
5798 mddev->ro = 1;
5799 if (mddev->gendisk)
5800 set_disk_ro(mddev->gendisk, 1);
5801 }
5802
5803 if (rdev->sb_page)
5804 mddev->has_superblocks = true;
5805
5806
5807
5808
5809
5810 if (rdev->meta_bdev) {
5811 ;
5812 } else if (rdev->data_offset < rdev->sb_start) {
5813 if (mddev->dev_sectors &&
5814 rdev->data_offset + mddev->dev_sectors
5815 > rdev->sb_start) {
5816 pr_warn("md: %s: data overlaps metadata\n",
5817 mdname(mddev));
5818 return -EINVAL;
5819 }
5820 } else {
5821 if (rdev->sb_start + rdev->sb_size/512
5822 > rdev->data_offset) {
5823 pr_warn("md: %s: metadata overlaps data\n",
5824 mdname(mddev));
5825 return -EINVAL;
5826 }
5827 }
5828 sysfs_notify_dirent_safe(rdev->sysfs_state);
5829 }
5830
5831 if (!bioset_initialized(&mddev->bio_set)) {
5832 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5833 if (err)
5834 return err;
5835 }
5836 if (!bioset_initialized(&mddev->sync_set)) {
5837 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5838 if (err)
5839 return err;
5840 }
5841
5842 spin_lock(&pers_lock);
5843 pers = find_pers(mddev->level, mddev->clevel);
5844 if (!pers || !try_module_get(pers->owner)) {
5845 spin_unlock(&pers_lock);
5846 if (mddev->level != LEVEL_NONE)
5847 pr_warn("md: personality for level %d is not loaded!\n",
5848 mddev->level);
5849 else
5850 pr_warn("md: personality for level %s is not loaded!\n",
5851 mddev->clevel);
5852 err = -EINVAL;
5853 goto abort;
5854 }
5855 spin_unlock(&pers_lock);
5856 if (mddev->level != pers->level) {
5857 mddev->level = pers->level;
5858 mddev->new_level = pers->level;
5859 }
5860 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5861
5862 if (mddev->reshape_position != MaxSector &&
5863 pers->start_reshape == NULL) {
5864
5865 module_put(pers->owner);
5866 err = -EINVAL;
5867 goto abort;
5868 }
5869
5870 if (pers->sync_request) {
5871
5872
5873
5874 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5875 struct md_rdev *rdev2;
5876 int warned = 0;
5877
5878 rdev_for_each(rdev, mddev)
5879 rdev_for_each(rdev2, mddev) {
5880 if (rdev < rdev2 &&
5881 rdev->bdev->bd_contains ==
5882 rdev2->bdev->bd_contains) {
5883 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5884 mdname(mddev),
5885 bdevname(rdev->bdev,b),
5886 bdevname(rdev2->bdev,b2));
5887 warned = 1;
5888 }
5889 }
5890
5891 if (warned)
5892 pr_warn("True protection against single-disk failure might be compromised.\n");
5893 }
5894
5895 mddev->recovery = 0;
5896
5897 mddev->resync_max_sectors = mddev->dev_sectors;
5898
5899 mddev->ok_start_degraded = start_dirty_degraded;
5900
5901 if (start_readonly && mddev->ro == 0)
5902 mddev->ro = 2;
5903
5904 err = pers->run(mddev);
5905 if (err)
5906 pr_warn("md: pers->run() failed ...\n");
5907 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5908 WARN_ONCE(!mddev->external_size,
5909 "%s: default size too small, but 'external_size' not in effect?\n",
5910 __func__);
5911 pr_warn("md: invalid array_size %llu > default size %llu\n",
5912 (unsigned long long)mddev->array_sectors / 2,
5913 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5914 err = -EINVAL;
5915 }
5916 if (err == 0 && pers->sync_request &&
5917 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5918 struct bitmap *bitmap;
5919
5920 bitmap = md_bitmap_create(mddev, -1);
5921 if (IS_ERR(bitmap)) {
5922 err = PTR_ERR(bitmap);
5923 pr_warn("%s: failed to create bitmap (%d)\n",
5924 mdname(mddev), err);
5925 } else
5926 mddev->bitmap = bitmap;
5927
5928 }
5929 if (err)
5930 goto bitmap_abort;
5931
5932 if (mddev->bitmap_info.max_write_behind > 0) {
5933 bool create_pool = false;
5934
5935 rdev_for_each(rdev, mddev) {
5936 if (test_bit(WriteMostly, &rdev->flags) &&
5937 rdev_init_serial(rdev))
5938 create_pool = true;
5939 }
5940 if (create_pool && mddev->serial_info_pool == NULL) {
5941 mddev->serial_info_pool =
5942 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
5943 sizeof(struct serial_info));
5944 if (!mddev->serial_info_pool) {
5945 err = -ENOMEM;
5946 goto bitmap_abort;
5947 }
5948 }
5949 }
5950
5951 if (mddev->queue) {
5952 bool nonrot = true;
5953
5954 rdev_for_each(rdev, mddev) {
5955 if (rdev->raid_disk >= 0 &&
5956 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5957 nonrot = false;
5958 break;
5959 }
5960 }
5961 if (mddev->degraded)
5962 nonrot = false;
5963 if (nonrot)
5964 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5965 else
5966 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5967 mddev->queue->backing_dev_info->congested_data = mddev;
5968 mddev->queue->backing_dev_info->congested_fn = md_congested;
5969 }
5970 if (pers->sync_request) {
5971 if (mddev->kobj.sd &&
5972 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5973 pr_warn("md: cannot register extra attributes for %s\n",
5974 mdname(mddev));
5975 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5976 } else if (mddev->ro == 2)
5977 mddev->ro = 0;
5978
5979 atomic_set(&mddev->max_corr_read_errors,
5980 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5981 mddev->safemode = 0;
5982 if (mddev_is_clustered(mddev))
5983 mddev->safemode_delay = 0;
5984 else
5985 mddev->safemode_delay = (200 * HZ)/1000 +1;
5986 mddev->in_sync = 1;
5987 smp_wmb();
5988 spin_lock(&mddev->lock);
5989 mddev->pers = pers;
5990 spin_unlock(&mddev->lock);
5991 rdev_for_each(rdev, mddev)
5992 if (rdev->raid_disk >= 0)
5993 sysfs_link_rdev(mddev, rdev);
5994
5995 if (mddev->degraded && !mddev->ro)
5996
5997
5998
5999 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6000 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6001
6002 if (mddev->sb_flags)
6003 md_update_sb(mddev, 0);
6004
6005 md_new_event(mddev);
6006 return 0;
6007
6008bitmap_abort:
6009 mddev_detach(mddev);
6010 if (mddev->private)
6011 pers->free(mddev, mddev->private);
6012 mddev->private = NULL;
6013 module_put(pers->owner);
6014 md_bitmap_destroy(mddev);
6015abort:
6016 bioset_exit(&mddev->bio_set);
6017 bioset_exit(&mddev->sync_set);
6018 return err;
6019}
6020EXPORT_SYMBOL_GPL(md_run);
6021
6022static int do_md_run(struct mddev *mddev)
6023{
6024 int err;
6025
6026 set_bit(MD_NOT_READY, &mddev->flags);
6027 err = md_run(mddev);
6028 if (err)
6029 goto out;
6030 err = md_bitmap_load(mddev);
6031 if (err) {
6032 md_bitmap_destroy(mddev);
6033 goto out;
6034 }
6035
6036 if (mddev_is_clustered(mddev))
6037 md_allow_write(mddev);
6038
6039
6040 md_start(mddev);
6041
6042 md_wakeup_thread(mddev->thread);
6043 md_wakeup_thread(mddev->sync_thread);
6044
6045 set_capacity(mddev->gendisk, mddev->array_sectors);
6046 revalidate_disk(mddev->gendisk);
6047 clear_bit(MD_NOT_READY, &mddev->flags);
6048 mddev->changed = 1;
6049 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6050 sysfs_notify_dirent_safe(mddev->sysfs_state);
6051 sysfs_notify_dirent_safe(mddev->sysfs_action);
6052 sysfs_notify(&mddev->kobj, NULL, "degraded");
6053out:
6054 clear_bit(MD_NOT_READY, &mddev->flags);
6055 return err;
6056}
6057
6058int md_start(struct mddev *mddev)
6059{
6060 int ret = 0;
6061
6062 if (mddev->pers->start) {
6063 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6064 md_wakeup_thread(mddev->thread);
6065 ret = mddev->pers->start(mddev);
6066 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6067 md_wakeup_thread(mddev->sync_thread);
6068 }
6069 return ret;
6070}
6071EXPORT_SYMBOL_GPL(md_start);
6072
6073static int restart_array(struct mddev *mddev)
6074{
6075 struct gendisk *disk = mddev->gendisk;
6076 struct md_rdev *rdev;
6077 bool has_journal = false;
6078 bool has_readonly = false;
6079
6080
6081 if (list_empty(&mddev->disks))
6082 return -ENXIO;
6083 if (!mddev->pers)
6084 return -EINVAL;
6085 if (!mddev->ro)
6086 return -EBUSY;
6087
6088 rcu_read_lock();
6089 rdev_for_each_rcu(rdev, mddev) {
6090 if (test_bit(Journal, &rdev->flags) &&
6091 !test_bit(Faulty, &rdev->flags))
6092 has_journal = true;
6093 if (bdev_read_only(rdev->bdev))
6094 has_readonly = true;
6095 }
6096 rcu_read_unlock();
6097 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6098
6099 return -EINVAL;
6100 if (has_readonly)
6101 return -EROFS;
6102
6103 mddev->safemode = 0;
6104 mddev->ro = 0;
6105 set_disk_ro(disk, 0);
6106 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6107
6108 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6109 md_wakeup_thread(mddev->thread);
6110 md_wakeup_thread(mddev->sync_thread);
6111 sysfs_notify_dirent_safe(mddev->sysfs_state);
6112 return 0;
6113}
6114
6115static void md_clean(struct mddev *mddev)
6116{
6117 mddev->array_sectors = 0;
6118 mddev->external_size = 0;
6119 mddev->dev_sectors = 0;
6120 mddev->raid_disks = 0;
6121 mddev->recovery_cp = 0;
6122 mddev->resync_min = 0;
6123 mddev->resync_max = MaxSector;
6124 mddev->reshape_position = MaxSector;
6125 mddev->external = 0;
6126 mddev->persistent = 0;
6127 mddev->level = LEVEL_NONE;
6128 mddev->clevel[0] = 0;
6129 mddev->flags = 0;
6130 mddev->sb_flags = 0;
6131 mddev->ro = 0;
6132 mddev->metadata_type[0] = 0;
6133 mddev->chunk_sectors = 0;
6134 mddev->ctime = mddev->utime = 0;
6135 mddev->layout = 0;
6136 mddev->max_disks = 0;
6137 mddev->events = 0;
6138 mddev->can_decrease_events = 0;
6139 mddev->delta_disks = 0;
6140 mddev->reshape_backwards = 0;
6141 mddev->new_level = LEVEL_NONE;
6142 mddev->new_layout = 0;
6143 mddev->new_chunk_sectors = 0;
6144 mddev->curr_resync = 0;
6145 atomic64_set(&mddev->resync_mismatches, 0);
6146 mddev->suspend_lo = mddev->suspend_hi = 0;
6147 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6148 mddev->recovery = 0;
6149 mddev->in_sync = 0;
6150 mddev->changed = 0;
6151 mddev->degraded = 0;
6152 mddev->safemode = 0;
6153 mddev->private = NULL;
6154 mddev->cluster_info = NULL;
6155 mddev->bitmap_info.offset = 0;
6156 mddev->bitmap_info.default_offset = 0;
6157 mddev->bitmap_info.default_space = 0;
6158 mddev->bitmap_info.chunksize = 0;
6159 mddev->bitmap_info.daemon_sleep = 0;
6160 mddev->bitmap_info.max_write_behind = 0;
6161 mddev->bitmap_info.nodes = 0;
6162}
6163
6164static void __md_stop_writes(struct mddev *mddev)
6165{
6166 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6167 if (work_pending(&mddev->del_work))
6168 flush_workqueue(md_misc_wq);
6169 if (mddev->sync_thread) {
6170 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6171 md_reap_sync_thread(mddev);
6172 }
6173
6174 del_timer_sync(&mddev->safemode_timer);
6175
6176 if (mddev->pers && mddev->pers->quiesce) {
6177 mddev->pers->quiesce(mddev, 1);
6178 mddev->pers->quiesce(mddev, 0);
6179 }
6180 md_bitmap_flush(mddev);
6181
6182 if (mddev->ro == 0 &&
6183 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6184 mddev->sb_flags)) {
6185
6186 if (!mddev_is_clustered(mddev))
6187 mddev->in_sync = 1;
6188 md_update_sb(mddev, 1);
6189 }
6190
6191 mddev->serialize_policy = 0;
6192 mddev_destroy_serial_pool(mddev, NULL, true);
6193}
6194
6195void md_stop_writes(struct mddev *mddev)
6196{
6197 mddev_lock_nointr(mddev);
6198 __md_stop_writes(mddev);
6199 mddev_unlock(mddev);
6200}
6201EXPORT_SYMBOL_GPL(md_stop_writes);
6202
6203static void mddev_detach(struct mddev *mddev)
6204{
6205 md_bitmap_wait_behind_writes(mddev);
6206 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6207 mddev->pers->quiesce(mddev, 1);
6208 mddev->pers->quiesce(mddev, 0);
6209 }
6210 md_unregister_thread(&mddev->thread);
6211 if (mddev->queue)
6212 blk_sync_queue(mddev->queue);
6213}
6214
6215static void __md_stop(struct mddev *mddev)
6216{
6217 struct md_personality *pers = mddev->pers;
6218 md_bitmap_destroy(mddev);
6219 mddev_detach(mddev);
6220
6221 if (mddev->event_work.func)
6222 flush_workqueue(md_misc_wq);
6223 spin_lock(&mddev->lock);
6224 mddev->pers = NULL;
6225 spin_unlock(&mddev->lock);
6226 pers->free(mddev, mddev->private);
6227 mddev->private = NULL;
6228 if (pers->sync_request && mddev->to_remove == NULL)
6229 mddev->to_remove = &md_redundancy_group;
6230 module_put(pers->owner);
6231 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6232}
6233
6234void md_stop(struct mddev *mddev)
6235{
6236
6237
6238
6239 __md_stop(mddev);
6240 bioset_exit(&mddev->bio_set);
6241 bioset_exit(&mddev->sync_set);
6242}
6243
6244EXPORT_SYMBOL_GPL(md_stop);
6245
6246static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6247{
6248 int err = 0;
6249 int did_freeze = 0;
6250
6251 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6252 did_freeze = 1;
6253 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6254 md_wakeup_thread(mddev->thread);
6255 }
6256 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6257 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6258 if (mddev->sync_thread)
6259
6260
6261 wake_up_process(mddev->sync_thread->tsk);
6262
6263 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6264 return -EBUSY;
6265 mddev_unlock(mddev);
6266 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6267 &mddev->recovery));
6268 wait_event(mddev->sb_wait,
6269 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6270 mddev_lock_nointr(mddev);
6271
6272 mutex_lock(&mddev->open_mutex);
6273 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6274 mddev->sync_thread ||
6275 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6276 pr_warn("md: %s still in use.\n",mdname(mddev));
6277 if (did_freeze) {
6278 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6279 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6280 md_wakeup_thread(mddev->thread);
6281 }
6282 err = -EBUSY;
6283 goto out;
6284 }
6285 if (mddev->pers) {
6286 __md_stop_writes(mddev);
6287
6288 err = -ENXIO;
6289 if (mddev->ro==1)
6290 goto out;
6291 mddev->ro = 1;
6292 set_disk_ro(mddev->gendisk, 1);
6293 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6294 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6295 md_wakeup_thread(mddev->thread);
6296 sysfs_notify_dirent_safe(mddev->sysfs_state);
6297 err = 0;
6298 }
6299out:
6300 mutex_unlock(&mddev->open_mutex);
6301 return err;
6302}
6303
6304
6305
6306
6307
6308static int do_md_stop(struct mddev *mddev, int mode,
6309 struct block_device *bdev)
6310{
6311 struct gendisk *disk = mddev->gendisk;
6312 struct md_rdev *rdev;
6313 int did_freeze = 0;
6314
6315 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6316 did_freeze = 1;
6317 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6318 md_wakeup_thread(mddev->thread);
6319 }
6320 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6321 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6322 if (mddev->sync_thread)
6323
6324
6325 wake_up_process(mddev->sync_thread->tsk);
6326
6327 mddev_unlock(mddev);
6328 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6329 !test_bit(MD_RECOVERY_RUNNING,
6330 &mddev->recovery)));
6331 mddev_lock_nointr(mddev);
6332
6333 mutex_lock(&mddev->open_mutex);
6334 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6335 mddev->sysfs_active ||
6336 mddev->sync_thread ||
6337 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6338 pr_warn("md: %s still in use.\n",mdname(mddev));
6339 mutex_unlock(&mddev->open_mutex);
6340 if (did_freeze) {
6341 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6342 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6343 md_wakeup_thread(mddev->thread);
6344 }
6345 return -EBUSY;
6346 }
6347 if (mddev->pers) {
6348 if (mddev->ro)
6349 set_disk_ro(disk, 0);
6350
6351 __md_stop_writes(mddev);
6352 __md_stop(mddev);
6353 mddev->queue->backing_dev_info->congested_fn = NULL;
6354
6355
6356 sysfs_notify_dirent_safe(mddev->sysfs_state);
6357
6358 rdev_for_each(rdev, mddev)
6359 if (rdev->raid_disk >= 0)
6360 sysfs_unlink_rdev(mddev, rdev);
6361
6362 set_capacity(disk, 0);
6363 mutex_unlock(&mddev->open_mutex);
6364 mddev->changed = 1;
6365 revalidate_disk(disk);
6366
6367 if (mddev->ro)
6368 mddev->ro = 0;
6369 } else
6370 mutex_unlock(&mddev->open_mutex);
6371
6372
6373
6374 if (mode == 0) {
6375 pr_info("md: %s stopped.\n", mdname(mddev));
6376
6377 if (mddev->bitmap_info.file) {
6378 struct file *f = mddev->bitmap_info.file;
6379 spin_lock(&mddev->lock);
6380 mddev->bitmap_info.file = NULL;
6381 spin_unlock(&mddev->lock);
6382 fput(f);
6383 }
6384 mddev->bitmap_info.offset = 0;
6385
6386 export_array(mddev);
6387
6388 md_clean(mddev);
6389 if (mddev->hold_active == UNTIL_STOP)
6390 mddev->hold_active = 0;
6391 }
6392 md_new_event(mddev);
6393 sysfs_notify_dirent_safe(mddev->sysfs_state);
6394 return 0;
6395}
6396
6397#ifndef MODULE
6398static void autorun_array(struct mddev *mddev)
6399{
6400 struct md_rdev *rdev;
6401 int err;
6402
6403 if (list_empty(&mddev->disks))
6404 return;
6405
6406 pr_info("md: running: ");
6407
6408 rdev_for_each(rdev, mddev) {
6409 char b[BDEVNAME_SIZE];
6410 pr_cont("<%s>", bdevname(rdev->bdev,b));
6411 }
6412 pr_cont("\n");
6413
6414 err = do_md_run(mddev);
6415 if (err) {
6416 pr_warn("md: do_md_run() returned %d\n", err);
6417 do_md_stop(mddev, 0, NULL);
6418 }
6419}
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433static void autorun_devices(int part)
6434{
6435 struct md_rdev *rdev0, *rdev, *tmp;
6436 struct mddev *mddev;
6437 char b[BDEVNAME_SIZE];
6438
6439 pr_info("md: autorun ...\n");
6440 while (!list_empty(&pending_raid_disks)) {
6441 int unit;
6442 dev_t dev;
6443 LIST_HEAD(candidates);
6444 rdev0 = list_entry(pending_raid_disks.next,
6445 struct md_rdev, same_set);
6446
6447 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6448 INIT_LIST_HEAD(&candidates);
6449 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6450 if (super_90_load(rdev, rdev0, 0) >= 0) {
6451 pr_debug("md: adding %s ...\n",
6452 bdevname(rdev->bdev,b));
6453 list_move(&rdev->same_set, &candidates);
6454 }
6455
6456
6457
6458
6459
6460 if (part) {
6461 dev = MKDEV(mdp_major,
6462 rdev0->preferred_minor << MdpMinorShift);
6463 unit = MINOR(dev) >> MdpMinorShift;
6464 } else {
6465 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6466 unit = MINOR(dev);
6467 }
6468 if (rdev0->preferred_minor != unit) {
6469 pr_warn("md: unit number in %s is bad: %d\n",
6470 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6471 break;
6472 }
6473
6474 md_probe(dev, NULL, NULL);
6475 mddev = mddev_find(dev);
6476 if (!mddev || !mddev->gendisk) {
6477 if (mddev)
6478 mddev_put(mddev);
6479 break;
6480 }
6481 if (mddev_lock(mddev))
6482 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6483 else if (mddev->raid_disks || mddev->major_version
6484 || !list_empty(&mddev->disks)) {
6485 pr_warn("md: %s already running, cannot run %s\n",
6486 mdname(mddev), bdevname(rdev0->bdev,b));
6487 mddev_unlock(mddev);
6488 } else {
6489 pr_debug("md: created %s\n", mdname(mddev));
6490 mddev->persistent = 1;
6491 rdev_for_each_list(rdev, tmp, &candidates) {
6492 list_del_init(&rdev->same_set);
6493 if (bind_rdev_to_array(rdev, mddev))
6494 export_rdev(rdev);
6495 }
6496 autorun_array(mddev);
6497 mddev_unlock(mddev);
6498 }
6499
6500
6501
6502 rdev_for_each_list(rdev, tmp, &candidates) {
6503 list_del_init(&rdev->same_set);
6504 export_rdev(rdev);
6505 }
6506 mddev_put(mddev);
6507 }
6508 pr_info("md: ... autorun DONE.\n");
6509}
6510#endif
6511
6512static int get_version(void __user *arg)
6513{
6514 mdu_version_t ver;
6515
6516 ver.major = MD_MAJOR_VERSION;
6517 ver.minor = MD_MINOR_VERSION;
6518 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6519
6520 if (copy_to_user(arg, &ver, sizeof(ver)))
6521 return -EFAULT;
6522
6523 return 0;
6524}
6525
6526static int get_array_info(struct mddev *mddev, void __user *arg)
6527{
6528 mdu_array_info_t info;
6529 int nr,working,insync,failed,spare;
6530 struct md_rdev *rdev;
6531
6532 nr = working = insync = failed = spare = 0;
6533 rcu_read_lock();
6534 rdev_for_each_rcu(rdev, mddev) {
6535 nr++;
6536 if (test_bit(Faulty, &rdev->flags))
6537 failed++;
6538 else {
6539 working++;
6540 if (test_bit(In_sync, &rdev->flags))
6541 insync++;
6542 else if (test_bit(Journal, &rdev->flags))
6543
6544 ;
6545 else
6546 spare++;
6547 }
6548 }
6549 rcu_read_unlock();
6550
6551 info.major_version = mddev->major_version;
6552 info.minor_version = mddev->minor_version;
6553 info.patch_version = MD_PATCHLEVEL_VERSION;
6554 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6555 info.level = mddev->level;
6556 info.size = mddev->dev_sectors / 2;
6557 if (info.size != mddev->dev_sectors / 2)
6558 info.size = -1;
6559 info.nr_disks = nr;
6560 info.raid_disks = mddev->raid_disks;
6561 info.md_minor = mddev->md_minor;
6562 info.not_persistent= !mddev->persistent;
6563
6564 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6565 info.state = 0;
6566 if (mddev->in_sync)
6567 info.state = (1<<MD_SB_CLEAN);
6568 if (mddev->bitmap && mddev->bitmap_info.offset)
6569 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6570 if (mddev_is_clustered(mddev))
6571 info.state |= (1<<MD_SB_CLUSTERED);
6572 info.active_disks = insync;
6573 info.working_disks = working;
6574 info.failed_disks = failed;
6575 info.spare_disks = spare;
6576
6577 info.layout = mddev->layout;
6578 info.chunk_size = mddev->chunk_sectors << 9;
6579
6580 if (copy_to_user(arg, &info, sizeof(info)))
6581 return -EFAULT;
6582
6583 return 0;
6584}
6585
6586static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6587{
6588 mdu_bitmap_file_t *file = NULL;
6589 char *ptr;
6590 int err;
6591
6592 file = kzalloc(sizeof(*file), GFP_NOIO);
6593 if (!file)
6594 return -ENOMEM;
6595
6596 err = 0;
6597 spin_lock(&mddev->lock);
6598
6599 if (mddev->bitmap_info.file) {
6600 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6601 sizeof(file->pathname));
6602 if (IS_ERR(ptr))
6603 err = PTR_ERR(ptr);
6604 else
6605 memmove(file->pathname, ptr,
6606 sizeof(file->pathname)-(ptr-file->pathname));
6607 }
6608 spin_unlock(&mddev->lock);
6609
6610 if (err == 0 &&
6611 copy_to_user(arg, file, sizeof(*file)))
6612 err = -EFAULT;
6613
6614 kfree(file);
6615 return err;
6616}
6617
6618static int get_disk_info(struct mddev *mddev, void __user * arg)
6619{
6620 mdu_disk_info_t info;
6621 struct md_rdev *rdev;
6622
6623 if (copy_from_user(&info, arg, sizeof(info)))
6624 return -EFAULT;
6625
6626 rcu_read_lock();
6627 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6628 if (rdev) {
6629 info.major = MAJOR(rdev->bdev->bd_dev);
6630 info.minor = MINOR(rdev->bdev->bd_dev);
6631 info.raid_disk = rdev->raid_disk;
6632 info.state = 0;
6633 if (test_bit(Faulty, &rdev->flags))
6634 info.state |= (1<<MD_DISK_FAULTY);
6635 else if (test_bit(In_sync, &rdev->flags)) {
6636 info.state |= (1<<MD_DISK_ACTIVE);
6637 info.state |= (1<<MD_DISK_SYNC);
6638 }
6639 if (test_bit(Journal, &rdev->flags))
6640 info.state |= (1<<MD_DISK_JOURNAL);
6641 if (test_bit(WriteMostly, &rdev->flags))
6642 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6643 if (test_bit(FailFast, &rdev->flags))
6644 info.state |= (1<<MD_DISK_FAILFAST);
6645 } else {
6646 info.major = info.minor = 0;
6647 info.raid_disk = -1;
6648 info.state = (1<<MD_DISK_REMOVED);
6649 }
6650 rcu_read_unlock();
6651
6652 if (copy_to_user(arg, &info, sizeof(info)))
6653 return -EFAULT;
6654
6655 return 0;
6656}
6657
6658static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6659{
6660 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6661 struct md_rdev *rdev;
6662 dev_t dev = MKDEV(info->major,info->minor);
6663
6664 if (mddev_is_clustered(mddev) &&
6665 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6666 pr_warn("%s: Cannot add to clustered mddev.\n",
6667 mdname(mddev));
6668 return -EINVAL;
6669 }
6670
6671 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6672 return -EOVERFLOW;
6673
6674 if (!mddev->raid_disks) {
6675 int err;
6676
6677 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6678 if (IS_ERR(rdev)) {
6679 pr_warn("md: md_import_device returned %ld\n",
6680 PTR_ERR(rdev));
6681 return PTR_ERR(rdev);
6682 }
6683 if (!list_empty(&mddev->disks)) {
6684 struct md_rdev *rdev0
6685 = list_entry(mddev->disks.next,
6686 struct md_rdev, same_set);
6687 err = super_types[mddev->major_version]
6688 .load_super(rdev, rdev0, mddev->minor_version);
6689 if (err < 0) {
6690 pr_warn("md: %s has different UUID to %s\n",
6691 bdevname(rdev->bdev,b),
6692 bdevname(rdev0->bdev,b2));
6693 export_rdev(rdev);
6694 return -EINVAL;
6695 }
6696 }
6697 err = bind_rdev_to_array(rdev, mddev);
6698 if (err)
6699 export_rdev(rdev);
6700 return err;
6701 }
6702
6703
6704
6705
6706
6707
6708 if (mddev->pers) {
6709 int err;
6710 if (!mddev->pers->hot_add_disk) {
6711 pr_warn("%s: personality does not support diskops!\n",
6712 mdname(mddev));
6713 return -EINVAL;
6714 }
6715 if (mddev->persistent)
6716 rdev = md_import_device(dev, mddev->major_version,
6717 mddev->minor_version);
6718 else
6719 rdev = md_import_device(dev, -1, -1);
6720 if (IS_ERR(rdev)) {
6721 pr_warn("md: md_import_device returned %ld\n",
6722 PTR_ERR(rdev));
6723 return PTR_ERR(rdev);
6724 }
6725
6726 if (!mddev->persistent) {
6727 if (info->state & (1<<MD_DISK_SYNC) &&
6728 info->raid_disk < mddev->raid_disks) {
6729 rdev->raid_disk = info->raid_disk;
6730 set_bit(In_sync, &rdev->flags);
6731 clear_bit(Bitmap_sync, &rdev->flags);
6732 } else
6733 rdev->raid_disk = -1;
6734 rdev->saved_raid_disk = rdev->raid_disk;
6735 } else
6736 super_types[mddev->major_version].
6737 validate_super(mddev, rdev);
6738 if ((info->state & (1<<MD_DISK_SYNC)) &&
6739 rdev->raid_disk != info->raid_disk) {
6740
6741
6742
6743 export_rdev(rdev);
6744 return -EINVAL;
6745 }
6746
6747 clear_bit(In_sync, &rdev->flags);
6748 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6749 set_bit(WriteMostly, &rdev->flags);
6750 else
6751 clear_bit(WriteMostly, &rdev->flags);
6752 if (info->state & (1<<MD_DISK_FAILFAST))
6753 set_bit(FailFast, &rdev->flags);
6754 else
6755 clear_bit(FailFast, &rdev->flags);
6756
6757 if (info->state & (1<<MD_DISK_JOURNAL)) {
6758 struct md_rdev *rdev2;
6759 bool has_journal = false;
6760
6761
6762 rdev_for_each(rdev2, mddev) {
6763 if (test_bit(Journal, &rdev2->flags)) {
6764 has_journal = true;
6765 break;
6766 }
6767 }
6768 if (has_journal || mddev->bitmap) {
6769 export_rdev(rdev);
6770 return -EBUSY;
6771 }
6772 set_bit(Journal, &rdev->flags);
6773 }
6774
6775
6776
6777 if (mddev_is_clustered(mddev)) {
6778 if (info->state & (1 << MD_DISK_CANDIDATE))
6779 set_bit(Candidate, &rdev->flags);
6780 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6781
6782 err = md_cluster_ops->add_new_disk(mddev, rdev);
6783 if (err) {
6784 export_rdev(rdev);
6785 return err;
6786 }
6787 }
6788 }
6789
6790 rdev->raid_disk = -1;
6791 err = bind_rdev_to_array(rdev, mddev);
6792
6793 if (err)
6794 export_rdev(rdev);
6795
6796 if (mddev_is_clustered(mddev)) {
6797 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6798 if (!err) {
6799 err = md_cluster_ops->new_disk_ack(mddev,
6800 err == 0);
6801 if (err)
6802 md_kick_rdev_from_array(rdev);
6803 }
6804 } else {
6805 if (err)
6806 md_cluster_ops->add_new_disk_cancel(mddev);
6807 else
6808 err = add_bound_rdev(rdev);
6809 }
6810
6811 } else if (!err)
6812 err = add_bound_rdev(rdev);
6813
6814 return err;
6815 }
6816
6817
6818
6819
6820 if (mddev->major_version != 0) {
6821 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6822 return -EINVAL;
6823 }
6824
6825 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6826 int err;
6827 rdev = md_import_device(dev, -1, 0);
6828 if (IS_ERR(rdev)) {
6829 pr_warn("md: error, md_import_device() returned %ld\n",
6830 PTR_ERR(rdev));
6831 return PTR_ERR(rdev);
6832 }
6833 rdev->desc_nr = info->number;
6834 if (info->raid_disk < mddev->raid_disks)
6835 rdev->raid_disk = info->raid_disk;
6836 else
6837 rdev->raid_disk = -1;
6838
6839 if (rdev->raid_disk < mddev->raid_disks)
6840 if (info->state & (1<<MD_DISK_SYNC))
6841 set_bit(In_sync, &rdev->flags);
6842
6843 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6844 set_bit(WriteMostly, &rdev->flags);
6845 if (info->state & (1<<MD_DISK_FAILFAST))
6846 set_bit(FailFast, &rdev->flags);
6847
6848 if (!mddev->persistent) {
6849 pr_debug("md: nonpersistent superblock ...\n");
6850 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6851 } else
6852 rdev->sb_start = calc_dev_sboffset(rdev);
6853 rdev->sectors = rdev->sb_start;
6854
6855 err = bind_rdev_to_array(rdev, mddev);
6856 if (err) {
6857 export_rdev(rdev);
6858 return err;
6859 }
6860 }
6861
6862 return 0;
6863}
6864
6865static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6866{
6867 char b[BDEVNAME_SIZE];
6868 struct md_rdev *rdev;
6869
6870 if (!mddev->pers)
6871 return -ENODEV;
6872
6873 rdev = find_rdev(mddev, dev);
6874 if (!rdev)
6875 return -ENXIO;
6876
6877 if (rdev->raid_disk < 0)
6878 goto kick_rdev;
6879
6880 clear_bit(Blocked, &rdev->flags);
6881 remove_and_add_spares(mddev, rdev);
6882
6883 if (rdev->raid_disk >= 0)
6884 goto busy;
6885
6886kick_rdev:
6887 if (mddev_is_clustered(mddev))
6888 md_cluster_ops->remove_disk(mddev, rdev);
6889
6890 md_kick_rdev_from_array(rdev);
6891 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6892 if (mddev->thread)
6893 md_wakeup_thread(mddev->thread);
6894 else
6895 md_update_sb(mddev, 1);
6896 md_new_event(mddev);
6897
6898 return 0;
6899busy:
6900 pr_debug("md: cannot remove active disk %s from %s ...\n",
6901 bdevname(rdev->bdev,b), mdname(mddev));
6902 return -EBUSY;
6903}
6904
6905static int hot_add_disk(struct mddev *mddev, dev_t dev)
6906{
6907 char b[BDEVNAME_SIZE];
6908 int err;
6909 struct md_rdev *rdev;
6910
6911 if (!mddev->pers)
6912 return -ENODEV;
6913
6914 if (mddev->major_version != 0) {
6915 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6916 mdname(mddev));
6917 return -EINVAL;
6918 }
6919 if (!mddev->pers->hot_add_disk) {
6920 pr_warn("%s: personality does not support diskops!\n",
6921 mdname(mddev));
6922 return -EINVAL;
6923 }
6924
6925 rdev = md_import_device(dev, -1, 0);
6926 if (IS_ERR(rdev)) {
6927 pr_warn("md: error, md_import_device() returned %ld\n",
6928 PTR_ERR(rdev));
6929 return -EINVAL;
6930 }
6931
6932 if (mddev->persistent)
6933 rdev->sb_start = calc_dev_sboffset(rdev);
6934 else
6935 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6936
6937 rdev->sectors = rdev->sb_start;
6938
6939 if (test_bit(Faulty, &rdev->flags)) {
6940 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6941 bdevname(rdev->bdev,b), mdname(mddev));
6942 err = -EINVAL;
6943 goto abort_export;
6944 }
6945
6946 clear_bit(In_sync, &rdev->flags);
6947 rdev->desc_nr = -1;
6948 rdev->saved_raid_disk = -1;
6949 err = bind_rdev_to_array(rdev, mddev);
6950 if (err)
6951 goto abort_export;
6952
6953
6954
6955
6956
6957
6958 rdev->raid_disk = -1;
6959
6960 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6961 if (!mddev->thread)
6962 md_update_sb(mddev, 1);
6963
6964
6965
6966
6967 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6968 md_wakeup_thread(mddev->thread);
6969 md_new_event(mddev);
6970 return 0;
6971
6972abort_export:
6973 export_rdev(rdev);
6974 return err;
6975}
6976
6977static int set_bitmap_file(struct mddev *mddev, int fd)
6978{
6979 int err = 0;
6980
6981 if (mddev->pers) {
6982 if (!mddev->pers->quiesce || !mddev->thread)
6983 return -EBUSY;
6984 if (mddev->recovery || mddev->sync_thread)
6985 return -EBUSY;
6986
6987 }
6988
6989 if (fd >= 0) {
6990 struct inode *inode;
6991 struct file *f;
6992
6993 if (mddev->bitmap || mddev->bitmap_info.file)
6994 return -EEXIST;
6995 f = fget(fd);
6996
6997 if (f == NULL) {
6998 pr_warn("%s: error: failed to get bitmap file\n",
6999 mdname(mddev));
7000 return -EBADF;
7001 }
7002
7003 inode = f->f_mapping->host;
7004 if (!S_ISREG(inode->i_mode)) {
7005 pr_warn("%s: error: bitmap file must be a regular file\n",
7006 mdname(mddev));
7007 err = -EBADF;
7008 } else if (!(f->f_mode & FMODE_WRITE)) {
7009 pr_warn("%s: error: bitmap file must open for write\n",
7010 mdname(mddev));
7011 err = -EBADF;
7012 } else if (atomic_read(&inode->i_writecount) != 1) {
7013 pr_warn("%s: error: bitmap file is already in use\n",
7014 mdname(mddev));
7015 err = -EBUSY;
7016 }
7017 if (err) {
7018 fput(f);
7019 return err;
7020 }
7021 mddev->bitmap_info.file = f;
7022 mddev->bitmap_info.offset = 0;
7023 } else if (mddev->bitmap == NULL)
7024 return -ENOENT;
7025 err = 0;
7026 if (mddev->pers) {
7027 if (fd >= 0) {
7028 struct bitmap *bitmap;
7029
7030 bitmap = md_bitmap_create(mddev, -1);
7031 mddev_suspend(mddev);
7032 if (!IS_ERR(bitmap)) {
7033 mddev->bitmap = bitmap;
7034 err = md_bitmap_load(mddev);
7035 } else
7036 err = PTR_ERR(bitmap);
7037 if (err) {
7038 md_bitmap_destroy(mddev);
7039 fd = -1;
7040 }
7041 mddev_resume(mddev);
7042 } else if (fd < 0) {
7043 mddev_suspend(mddev);
7044 md_bitmap_destroy(mddev);
7045 mddev_resume(mddev);
7046 }
7047 }
7048 if (fd < 0) {
7049 struct file *f = mddev->bitmap_info.file;
7050 if (f) {
7051 spin_lock(&mddev->lock);
7052 mddev->bitmap_info.file = NULL;
7053 spin_unlock(&mddev->lock);
7054 fput(f);
7055 }
7056 }
7057
7058 return err;
7059}
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
7075{
7076
7077 if (info->raid_disks == 0) {
7078
7079 if (info->major_version < 0 ||
7080 info->major_version >= ARRAY_SIZE(super_types) ||
7081 super_types[info->major_version].name == NULL) {
7082
7083 pr_warn("md: superblock version %d not known\n",
7084 info->major_version);
7085 return -EINVAL;
7086 }
7087 mddev->major_version = info->major_version;
7088 mddev->minor_version = info->minor_version;
7089 mddev->patch_version = info->patch_version;
7090 mddev->persistent = !info->not_persistent;
7091
7092
7093
7094 mddev->ctime = ktime_get_real_seconds();
7095 return 0;
7096 }
7097 mddev->major_version = MD_MAJOR_VERSION;
7098 mddev->minor_version = MD_MINOR_VERSION;
7099 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7100 mddev->ctime = ktime_get_real_seconds();
7101
7102 mddev->level = info->level;
7103 mddev->clevel[0] = 0;
7104 mddev->dev_sectors = 2 * (sector_t)info->size;
7105 mddev->raid_disks = info->raid_disks;
7106
7107
7108
7109 if (info->state & (1<<MD_SB_CLEAN))
7110 mddev->recovery_cp = MaxSector;
7111 else
7112 mddev->recovery_cp = 0;
7113 mddev->persistent = ! info->not_persistent;
7114 mddev->external = 0;
7115
7116 mddev->layout = info->layout;
7117 if (mddev->level == 0)
7118
7119 mddev->layout = -1;
7120 mddev->chunk_sectors = info->chunk_size >> 9;
7121
7122 if (mddev->persistent) {
7123 mddev->max_disks = MD_SB_DISKS;
7124 mddev->flags = 0;
7125 mddev->sb_flags = 0;
7126 }
7127 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7128
7129 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7130 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7131 mddev->bitmap_info.offset = 0;
7132
7133 mddev->reshape_position = MaxSector;
7134
7135
7136
7137
7138 get_random_bytes(mddev->uuid, 16);
7139
7140 mddev->new_level = mddev->level;
7141 mddev->new_chunk_sectors = mddev->chunk_sectors;
7142 mddev->new_layout = mddev->layout;
7143 mddev->delta_disks = 0;
7144 mddev->reshape_backwards = 0;
7145
7146 return 0;
7147}
7148
7149void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7150{
7151 lockdep_assert_held(&mddev->reconfig_mutex);
7152
7153 if (mddev->external_size)
7154 return;
7155
7156 mddev->array_sectors = array_sectors;
7157}
7158EXPORT_SYMBOL(md_set_array_sectors);
7159
7160static int update_size(struct mddev *mddev, sector_t num_sectors)
7161{
7162 struct md_rdev *rdev;
7163 int rv;
7164 int fit = (num_sectors == 0);
7165 sector_t old_dev_sectors = mddev->dev_sectors;
7166
7167 if (mddev->pers->resize == NULL)
7168 return -EINVAL;
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7179 mddev->sync_thread)
7180 return -EBUSY;
7181 if (mddev->ro)
7182 return -EROFS;
7183
7184 rdev_for_each(rdev, mddev) {
7185 sector_t avail = rdev->sectors;
7186
7187 if (fit && (num_sectors == 0 || num_sectors > avail))
7188 num_sectors = avail;
7189 if (avail < num_sectors)
7190 return -ENOSPC;
7191 }
7192 rv = mddev->pers->resize(mddev, num_sectors);
7193 if (!rv) {
7194 if (mddev_is_clustered(mddev))
7195 md_cluster_ops->update_size(mddev, old_dev_sectors);
7196 else if (mddev->queue) {
7197 set_capacity(mddev->gendisk, mddev->array_sectors);
7198 revalidate_disk(mddev->gendisk);
7199 }
7200 }
7201 return rv;
7202}
7203
7204static int update_raid_disks(struct mddev *mddev, int raid_disks)
7205{
7206 int rv;
7207 struct md_rdev *rdev;
7208
7209 if (mddev->pers->check_reshape == NULL)
7210 return -EINVAL;
7211 if (mddev->ro)
7212 return -EROFS;
7213 if (raid_disks <= 0 ||
7214 (mddev->max_disks && raid_disks >= mddev->max_disks))
7215 return -EINVAL;
7216 if (mddev->sync_thread ||
7217 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7218 mddev->reshape_position != MaxSector)
7219 return -EBUSY;
7220
7221 rdev_for_each(rdev, mddev) {
7222 if (mddev->raid_disks < raid_disks &&
7223 rdev->data_offset < rdev->new_data_offset)
7224 return -EINVAL;
7225 if (mddev->raid_disks > raid_disks &&
7226 rdev->data_offset > rdev->new_data_offset)
7227 return -EINVAL;
7228 }
7229
7230 mddev->delta_disks = raid_disks - mddev->raid_disks;
7231 if (mddev->delta_disks < 0)
7232 mddev->reshape_backwards = 1;
7233 else if (mddev->delta_disks > 0)
7234 mddev->reshape_backwards = 0;
7235
7236 rv = mddev->pers->check_reshape(mddev);
7237 if (rv < 0) {
7238 mddev->delta_disks = 0;
7239 mddev->reshape_backwards = 0;
7240 }
7241 return rv;
7242}
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7253{
7254 int rv = 0;
7255 int cnt = 0;
7256 int state = 0;
7257
7258
7259 if (mddev->bitmap && mddev->bitmap_info.offset)
7260 state |= (1 << MD_SB_BITMAP_PRESENT);
7261
7262 if (mddev->major_version != info->major_version ||
7263 mddev->minor_version != info->minor_version ||
7264
7265 mddev->ctime != info->ctime ||
7266 mddev->level != info->level ||
7267
7268 mddev->persistent != !info->not_persistent ||
7269 mddev->chunk_sectors != info->chunk_size >> 9 ||
7270
7271 ((state^info->state) & 0xfffffe00)
7272 )
7273 return -EINVAL;
7274
7275 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7276 cnt++;
7277 if (mddev->raid_disks != info->raid_disks)
7278 cnt++;
7279 if (mddev->layout != info->layout)
7280 cnt++;
7281 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7282 cnt++;
7283 if (cnt == 0)
7284 return 0;
7285 if (cnt > 1)
7286 return -EINVAL;
7287
7288 if (mddev->layout != info->layout) {
7289
7290
7291
7292
7293 if (mddev->pers->check_reshape == NULL)
7294 return -EINVAL;
7295 else {
7296 mddev->new_layout = info->layout;
7297 rv = mddev->pers->check_reshape(mddev);
7298 if (rv)
7299 mddev->new_layout = mddev->layout;
7300 return rv;
7301 }
7302 }
7303 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7304 rv = update_size(mddev, (sector_t)info->size * 2);
7305
7306 if (mddev->raid_disks != info->raid_disks)
7307 rv = update_raid_disks(mddev, info->raid_disks);
7308
7309 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7310 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7311 rv = -EINVAL;
7312 goto err;
7313 }
7314 if (mddev->recovery || mddev->sync_thread) {
7315 rv = -EBUSY;
7316 goto err;
7317 }
7318 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7319 struct bitmap *bitmap;
7320
7321 if (mddev->bitmap) {
7322 rv = -EEXIST;
7323 goto err;
7324 }
7325 if (mddev->bitmap_info.default_offset == 0) {
7326 rv = -EINVAL;
7327 goto err;
7328 }
7329 mddev->bitmap_info.offset =
7330 mddev->bitmap_info.default_offset;
7331 mddev->bitmap_info.space =
7332 mddev->bitmap_info.default_space;
7333 bitmap = md_bitmap_create(mddev, -1);
7334 mddev_suspend(mddev);
7335 if (!IS_ERR(bitmap)) {
7336 mddev->bitmap = bitmap;
7337 rv = md_bitmap_load(mddev);
7338 } else
7339 rv = PTR_ERR(bitmap);
7340 if (rv)
7341 md_bitmap_destroy(mddev);
7342 mddev_resume(mddev);
7343 } else {
7344
7345 if (!mddev->bitmap) {
7346 rv = -ENOENT;
7347 goto err;
7348 }
7349 if (mddev->bitmap->storage.file) {
7350 rv = -EINVAL;
7351 goto err;
7352 }
7353 if (mddev->bitmap_info.nodes) {
7354
7355 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7356 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7357 rv = -EPERM;
7358 md_cluster_ops->unlock_all_bitmaps(mddev);
7359 goto err;
7360 }
7361
7362 mddev->bitmap_info.nodes = 0;
7363 md_cluster_ops->leave(mddev);
7364 }
7365 mddev_suspend(mddev);
7366 md_bitmap_destroy(mddev);
7367 mddev_resume(mddev);
7368 mddev->bitmap_info.offset = 0;
7369 }
7370 }
7371 md_update_sb(mddev, 1);
7372 return rv;
7373err:
7374 return rv;
7375}
7376
7377static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7378{
7379 struct md_rdev *rdev;
7380 int err = 0;
7381
7382 if (mddev->pers == NULL)
7383 return -ENODEV;
7384
7385 rcu_read_lock();
7386 rdev = md_find_rdev_rcu(mddev, dev);
7387 if (!rdev)
7388 err = -ENODEV;
7389 else {
7390 md_error(mddev, rdev);
7391 if (!test_bit(Faulty, &rdev->flags))
7392 err = -EBUSY;
7393 }
7394 rcu_read_unlock();
7395 return err;
7396}
7397
7398
7399
7400
7401
7402
7403
7404static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7405{
7406 struct mddev *mddev = bdev->bd_disk->private_data;
7407
7408 geo->heads = 2;
7409 geo->sectors = 4;
7410 geo->cylinders = mddev->array_sectors / 8;
7411 return 0;
7412}
7413
7414static inline bool md_ioctl_valid(unsigned int cmd)
7415{
7416 switch (cmd) {
7417 case ADD_NEW_DISK:
7418 case BLKROSET:
7419 case GET_ARRAY_INFO:
7420 case GET_BITMAP_FILE:
7421 case GET_DISK_INFO:
7422 case HOT_ADD_DISK:
7423 case HOT_REMOVE_DISK:
7424 case RAID_AUTORUN:
7425 case RAID_VERSION:
7426 case RESTART_ARRAY_RW:
7427 case RUN_ARRAY:
7428 case SET_ARRAY_INFO:
7429 case SET_BITMAP_FILE:
7430 case SET_DISK_FAULTY:
7431 case STOP_ARRAY:
7432 case STOP_ARRAY_RO:
7433 case CLUSTERED_DISK_NACK:
7434 return true;
7435 default:
7436 return false;
7437 }
7438}
7439
7440static int md_ioctl(struct block_device *bdev, fmode_t mode,
7441 unsigned int cmd, unsigned long arg)
7442{
7443 int err = 0;
7444 void __user *argp = (void __user *)arg;
7445 struct mddev *mddev = NULL;
7446 int ro;
7447 bool did_set_md_closing = false;
7448
7449 if (!md_ioctl_valid(cmd))
7450 return -ENOTTY;
7451
7452 switch (cmd) {
7453 case RAID_VERSION:
7454 case GET_ARRAY_INFO:
7455 case GET_DISK_INFO:
7456 break;
7457 default:
7458 if (!capable(CAP_SYS_ADMIN))
7459 return -EACCES;
7460 }
7461
7462
7463
7464
7465
7466 switch (cmd) {
7467 case RAID_VERSION:
7468 err = get_version(argp);
7469 goto out;
7470
7471#ifndef MODULE
7472 case RAID_AUTORUN:
7473 err = 0;
7474 autostart_arrays(arg);
7475 goto out;
7476#endif
7477 default:;
7478 }
7479
7480
7481
7482
7483
7484 mddev = bdev->bd_disk->private_data;
7485
7486 if (!mddev) {
7487 BUG();
7488 goto out;
7489 }
7490
7491
7492 switch (cmd) {
7493 case GET_ARRAY_INFO:
7494 if (!mddev->raid_disks && !mddev->external)
7495 err = -ENODEV;
7496 else
7497 err = get_array_info(mddev, argp);
7498 goto out;
7499
7500 case GET_DISK_INFO:
7501 if (!mddev->raid_disks && !mddev->external)
7502 err = -ENODEV;
7503 else
7504 err = get_disk_info(mddev, argp);
7505 goto out;
7506
7507 case SET_DISK_FAULTY:
7508 err = set_disk_faulty(mddev, new_decode_dev(arg));
7509 goto out;
7510
7511 case GET_BITMAP_FILE:
7512 err = get_bitmap_file(mddev, argp);
7513 goto out;
7514
7515 }
7516
7517 if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7518 flush_rdev_wq(mddev);
7519
7520 if (cmd == HOT_REMOVE_DISK)
7521
7522 wait_event_interruptible_timeout(mddev->sb_wait,
7523 !test_bit(MD_RECOVERY_NEEDED,
7524 &mddev->recovery),
7525 msecs_to_jiffies(5000));
7526 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7527
7528
7529
7530 mutex_lock(&mddev->open_mutex);
7531 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7532 mutex_unlock(&mddev->open_mutex);
7533 err = -EBUSY;
7534 goto out;
7535 }
7536 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7537 set_bit(MD_CLOSING, &mddev->flags);
7538 did_set_md_closing = true;
7539 mutex_unlock(&mddev->open_mutex);
7540 sync_blockdev(bdev);
7541 }
7542 err = mddev_lock(mddev);
7543 if (err) {
7544 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7545 err, cmd);
7546 goto out;
7547 }
7548
7549 if (cmd == SET_ARRAY_INFO) {
7550 mdu_array_info_t info;
7551 if (!arg)
7552 memset(&info, 0, sizeof(info));
7553 else if (copy_from_user(&info, argp, sizeof(info))) {
7554 err = -EFAULT;
7555 goto unlock;
7556 }
7557 if (mddev->pers) {
7558 err = update_array_info(mddev, &info);
7559 if (err) {
7560 pr_warn("md: couldn't update array info. %d\n", err);
7561 goto unlock;
7562 }
7563 goto unlock;
7564 }
7565 if (!list_empty(&mddev->disks)) {
7566 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7567 err = -EBUSY;
7568 goto unlock;
7569 }
7570 if (mddev->raid_disks) {
7571 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7572 err = -EBUSY;
7573 goto unlock;
7574 }
7575 err = set_array_info(mddev, &info);
7576 if (err) {
7577 pr_warn("md: couldn't set array info. %d\n", err);
7578 goto unlock;
7579 }
7580 goto unlock;
7581 }
7582
7583
7584
7585
7586
7587
7588 if ((!mddev->raid_disks && !mddev->external)
7589 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7590 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7591 && cmd != GET_BITMAP_FILE) {
7592 err = -ENODEV;
7593 goto unlock;
7594 }
7595
7596
7597
7598
7599 switch (cmd) {
7600 case RESTART_ARRAY_RW:
7601 err = restart_array(mddev);
7602 goto unlock;
7603
7604 case STOP_ARRAY:
7605 err = do_md_stop(mddev, 0, bdev);
7606 goto unlock;
7607
7608 case STOP_ARRAY_RO:
7609 err = md_set_readonly(mddev, bdev);
7610 goto unlock;
7611
7612 case HOT_REMOVE_DISK:
7613 err = hot_remove_disk(mddev, new_decode_dev(arg));
7614 goto unlock;
7615
7616 case ADD_NEW_DISK:
7617
7618
7619
7620
7621 if (mddev->pers) {
7622 mdu_disk_info_t info;
7623 if (copy_from_user(&info, argp, sizeof(info)))
7624 err = -EFAULT;
7625 else if (!(info.state & (1<<MD_DISK_SYNC)))
7626
7627 break;
7628 else
7629 err = add_new_disk(mddev, &info);
7630 goto unlock;
7631 }
7632 break;
7633
7634 case BLKROSET:
7635 if (get_user(ro, (int __user *)(arg))) {
7636 err = -EFAULT;
7637 goto unlock;
7638 }
7639 err = -EINVAL;
7640
7641
7642
7643
7644 if (ro)
7645 goto unlock;
7646
7647
7648 if (mddev->ro != 1)
7649 goto unlock;
7650
7651
7652
7653
7654 if (mddev->pers) {
7655 err = restart_array(mddev);
7656 if (err == 0) {
7657 mddev->ro = 2;
7658 set_disk_ro(mddev->gendisk, 0);
7659 }
7660 }
7661 goto unlock;
7662 }
7663
7664
7665
7666
7667
7668 if (mddev->ro && mddev->pers) {
7669 if (mddev->ro == 2) {
7670 mddev->ro = 0;
7671 sysfs_notify_dirent_safe(mddev->sysfs_state);
7672 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7673
7674
7675
7676
7677 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7678 mddev_unlock(mddev);
7679 wait_event(mddev->sb_wait,
7680 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7681 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7682 mddev_lock_nointr(mddev);
7683 }
7684 } else {
7685 err = -EROFS;
7686 goto unlock;
7687 }
7688 }
7689
7690 switch (cmd) {
7691 case ADD_NEW_DISK:
7692 {
7693 mdu_disk_info_t info;
7694 if (copy_from_user(&info, argp, sizeof(info)))
7695 err = -EFAULT;
7696 else
7697 err = add_new_disk(mddev, &info);
7698 goto unlock;
7699 }
7700
7701 case CLUSTERED_DISK_NACK:
7702 if (mddev_is_clustered(mddev))
7703 md_cluster_ops->new_disk_ack(mddev, false);
7704 else
7705 err = -EINVAL;
7706 goto unlock;
7707
7708 case HOT_ADD_DISK:
7709 err = hot_add_disk(mddev, new_decode_dev(arg));
7710 goto unlock;
7711
7712 case RUN_ARRAY:
7713 err = do_md_run(mddev);
7714 goto unlock;
7715
7716 case SET_BITMAP_FILE:
7717 err = set_bitmap_file(mddev, (int)arg);
7718 goto unlock;
7719
7720 default:
7721 err = -EINVAL;
7722 goto unlock;
7723 }
7724
7725unlock:
7726 if (mddev->hold_active == UNTIL_IOCTL &&
7727 err != -EINVAL)
7728 mddev->hold_active = 0;
7729 mddev_unlock(mddev);
7730out:
7731 if(did_set_md_closing)
7732 clear_bit(MD_CLOSING, &mddev->flags);
7733 return err;
7734}
7735#ifdef CONFIG_COMPAT
7736static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7737 unsigned int cmd, unsigned long arg)
7738{
7739 switch (cmd) {
7740 case HOT_REMOVE_DISK:
7741 case HOT_ADD_DISK:
7742 case SET_DISK_FAULTY:
7743 case SET_BITMAP_FILE:
7744
7745 break;
7746 default:
7747 arg = (unsigned long)compat_ptr(arg);
7748 break;
7749 }
7750
7751 return md_ioctl(bdev, mode, cmd, arg);
7752}
7753#endif
7754
7755static int md_open(struct block_device *bdev, fmode_t mode)
7756{
7757
7758
7759
7760
7761 struct mddev *mddev = mddev_find(bdev->bd_dev);
7762 int err;
7763
7764 if (!mddev)
7765 return -ENODEV;
7766
7767 if (mddev->gendisk != bdev->bd_disk) {
7768
7769
7770
7771 mddev_put(mddev);
7772
7773 if (work_pending(&mddev->del_work))
7774 flush_workqueue(md_misc_wq);
7775
7776 return -ERESTARTSYS;
7777 }
7778 BUG_ON(mddev != bdev->bd_disk->private_data);
7779
7780 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7781 goto out;
7782
7783 if (test_bit(MD_CLOSING, &mddev->flags)) {
7784 mutex_unlock(&mddev->open_mutex);
7785 err = -ENODEV;
7786 goto out;
7787 }
7788
7789 err = 0;
7790 atomic_inc(&mddev->openers);
7791 mutex_unlock(&mddev->open_mutex);
7792
7793 check_disk_change(bdev);
7794 out:
7795 if (err)
7796 mddev_put(mddev);
7797 return err;
7798}
7799
7800static void md_release(struct gendisk *disk, fmode_t mode)
7801{
7802 struct mddev *mddev = disk->private_data;
7803
7804 BUG_ON(!mddev);
7805 atomic_dec(&mddev->openers);
7806 mddev_put(mddev);
7807}
7808
7809static int md_media_changed(struct gendisk *disk)
7810{
7811 struct mddev *mddev = disk->private_data;
7812
7813 return mddev->changed;
7814}
7815
7816static int md_revalidate(struct gendisk *disk)
7817{
7818 struct mddev *mddev = disk->private_data;
7819
7820 mddev->changed = 0;
7821 return 0;
7822}
7823static const struct block_device_operations md_fops =
7824{
7825 .owner = THIS_MODULE,
7826 .open = md_open,
7827 .release = md_release,
7828 .ioctl = md_ioctl,
7829#ifdef CONFIG_COMPAT
7830 .compat_ioctl = md_compat_ioctl,
7831#endif
7832 .getgeo = md_getgeo,
7833 .media_changed = md_media_changed,
7834 .revalidate_disk= md_revalidate,
7835};
7836
7837static int md_thread(void *arg)
7838{
7839 struct md_thread *thread = arg;
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853 allow_signal(SIGKILL);
7854 while (!kthread_should_stop()) {
7855
7856
7857
7858
7859
7860
7861 if (signal_pending(current))
7862 flush_signals(current);
7863
7864 wait_event_interruptible_timeout
7865 (thread->wqueue,
7866 test_bit(THREAD_WAKEUP, &thread->flags)
7867 || kthread_should_stop() || kthread_should_park(),
7868 thread->timeout);
7869
7870 clear_bit(THREAD_WAKEUP, &thread->flags);
7871 if (kthread_should_park())
7872 kthread_parkme();
7873 if (!kthread_should_stop())
7874 thread->run(thread);
7875 }
7876
7877 return 0;
7878}
7879
7880void md_wakeup_thread(struct md_thread *thread)
7881{
7882 if (thread) {
7883 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7884 set_bit(THREAD_WAKEUP, &thread->flags);
7885 wake_up(&thread->wqueue);
7886 }
7887}
7888EXPORT_SYMBOL(md_wakeup_thread);
7889
7890struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7891 struct mddev *mddev, const char *name)
7892{
7893 struct md_thread *thread;
7894
7895 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7896 if (!thread)
7897 return NULL;
7898
7899 init_waitqueue_head(&thread->wqueue);
7900
7901 thread->run = run;
7902 thread->mddev = mddev;
7903 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7904 thread->tsk = kthread_run(md_thread, thread,
7905 "%s_%s",
7906 mdname(thread->mddev),
7907 name);
7908 if (IS_ERR(thread->tsk)) {
7909 kfree(thread);
7910 return NULL;
7911 }
7912 return thread;
7913}
7914EXPORT_SYMBOL(md_register_thread);
7915
7916void md_unregister_thread(struct md_thread **threadp)
7917{
7918 struct md_thread *thread = *threadp;
7919 if (!thread)
7920 return;
7921 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7922
7923
7924
7925 spin_lock(&pers_lock);
7926 *threadp = NULL;
7927 spin_unlock(&pers_lock);
7928
7929 kthread_stop(thread->tsk);
7930 kfree(thread);
7931}
7932EXPORT_SYMBOL(md_unregister_thread);
7933
7934void md_error(struct mddev *mddev, struct md_rdev *rdev)
7935{
7936 if (!rdev || test_bit(Faulty, &rdev->flags))
7937 return;
7938
7939 if (!mddev->pers || !mddev->pers->error_handler)
7940 return;
7941 mddev->pers->error_handler(mddev,rdev);
7942 if (mddev->degraded)
7943 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7944 sysfs_notify_dirent_safe(rdev->sysfs_state);
7945 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7946 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7947 md_wakeup_thread(mddev->thread);
7948 if (mddev->event_work.func)
7949 queue_work(md_misc_wq, &mddev->event_work);
7950 md_new_event(mddev);
7951}
7952EXPORT_SYMBOL(md_error);
7953
7954
7955
7956static void status_unused(struct seq_file *seq)
7957{
7958 int i = 0;
7959 struct md_rdev *rdev;
7960
7961 seq_printf(seq, "unused devices: ");
7962
7963 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7964 char b[BDEVNAME_SIZE];
7965 i++;
7966 seq_printf(seq, "%s ",
7967 bdevname(rdev->bdev,b));
7968 }
7969 if (!i)
7970 seq_printf(seq, "<none>");
7971
7972 seq_printf(seq, "\n");
7973}
7974
7975static int status_resync(struct seq_file *seq, struct mddev *mddev)
7976{
7977 sector_t max_sectors, resync, res;
7978 unsigned long dt, db = 0;
7979 sector_t rt, curr_mark_cnt, resync_mark_cnt;
7980 int scale, recovery_active;
7981 unsigned int per_milli;
7982
7983 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7984 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7985 max_sectors = mddev->resync_max_sectors;
7986 else
7987 max_sectors = mddev->dev_sectors;
7988
7989 resync = mddev->curr_resync;
7990 if (resync <= 3) {
7991 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7992
7993 resync = max_sectors;
7994 } else if (resync > max_sectors)
7995 resync = max_sectors;
7996 else
7997 resync -= atomic_read(&mddev->recovery_active);
7998
7999 if (resync == 0) {
8000 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8001 struct md_rdev *rdev;
8002
8003 rdev_for_each(rdev, mddev)
8004 if (rdev->raid_disk >= 0 &&
8005 !test_bit(Faulty, &rdev->flags) &&
8006 rdev->recovery_offset != MaxSector &&
8007 rdev->recovery_offset) {
8008 seq_printf(seq, "\trecover=REMOTE");
8009 return 1;
8010 }
8011 if (mddev->reshape_position != MaxSector)
8012 seq_printf(seq, "\treshape=REMOTE");
8013 else
8014 seq_printf(seq, "\tresync=REMOTE");
8015 return 1;
8016 }
8017 if (mddev->recovery_cp < MaxSector) {
8018 seq_printf(seq, "\tresync=PENDING");
8019 return 1;
8020 }
8021 return 0;
8022 }
8023 if (resync < 3) {
8024 seq_printf(seq, "\tresync=DELAYED");
8025 return 1;
8026 }
8027
8028 WARN_ON(max_sectors == 0);
8029
8030
8031
8032
8033
8034 scale = 10;
8035 if (sizeof(sector_t) > sizeof(unsigned long)) {
8036 while ( max_sectors/2 > (1ULL<<(scale+32)))
8037 scale++;
8038 }
8039 res = (resync>>scale)*1000;
8040 sector_div(res, (u32)((max_sectors>>scale)+1));
8041
8042 per_milli = res;
8043 {
8044 int i, x = per_milli/50, y = 20-x;
8045 seq_printf(seq, "[");
8046 for (i = 0; i < x; i++)
8047 seq_printf(seq, "=");
8048 seq_printf(seq, ">");
8049 for (i = 0; i < y; i++)
8050 seq_printf(seq, ".");
8051 seq_printf(seq, "] ");
8052 }
8053 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8054 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8055 "reshape" :
8056 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8057 "check" :
8058 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8059 "resync" : "recovery"))),
8060 per_milli/10, per_milli % 10,
8061 (unsigned long long) resync/2,
8062 (unsigned long long) max_sectors/2);
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081 dt = ((jiffies - mddev->resync_mark) / HZ);
8082 if (!dt) dt++;
8083
8084 curr_mark_cnt = mddev->curr_mark_cnt;
8085 recovery_active = atomic_read(&mddev->recovery_active);
8086 resync_mark_cnt = mddev->resync_mark_cnt;
8087
8088 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8089 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8090
8091 rt = max_sectors - resync;
8092 rt = div64_u64(rt, db/32+1);
8093 rt *= dt;
8094 rt >>= 5;
8095
8096 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8097 ((unsigned long)rt % 60)/6);
8098
8099 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8100 return 1;
8101}
8102
8103static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8104{
8105 struct list_head *tmp;
8106 loff_t l = *pos;
8107 struct mddev *mddev;
8108
8109 if (l >= 0x10000)
8110 return NULL;
8111 if (!l--)
8112
8113 return (void*)1;
8114
8115 spin_lock(&all_mddevs_lock);
8116 list_for_each(tmp,&all_mddevs)
8117 if (!l--) {
8118 mddev = list_entry(tmp, struct mddev, all_mddevs);
8119 mddev_get(mddev);
8120 spin_unlock(&all_mddevs_lock);
8121 return mddev;
8122 }
8123 spin_unlock(&all_mddevs_lock);
8124 if (!l--)
8125 return (void*)2;
8126 return NULL;
8127}
8128
8129static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8130{
8131 struct list_head *tmp;
8132 struct mddev *next_mddev, *mddev = v;
8133
8134 ++*pos;
8135 if (v == (void*)2)
8136 return NULL;
8137
8138 spin_lock(&all_mddevs_lock);
8139 if (v == (void*)1)
8140 tmp = all_mddevs.next;
8141 else
8142 tmp = mddev->all_mddevs.next;
8143 if (tmp != &all_mddevs)
8144 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8145 else {
8146 next_mddev = (void*)2;
8147 *pos = 0x10000;
8148 }
8149 spin_unlock(&all_mddevs_lock);
8150
8151 if (v != (void*)1)
8152 mddev_put(mddev);
8153 return next_mddev;
8154
8155}
8156
8157static void md_seq_stop(struct seq_file *seq, void *v)
8158{
8159 struct mddev *mddev = v;
8160
8161 if (mddev && v != (void*)1 && v != (void*)2)
8162 mddev_put(mddev);
8163}
8164
8165static int md_seq_show(struct seq_file *seq, void *v)
8166{
8167 struct mddev *mddev = v;
8168 sector_t sectors;
8169 struct md_rdev *rdev;
8170
8171 if (v == (void*)1) {
8172 struct md_personality *pers;
8173 seq_printf(seq, "Personalities : ");
8174 spin_lock(&pers_lock);
8175 list_for_each_entry(pers, &pers_list, list)
8176 seq_printf(seq, "[%s] ", pers->name);
8177
8178 spin_unlock(&pers_lock);
8179 seq_printf(seq, "\n");
8180 seq->poll_event = atomic_read(&md_event_count);
8181 return 0;
8182 }
8183 if (v == (void*)2) {
8184 status_unused(seq);
8185 return 0;
8186 }
8187
8188 spin_lock(&mddev->lock);
8189 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8190 seq_printf(seq, "%s : %sactive", mdname(mddev),
8191 mddev->pers ? "" : "in");
8192 if (mddev->pers) {
8193 if (mddev->ro==1)
8194 seq_printf(seq, " (read-only)");
8195 if (mddev->ro==2)
8196 seq_printf(seq, " (auto-read-only)");
8197 seq_printf(seq, " %s", mddev->pers->name);
8198 }
8199
8200 sectors = 0;
8201 rcu_read_lock();
8202 rdev_for_each_rcu(rdev, mddev) {
8203 char b[BDEVNAME_SIZE];
8204 seq_printf(seq, " %s[%d]",
8205 bdevname(rdev->bdev,b), rdev->desc_nr);
8206 if (test_bit(WriteMostly, &rdev->flags))
8207 seq_printf(seq, "(W)");
8208 if (test_bit(Journal, &rdev->flags))
8209 seq_printf(seq, "(J)");
8210 if (test_bit(Faulty, &rdev->flags)) {
8211 seq_printf(seq, "(F)");
8212 continue;
8213 }
8214 if (rdev->raid_disk < 0)
8215 seq_printf(seq, "(S)");
8216 if (test_bit(Replacement, &rdev->flags))
8217 seq_printf(seq, "(R)");
8218 sectors += rdev->sectors;
8219 }
8220 rcu_read_unlock();
8221
8222 if (!list_empty(&mddev->disks)) {
8223 if (mddev->pers)
8224 seq_printf(seq, "\n %llu blocks",
8225 (unsigned long long)
8226 mddev->array_sectors / 2);
8227 else
8228 seq_printf(seq, "\n %llu blocks",
8229 (unsigned long long)sectors / 2);
8230 }
8231 if (mddev->persistent) {
8232 if (mddev->major_version != 0 ||
8233 mddev->minor_version != 90) {
8234 seq_printf(seq," super %d.%d",
8235 mddev->major_version,
8236 mddev->minor_version);
8237 }
8238 } else if (mddev->external)
8239 seq_printf(seq, " super external:%s",
8240 mddev->metadata_type);
8241 else
8242 seq_printf(seq, " super non-persistent");
8243
8244 if (mddev->pers) {
8245 mddev->pers->status(seq, mddev);
8246 seq_printf(seq, "\n ");
8247 if (mddev->pers->sync_request) {
8248 if (status_resync(seq, mddev))
8249 seq_printf(seq, "\n ");
8250 }
8251 } else
8252 seq_printf(seq, "\n ");
8253
8254 md_bitmap_status(seq, mddev->bitmap);
8255
8256 seq_printf(seq, "\n");
8257 }
8258 spin_unlock(&mddev->lock);
8259
8260 return 0;
8261}
8262
8263static const struct seq_operations md_seq_ops = {
8264 .start = md_seq_start,
8265 .next = md_seq_next,
8266 .stop = md_seq_stop,
8267 .show = md_seq_show,
8268};
8269
8270static int md_seq_open(struct inode *inode, struct file *file)
8271{
8272 struct seq_file *seq;
8273 int error;
8274
8275 error = seq_open(file, &md_seq_ops);
8276 if (error)
8277 return error;
8278
8279 seq = file->private_data;
8280 seq->poll_event = atomic_read(&md_event_count);
8281 return error;
8282}
8283
8284static int md_unloading;
8285static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8286{
8287 struct seq_file *seq = filp->private_data;
8288 __poll_t mask;
8289
8290 if (md_unloading)
8291 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8292 poll_wait(filp, &md_event_waiters, wait);
8293
8294
8295 mask = EPOLLIN | EPOLLRDNORM;
8296
8297 if (seq->poll_event != atomic_read(&md_event_count))
8298 mask |= EPOLLERR | EPOLLPRI;
8299 return mask;
8300}
8301
8302static const struct proc_ops mdstat_proc_ops = {
8303 .proc_open = md_seq_open,
8304 .proc_read = seq_read,
8305 .proc_lseek = seq_lseek,
8306 .proc_release = seq_release,
8307 .proc_poll = mdstat_poll,
8308};
8309
8310int register_md_personality(struct md_personality *p)
8311{
8312 pr_debug("md: %s personality registered for level %d\n",
8313 p->name, p->level);
8314 spin_lock(&pers_lock);
8315 list_add_tail(&p->list, &pers_list);
8316 spin_unlock(&pers_lock);
8317 return 0;
8318}
8319EXPORT_SYMBOL(register_md_personality);
8320
8321int unregister_md_personality(struct md_personality *p)
8322{
8323 pr_debug("md: %s personality unregistered\n", p->name);
8324 spin_lock(&pers_lock);
8325 list_del_init(&p->list);
8326 spin_unlock(&pers_lock);
8327 return 0;
8328}
8329EXPORT_SYMBOL(unregister_md_personality);
8330
8331int register_md_cluster_operations(struct md_cluster_operations *ops,
8332 struct module *module)
8333{
8334 int ret = 0;
8335 spin_lock(&pers_lock);
8336 if (md_cluster_ops != NULL)
8337 ret = -EALREADY;
8338 else {
8339 md_cluster_ops = ops;
8340 md_cluster_mod = module;
8341 }
8342 spin_unlock(&pers_lock);
8343 return ret;
8344}
8345EXPORT_SYMBOL(register_md_cluster_operations);
8346
8347int unregister_md_cluster_operations(void)
8348{
8349 spin_lock(&pers_lock);
8350 md_cluster_ops = NULL;
8351 spin_unlock(&pers_lock);
8352 return 0;
8353}
8354EXPORT_SYMBOL(unregister_md_cluster_operations);
8355
8356int md_setup_cluster(struct mddev *mddev, int nodes)
8357{
8358 if (!md_cluster_ops)
8359 request_module("md-cluster");
8360 spin_lock(&pers_lock);
8361
8362 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8363 pr_warn("can't find md-cluster module or get it's reference.\n");
8364 spin_unlock(&pers_lock);
8365 return -ENOENT;
8366 }
8367 spin_unlock(&pers_lock);
8368
8369 return md_cluster_ops->join(mddev, nodes);
8370}
8371
8372void md_cluster_stop(struct mddev *mddev)
8373{
8374 if (!md_cluster_ops)
8375 return;
8376 md_cluster_ops->leave(mddev);
8377 module_put(md_cluster_mod);
8378}
8379
8380static int is_mddev_idle(struct mddev *mddev, int init)
8381{
8382 struct md_rdev *rdev;
8383 int idle;
8384 int curr_events;
8385
8386 idle = 1;
8387 rcu_read_lock();
8388 rdev_for_each_rcu(rdev, mddev) {
8389 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8390 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8391 atomic_read(&disk->sync_io);
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414 if (init || curr_events - rdev->last_events > 64) {
8415 rdev->last_events = curr_events;
8416 idle = 0;
8417 }
8418 }
8419 rcu_read_unlock();
8420 return idle;
8421}
8422
8423void md_done_sync(struct mddev *mddev, int blocks, int ok)
8424{
8425
8426 atomic_sub(blocks, &mddev->recovery_active);
8427 wake_up(&mddev->recovery_wait);
8428 if (!ok) {
8429 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8430 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8431 md_wakeup_thread(mddev->thread);
8432
8433 }
8434}
8435EXPORT_SYMBOL(md_done_sync);
8436
8437
8438
8439
8440
8441
8442
8443
8444bool md_write_start(struct mddev *mddev, struct bio *bi)
8445{
8446 int did_change = 0;
8447
8448 if (bio_data_dir(bi) != WRITE)
8449 return true;
8450
8451 BUG_ON(mddev->ro == 1);
8452 if (mddev->ro == 2) {
8453
8454 mddev->ro = 0;
8455 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8456 md_wakeup_thread(mddev->thread);
8457 md_wakeup_thread(mddev->sync_thread);
8458 did_change = 1;
8459 }
8460 rcu_read_lock();
8461 percpu_ref_get(&mddev->writes_pending);
8462 smp_mb();
8463 if (mddev->safemode == 1)
8464 mddev->safemode = 0;
8465
8466 if (mddev->in_sync || mddev->sync_checkers) {
8467 spin_lock(&mddev->lock);
8468 if (mddev->in_sync) {
8469 mddev->in_sync = 0;
8470 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8471 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8472 md_wakeup_thread(mddev->thread);
8473 did_change = 1;
8474 }
8475 spin_unlock(&mddev->lock);
8476 }
8477 rcu_read_unlock();
8478 if (did_change)
8479 sysfs_notify_dirent_safe(mddev->sysfs_state);
8480 if (!mddev->has_superblocks)
8481 return true;
8482 wait_event(mddev->sb_wait,
8483 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8484 mddev->suspended);
8485 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8486 percpu_ref_put(&mddev->writes_pending);
8487 return false;
8488 }
8489 return true;
8490}
8491EXPORT_SYMBOL(md_write_start);
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501void md_write_inc(struct mddev *mddev, struct bio *bi)
8502{
8503 if (bio_data_dir(bi) != WRITE)
8504 return;
8505 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8506 percpu_ref_get(&mddev->writes_pending);
8507}
8508EXPORT_SYMBOL(md_write_inc);
8509
8510void md_write_end(struct mddev *mddev)
8511{
8512 percpu_ref_put(&mddev->writes_pending);
8513
8514 if (mddev->safemode == 2)
8515 md_wakeup_thread(mddev->thread);
8516 else if (mddev->safemode_delay)
8517
8518
8519
8520 mod_timer(&mddev->safemode_timer,
8521 roundup(jiffies, mddev->safemode_delay) +
8522 mddev->safemode_delay);
8523}
8524
8525EXPORT_SYMBOL(md_write_end);
8526
8527
8528
8529
8530
8531
8532
8533void md_allow_write(struct mddev *mddev)
8534{
8535 if (!mddev->pers)
8536 return;
8537 if (mddev->ro)
8538 return;
8539 if (!mddev->pers->sync_request)
8540 return;
8541
8542 spin_lock(&mddev->lock);
8543 if (mddev->in_sync) {
8544 mddev->in_sync = 0;
8545 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8546 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8547 if (mddev->safemode_delay &&
8548 mddev->safemode == 0)
8549 mddev->safemode = 1;
8550 spin_unlock(&mddev->lock);
8551 md_update_sb(mddev, 0);
8552 sysfs_notify_dirent_safe(mddev->sysfs_state);
8553
8554 wait_event(mddev->sb_wait,
8555 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8556 } else
8557 spin_unlock(&mddev->lock);
8558}
8559EXPORT_SYMBOL_GPL(md_allow_write);
8560
8561#define SYNC_MARKS 10
8562#define SYNC_MARK_STEP (3*HZ)
8563#define UPDATE_FREQUENCY (5*60*HZ)
8564void md_do_sync(struct md_thread *thread)
8565{
8566 struct mddev *mddev = thread->mddev;
8567 struct mddev *mddev2;
8568 unsigned int currspeed = 0, window;
8569 sector_t max_sectors,j, io_sectors, recovery_done;
8570 unsigned long mark[SYNC_MARKS];
8571 unsigned long update_time;
8572 sector_t mark_cnt[SYNC_MARKS];
8573 int last_mark,m;
8574 struct list_head *tmp;
8575 sector_t last_check;
8576 int skipped = 0;
8577 struct md_rdev *rdev;
8578 char *desc, *action = NULL;
8579 struct blk_plug plug;
8580 int ret;
8581
8582
8583 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8584 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8585 return;
8586 if (mddev->ro) {
8587 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8588 return;
8589 }
8590
8591 if (mddev_is_clustered(mddev)) {
8592 ret = md_cluster_ops->resync_start(mddev);
8593 if (ret)
8594 goto skip;
8595
8596 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8597 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8598 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8599 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8600 && ((unsigned long long)mddev->curr_resync_completed
8601 < (unsigned long long)mddev->resync_max_sectors))
8602 goto skip;
8603 }
8604
8605 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8606 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8607 desc = "data-check";
8608 action = "check";
8609 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8610 desc = "requested-resync";
8611 action = "repair";
8612 } else
8613 desc = "resync";
8614 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8615 desc = "reshape";
8616 else
8617 desc = "recovery";
8618
8619 mddev->last_sync_action = action ?: desc;
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637 do {
8638 int mddev2_minor = -1;
8639 mddev->curr_resync = 2;
8640
8641 try_again:
8642 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8643 goto skip;
8644 for_each_mddev(mddev2, tmp) {
8645 if (mddev2 == mddev)
8646 continue;
8647 if (!mddev->parallel_resync
8648 && mddev2->curr_resync
8649 && match_mddev_units(mddev, mddev2)) {
8650 DEFINE_WAIT(wq);
8651 if (mddev < mddev2 && mddev->curr_resync == 2) {
8652
8653 mddev->curr_resync = 1;
8654 wake_up(&resync_wait);
8655 }
8656 if (mddev > mddev2 && mddev->curr_resync == 1)
8657
8658
8659
8660 continue;
8661
8662
8663
8664
8665 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8666 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8667 mddev2->curr_resync >= mddev->curr_resync) {
8668 if (mddev2_minor != mddev2->md_minor) {
8669 mddev2_minor = mddev2->md_minor;
8670 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8671 desc, mdname(mddev),
8672 mdname(mddev2));
8673 }
8674 mddev_put(mddev2);
8675 if (signal_pending(current))
8676 flush_signals(current);
8677 schedule();
8678 finish_wait(&resync_wait, &wq);
8679 goto try_again;
8680 }
8681 finish_wait(&resync_wait, &wq);
8682 }
8683 }
8684 } while (mddev->curr_resync < 2);
8685
8686 j = 0;
8687 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8688
8689
8690
8691 max_sectors = mddev->resync_max_sectors;
8692 atomic64_set(&mddev->resync_mismatches, 0);
8693
8694 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8695 j = mddev->resync_min;
8696 else if (!mddev->bitmap)
8697 j = mddev->recovery_cp;
8698
8699 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8700 max_sectors = mddev->resync_max_sectors;
8701
8702
8703
8704
8705
8706 if (mddev_is_clustered(mddev) &&
8707 mddev->reshape_position != MaxSector)
8708 j = mddev->reshape_position;
8709 } else {
8710
8711 max_sectors = mddev->dev_sectors;
8712 j = MaxSector;
8713 rcu_read_lock();
8714 rdev_for_each_rcu(rdev, mddev)
8715 if (rdev->raid_disk >= 0 &&
8716 !test_bit(Journal, &rdev->flags) &&
8717 !test_bit(Faulty, &rdev->flags) &&
8718 !test_bit(In_sync, &rdev->flags) &&
8719 rdev->recovery_offset < j)
8720 j = rdev->recovery_offset;
8721 rcu_read_unlock();
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731 if (mddev->bitmap) {
8732 mddev->pers->quiesce(mddev, 1);
8733 mddev->pers->quiesce(mddev, 0);
8734 }
8735 }
8736
8737 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8738 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8739 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8740 speed_max(mddev), desc);
8741
8742 is_mddev_idle(mddev, 1);
8743
8744 io_sectors = 0;
8745 for (m = 0; m < SYNC_MARKS; m++) {
8746 mark[m] = jiffies;
8747 mark_cnt[m] = io_sectors;
8748 }
8749 last_mark = 0;
8750 mddev->resync_mark = mark[last_mark];
8751 mddev->resync_mark_cnt = mark_cnt[last_mark];
8752
8753
8754
8755
8756 window = 32 * (PAGE_SIZE / 512);
8757 pr_debug("md: using %dk window, over a total of %lluk.\n",
8758 window/2, (unsigned long long)max_sectors/2);
8759
8760 atomic_set(&mddev->recovery_active, 0);
8761 last_check = 0;
8762
8763 if (j>2) {
8764 pr_debug("md: resuming %s of %s from checkpoint.\n",
8765 desc, mdname(mddev));
8766 mddev->curr_resync = j;
8767 } else
8768 mddev->curr_resync = 3;
8769 mddev->curr_resync_completed = j;
8770 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8771 md_new_event(mddev);
8772 update_time = jiffies;
8773
8774 blk_start_plug(&plug);
8775 while (j < max_sectors) {
8776 sector_t sectors;
8777
8778 skipped = 0;
8779
8780 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8781 ((mddev->curr_resync > mddev->curr_resync_completed &&
8782 (mddev->curr_resync - mddev->curr_resync_completed)
8783 > (max_sectors >> 4)) ||
8784 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8785 (j - mddev->curr_resync_completed)*2
8786 >= mddev->resync_max - mddev->curr_resync_completed ||
8787 mddev->curr_resync_completed > mddev->resync_max
8788 )) {
8789
8790 wait_event(mddev->recovery_wait,
8791 atomic_read(&mddev->recovery_active) == 0);
8792 mddev->curr_resync_completed = j;
8793 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8794 j > mddev->recovery_cp)
8795 mddev->recovery_cp = j;
8796 update_time = jiffies;
8797 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8798 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8799 }
8800
8801 while (j >= mddev->resync_max &&
8802 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8803
8804
8805
8806
8807 flush_signals(current);
8808 wait_event_interruptible(mddev->recovery_wait,
8809 mddev->resync_max > j
8810 || test_bit(MD_RECOVERY_INTR,
8811 &mddev->recovery));
8812 }
8813
8814 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8815 break;
8816
8817 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8818 if (sectors == 0) {
8819 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8820 break;
8821 }
8822
8823 if (!skipped) {
8824 io_sectors += sectors;
8825 atomic_add(sectors, &mddev->recovery_active);
8826 }
8827
8828 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8829 break;
8830
8831 j += sectors;
8832 if (j > max_sectors)
8833
8834 j = max_sectors;
8835 if (j > 2)
8836 mddev->curr_resync = j;
8837 mddev->curr_mark_cnt = io_sectors;
8838 if (last_check == 0)
8839
8840
8841
8842 md_new_event(mddev);
8843
8844 if (last_check + window > io_sectors || j == max_sectors)
8845 continue;
8846
8847 last_check = io_sectors;
8848 repeat:
8849 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8850
8851 int next = (last_mark+1) % SYNC_MARKS;
8852
8853 mddev->resync_mark = mark[next];
8854 mddev->resync_mark_cnt = mark_cnt[next];
8855 mark[next] = jiffies;
8856 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8857 last_mark = next;
8858 }
8859
8860 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8861 break;
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871 cond_resched();
8872
8873 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8874 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8875 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8876
8877 if (currspeed > speed_min(mddev)) {
8878 if (currspeed > speed_max(mddev)) {
8879 msleep(500);
8880 goto repeat;
8881 }
8882 if (!is_mddev_idle(mddev, 0)) {
8883
8884
8885
8886
8887 wait_event(mddev->recovery_wait,
8888 !atomic_read(&mddev->recovery_active));
8889 }
8890 }
8891 }
8892 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8893 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8894 ? "interrupted" : "done");
8895
8896
8897
8898 blk_finish_plug(&plug);
8899 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8900
8901 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8902 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8903 mddev->curr_resync > 3) {
8904 mddev->curr_resync_completed = mddev->curr_resync;
8905 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8906 }
8907 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8908
8909 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8910 mddev->curr_resync > 3) {
8911 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8912 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8913 if (mddev->curr_resync >= mddev->recovery_cp) {
8914 pr_debug("md: checkpointing %s of %s.\n",
8915 desc, mdname(mddev));
8916 if (test_bit(MD_RECOVERY_ERROR,
8917 &mddev->recovery))
8918 mddev->recovery_cp =
8919 mddev->curr_resync_completed;
8920 else
8921 mddev->recovery_cp =
8922 mddev->curr_resync;
8923 }
8924 } else
8925 mddev->recovery_cp = MaxSector;
8926 } else {
8927 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8928 mddev->curr_resync = MaxSector;
8929 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8930 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8931 rcu_read_lock();
8932 rdev_for_each_rcu(rdev, mddev)
8933 if (rdev->raid_disk >= 0 &&
8934 mddev->delta_disks >= 0 &&
8935 !test_bit(Journal, &rdev->flags) &&
8936 !test_bit(Faulty, &rdev->flags) &&
8937 !test_bit(In_sync, &rdev->flags) &&
8938 rdev->recovery_offset < mddev->curr_resync)
8939 rdev->recovery_offset = mddev->curr_resync;
8940 rcu_read_unlock();
8941 }
8942 }
8943 }
8944 skip:
8945
8946
8947
8948 set_mask_bits(&mddev->sb_flags, 0,
8949 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8950
8951 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8952 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8953 mddev->delta_disks > 0 &&
8954 mddev->pers->finish_reshape &&
8955 mddev->pers->size &&
8956 mddev->queue) {
8957 mddev_lock_nointr(mddev);
8958 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8959 mddev_unlock(mddev);
8960 if (!mddev_is_clustered(mddev)) {
8961 set_capacity(mddev->gendisk, mddev->array_sectors);
8962 revalidate_disk(mddev->gendisk);
8963 }
8964 }
8965
8966 spin_lock(&mddev->lock);
8967 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8968
8969 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8970 mddev->resync_min = 0;
8971 mddev->resync_max = MaxSector;
8972 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8973 mddev->resync_min = mddev->curr_resync_completed;
8974 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8975 mddev->curr_resync = 0;
8976 spin_unlock(&mddev->lock);
8977
8978 wake_up(&resync_wait);
8979 md_wakeup_thread(mddev->thread);
8980 return;
8981}
8982EXPORT_SYMBOL_GPL(md_do_sync);
8983
8984static int remove_and_add_spares(struct mddev *mddev,
8985 struct md_rdev *this)
8986{
8987 struct md_rdev *rdev;
8988 int spares = 0;
8989 int removed = 0;
8990 bool remove_some = false;
8991
8992 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8993
8994 return 0;
8995
8996 rdev_for_each(rdev, mddev) {
8997 if ((this == NULL || rdev == this) &&
8998 rdev->raid_disk >= 0 &&
8999 !test_bit(Blocked, &rdev->flags) &&
9000 test_bit(Faulty, &rdev->flags) &&
9001 atomic_read(&rdev->nr_pending)==0) {
9002
9003
9004
9005
9006
9007 remove_some = true;
9008 set_bit(RemoveSynchronized, &rdev->flags);
9009 }
9010 }
9011
9012 if (remove_some)
9013 synchronize_rcu();
9014 rdev_for_each(rdev, mddev) {
9015 if ((this == NULL || rdev == this) &&
9016 rdev->raid_disk >= 0 &&
9017 !test_bit(Blocked, &rdev->flags) &&
9018 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9019 (!test_bit(In_sync, &rdev->flags) &&
9020 !test_bit(Journal, &rdev->flags))) &&
9021 atomic_read(&rdev->nr_pending)==0)) {
9022 if (mddev->pers->hot_remove_disk(
9023 mddev, rdev) == 0) {
9024 sysfs_unlink_rdev(mddev, rdev);
9025 rdev->saved_raid_disk = rdev->raid_disk;
9026 rdev->raid_disk = -1;
9027 removed++;
9028 }
9029 }
9030 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9031 clear_bit(RemoveSynchronized, &rdev->flags);
9032 }
9033
9034 if (removed && mddev->kobj.sd)
9035 sysfs_notify(&mddev->kobj, NULL, "degraded");
9036
9037 if (this && removed)
9038 goto no_add;
9039
9040 rdev_for_each(rdev, mddev) {
9041 if (this && this != rdev)
9042 continue;
9043 if (test_bit(Candidate, &rdev->flags))
9044 continue;
9045 if (rdev->raid_disk >= 0 &&
9046 !test_bit(In_sync, &rdev->flags) &&
9047 !test_bit(Journal, &rdev->flags) &&
9048 !test_bit(Faulty, &rdev->flags))
9049 spares++;
9050 if (rdev->raid_disk >= 0)
9051 continue;
9052 if (test_bit(Faulty, &rdev->flags))
9053 continue;
9054 if (!test_bit(Journal, &rdev->flags)) {
9055 if (mddev->ro &&
9056 ! (rdev->saved_raid_disk >= 0 &&
9057 !test_bit(Bitmap_sync, &rdev->flags)))
9058 continue;
9059
9060 rdev->recovery_offset = 0;
9061 }
9062 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9063 if (sysfs_link_rdev(mddev, rdev))
9064 ;
9065 if (!test_bit(Journal, &rdev->flags))
9066 spares++;
9067 md_new_event(mddev);
9068 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9069 }
9070 }
9071no_add:
9072 if (removed)
9073 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9074 return spares;
9075}
9076
9077static void md_start_sync(struct work_struct *ws)
9078{
9079 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9080
9081 mddev->sync_thread = md_register_thread(md_do_sync,
9082 mddev,
9083 "resync");
9084 if (!mddev->sync_thread) {
9085 pr_warn("%s: could not start resync thread...\n",
9086 mdname(mddev));
9087
9088 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9089 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9090 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9091 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9092 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9093 wake_up(&resync_wait);
9094 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9095 &mddev->recovery))
9096 if (mddev->sysfs_action)
9097 sysfs_notify_dirent_safe(mddev->sysfs_action);
9098 } else
9099 md_wakeup_thread(mddev->sync_thread);
9100 sysfs_notify_dirent_safe(mddev->sysfs_action);
9101 md_new_event(mddev);
9102}
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126void md_check_recovery(struct mddev *mddev)
9127{
9128 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9129
9130
9131
9132 set_bit(MD_UPDATING_SB, &mddev->flags);
9133 smp_mb__after_atomic();
9134 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9135 md_update_sb(mddev, 0);
9136 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9137 wake_up(&mddev->sb_wait);
9138 }
9139
9140 if (mddev->suspended)
9141 return;
9142
9143 if (mddev->bitmap)
9144 md_bitmap_daemon_work(mddev);
9145
9146 if (signal_pending(current)) {
9147 if (mddev->pers->sync_request && !mddev->external) {
9148 pr_debug("md: %s in immediate safe mode\n",
9149 mdname(mddev));
9150 mddev->safemode = 2;
9151 }
9152 flush_signals(current);
9153 }
9154
9155 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9156 return;
9157 if ( ! (
9158 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9159 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9160 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9161 (mddev->external == 0 && mddev->safemode == 1) ||
9162 (mddev->safemode == 2
9163 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9164 ))
9165 return;
9166
9167 if (mddev_trylock(mddev)) {
9168 int spares = 0;
9169 bool try_set_sync = mddev->safemode != 0;
9170
9171 if (!mddev->external && mddev->safemode == 1)
9172 mddev->safemode = 0;
9173
9174 if (mddev->ro) {
9175 struct md_rdev *rdev;
9176 if (!mddev->external && mddev->in_sync)
9177
9178
9179
9180
9181
9182 rdev_for_each(rdev, mddev)
9183 clear_bit(Blocked, &rdev->flags);
9184
9185
9186
9187
9188
9189
9190
9191 remove_and_add_spares(mddev, NULL);
9192
9193
9194
9195 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9196 md_reap_sync_thread(mddev);
9197 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9198 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9199 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9200 goto unlock;
9201 }
9202
9203 if (mddev_is_clustered(mddev)) {
9204 struct md_rdev *rdev;
9205
9206
9207
9208 rdev_for_each(rdev, mddev) {
9209 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9210 rdev->raid_disk < 0)
9211 md_kick_rdev_from_array(rdev);
9212 }
9213 }
9214
9215 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9216 spin_lock(&mddev->lock);
9217 set_in_sync(mddev);
9218 spin_unlock(&mddev->lock);
9219 }
9220
9221 if (mddev->sb_flags)
9222 md_update_sb(mddev, 0);
9223
9224 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9225 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9226
9227 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9228 goto unlock;
9229 }
9230 if (mddev->sync_thread) {
9231 md_reap_sync_thread(mddev);
9232 goto unlock;
9233 }
9234
9235
9236
9237 mddev->curr_resync_completed = 0;
9238 spin_lock(&mddev->lock);
9239 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9240 spin_unlock(&mddev->lock);
9241
9242
9243
9244 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9245 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9246
9247 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9248 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9249 goto not_running;
9250
9251
9252
9253
9254
9255
9256
9257 if (mddev->reshape_position != MaxSector) {
9258 if (mddev->pers->check_reshape == NULL ||
9259 mddev->pers->check_reshape(mddev) != 0)
9260
9261 goto not_running;
9262 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9263 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9264 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9265 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9266 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9267 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9268 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9269 } else if (mddev->recovery_cp < MaxSector) {
9270 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9271 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9272 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9273
9274 goto not_running;
9275
9276 if (mddev->pers->sync_request) {
9277 if (spares) {
9278
9279
9280
9281
9282 md_bitmap_write_all(mddev->bitmap);
9283 }
9284 INIT_WORK(&mddev->del_work, md_start_sync);
9285 queue_work(md_misc_wq, &mddev->del_work);
9286 goto unlock;
9287 }
9288 not_running:
9289 if (!mddev->sync_thread) {
9290 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9291 wake_up(&resync_wait);
9292 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9293 &mddev->recovery))
9294 if (mddev->sysfs_action)
9295 sysfs_notify_dirent_safe(mddev->sysfs_action);
9296 }
9297 unlock:
9298 wake_up(&mddev->sb_wait);
9299 mddev_unlock(mddev);
9300 }
9301}
9302EXPORT_SYMBOL(md_check_recovery);
9303
9304void md_reap_sync_thread(struct mddev *mddev)
9305{
9306 struct md_rdev *rdev;
9307 sector_t old_dev_sectors = mddev->dev_sectors;
9308 bool is_reshaped = false;
9309
9310
9311 md_unregister_thread(&mddev->sync_thread);
9312 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9313 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9314 mddev->degraded != mddev->raid_disks) {
9315
9316
9317 if (mddev->pers->spare_active(mddev)) {
9318 sysfs_notify(&mddev->kobj, NULL,
9319 "degraded");
9320 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9321 }
9322 }
9323 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9324 mddev->pers->finish_reshape) {
9325 mddev->pers->finish_reshape(mddev);
9326 if (mddev_is_clustered(mddev))
9327 is_reshaped = true;
9328 }
9329
9330
9331
9332
9333 if (!mddev->degraded)
9334 rdev_for_each(rdev, mddev)
9335 rdev->saved_raid_disk = -1;
9336
9337 md_update_sb(mddev, 1);
9338
9339
9340
9341 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9342 md_cluster_ops->resync_finish(mddev);
9343 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9344 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9345 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9346 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9347 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9348 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9349
9350
9351
9352
9353
9354 if (mddev_is_clustered(mddev) && is_reshaped
9355 && !test_bit(MD_CLOSING, &mddev->flags))
9356 md_cluster_ops->update_size(mddev, old_dev_sectors);
9357 wake_up(&resync_wait);
9358
9359 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9360 sysfs_notify_dirent_safe(mddev->sysfs_action);
9361 md_new_event(mddev);
9362 if (mddev->event_work.func)
9363 queue_work(md_misc_wq, &mddev->event_work);
9364}
9365EXPORT_SYMBOL(md_reap_sync_thread);
9366
9367void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9368{
9369 sysfs_notify_dirent_safe(rdev->sysfs_state);
9370 wait_event_timeout(rdev->blocked_wait,
9371 !test_bit(Blocked, &rdev->flags) &&
9372 !test_bit(BlockedBadBlocks, &rdev->flags),
9373 msecs_to_jiffies(5000));
9374 rdev_dec_pending(rdev, mddev);
9375}
9376EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9377
9378void md_finish_reshape(struct mddev *mddev)
9379{
9380
9381 struct md_rdev *rdev;
9382
9383 rdev_for_each(rdev, mddev) {
9384 if (rdev->data_offset > rdev->new_data_offset)
9385 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9386 else
9387 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9388 rdev->data_offset = rdev->new_data_offset;
9389 }
9390}
9391EXPORT_SYMBOL(md_finish_reshape);
9392
9393
9394
9395
9396int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9397 int is_new)
9398{
9399 struct mddev *mddev = rdev->mddev;
9400 int rv;
9401 if (is_new)
9402 s += rdev->new_data_offset;
9403 else
9404 s += rdev->data_offset;
9405 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9406 if (rv == 0) {
9407
9408 if (test_bit(ExternalBbl, &rdev->flags))
9409 sysfs_notify(&rdev->kobj, NULL,
9410 "unacknowledged_bad_blocks");
9411 sysfs_notify_dirent_safe(rdev->sysfs_state);
9412 set_mask_bits(&mddev->sb_flags, 0,
9413 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9414 md_wakeup_thread(rdev->mddev->thread);
9415 return 1;
9416 } else
9417 return 0;
9418}
9419EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9420
9421int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9422 int is_new)
9423{
9424 int rv;
9425 if (is_new)
9426 s += rdev->new_data_offset;
9427 else
9428 s += rdev->data_offset;
9429 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9430 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9431 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9432 return rv;
9433}
9434EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9435
9436static int md_notify_reboot(struct notifier_block *this,
9437 unsigned long code, void *x)
9438{
9439 struct list_head *tmp;
9440 struct mddev *mddev;
9441 int need_delay = 0;
9442
9443 for_each_mddev(mddev, tmp) {
9444 if (mddev_trylock(mddev)) {
9445 if (mddev->pers)
9446 __md_stop_writes(mddev);
9447 if (mddev->persistent)
9448 mddev->safemode = 2;
9449 mddev_unlock(mddev);
9450 }
9451 need_delay = 1;
9452 }
9453
9454
9455
9456
9457
9458
9459 if (need_delay)
9460 mdelay(1000*1);
9461
9462 return NOTIFY_DONE;
9463}
9464
9465static struct notifier_block md_notifier = {
9466 .notifier_call = md_notify_reboot,
9467 .next = NULL,
9468 .priority = INT_MAX,
9469};
9470
9471static void md_geninit(void)
9472{
9473 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9474
9475 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9476}
9477
9478static int __init md_init(void)
9479{
9480 int ret = -ENOMEM;
9481
9482 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9483 if (!md_wq)
9484 goto err_wq;
9485
9486 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9487 if (!md_misc_wq)
9488 goto err_misc_wq;
9489
9490 md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9491 if (!md_misc_wq)
9492 goto err_rdev_misc_wq;
9493
9494 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9495 goto err_md;
9496
9497 if ((ret = register_blkdev(0, "mdp")) < 0)
9498 goto err_mdp;
9499 mdp_major = ret;
9500
9501 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9502 md_probe, NULL, NULL);
9503 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9504 md_probe, NULL, NULL);
9505
9506 register_reboot_notifier(&md_notifier);
9507 raid_table_header = register_sysctl_table(raid_root_table);
9508
9509 md_geninit();
9510 return 0;
9511
9512err_mdp:
9513 unregister_blkdev(MD_MAJOR, "md");
9514err_md:
9515 destroy_workqueue(md_rdev_misc_wq);
9516err_rdev_misc_wq:
9517 destroy_workqueue(md_misc_wq);
9518err_misc_wq:
9519 destroy_workqueue(md_wq);
9520err_wq:
9521 return ret;
9522}
9523
9524static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9525{
9526 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9527 struct md_rdev *rdev2;
9528 int role, ret;
9529 char b[BDEVNAME_SIZE];
9530
9531
9532
9533
9534
9535 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9536 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9537 if (ret)
9538 pr_info("md-cluster: resize failed\n");
9539 else
9540 md_bitmap_update_sb(mddev->bitmap);
9541 }
9542
9543
9544 rdev_for_each(rdev2, mddev) {
9545 if (test_bit(Faulty, &rdev2->flags))
9546 continue;
9547
9548
9549 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9550
9551 if (test_bit(Candidate, &rdev2->flags)) {
9552 if (role == 0xfffe) {
9553 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9554 md_kick_rdev_from_array(rdev2);
9555 continue;
9556 }
9557 else
9558 clear_bit(Candidate, &rdev2->flags);
9559 }
9560
9561 if (role != rdev2->raid_disk) {
9562
9563
9564
9565 if (rdev2->raid_disk == -1 && role != 0xffff &&
9566 !(le32_to_cpu(sb->feature_map) &
9567 MD_FEATURE_RESHAPE_ACTIVE)) {
9568 rdev2->saved_raid_disk = role;
9569 ret = remove_and_add_spares(mddev, rdev2);
9570 pr_info("Activated spare: %s\n",
9571 bdevname(rdev2->bdev,b));
9572
9573
9574 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9575 md_wakeup_thread(mddev->thread);
9576 }
9577
9578
9579
9580
9581
9582 if ((role == 0xfffe) || (role == 0xfffd)) {
9583 md_error(mddev, rdev2);
9584 clear_bit(Blocked, &rdev2->flags);
9585 }
9586 }
9587 }
9588
9589 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9590 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9591
9592
9593
9594
9595
9596 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9597 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9598
9599
9600
9601
9602 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9603 if (mddev->pers->update_reshape_pos)
9604 mddev->pers->update_reshape_pos(mddev);
9605 if (mddev->pers->start_reshape)
9606 mddev->pers->start_reshape(mddev);
9607 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9608 mddev->reshape_position != MaxSector &&
9609 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9610
9611 mddev->reshape_position = MaxSector;
9612 if (mddev->pers->update_reshape_pos)
9613 mddev->pers->update_reshape_pos(mddev);
9614 }
9615
9616
9617 mddev->events = le64_to_cpu(sb->events);
9618}
9619
9620static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9621{
9622 int err;
9623 struct page *swapout = rdev->sb_page;
9624 struct mdp_superblock_1 *sb;
9625
9626
9627
9628
9629 rdev->sb_page = NULL;
9630 err = alloc_disk_sb(rdev);
9631 if (err == 0) {
9632 ClearPageUptodate(rdev->sb_page);
9633 rdev->sb_loaded = 0;
9634 err = super_types[mddev->major_version].
9635 load_super(rdev, NULL, mddev->minor_version);
9636 }
9637 if (err < 0) {
9638 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9639 __func__, __LINE__, rdev->desc_nr, err);
9640 if (rdev->sb_page)
9641 put_page(rdev->sb_page);
9642 rdev->sb_page = swapout;
9643 rdev->sb_loaded = 1;
9644 return err;
9645 }
9646
9647 sb = page_address(rdev->sb_page);
9648
9649
9650
9651
9652 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9653 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9654
9655
9656
9657
9658 if (rdev->recovery_offset == MaxSector &&
9659 !test_bit(In_sync, &rdev->flags) &&
9660 mddev->pers->spare_active(mddev))
9661 sysfs_notify(&mddev->kobj, NULL, "degraded");
9662
9663 put_page(swapout);
9664 return 0;
9665}
9666
9667void md_reload_sb(struct mddev *mddev, int nr)
9668{
9669 struct md_rdev *rdev;
9670 int err;
9671
9672
9673 rdev_for_each_rcu(rdev, mddev) {
9674 if (rdev->desc_nr == nr)
9675 break;
9676 }
9677
9678 if (!rdev || rdev->desc_nr != nr) {
9679 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9680 return;
9681 }
9682
9683 err = read_rdev(mddev, rdev);
9684 if (err < 0)
9685 return;
9686
9687 check_sb_changes(mddev, rdev);
9688
9689
9690 rdev_for_each_rcu(rdev, mddev) {
9691 if (!test_bit(Faulty, &rdev->flags))
9692 read_rdev(mddev, rdev);
9693 }
9694}
9695EXPORT_SYMBOL(md_reload_sb);
9696
9697#ifndef MODULE
9698
9699
9700
9701
9702
9703
9704static DEFINE_MUTEX(detected_devices_mutex);
9705static LIST_HEAD(all_detected_devices);
9706struct detected_devices_node {
9707 struct list_head list;
9708 dev_t dev;
9709};
9710
9711void md_autodetect_dev(dev_t dev)
9712{
9713 struct detected_devices_node *node_detected_dev;
9714
9715 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9716 if (node_detected_dev) {
9717 node_detected_dev->dev = dev;
9718 mutex_lock(&detected_devices_mutex);
9719 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9720 mutex_unlock(&detected_devices_mutex);
9721 }
9722}
9723
9724static void autostart_arrays(int part)
9725{
9726 struct md_rdev *rdev;
9727 struct detected_devices_node *node_detected_dev;
9728 dev_t dev;
9729 int i_scanned, i_passed;
9730
9731 i_scanned = 0;
9732 i_passed = 0;
9733
9734 pr_info("md: Autodetecting RAID arrays.\n");
9735
9736 mutex_lock(&detected_devices_mutex);
9737 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9738 i_scanned++;
9739 node_detected_dev = list_entry(all_detected_devices.next,
9740 struct detected_devices_node, list);
9741 list_del(&node_detected_dev->list);
9742 dev = node_detected_dev->dev;
9743 kfree(node_detected_dev);
9744 mutex_unlock(&detected_devices_mutex);
9745 rdev = md_import_device(dev,0, 90);
9746 mutex_lock(&detected_devices_mutex);
9747 if (IS_ERR(rdev))
9748 continue;
9749
9750 if (test_bit(Faulty, &rdev->flags))
9751 continue;
9752
9753 set_bit(AutoDetected, &rdev->flags);
9754 list_add(&rdev->same_set, &pending_raid_disks);
9755 i_passed++;
9756 }
9757 mutex_unlock(&detected_devices_mutex);
9758
9759 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9760
9761 autorun_devices(part);
9762}
9763
9764#endif
9765
9766static __exit void md_exit(void)
9767{
9768 struct mddev *mddev;
9769 struct list_head *tmp;
9770 int delay = 1;
9771
9772 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9773 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9774
9775 unregister_blkdev(MD_MAJOR,"md");
9776 unregister_blkdev(mdp_major, "mdp");
9777 unregister_reboot_notifier(&md_notifier);
9778 unregister_sysctl_table(raid_table_header);
9779
9780
9781
9782
9783 md_unloading = 1;
9784 while (waitqueue_active(&md_event_waiters)) {
9785
9786 wake_up(&md_event_waiters);
9787 msleep(delay);
9788 delay += delay;
9789 }
9790 remove_proc_entry("mdstat", NULL);
9791
9792 for_each_mddev(mddev, tmp) {
9793 export_array(mddev);
9794 mddev->ctime = 0;
9795 mddev->hold_active = 0;
9796
9797
9798
9799
9800
9801
9802 }
9803 destroy_workqueue(md_rdev_misc_wq);
9804 destroy_workqueue(md_misc_wq);
9805 destroy_workqueue(md_wq);
9806}
9807
9808subsys_initcall(md_init);
9809module_exit(md_exit)
9810
9811static int get_ro(char *buffer, const struct kernel_param *kp)
9812{
9813 return sprintf(buffer, "%d\n", start_readonly);
9814}
9815static int set_ro(const char *val, const struct kernel_param *kp)
9816{
9817 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9818}
9819
9820module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9821module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9822module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9823module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9824
9825MODULE_LICENSE("GPL");
9826MODULE_DESCRIPTION("MD RAID framework");
9827MODULE_ALIAS("md");
9828MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9829