1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/raid/detect.h>
62#include <linux/slab.h>
63#include <linux/percpu-refcount.h>
64#include <linux/part_stat.h>
65
66#include <trace/events/block.h>
67#include "md.h"
68#include "md-bitmap.h"
69#include "md-cluster.h"
70
71#ifndef MODULE
72static void autostart_arrays(int part);
73#endif
74
75
76
77
78
79
80static LIST_HEAD(pers_list);
81static DEFINE_SPINLOCK(pers_lock);
82
83static struct kobj_type md_ktype;
84
85struct md_cluster_operations *md_cluster_ops;
86EXPORT_SYMBOL(md_cluster_ops);
87static struct module *md_cluster_mod;
88
89static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
90static struct workqueue_struct *md_wq;
91static struct workqueue_struct *md_misc_wq;
92
93static int remove_and_add_spares(struct mddev *mddev,
94 struct md_rdev *this);
95static void mddev_detach(struct mddev *mddev);
96
97
98
99
100
101
102#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
103
104
105
106
107
108
109
110
111
112
113
114
115
116static int sysctl_speed_limit_min = 1000;
117static int sysctl_speed_limit_max = 200000;
118static inline int speed_min(struct mddev *mddev)
119{
120 return mddev->sync_speed_min ?
121 mddev->sync_speed_min : sysctl_speed_limit_min;
122}
123
124static inline int speed_max(struct mddev *mddev)
125{
126 return mddev->sync_speed_max ?
127 mddev->sync_speed_max : sysctl_speed_limit_max;
128}
129
130static void rdev_uninit_serial(struct md_rdev *rdev)
131{
132 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
133 return;
134
135 kvfree(rdev->serial);
136 rdev->serial = NULL;
137}
138
139static void rdevs_uninit_serial(struct mddev *mddev)
140{
141 struct md_rdev *rdev;
142
143 rdev_for_each(rdev, mddev)
144 rdev_uninit_serial(rdev);
145}
146
147static int rdev_init_serial(struct md_rdev *rdev)
148{
149
150 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
151 struct serial_in_rdev *serial = NULL;
152
153 if (test_bit(CollisionCheck, &rdev->flags))
154 return 0;
155
156 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
157 GFP_KERNEL);
158 if (!serial)
159 return -ENOMEM;
160
161 for (i = 0; i < serial_nums; i++) {
162 struct serial_in_rdev *serial_tmp = &serial[i];
163
164 spin_lock_init(&serial_tmp->serial_lock);
165 serial_tmp->serial_rb = RB_ROOT_CACHED;
166 init_waitqueue_head(&serial_tmp->serial_io_wait);
167 }
168
169 rdev->serial = serial;
170 set_bit(CollisionCheck, &rdev->flags);
171
172 return 0;
173}
174
175static int rdevs_init_serial(struct mddev *mddev)
176{
177 struct md_rdev *rdev;
178 int ret = 0;
179
180 rdev_for_each(rdev, mddev) {
181 ret = rdev_init_serial(rdev);
182 if (ret)
183 break;
184 }
185
186
187 if (ret && !mddev->serial_info_pool)
188 rdevs_uninit_serial(mddev);
189
190 return ret;
191}
192
193
194
195
196
197
198static int rdev_need_serial(struct md_rdev *rdev)
199{
200 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
201 rdev->bdev->bd_queue->nr_hw_queues != 1 &&
202 test_bit(WriteMostly, &rdev->flags));
203}
204
205
206
207
208
209
210void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
211 bool is_suspend)
212{
213 int ret = 0;
214
215 if (rdev && !rdev_need_serial(rdev) &&
216 !test_bit(CollisionCheck, &rdev->flags))
217 return;
218
219 if (!is_suspend)
220 mddev_suspend(mddev);
221
222 if (!rdev)
223 ret = rdevs_init_serial(mddev);
224 else
225 ret = rdev_init_serial(rdev);
226 if (ret)
227 goto abort;
228
229 if (mddev->serial_info_pool == NULL) {
230 unsigned int noio_flag;
231
232 noio_flag = memalloc_noio_save();
233 mddev->serial_info_pool =
234 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
235 sizeof(struct serial_info));
236 memalloc_noio_restore(noio_flag);
237 if (!mddev->serial_info_pool) {
238 rdevs_uninit_serial(mddev);
239 pr_err("can't alloc memory pool for serialization\n");
240 }
241 }
242
243abort:
244 if (!is_suspend)
245 mddev_resume(mddev);
246}
247
248
249
250
251
252
253
254void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
255 bool is_suspend)
256{
257 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
258 return;
259
260 if (mddev->serial_info_pool) {
261 struct md_rdev *temp;
262 int num = 0;
263
264 if (!is_suspend)
265 mddev_suspend(mddev);
266 rdev_for_each(temp, mddev) {
267 if (!rdev) {
268 if (!mddev->serialize_policy ||
269 !rdev_need_serial(temp))
270 rdev_uninit_serial(temp);
271 else
272 num++;
273 } else if (temp != rdev &&
274 test_bit(CollisionCheck, &temp->flags))
275 num++;
276 }
277
278 if (rdev)
279 rdev_uninit_serial(rdev);
280
281 if (num)
282 pr_info("The mempool could be used by other devices\n");
283 else {
284 mempool_destroy(mddev->serial_info_pool);
285 mddev->serial_info_pool = NULL;
286 }
287 if (!is_suspend)
288 mddev_resume(mddev);
289 }
290}
291
292static struct ctl_table_header *raid_table_header;
293
294static struct ctl_table raid_table[] = {
295 {
296 .procname = "speed_limit_min",
297 .data = &sysctl_speed_limit_min,
298 .maxlen = sizeof(int),
299 .mode = S_IRUGO|S_IWUSR,
300 .proc_handler = proc_dointvec,
301 },
302 {
303 .procname = "speed_limit_max",
304 .data = &sysctl_speed_limit_max,
305 .maxlen = sizeof(int),
306 .mode = S_IRUGO|S_IWUSR,
307 .proc_handler = proc_dointvec,
308 },
309 { }
310};
311
312static struct ctl_table raid_dir_table[] = {
313 {
314 .procname = "raid",
315 .maxlen = 0,
316 .mode = S_IRUGO|S_IXUGO,
317 .child = raid_table,
318 },
319 { }
320};
321
322static struct ctl_table raid_root_table[] = {
323 {
324 .procname = "dev",
325 .maxlen = 0,
326 .mode = 0555,
327 .child = raid_dir_table,
328 },
329 { }
330};
331
332static const struct block_device_operations md_fops;
333
334static int start_readonly;
335
336
337
338
339
340
341
342
343
344static bool create_on_open = true;
345
346struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
347 struct mddev *mddev)
348{
349 if (!mddev || !bioset_initialized(&mddev->bio_set))
350 return bio_alloc(gfp_mask, nr_iovecs);
351
352 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
353}
354EXPORT_SYMBOL_GPL(bio_alloc_mddev);
355
356static struct bio *md_bio_alloc_sync(struct mddev *mddev)
357{
358 if (!mddev || !bioset_initialized(&mddev->sync_set))
359 return bio_alloc(GFP_NOIO, 1);
360
361 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
362}
363
364
365
366
367
368
369
370
371
372
373
374static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
375static atomic_t md_event_count;
376void md_new_event(struct mddev *mddev)
377{
378 atomic_inc(&md_event_count);
379 wake_up(&md_event_waiters);
380}
381EXPORT_SYMBOL_GPL(md_new_event);
382
383
384
385
386
387static LIST_HEAD(all_mddevs);
388static DEFINE_SPINLOCK(all_mddevs_lock);
389
390
391
392
393
394
395
396
397#define for_each_mddev(_mddev,_tmp) \
398 \
399 for (({ spin_lock(&all_mddevs_lock); \
400 _tmp = all_mddevs.next; \
401 _mddev = NULL;}); \
402 ({ if (_tmp != &all_mddevs) \
403 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
404 spin_unlock(&all_mddevs_lock); \
405 if (_mddev) mddev_put(_mddev); \
406 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
407 _tmp != &all_mddevs;}); \
408 ({ spin_lock(&all_mddevs_lock); \
409 _tmp = _tmp->next;}) \
410 )
411
412
413
414
415
416
417
418
419static bool is_suspended(struct mddev *mddev, struct bio *bio)
420{
421 if (mddev->suspended)
422 return true;
423 if (bio_data_dir(bio) != WRITE)
424 return false;
425 if (mddev->suspend_lo >= mddev->suspend_hi)
426 return false;
427 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
428 return false;
429 if (bio_end_sector(bio) < mddev->suspend_lo)
430 return false;
431 return true;
432}
433
434void md_handle_request(struct mddev *mddev, struct bio *bio)
435{
436check_suspended:
437 rcu_read_lock();
438 if (is_suspended(mddev, bio)) {
439 DEFINE_WAIT(__wait);
440 for (;;) {
441 prepare_to_wait(&mddev->sb_wait, &__wait,
442 TASK_UNINTERRUPTIBLE);
443 if (!is_suspended(mddev, bio))
444 break;
445 rcu_read_unlock();
446 schedule();
447 rcu_read_lock();
448 }
449 finish_wait(&mddev->sb_wait, &__wait);
450 }
451 atomic_inc(&mddev->active_io);
452 rcu_read_unlock();
453
454 if (!mddev->pers->make_request(mddev, bio)) {
455 atomic_dec(&mddev->active_io);
456 wake_up(&mddev->sb_wait);
457 goto check_suspended;
458 }
459
460 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
461 wake_up(&mddev->sb_wait);
462}
463EXPORT_SYMBOL(md_handle_request);
464
465static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
466{
467 const int rw = bio_data_dir(bio);
468 const int sgrp = op_stat_group(bio_op(bio));
469 struct mddev *mddev = q->queuedata;
470 unsigned int sectors;
471
472 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
473 bio_io_error(bio);
474 return BLK_QC_T_NONE;
475 }
476
477 blk_queue_split(q, &bio);
478
479 if (mddev == NULL || mddev->pers == NULL) {
480 bio_io_error(bio);
481 return BLK_QC_T_NONE;
482 }
483 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
484 if (bio_sectors(bio) != 0)
485 bio->bi_status = BLK_STS_IOERR;
486 bio_endio(bio);
487 return BLK_QC_T_NONE;
488 }
489
490
491
492
493
494 sectors = bio_sectors(bio);
495
496 bio->bi_opf &= ~REQ_NOMERGE;
497
498 md_handle_request(mddev, bio);
499
500 part_stat_lock();
501 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
502 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
503 part_stat_unlock();
504
505 return BLK_QC_T_NONE;
506}
507
508
509
510
511
512
513
514void mddev_suspend(struct mddev *mddev)
515{
516 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
517 lockdep_assert_held(&mddev->reconfig_mutex);
518 if (mddev->suspended++)
519 return;
520 synchronize_rcu();
521 wake_up(&mddev->sb_wait);
522 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
523 smp_mb__after_atomic();
524 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
525 mddev->pers->quiesce(mddev, 1);
526 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
527 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
528
529 del_timer_sync(&mddev->safemode_timer);
530}
531EXPORT_SYMBOL_GPL(mddev_suspend);
532
533void mddev_resume(struct mddev *mddev)
534{
535 lockdep_assert_held(&mddev->reconfig_mutex);
536 if (--mddev->suspended)
537 return;
538 wake_up(&mddev->sb_wait);
539 mddev->pers->quiesce(mddev, 0);
540
541 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
542 md_wakeup_thread(mddev->thread);
543 md_wakeup_thread(mddev->sync_thread);
544}
545EXPORT_SYMBOL_GPL(mddev_resume);
546
547int mddev_congested(struct mddev *mddev, int bits)
548{
549 struct md_personality *pers = mddev->pers;
550 int ret = 0;
551
552 rcu_read_lock();
553 if (mddev->suspended)
554 ret = 1;
555 else if (pers && pers->congested)
556 ret = pers->congested(mddev, bits);
557 rcu_read_unlock();
558 return ret;
559}
560EXPORT_SYMBOL_GPL(mddev_congested);
561static int md_congested(void *data, int bits)
562{
563 struct mddev *mddev = data;
564 return mddev_congested(mddev, bits);
565}
566
567
568
569
570
571static void md_end_flush(struct bio *bio)
572{
573 struct md_rdev *rdev = bio->bi_private;
574 struct mddev *mddev = rdev->mddev;
575
576 rdev_dec_pending(rdev, mddev);
577
578 if (atomic_dec_and_test(&mddev->flush_pending)) {
579
580 queue_work(md_wq, &mddev->flush_work);
581 }
582 bio_put(bio);
583}
584
585static void md_submit_flush_data(struct work_struct *ws);
586
587static void submit_flushes(struct work_struct *ws)
588{
589 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
590 struct md_rdev *rdev;
591
592 mddev->start_flush = ktime_get_boottime();
593 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
594 atomic_set(&mddev->flush_pending, 1);
595 rcu_read_lock();
596 rdev_for_each_rcu(rdev, mddev)
597 if (rdev->raid_disk >= 0 &&
598 !test_bit(Faulty, &rdev->flags)) {
599
600
601
602
603 struct bio *bi;
604 atomic_inc(&rdev->nr_pending);
605 atomic_inc(&rdev->nr_pending);
606 rcu_read_unlock();
607 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
608 bi->bi_end_io = md_end_flush;
609 bi->bi_private = rdev;
610 bio_set_dev(bi, rdev->bdev);
611 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
612 atomic_inc(&mddev->flush_pending);
613 submit_bio(bi);
614 rcu_read_lock();
615 rdev_dec_pending(rdev, mddev);
616 }
617 rcu_read_unlock();
618 if (atomic_dec_and_test(&mddev->flush_pending))
619 queue_work(md_wq, &mddev->flush_work);
620}
621
622static void md_submit_flush_data(struct work_struct *ws)
623{
624 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
625 struct bio *bio = mddev->flush_bio;
626
627
628
629
630
631
632
633 mddev->last_flush = mddev->start_flush;
634 mddev->flush_bio = NULL;
635 wake_up(&mddev->sb_wait);
636
637 if (bio->bi_iter.bi_size == 0) {
638
639 bio_endio(bio);
640 } else {
641 bio->bi_opf &= ~REQ_PREFLUSH;
642 md_handle_request(mddev, bio);
643 }
644}
645
646
647
648
649
650
651
652bool md_flush_request(struct mddev *mddev, struct bio *bio)
653{
654 ktime_t start = ktime_get_boottime();
655 spin_lock_irq(&mddev->lock);
656 wait_event_lock_irq(mddev->sb_wait,
657 !mddev->flush_bio ||
658 ktime_after(mddev->last_flush, start),
659 mddev->lock);
660 if (!ktime_after(mddev->last_flush, start)) {
661 WARN_ON(mddev->flush_bio);
662 mddev->flush_bio = bio;
663 bio = NULL;
664 }
665 spin_unlock_irq(&mddev->lock);
666
667 if (!bio) {
668 INIT_WORK(&mddev->flush_work, submit_flushes);
669 queue_work(md_wq, &mddev->flush_work);
670 } else {
671
672 if (bio->bi_iter.bi_size == 0)
673
674 bio_endio(bio);
675 else {
676 bio->bi_opf &= ~REQ_PREFLUSH;
677 return false;
678 }
679 }
680 return true;
681}
682EXPORT_SYMBOL(md_flush_request);
683
684static inline struct mddev *mddev_get(struct mddev *mddev)
685{
686 atomic_inc(&mddev->active);
687 return mddev;
688}
689
690static void mddev_delayed_delete(struct work_struct *ws);
691
692static void mddev_put(struct mddev *mddev)
693{
694 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
695 return;
696 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
697 mddev->ctime == 0 && !mddev->hold_active) {
698
699
700 list_del_init(&mddev->all_mddevs);
701
702
703
704
705
706
707 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
708 queue_work(md_misc_wq, &mddev->del_work);
709 }
710 spin_unlock(&all_mddevs_lock);
711}
712
713static void md_safemode_timeout(struct timer_list *t);
714
715void mddev_init(struct mddev *mddev)
716{
717 kobject_init(&mddev->kobj, &md_ktype);
718 mutex_init(&mddev->open_mutex);
719 mutex_init(&mddev->reconfig_mutex);
720 mutex_init(&mddev->bitmap_info.mutex);
721 INIT_LIST_HEAD(&mddev->disks);
722 INIT_LIST_HEAD(&mddev->all_mddevs);
723 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
724 atomic_set(&mddev->active, 1);
725 atomic_set(&mddev->openers, 0);
726 atomic_set(&mddev->active_io, 0);
727 spin_lock_init(&mddev->lock);
728 atomic_set(&mddev->flush_pending, 0);
729 init_waitqueue_head(&mddev->sb_wait);
730 init_waitqueue_head(&mddev->recovery_wait);
731 mddev->reshape_position = MaxSector;
732 mddev->reshape_backwards = 0;
733 mddev->last_sync_action = "none";
734 mddev->resync_min = 0;
735 mddev->resync_max = MaxSector;
736 mddev->level = LEVEL_NONE;
737}
738EXPORT_SYMBOL_GPL(mddev_init);
739
740static struct mddev *mddev_find(dev_t unit)
741{
742 struct mddev *mddev, *new = NULL;
743
744 if (unit && MAJOR(unit) != MD_MAJOR)
745 unit &= ~((1<<MdpMinorShift)-1);
746
747 retry:
748 spin_lock(&all_mddevs_lock);
749
750 if (unit) {
751 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
752 if (mddev->unit == unit) {
753 mddev_get(mddev);
754 spin_unlock(&all_mddevs_lock);
755 kfree(new);
756 return mddev;
757 }
758
759 if (new) {
760 list_add(&new->all_mddevs, &all_mddevs);
761 spin_unlock(&all_mddevs_lock);
762 new->hold_active = UNTIL_IOCTL;
763 return new;
764 }
765 } else if (new) {
766
767 static int next_minor = 512;
768 int start = next_minor;
769 int is_free = 0;
770 int dev = 0;
771 while (!is_free) {
772 dev = MKDEV(MD_MAJOR, next_minor);
773 next_minor++;
774 if (next_minor > MINORMASK)
775 next_minor = 0;
776 if (next_minor == start) {
777
778 spin_unlock(&all_mddevs_lock);
779 kfree(new);
780 return NULL;
781 }
782
783 is_free = 1;
784 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
785 if (mddev->unit == dev) {
786 is_free = 0;
787 break;
788 }
789 }
790 new->unit = dev;
791 new->md_minor = MINOR(dev);
792 new->hold_active = UNTIL_STOP;
793 list_add(&new->all_mddevs, &all_mddevs);
794 spin_unlock(&all_mddevs_lock);
795 return new;
796 }
797 spin_unlock(&all_mddevs_lock);
798
799 new = kzalloc(sizeof(*new), GFP_KERNEL);
800 if (!new)
801 return NULL;
802
803 new->unit = unit;
804 if (MAJOR(unit) == MD_MAJOR)
805 new->md_minor = MINOR(unit);
806 else
807 new->md_minor = MINOR(unit) >> MdpMinorShift;
808
809 mddev_init(new);
810
811 goto retry;
812}
813
814static struct attribute_group md_redundancy_group;
815
816void mddev_unlock(struct mddev *mddev)
817{
818 if (mddev->to_remove) {
819
820
821
822
823
824
825
826
827
828
829
830
831 struct attribute_group *to_remove = mddev->to_remove;
832 mddev->to_remove = NULL;
833 mddev->sysfs_active = 1;
834 mutex_unlock(&mddev->reconfig_mutex);
835
836 if (mddev->kobj.sd) {
837 if (to_remove != &md_redundancy_group)
838 sysfs_remove_group(&mddev->kobj, to_remove);
839 if (mddev->pers == NULL ||
840 mddev->pers->sync_request == NULL) {
841 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
842 if (mddev->sysfs_action)
843 sysfs_put(mddev->sysfs_action);
844 mddev->sysfs_action = NULL;
845 }
846 }
847 mddev->sysfs_active = 0;
848 } else
849 mutex_unlock(&mddev->reconfig_mutex);
850
851
852
853
854 spin_lock(&pers_lock);
855 md_wakeup_thread(mddev->thread);
856 wake_up(&mddev->sb_wait);
857 spin_unlock(&pers_lock);
858}
859EXPORT_SYMBOL_GPL(mddev_unlock);
860
861struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
862{
863 struct md_rdev *rdev;
864
865 rdev_for_each_rcu(rdev, mddev)
866 if (rdev->desc_nr == nr)
867 return rdev;
868
869 return NULL;
870}
871EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
872
873static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
874{
875 struct md_rdev *rdev;
876
877 rdev_for_each(rdev, mddev)
878 if (rdev->bdev->bd_dev == dev)
879 return rdev;
880
881 return NULL;
882}
883
884struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
885{
886 struct md_rdev *rdev;
887
888 rdev_for_each_rcu(rdev, mddev)
889 if (rdev->bdev->bd_dev == dev)
890 return rdev;
891
892 return NULL;
893}
894EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
895
896static struct md_personality *find_pers(int level, char *clevel)
897{
898 struct md_personality *pers;
899 list_for_each_entry(pers, &pers_list, list) {
900 if (level != LEVEL_NONE && pers->level == level)
901 return pers;
902 if (strcmp(pers->name, clevel)==0)
903 return pers;
904 }
905 return NULL;
906}
907
908
909static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
910{
911 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
912 return MD_NEW_SIZE_SECTORS(num_sectors);
913}
914
915static int alloc_disk_sb(struct md_rdev *rdev)
916{
917 rdev->sb_page = alloc_page(GFP_KERNEL);
918 if (!rdev->sb_page)
919 return -ENOMEM;
920 return 0;
921}
922
923void md_rdev_clear(struct md_rdev *rdev)
924{
925 if (rdev->sb_page) {
926 put_page(rdev->sb_page);
927 rdev->sb_loaded = 0;
928 rdev->sb_page = NULL;
929 rdev->sb_start = 0;
930 rdev->sectors = 0;
931 }
932 if (rdev->bb_page) {
933 put_page(rdev->bb_page);
934 rdev->bb_page = NULL;
935 }
936 badblocks_exit(&rdev->badblocks);
937}
938EXPORT_SYMBOL_GPL(md_rdev_clear);
939
940static void super_written(struct bio *bio)
941{
942 struct md_rdev *rdev = bio->bi_private;
943 struct mddev *mddev = rdev->mddev;
944
945 if (bio->bi_status) {
946 pr_err("md: super_written gets error=%d\n", bio->bi_status);
947 md_error(mddev, rdev);
948 if (!test_bit(Faulty, &rdev->flags)
949 && (bio->bi_opf & MD_FAILFAST)) {
950 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
951 set_bit(LastDev, &rdev->flags);
952 }
953 } else
954 clear_bit(LastDev, &rdev->flags);
955
956 if (atomic_dec_and_test(&mddev->pending_writes))
957 wake_up(&mddev->sb_wait);
958 rdev_dec_pending(rdev, mddev);
959 bio_put(bio);
960}
961
962void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
963 sector_t sector, int size, struct page *page)
964{
965
966
967
968
969
970
971 struct bio *bio;
972 int ff = 0;
973
974 if (!page)
975 return;
976
977 if (test_bit(Faulty, &rdev->flags))
978 return;
979
980 bio = md_bio_alloc_sync(mddev);
981
982 atomic_inc(&rdev->nr_pending);
983
984 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
985 bio->bi_iter.bi_sector = sector;
986 bio_add_page(bio, page, size, 0);
987 bio->bi_private = rdev;
988 bio->bi_end_io = super_written;
989
990 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
991 test_bit(FailFast, &rdev->flags) &&
992 !test_bit(LastDev, &rdev->flags))
993 ff = MD_FAILFAST;
994 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
995
996 atomic_inc(&mddev->pending_writes);
997 submit_bio(bio);
998}
999
1000int md_super_wait(struct mddev *mddev)
1001{
1002
1003 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1004 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1005 return -EAGAIN;
1006 return 0;
1007}
1008
1009int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
1010 struct page *page, int op, int op_flags, bool metadata_op)
1011{
1012 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1013 int ret;
1014
1015 if (metadata_op && rdev->meta_bdev)
1016 bio_set_dev(bio, rdev->meta_bdev);
1017 else
1018 bio_set_dev(bio, rdev->bdev);
1019 bio_set_op_attrs(bio, op, op_flags);
1020 if (metadata_op)
1021 bio->bi_iter.bi_sector = sector + rdev->sb_start;
1022 else if (rdev->mddev->reshape_position != MaxSector &&
1023 (rdev->mddev->reshape_backwards ==
1024 (sector >= rdev->mddev->reshape_position)))
1025 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
1026 else
1027 bio->bi_iter.bi_sector = sector + rdev->data_offset;
1028 bio_add_page(bio, page, size, 0);
1029
1030 submit_bio_wait(bio);
1031
1032 ret = !bio->bi_status;
1033 bio_put(bio);
1034 return ret;
1035}
1036EXPORT_SYMBOL_GPL(sync_page_io);
1037
1038static int read_disk_sb(struct md_rdev *rdev, int size)
1039{
1040 char b[BDEVNAME_SIZE];
1041
1042 if (rdev->sb_loaded)
1043 return 0;
1044
1045 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1046 goto fail;
1047 rdev->sb_loaded = 1;
1048 return 0;
1049
1050fail:
1051 pr_err("md: disabled device %s, could not read superblock.\n",
1052 bdevname(rdev->bdev,b));
1053 return -EINVAL;
1054}
1055
1056static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1057{
1058 return sb1->set_uuid0 == sb2->set_uuid0 &&
1059 sb1->set_uuid1 == sb2->set_uuid1 &&
1060 sb1->set_uuid2 == sb2->set_uuid2 &&
1061 sb1->set_uuid3 == sb2->set_uuid3;
1062}
1063
1064static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1065{
1066 int ret;
1067 mdp_super_t *tmp1, *tmp2;
1068
1069 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1070 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1071
1072 if (!tmp1 || !tmp2) {
1073 ret = 0;
1074 goto abort;
1075 }
1076
1077 *tmp1 = *sb1;
1078 *tmp2 = *sb2;
1079
1080
1081
1082
1083 tmp1->nr_disks = 0;
1084 tmp2->nr_disks = 0;
1085
1086 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1087abort:
1088 kfree(tmp1);
1089 kfree(tmp2);
1090 return ret;
1091}
1092
1093static u32 md_csum_fold(u32 csum)
1094{
1095 csum = (csum & 0xffff) + (csum >> 16);
1096 return (csum & 0xffff) + (csum >> 16);
1097}
1098
1099static unsigned int calc_sb_csum(mdp_super_t *sb)
1100{
1101 u64 newcsum = 0;
1102 u32 *sb32 = (u32*)sb;
1103 int i;
1104 unsigned int disk_csum, csum;
1105
1106 disk_csum = sb->sb_csum;
1107 sb->sb_csum = 0;
1108
1109 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1110 newcsum += sb32[i];
1111 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1112
1113#ifdef CONFIG_ALPHA
1114
1115
1116
1117
1118
1119
1120
1121
1122 sb->sb_csum = md_csum_fold(disk_csum);
1123#else
1124 sb->sb_csum = disk_csum;
1125#endif
1126 return csum;
1127}
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159struct super_type {
1160 char *name;
1161 struct module *owner;
1162 int (*load_super)(struct md_rdev *rdev,
1163 struct md_rdev *refdev,
1164 int minor_version);
1165 int (*validate_super)(struct mddev *mddev,
1166 struct md_rdev *rdev);
1167 void (*sync_super)(struct mddev *mddev,
1168 struct md_rdev *rdev);
1169 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1170 sector_t num_sectors);
1171 int (*allow_new_offset)(struct md_rdev *rdev,
1172 unsigned long long new_offset);
1173};
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183int md_check_no_bitmap(struct mddev *mddev)
1184{
1185 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1186 return 0;
1187 pr_warn("%s: bitmaps are not supported for %s\n",
1188 mdname(mddev), mddev->pers->name);
1189 return 1;
1190}
1191EXPORT_SYMBOL(md_check_no_bitmap);
1192
1193
1194
1195
1196static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1197{
1198 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1199 mdp_super_t *sb;
1200 int ret;
1201 bool spare_disk = true;
1202
1203
1204
1205
1206
1207
1208
1209 rdev->sb_start = calc_dev_sboffset(rdev);
1210
1211 ret = read_disk_sb(rdev, MD_SB_BYTES);
1212 if (ret)
1213 return ret;
1214
1215 ret = -EINVAL;
1216
1217 bdevname(rdev->bdev, b);
1218 sb = page_address(rdev->sb_page);
1219
1220 if (sb->md_magic != MD_SB_MAGIC) {
1221 pr_warn("md: invalid raid superblock magic on %s\n", b);
1222 goto abort;
1223 }
1224
1225 if (sb->major_version != 0 ||
1226 sb->minor_version < 90 ||
1227 sb->minor_version > 91) {
1228 pr_warn("Bad version number %d.%d on %s\n",
1229 sb->major_version, sb->minor_version, b);
1230 goto abort;
1231 }
1232
1233 if (sb->raid_disks <= 0)
1234 goto abort;
1235
1236 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1237 pr_warn("md: invalid superblock checksum on %s\n", b);
1238 goto abort;
1239 }
1240
1241 rdev->preferred_minor = sb->md_minor;
1242 rdev->data_offset = 0;
1243 rdev->new_data_offset = 0;
1244 rdev->sb_size = MD_SB_BYTES;
1245 rdev->badblocks.shift = -1;
1246
1247 if (sb->level == LEVEL_MULTIPATH)
1248 rdev->desc_nr = -1;
1249 else
1250 rdev->desc_nr = sb->this_disk.number;
1251
1252
1253 if (sb->level == LEVEL_MULTIPATH ||
1254 (rdev->desc_nr >= 0 &&
1255 rdev->desc_nr < MD_SB_DISKS &&
1256 sb->disks[rdev->desc_nr].state &
1257 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1258 spare_disk = false;
1259
1260 if (!refdev) {
1261 if (!spare_disk)
1262 ret = 1;
1263 else
1264 ret = 0;
1265 } else {
1266 __u64 ev1, ev2;
1267 mdp_super_t *refsb = page_address(refdev->sb_page);
1268 if (!md_uuid_equal(refsb, sb)) {
1269 pr_warn("md: %s has different UUID to %s\n",
1270 b, bdevname(refdev->bdev,b2));
1271 goto abort;
1272 }
1273 if (!md_sb_equal(refsb, sb)) {
1274 pr_warn("md: %s has same UUID but different superblock to %s\n",
1275 b, bdevname(refdev->bdev, b2));
1276 goto abort;
1277 }
1278 ev1 = md_event(sb);
1279 ev2 = md_event(refsb);
1280
1281 if (!spare_disk && ev1 > ev2)
1282 ret = 1;
1283 else
1284 ret = 0;
1285 }
1286 rdev->sectors = rdev->sb_start;
1287
1288
1289
1290
1291 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1292 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1293
1294 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1295
1296 ret = -EINVAL;
1297
1298 abort:
1299 return ret;
1300}
1301
1302
1303
1304
1305static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1306{
1307 mdp_disk_t *desc;
1308 mdp_super_t *sb = page_address(rdev->sb_page);
1309 __u64 ev1 = md_event(sb);
1310
1311 rdev->raid_disk = -1;
1312 clear_bit(Faulty, &rdev->flags);
1313 clear_bit(In_sync, &rdev->flags);
1314 clear_bit(Bitmap_sync, &rdev->flags);
1315 clear_bit(WriteMostly, &rdev->flags);
1316
1317 if (mddev->raid_disks == 0) {
1318 mddev->major_version = 0;
1319 mddev->minor_version = sb->minor_version;
1320 mddev->patch_version = sb->patch_version;
1321 mddev->external = 0;
1322 mddev->chunk_sectors = sb->chunk_size >> 9;
1323 mddev->ctime = sb->ctime;
1324 mddev->utime = sb->utime;
1325 mddev->level = sb->level;
1326 mddev->clevel[0] = 0;
1327 mddev->layout = sb->layout;
1328 mddev->raid_disks = sb->raid_disks;
1329 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1330 mddev->events = ev1;
1331 mddev->bitmap_info.offset = 0;
1332 mddev->bitmap_info.space = 0;
1333
1334 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1335 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1336 mddev->reshape_backwards = 0;
1337
1338 if (mddev->minor_version >= 91) {
1339 mddev->reshape_position = sb->reshape_position;
1340 mddev->delta_disks = sb->delta_disks;
1341 mddev->new_level = sb->new_level;
1342 mddev->new_layout = sb->new_layout;
1343 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1344 if (mddev->delta_disks < 0)
1345 mddev->reshape_backwards = 1;
1346 } else {
1347 mddev->reshape_position = MaxSector;
1348 mddev->delta_disks = 0;
1349 mddev->new_level = mddev->level;
1350 mddev->new_layout = mddev->layout;
1351 mddev->new_chunk_sectors = mddev->chunk_sectors;
1352 }
1353 if (mddev->level == 0)
1354 mddev->layout = -1;
1355
1356 if (sb->state & (1<<MD_SB_CLEAN))
1357 mddev->recovery_cp = MaxSector;
1358 else {
1359 if (sb->events_hi == sb->cp_events_hi &&
1360 sb->events_lo == sb->cp_events_lo) {
1361 mddev->recovery_cp = sb->recovery_cp;
1362 } else
1363 mddev->recovery_cp = 0;
1364 }
1365
1366 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1367 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1368 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1369 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1370
1371 mddev->max_disks = MD_SB_DISKS;
1372
1373 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1374 mddev->bitmap_info.file == NULL) {
1375 mddev->bitmap_info.offset =
1376 mddev->bitmap_info.default_offset;
1377 mddev->bitmap_info.space =
1378 mddev->bitmap_info.default_space;
1379 }
1380
1381 } else if (mddev->pers == NULL) {
1382
1383
1384 ++ev1;
1385 if (sb->disks[rdev->desc_nr].state & (
1386 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1387 if (ev1 < mddev->events)
1388 return -EINVAL;
1389 } else if (mddev->bitmap) {
1390
1391
1392
1393 if (ev1 < mddev->bitmap->events_cleared)
1394 return 0;
1395 if (ev1 < mddev->events)
1396 set_bit(Bitmap_sync, &rdev->flags);
1397 } else {
1398 if (ev1 < mddev->events)
1399
1400 return 0;
1401 }
1402
1403 if (mddev->level != LEVEL_MULTIPATH) {
1404 desc = sb->disks + rdev->desc_nr;
1405
1406 if (desc->state & (1<<MD_DISK_FAULTY))
1407 set_bit(Faulty, &rdev->flags);
1408 else if (desc->state & (1<<MD_DISK_SYNC)
1409) {
1410 set_bit(In_sync, &rdev->flags);
1411 rdev->raid_disk = desc->raid_disk;
1412 rdev->saved_raid_disk = desc->raid_disk;
1413 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1414
1415
1416
1417 if (mddev->minor_version >= 91) {
1418 rdev->recovery_offset = 0;
1419 rdev->raid_disk = desc->raid_disk;
1420 }
1421 }
1422 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1423 set_bit(WriteMostly, &rdev->flags);
1424 if (desc->state & (1<<MD_DISK_FAILFAST))
1425 set_bit(FailFast, &rdev->flags);
1426 } else
1427 set_bit(In_sync, &rdev->flags);
1428 return 0;
1429}
1430
1431
1432
1433
1434static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1435{
1436 mdp_super_t *sb;
1437 struct md_rdev *rdev2;
1438 int next_spare = mddev->raid_disks;
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 int i;
1451 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1452
1453 rdev->sb_size = MD_SB_BYTES;
1454
1455 sb = page_address(rdev->sb_page);
1456
1457 memset(sb, 0, sizeof(*sb));
1458
1459 sb->md_magic = MD_SB_MAGIC;
1460 sb->major_version = mddev->major_version;
1461 sb->patch_version = mddev->patch_version;
1462 sb->gvalid_words = 0;
1463 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1464 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1465 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1466 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1467
1468 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1469 sb->level = mddev->level;
1470 sb->size = mddev->dev_sectors / 2;
1471 sb->raid_disks = mddev->raid_disks;
1472 sb->md_minor = mddev->md_minor;
1473 sb->not_persistent = 0;
1474 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1475 sb->state = 0;
1476 sb->events_hi = (mddev->events>>32);
1477 sb->events_lo = (u32)mddev->events;
1478
1479 if (mddev->reshape_position == MaxSector)
1480 sb->minor_version = 90;
1481 else {
1482 sb->minor_version = 91;
1483 sb->reshape_position = mddev->reshape_position;
1484 sb->new_level = mddev->new_level;
1485 sb->delta_disks = mddev->delta_disks;
1486 sb->new_layout = mddev->new_layout;
1487 sb->new_chunk = mddev->new_chunk_sectors << 9;
1488 }
1489 mddev->minor_version = sb->minor_version;
1490 if (mddev->in_sync)
1491 {
1492 sb->recovery_cp = mddev->recovery_cp;
1493 sb->cp_events_hi = (mddev->events>>32);
1494 sb->cp_events_lo = (u32)mddev->events;
1495 if (mddev->recovery_cp == MaxSector)
1496 sb->state = (1<< MD_SB_CLEAN);
1497 } else
1498 sb->recovery_cp = 0;
1499
1500 sb->layout = mddev->layout;
1501 sb->chunk_size = mddev->chunk_sectors << 9;
1502
1503 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1504 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1505
1506 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1507 rdev_for_each(rdev2, mddev) {
1508 mdp_disk_t *d;
1509 int desc_nr;
1510 int is_active = test_bit(In_sync, &rdev2->flags);
1511
1512 if (rdev2->raid_disk >= 0 &&
1513 sb->minor_version >= 91)
1514
1515
1516
1517
1518 is_active = 1;
1519 if (rdev2->raid_disk < 0 ||
1520 test_bit(Faulty, &rdev2->flags))
1521 is_active = 0;
1522 if (is_active)
1523 desc_nr = rdev2->raid_disk;
1524 else
1525 desc_nr = next_spare++;
1526 rdev2->desc_nr = desc_nr;
1527 d = &sb->disks[rdev2->desc_nr];
1528 nr_disks++;
1529 d->number = rdev2->desc_nr;
1530 d->major = MAJOR(rdev2->bdev->bd_dev);
1531 d->minor = MINOR(rdev2->bdev->bd_dev);
1532 if (is_active)
1533 d->raid_disk = rdev2->raid_disk;
1534 else
1535 d->raid_disk = rdev2->desc_nr;
1536 if (test_bit(Faulty, &rdev2->flags))
1537 d->state = (1<<MD_DISK_FAULTY);
1538 else if (is_active) {
1539 d->state = (1<<MD_DISK_ACTIVE);
1540 if (test_bit(In_sync, &rdev2->flags))
1541 d->state |= (1<<MD_DISK_SYNC);
1542 active++;
1543 working++;
1544 } else {
1545 d->state = 0;
1546 spare++;
1547 working++;
1548 }
1549 if (test_bit(WriteMostly, &rdev2->flags))
1550 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1551 if (test_bit(FailFast, &rdev2->flags))
1552 d->state |= (1<<MD_DISK_FAILFAST);
1553 }
1554
1555 for (i=0 ; i < mddev->raid_disks ; i++) {
1556 mdp_disk_t *d = &sb->disks[i];
1557 if (d->state == 0 && d->number == 0) {
1558 d->number = i;
1559 d->raid_disk = i;
1560 d->state = (1<<MD_DISK_REMOVED);
1561 d->state |= (1<<MD_DISK_FAULTY);
1562 failed++;
1563 }
1564 }
1565 sb->nr_disks = nr_disks;
1566 sb->active_disks = active;
1567 sb->working_disks = working;
1568 sb->failed_disks = failed;
1569 sb->spare_disks = spare;
1570
1571 sb->this_disk = sb->disks[rdev->desc_nr];
1572 sb->sb_csum = calc_sb_csum(sb);
1573}
1574
1575
1576
1577
1578static unsigned long long
1579super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1580{
1581 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1582 return 0;
1583 if (rdev->mddev->bitmap_info.offset)
1584 return 0;
1585 rdev->sb_start = calc_dev_sboffset(rdev);
1586 if (!num_sectors || num_sectors > rdev->sb_start)
1587 num_sectors = rdev->sb_start;
1588
1589
1590
1591 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1592 num_sectors = (sector_t)(2ULL << 32) - 2;
1593 do {
1594 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1595 rdev->sb_page);
1596 } while (md_super_wait(rdev->mddev) < 0);
1597 return num_sectors;
1598}
1599
1600static int
1601super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1602{
1603
1604 return new_offset == 0;
1605}
1606
1607
1608
1609
1610
1611static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1612{
1613 __le32 disk_csum;
1614 u32 csum;
1615 unsigned long long newcsum;
1616 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1617 __le32 *isuper = (__le32*)sb;
1618
1619 disk_csum = sb->sb_csum;
1620 sb->sb_csum = 0;
1621 newcsum = 0;
1622 for (; size >= 4; size -= 4)
1623 newcsum += le32_to_cpu(*isuper++);
1624
1625 if (size == 2)
1626 newcsum += le16_to_cpu(*(__le16*) isuper);
1627
1628 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1629 sb->sb_csum = disk_csum;
1630 return cpu_to_le32(csum);
1631}
1632
1633static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1634{
1635 struct mdp_superblock_1 *sb;
1636 int ret;
1637 sector_t sb_start;
1638 sector_t sectors;
1639 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1640 int bmask;
1641 bool spare_disk = true;
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651 switch(minor_version) {
1652 case 0:
1653 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1654 sb_start -= 8*2;
1655 sb_start &= ~(sector_t)(4*2-1);
1656 break;
1657 case 1:
1658 sb_start = 0;
1659 break;
1660 case 2:
1661 sb_start = 8;
1662 break;
1663 default:
1664 return -EINVAL;
1665 }
1666 rdev->sb_start = sb_start;
1667
1668
1669
1670
1671 ret = read_disk_sb(rdev, 4096);
1672 if (ret) return ret;
1673
1674 sb = page_address(rdev->sb_page);
1675
1676 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1677 sb->major_version != cpu_to_le32(1) ||
1678 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1679 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1680 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1681 return -EINVAL;
1682
1683 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1684 pr_warn("md: invalid superblock checksum on %s\n",
1685 bdevname(rdev->bdev,b));
1686 return -EINVAL;
1687 }
1688 if (le64_to_cpu(sb->data_size) < 10) {
1689 pr_warn("md: data_size too small on %s\n",
1690 bdevname(rdev->bdev,b));
1691 return -EINVAL;
1692 }
1693 if (sb->pad0 ||
1694 sb->pad3[0] ||
1695 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1696
1697 return -EINVAL;
1698
1699 rdev->preferred_minor = 0xffff;
1700 rdev->data_offset = le64_to_cpu(sb->data_offset);
1701 rdev->new_data_offset = rdev->data_offset;
1702 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1703 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1704 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1705 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1706
1707 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1708 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1709 if (rdev->sb_size & bmask)
1710 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1711
1712 if (minor_version
1713 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1714 return -EINVAL;
1715 if (minor_version
1716 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1717 return -EINVAL;
1718
1719 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1720 rdev->desc_nr = -1;
1721 else
1722 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1723
1724 if (!rdev->bb_page) {
1725 rdev->bb_page = alloc_page(GFP_KERNEL);
1726 if (!rdev->bb_page)
1727 return -ENOMEM;
1728 }
1729 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1730 rdev->badblocks.count == 0) {
1731
1732
1733
1734 s32 offset;
1735 sector_t bb_sector;
1736 __le64 *bbp;
1737 int i;
1738 int sectors = le16_to_cpu(sb->bblog_size);
1739 if (sectors > (PAGE_SIZE / 512))
1740 return -EINVAL;
1741 offset = le32_to_cpu(sb->bblog_offset);
1742 if (offset == 0)
1743 return -EINVAL;
1744 bb_sector = (long long)offset;
1745 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1746 rdev->bb_page, REQ_OP_READ, 0, true))
1747 return -EIO;
1748 bbp = (__le64 *)page_address(rdev->bb_page);
1749 rdev->badblocks.shift = sb->bblog_shift;
1750 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1751 u64 bb = le64_to_cpu(*bbp);
1752 int count = bb & (0x3ff);
1753 u64 sector = bb >> 10;
1754 sector <<= sb->bblog_shift;
1755 count <<= sb->bblog_shift;
1756 if (bb + 1 == 0)
1757 break;
1758 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1759 return -EINVAL;
1760 }
1761 } else if (sb->bblog_offset != 0)
1762 rdev->badblocks.shift = 0;
1763
1764 if ((le32_to_cpu(sb->feature_map) &
1765 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1766 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1767 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1768 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1769 }
1770
1771 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1772 sb->level != 0)
1773 return -EINVAL;
1774
1775
1776 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1777 (rdev->desc_nr >= 0 &&
1778 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1779 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1780 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1781 spare_disk = false;
1782
1783 if (!refdev) {
1784 if (!spare_disk)
1785 ret = 1;
1786 else
1787 ret = 0;
1788 } else {
1789 __u64 ev1, ev2;
1790 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1791
1792 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1793 sb->level != refsb->level ||
1794 sb->layout != refsb->layout ||
1795 sb->chunksize != refsb->chunksize) {
1796 pr_warn("md: %s has strangely different superblock to %s\n",
1797 bdevname(rdev->bdev,b),
1798 bdevname(refdev->bdev,b2));
1799 return -EINVAL;
1800 }
1801 ev1 = le64_to_cpu(sb->events);
1802 ev2 = le64_to_cpu(refsb->events);
1803
1804 if (!spare_disk && ev1 > ev2)
1805 ret = 1;
1806 else
1807 ret = 0;
1808 }
1809 if (minor_version) {
1810 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1811 sectors -= rdev->data_offset;
1812 } else
1813 sectors = rdev->sb_start;
1814 if (sectors < le64_to_cpu(sb->data_size))
1815 return -EINVAL;
1816 rdev->sectors = le64_to_cpu(sb->data_size);
1817 return ret;
1818}
1819
1820static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1821{
1822 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1823 __u64 ev1 = le64_to_cpu(sb->events);
1824
1825 rdev->raid_disk = -1;
1826 clear_bit(Faulty, &rdev->flags);
1827 clear_bit(In_sync, &rdev->flags);
1828 clear_bit(Bitmap_sync, &rdev->flags);
1829 clear_bit(WriteMostly, &rdev->flags);
1830
1831 if (mddev->raid_disks == 0) {
1832 mddev->major_version = 1;
1833 mddev->patch_version = 0;
1834 mddev->external = 0;
1835 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1836 mddev->ctime = le64_to_cpu(sb->ctime);
1837 mddev->utime = le64_to_cpu(sb->utime);
1838 mddev->level = le32_to_cpu(sb->level);
1839 mddev->clevel[0] = 0;
1840 mddev->layout = le32_to_cpu(sb->layout);
1841 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1842 mddev->dev_sectors = le64_to_cpu(sb->size);
1843 mddev->events = ev1;
1844 mddev->bitmap_info.offset = 0;
1845 mddev->bitmap_info.space = 0;
1846
1847
1848
1849 mddev->bitmap_info.default_offset = 1024 >> 9;
1850 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1851 mddev->reshape_backwards = 0;
1852
1853 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1854 memcpy(mddev->uuid, sb->set_uuid, 16);
1855
1856 mddev->max_disks = (4096-256)/2;
1857
1858 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1859 mddev->bitmap_info.file == NULL) {
1860 mddev->bitmap_info.offset =
1861 (__s32)le32_to_cpu(sb->bitmap_offset);
1862
1863
1864
1865
1866
1867 if (mddev->minor_version > 0)
1868 mddev->bitmap_info.space = 0;
1869 else if (mddev->bitmap_info.offset > 0)
1870 mddev->bitmap_info.space =
1871 8 - mddev->bitmap_info.offset;
1872 else
1873 mddev->bitmap_info.space =
1874 -mddev->bitmap_info.offset;
1875 }
1876
1877 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1878 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1879 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1880 mddev->new_level = le32_to_cpu(sb->new_level);
1881 mddev->new_layout = le32_to_cpu(sb->new_layout);
1882 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1883 if (mddev->delta_disks < 0 ||
1884 (mddev->delta_disks == 0 &&
1885 (le32_to_cpu(sb->feature_map)
1886 & MD_FEATURE_RESHAPE_BACKWARDS)))
1887 mddev->reshape_backwards = 1;
1888 } else {
1889 mddev->reshape_position = MaxSector;
1890 mddev->delta_disks = 0;
1891 mddev->new_level = mddev->level;
1892 mddev->new_layout = mddev->layout;
1893 mddev->new_chunk_sectors = mddev->chunk_sectors;
1894 }
1895
1896 if (mddev->level == 0 &&
1897 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1898 mddev->layout = -1;
1899
1900 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1901 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1902
1903 if (le32_to_cpu(sb->feature_map) &
1904 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1905 if (le32_to_cpu(sb->feature_map) &
1906 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1907 return -EINVAL;
1908 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1909 (le32_to_cpu(sb->feature_map) &
1910 MD_FEATURE_MULTIPLE_PPLS))
1911 return -EINVAL;
1912 set_bit(MD_HAS_PPL, &mddev->flags);
1913 }
1914 } else if (mddev->pers == NULL) {
1915
1916
1917 ++ev1;
1918 if (rdev->desc_nr >= 0 &&
1919 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1920 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1921 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1922 if (ev1 < mddev->events)
1923 return -EINVAL;
1924 } else if (mddev->bitmap) {
1925
1926
1927
1928 if (ev1 < mddev->bitmap->events_cleared)
1929 return 0;
1930 if (ev1 < mddev->events)
1931 set_bit(Bitmap_sync, &rdev->flags);
1932 } else {
1933 if (ev1 < mddev->events)
1934
1935 return 0;
1936 }
1937 if (mddev->level != LEVEL_MULTIPATH) {
1938 int role;
1939 if (rdev->desc_nr < 0 ||
1940 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1941 role = MD_DISK_ROLE_SPARE;
1942 rdev->desc_nr = -1;
1943 } else
1944 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1945 switch(role) {
1946 case MD_DISK_ROLE_SPARE:
1947 break;
1948 case MD_DISK_ROLE_FAULTY:
1949 set_bit(Faulty, &rdev->flags);
1950 break;
1951 case MD_DISK_ROLE_JOURNAL:
1952 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1953
1954 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1955 return -EINVAL;
1956 }
1957 set_bit(Journal, &rdev->flags);
1958 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1959 rdev->raid_disk = 0;
1960 break;
1961 default:
1962 rdev->saved_raid_disk = role;
1963 if ((le32_to_cpu(sb->feature_map) &
1964 MD_FEATURE_RECOVERY_OFFSET)) {
1965 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1966 if (!(le32_to_cpu(sb->feature_map) &
1967 MD_FEATURE_RECOVERY_BITMAP))
1968 rdev->saved_raid_disk = -1;
1969 } else {
1970
1971
1972
1973
1974 if (!test_bit(MD_RECOVERY_FROZEN,
1975 &mddev->recovery))
1976 set_bit(In_sync, &rdev->flags);
1977 }
1978 rdev->raid_disk = role;
1979 break;
1980 }
1981 if (sb->devflags & WriteMostly1)
1982 set_bit(WriteMostly, &rdev->flags);
1983 if (sb->devflags & FailFast1)
1984 set_bit(FailFast, &rdev->flags);
1985 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1986 set_bit(Replacement, &rdev->flags);
1987 } else
1988 set_bit(In_sync, &rdev->flags);
1989
1990 return 0;
1991}
1992
1993static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1994{
1995 struct mdp_superblock_1 *sb;
1996 struct md_rdev *rdev2;
1997 int max_dev, i;
1998
1999
2000 sb = page_address(rdev->sb_page);
2001
2002 sb->feature_map = 0;
2003 sb->pad0 = 0;
2004 sb->recovery_offset = cpu_to_le64(0);
2005 memset(sb->pad3, 0, sizeof(sb->pad3));
2006
2007 sb->utime = cpu_to_le64((__u64)mddev->utime);
2008 sb->events = cpu_to_le64(mddev->events);
2009 if (mddev->in_sync)
2010 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2011 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2012 sb->resync_offset = cpu_to_le64(MaxSector);
2013 else
2014 sb->resync_offset = cpu_to_le64(0);
2015
2016 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2017
2018 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2019 sb->size = cpu_to_le64(mddev->dev_sectors);
2020 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2021 sb->level = cpu_to_le32(mddev->level);
2022 sb->layout = cpu_to_le32(mddev->layout);
2023 if (test_bit(FailFast, &rdev->flags))
2024 sb->devflags |= FailFast1;
2025 else
2026 sb->devflags &= ~FailFast1;
2027
2028 if (test_bit(WriteMostly, &rdev->flags))
2029 sb->devflags |= WriteMostly1;
2030 else
2031 sb->devflags &= ~WriteMostly1;
2032 sb->data_offset = cpu_to_le64(rdev->data_offset);
2033 sb->data_size = cpu_to_le64(rdev->sectors);
2034
2035 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2036 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2037 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2038 }
2039
2040 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2041 !test_bit(In_sync, &rdev->flags)) {
2042 sb->feature_map |=
2043 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2044 sb->recovery_offset =
2045 cpu_to_le64(rdev->recovery_offset);
2046 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2047 sb->feature_map |=
2048 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2049 }
2050
2051 if (test_bit(Journal, &rdev->flags))
2052 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2053 if (test_bit(Replacement, &rdev->flags))
2054 sb->feature_map |=
2055 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2056
2057 if (mddev->reshape_position != MaxSector) {
2058 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2059 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2060 sb->new_layout = cpu_to_le32(mddev->new_layout);
2061 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2062 sb->new_level = cpu_to_le32(mddev->new_level);
2063 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2064 if (mddev->delta_disks == 0 &&
2065 mddev->reshape_backwards)
2066 sb->feature_map
2067 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2068 if (rdev->new_data_offset != rdev->data_offset) {
2069 sb->feature_map
2070 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2071 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2072 - rdev->data_offset));
2073 }
2074 }
2075
2076 if (mddev_is_clustered(mddev))
2077 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2078
2079 if (rdev->badblocks.count == 0)
2080 ;
2081 else if (sb->bblog_offset == 0)
2082
2083 md_error(mddev, rdev);
2084 else {
2085 struct badblocks *bb = &rdev->badblocks;
2086 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2087 u64 *p = bb->page;
2088 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2089 if (bb->changed) {
2090 unsigned seq;
2091
2092retry:
2093 seq = read_seqbegin(&bb->lock);
2094
2095 memset(bbp, 0xff, PAGE_SIZE);
2096
2097 for (i = 0 ; i < bb->count ; i++) {
2098 u64 internal_bb = p[i];
2099 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2100 | BB_LEN(internal_bb));
2101 bbp[i] = cpu_to_le64(store_bb);
2102 }
2103 bb->changed = 0;
2104 if (read_seqretry(&bb->lock, seq))
2105 goto retry;
2106
2107 bb->sector = (rdev->sb_start +
2108 (int)le32_to_cpu(sb->bblog_offset));
2109 bb->size = le16_to_cpu(sb->bblog_size);
2110 }
2111 }
2112
2113 max_dev = 0;
2114 rdev_for_each(rdev2, mddev)
2115 if (rdev2->desc_nr+1 > max_dev)
2116 max_dev = rdev2->desc_nr+1;
2117
2118 if (max_dev > le32_to_cpu(sb->max_dev)) {
2119 int bmask;
2120 sb->max_dev = cpu_to_le32(max_dev);
2121 rdev->sb_size = max_dev * 2 + 256;
2122 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2123 if (rdev->sb_size & bmask)
2124 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2125 } else
2126 max_dev = le32_to_cpu(sb->max_dev);
2127
2128 for (i=0; i<max_dev;i++)
2129 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2130
2131 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2132 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2133
2134 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2135 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2136 sb->feature_map |=
2137 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2138 else
2139 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2140 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2141 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2142 }
2143
2144 rdev_for_each(rdev2, mddev) {
2145 i = rdev2->desc_nr;
2146 if (test_bit(Faulty, &rdev2->flags))
2147 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2148 else if (test_bit(In_sync, &rdev2->flags))
2149 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2150 else if (test_bit(Journal, &rdev2->flags))
2151 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2152 else if (rdev2->raid_disk >= 0)
2153 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2154 else
2155 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2156 }
2157
2158 sb->sb_csum = calc_sb_1_csum(sb);
2159}
2160
2161static unsigned long long
2162super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2163{
2164 struct mdp_superblock_1 *sb;
2165 sector_t max_sectors;
2166 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2167 return 0;
2168 if (rdev->data_offset != rdev->new_data_offset)
2169 return 0;
2170 if (rdev->sb_start < rdev->data_offset) {
2171
2172 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2173 max_sectors -= rdev->data_offset;
2174 if (!num_sectors || num_sectors > max_sectors)
2175 num_sectors = max_sectors;
2176 } else if (rdev->mddev->bitmap_info.offset) {
2177
2178 return 0;
2179 } else {
2180
2181 sector_t sb_start;
2182 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
2183 sb_start &= ~(sector_t)(4*2 - 1);
2184 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
2185 if (!num_sectors || num_sectors > max_sectors)
2186 num_sectors = max_sectors;
2187 rdev->sb_start = sb_start;
2188 }
2189 sb = page_address(rdev->sb_page);
2190 sb->data_size = cpu_to_le64(num_sectors);
2191 sb->super_offset = cpu_to_le64(rdev->sb_start);
2192 sb->sb_csum = calc_sb_1_csum(sb);
2193 do {
2194 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2195 rdev->sb_page);
2196 } while (md_super_wait(rdev->mddev) < 0);
2197 return num_sectors;
2198
2199}
2200
2201static int
2202super_1_allow_new_offset(struct md_rdev *rdev,
2203 unsigned long long new_offset)
2204{
2205
2206 struct bitmap *bitmap;
2207 if (new_offset >= rdev->data_offset)
2208 return 1;
2209
2210
2211
2212 if (rdev->mddev->minor_version == 0)
2213 return 1;
2214
2215
2216
2217
2218
2219
2220
2221 if (rdev->sb_start + (32+4)*2 > new_offset)
2222 return 0;
2223 bitmap = rdev->mddev->bitmap;
2224 if (bitmap && !rdev->mddev->bitmap_info.file &&
2225 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2226 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2227 return 0;
2228 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2229 return 0;
2230
2231 return 1;
2232}
2233
2234static struct super_type super_types[] = {
2235 [0] = {
2236 .name = "0.90.0",
2237 .owner = THIS_MODULE,
2238 .load_super = super_90_load,
2239 .validate_super = super_90_validate,
2240 .sync_super = super_90_sync,
2241 .rdev_size_change = super_90_rdev_size_change,
2242 .allow_new_offset = super_90_allow_new_offset,
2243 },
2244 [1] = {
2245 .name = "md-1",
2246 .owner = THIS_MODULE,
2247 .load_super = super_1_load,
2248 .validate_super = super_1_validate,
2249 .sync_super = super_1_sync,
2250 .rdev_size_change = super_1_rdev_size_change,
2251 .allow_new_offset = super_1_allow_new_offset,
2252 },
2253};
2254
2255static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2256{
2257 if (mddev->sync_super) {
2258 mddev->sync_super(mddev, rdev);
2259 return;
2260 }
2261
2262 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2263
2264 super_types[mddev->major_version].sync_super(mddev, rdev);
2265}
2266
2267static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2268{
2269 struct md_rdev *rdev, *rdev2;
2270
2271 rcu_read_lock();
2272 rdev_for_each_rcu(rdev, mddev1) {
2273 if (test_bit(Faulty, &rdev->flags) ||
2274 test_bit(Journal, &rdev->flags) ||
2275 rdev->raid_disk == -1)
2276 continue;
2277 rdev_for_each_rcu(rdev2, mddev2) {
2278 if (test_bit(Faulty, &rdev2->flags) ||
2279 test_bit(Journal, &rdev2->flags) ||
2280 rdev2->raid_disk == -1)
2281 continue;
2282 if (rdev->bdev->bd_contains ==
2283 rdev2->bdev->bd_contains) {
2284 rcu_read_unlock();
2285 return 1;
2286 }
2287 }
2288 }
2289 rcu_read_unlock();
2290 return 0;
2291}
2292
2293static LIST_HEAD(pending_raid_disks);
2294
2295
2296
2297
2298
2299
2300
2301
2302int md_integrity_register(struct mddev *mddev)
2303{
2304 struct md_rdev *rdev, *reference = NULL;
2305
2306 if (list_empty(&mddev->disks))
2307 return 0;
2308 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2309 return 0;
2310 rdev_for_each(rdev, mddev) {
2311
2312 if (test_bit(Faulty, &rdev->flags))
2313 continue;
2314 if (rdev->raid_disk < 0)
2315 continue;
2316 if (!reference) {
2317
2318 reference = rdev;
2319 continue;
2320 }
2321
2322 if (blk_integrity_compare(reference->bdev->bd_disk,
2323 rdev->bdev->bd_disk) < 0)
2324 return -EINVAL;
2325 }
2326 if (!reference || !bdev_get_integrity(reference->bdev))
2327 return 0;
2328
2329
2330
2331
2332 blk_integrity_register(mddev->gendisk,
2333 bdev_get_integrity(reference->bdev));
2334
2335 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2336 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2337 pr_err("md: failed to create integrity pool for %s\n",
2338 mdname(mddev));
2339 return -EINVAL;
2340 }
2341 return 0;
2342}
2343EXPORT_SYMBOL(md_integrity_register);
2344
2345
2346
2347
2348
2349int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2350{
2351 struct blk_integrity *bi_mddev;
2352 char name[BDEVNAME_SIZE];
2353
2354 if (!mddev->gendisk)
2355 return 0;
2356
2357 bi_mddev = blk_get_integrity(mddev->gendisk);
2358
2359 if (!bi_mddev)
2360 return 0;
2361
2362 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2363 pr_err("%s: incompatible integrity profile for %s\n",
2364 mdname(mddev), bdevname(rdev->bdev, name));
2365 return -ENXIO;
2366 }
2367
2368 return 0;
2369}
2370EXPORT_SYMBOL(md_integrity_add_rdev);
2371
2372static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2373{
2374 char b[BDEVNAME_SIZE];
2375 struct kobject *ko;
2376 int err;
2377
2378
2379 if (find_rdev(mddev, rdev->bdev->bd_dev))
2380 return -EEXIST;
2381
2382 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2383 mddev->pers)
2384 return -EROFS;
2385
2386
2387 if (!test_bit(Journal, &rdev->flags) &&
2388 rdev->sectors &&
2389 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2390 if (mddev->pers) {
2391
2392
2393
2394
2395 if (mddev->level > 0)
2396 return -ENOSPC;
2397 } else
2398 mddev->dev_sectors = rdev->sectors;
2399 }
2400
2401
2402
2403
2404
2405 rcu_read_lock();
2406 if (rdev->desc_nr < 0) {
2407 int choice = 0;
2408 if (mddev->pers)
2409 choice = mddev->raid_disks;
2410 while (md_find_rdev_nr_rcu(mddev, choice))
2411 choice++;
2412 rdev->desc_nr = choice;
2413 } else {
2414 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2415 rcu_read_unlock();
2416 return -EBUSY;
2417 }
2418 }
2419 rcu_read_unlock();
2420 if (!test_bit(Journal, &rdev->flags) &&
2421 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2422 pr_warn("md: %s: array is limited to %d devices\n",
2423 mdname(mddev), mddev->max_disks);
2424 return -EBUSY;
2425 }
2426 bdevname(rdev->bdev,b);
2427 strreplace(b, '/', '!');
2428
2429 rdev->mddev = mddev;
2430 pr_debug("md: bind<%s>\n", b);
2431
2432 if (mddev->raid_disks)
2433 mddev_create_serial_pool(mddev, rdev, false);
2434
2435 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2436 goto fail;
2437
2438 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2439 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2440 ;
2441 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2442
2443 list_add_rcu(&rdev->same_set, &mddev->disks);
2444 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2445
2446
2447 mddev->recovery_disabled++;
2448
2449 return 0;
2450
2451 fail:
2452 pr_warn("md: failed to register dev-%s for %s\n",
2453 b, mdname(mddev));
2454 return err;
2455}
2456
2457static void md_delayed_delete(struct work_struct *ws)
2458{
2459 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2460 kobject_del(&rdev->kobj);
2461 kobject_put(&rdev->kobj);
2462}
2463
2464static void unbind_rdev_from_array(struct md_rdev *rdev)
2465{
2466 char b[BDEVNAME_SIZE];
2467
2468 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2469 list_del_rcu(&rdev->same_set);
2470 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2471 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2472 rdev->mddev = NULL;
2473 sysfs_remove_link(&rdev->kobj, "block");
2474 sysfs_put(rdev->sysfs_state);
2475 rdev->sysfs_state = NULL;
2476 rdev->badblocks.count = 0;
2477
2478
2479
2480
2481 synchronize_rcu();
2482 INIT_WORK(&rdev->del_work, md_delayed_delete);
2483 kobject_get(&rdev->kobj);
2484 queue_work(md_misc_wq, &rdev->del_work);
2485}
2486
2487
2488
2489
2490
2491
2492static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2493{
2494 int err = 0;
2495 struct block_device *bdev;
2496
2497 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2498 shared ? (struct md_rdev *)lock_rdev : rdev);
2499 if (IS_ERR(bdev)) {
2500 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2501 MAJOR(dev), MINOR(dev));
2502 return PTR_ERR(bdev);
2503 }
2504 rdev->bdev = bdev;
2505 return err;
2506}
2507
2508static void unlock_rdev(struct md_rdev *rdev)
2509{
2510 struct block_device *bdev = rdev->bdev;
2511 rdev->bdev = NULL;
2512 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2513}
2514
2515void md_autodetect_dev(dev_t dev);
2516
2517static void export_rdev(struct md_rdev *rdev)
2518{
2519 char b[BDEVNAME_SIZE];
2520
2521 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2522 md_rdev_clear(rdev);
2523#ifndef MODULE
2524 if (test_bit(AutoDetected, &rdev->flags))
2525 md_autodetect_dev(rdev->bdev->bd_dev);
2526#endif
2527 unlock_rdev(rdev);
2528 kobject_put(&rdev->kobj);
2529}
2530
2531void md_kick_rdev_from_array(struct md_rdev *rdev)
2532{
2533 unbind_rdev_from_array(rdev);
2534 export_rdev(rdev);
2535}
2536EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2537
2538static void export_array(struct mddev *mddev)
2539{
2540 struct md_rdev *rdev;
2541
2542 while (!list_empty(&mddev->disks)) {
2543 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2544 same_set);
2545 md_kick_rdev_from_array(rdev);
2546 }
2547 mddev->raid_disks = 0;
2548 mddev->major_version = 0;
2549}
2550
2551static bool set_in_sync(struct mddev *mddev)
2552{
2553 lockdep_assert_held(&mddev->lock);
2554 if (!mddev->in_sync) {
2555 mddev->sync_checkers++;
2556 spin_unlock(&mddev->lock);
2557 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2558 spin_lock(&mddev->lock);
2559 if (!mddev->in_sync &&
2560 percpu_ref_is_zero(&mddev->writes_pending)) {
2561 mddev->in_sync = 1;
2562
2563
2564
2565
2566 smp_mb();
2567 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2568 sysfs_notify_dirent_safe(mddev->sysfs_state);
2569 }
2570 if (--mddev->sync_checkers == 0)
2571 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2572 }
2573 if (mddev->safemode == 1)
2574 mddev->safemode = 0;
2575 return mddev->in_sync;
2576}
2577
2578static void sync_sbs(struct mddev *mddev, int nospares)
2579{
2580
2581
2582
2583
2584
2585
2586 struct md_rdev *rdev;
2587 rdev_for_each(rdev, mddev) {
2588 if (rdev->sb_events == mddev->events ||
2589 (nospares &&
2590 rdev->raid_disk < 0 &&
2591 rdev->sb_events+1 == mddev->events)) {
2592
2593 rdev->sb_loaded = 2;
2594 } else {
2595 sync_super(mddev, rdev);
2596 rdev->sb_loaded = 1;
2597 }
2598 }
2599}
2600
2601static bool does_sb_need_changing(struct mddev *mddev)
2602{
2603 struct md_rdev *rdev;
2604 struct mdp_superblock_1 *sb;
2605 int role;
2606
2607
2608 rdev_for_each(rdev, mddev)
2609 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2610 break;
2611
2612
2613 if (!rdev)
2614 return false;
2615
2616 sb = page_address(rdev->sb_page);
2617
2618 rdev_for_each(rdev, mddev) {
2619 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2620
2621 if (role == 0xffff && rdev->raid_disk >=0 &&
2622 !test_bit(Faulty, &rdev->flags))
2623 return true;
2624
2625 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2626 return true;
2627 }
2628
2629
2630 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2631 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2632 (mddev->layout != le32_to_cpu(sb->layout)) ||
2633 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2634 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2635 return true;
2636
2637 return false;
2638}
2639
2640void md_update_sb(struct mddev *mddev, int force_change)
2641{
2642 struct md_rdev *rdev;
2643 int sync_req;
2644 int nospares = 0;
2645 int any_badblocks_changed = 0;
2646 int ret = -1;
2647
2648 if (mddev->ro) {
2649 if (force_change)
2650 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2651 return;
2652 }
2653
2654repeat:
2655 if (mddev_is_clustered(mddev)) {
2656 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2657 force_change = 1;
2658 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2659 nospares = 1;
2660 ret = md_cluster_ops->metadata_update_start(mddev);
2661
2662 if (!does_sb_need_changing(mddev)) {
2663 if (ret == 0)
2664 md_cluster_ops->metadata_update_cancel(mddev);
2665 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2666 BIT(MD_SB_CHANGE_DEVS) |
2667 BIT(MD_SB_CHANGE_CLEAN));
2668 return;
2669 }
2670 }
2671
2672
2673
2674
2675
2676
2677
2678 rdev_for_each(rdev, mddev) {
2679 if (rdev->raid_disk >= 0 &&
2680 mddev->delta_disks >= 0 &&
2681 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2682 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2683 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2684 !test_bit(Journal, &rdev->flags) &&
2685 !test_bit(In_sync, &rdev->flags) &&
2686 mddev->curr_resync_completed > rdev->recovery_offset)
2687 rdev->recovery_offset = mddev->curr_resync_completed;
2688
2689 }
2690 if (!mddev->persistent) {
2691 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2692 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2693 if (!mddev->external) {
2694 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2695 rdev_for_each(rdev, mddev) {
2696 if (rdev->badblocks.changed) {
2697 rdev->badblocks.changed = 0;
2698 ack_all_badblocks(&rdev->badblocks);
2699 md_error(mddev, rdev);
2700 }
2701 clear_bit(Blocked, &rdev->flags);
2702 clear_bit(BlockedBadBlocks, &rdev->flags);
2703 wake_up(&rdev->blocked_wait);
2704 }
2705 }
2706 wake_up(&mddev->sb_wait);
2707 return;
2708 }
2709
2710 spin_lock(&mddev->lock);
2711
2712 mddev->utime = ktime_get_real_seconds();
2713
2714 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2715 force_change = 1;
2716 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2717
2718
2719
2720
2721 nospares = 1;
2722 if (force_change)
2723 nospares = 0;
2724 if (mddev->degraded)
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734 nospares = 0;
2735
2736 sync_req = mddev->in_sync;
2737
2738
2739
2740 if (nospares
2741 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2742 && mddev->can_decrease_events
2743 && mddev->events != 1) {
2744 mddev->events--;
2745 mddev->can_decrease_events = 0;
2746 } else {
2747
2748 mddev->events ++;
2749 mddev->can_decrease_events = nospares;
2750 }
2751
2752
2753
2754
2755
2756
2757 WARN_ON(mddev->events == 0);
2758
2759 rdev_for_each(rdev, mddev) {
2760 if (rdev->badblocks.changed)
2761 any_badblocks_changed++;
2762 if (test_bit(Faulty, &rdev->flags))
2763 set_bit(FaultRecorded, &rdev->flags);
2764 }
2765
2766 sync_sbs(mddev, nospares);
2767 spin_unlock(&mddev->lock);
2768
2769 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2770 mdname(mddev), mddev->in_sync);
2771
2772 if (mddev->queue)
2773 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2774rewrite:
2775 md_bitmap_update_sb(mddev->bitmap);
2776 rdev_for_each(rdev, mddev) {
2777 char b[BDEVNAME_SIZE];
2778
2779 if (rdev->sb_loaded != 1)
2780 continue;
2781
2782 if (!test_bit(Faulty, &rdev->flags)) {
2783 md_super_write(mddev,rdev,
2784 rdev->sb_start, rdev->sb_size,
2785 rdev->sb_page);
2786 pr_debug("md: (write) %s's sb offset: %llu\n",
2787 bdevname(rdev->bdev, b),
2788 (unsigned long long)rdev->sb_start);
2789 rdev->sb_events = mddev->events;
2790 if (rdev->badblocks.size) {
2791 md_super_write(mddev, rdev,
2792 rdev->badblocks.sector,
2793 rdev->badblocks.size << 9,
2794 rdev->bb_page);
2795 rdev->badblocks.size = 0;
2796 }
2797
2798 } else
2799 pr_debug("md: %s (skipping faulty)\n",
2800 bdevname(rdev->bdev, b));
2801
2802 if (mddev->level == LEVEL_MULTIPATH)
2803
2804 break;
2805 }
2806 if (md_super_wait(mddev) < 0)
2807 goto rewrite;
2808
2809
2810 if (mddev_is_clustered(mddev) && ret == 0)
2811 md_cluster_ops->metadata_update_finish(mddev);
2812
2813 if (mddev->in_sync != sync_req ||
2814 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2815 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2816
2817 goto repeat;
2818 wake_up(&mddev->sb_wait);
2819 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2820 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2821
2822 rdev_for_each(rdev, mddev) {
2823 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2824 clear_bit(Blocked, &rdev->flags);
2825
2826 if (any_badblocks_changed)
2827 ack_all_badblocks(&rdev->badblocks);
2828 clear_bit(BlockedBadBlocks, &rdev->flags);
2829 wake_up(&rdev->blocked_wait);
2830 }
2831}
2832EXPORT_SYMBOL(md_update_sb);
2833
2834static int add_bound_rdev(struct md_rdev *rdev)
2835{
2836 struct mddev *mddev = rdev->mddev;
2837 int err = 0;
2838 bool add_journal = test_bit(Journal, &rdev->flags);
2839
2840 if (!mddev->pers->hot_remove_disk || add_journal) {
2841
2842
2843
2844
2845 super_types[mddev->major_version].
2846 validate_super(mddev, rdev);
2847 if (add_journal)
2848 mddev_suspend(mddev);
2849 err = mddev->pers->hot_add_disk(mddev, rdev);
2850 if (add_journal)
2851 mddev_resume(mddev);
2852 if (err) {
2853 md_kick_rdev_from_array(rdev);
2854 return err;
2855 }
2856 }
2857 sysfs_notify_dirent_safe(rdev->sysfs_state);
2858
2859 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2860 if (mddev->degraded)
2861 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2862 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2863 md_new_event(mddev);
2864 md_wakeup_thread(mddev->thread);
2865 return 0;
2866}
2867
2868
2869
2870
2871static int cmd_match(const char *cmd, const char *str)
2872{
2873
2874
2875
2876
2877 while (*cmd && *str && *cmd == *str) {
2878 cmd++;
2879 str++;
2880 }
2881 if (*cmd == '\n')
2882 cmd++;
2883 if (*str || *cmd)
2884 return 0;
2885 return 1;
2886}
2887
2888struct rdev_sysfs_entry {
2889 struct attribute attr;
2890 ssize_t (*show)(struct md_rdev *, char *);
2891 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2892};
2893
2894static ssize_t
2895state_show(struct md_rdev *rdev, char *page)
2896{
2897 char *sep = ",";
2898 size_t len = 0;
2899 unsigned long flags = READ_ONCE(rdev->flags);
2900
2901 if (test_bit(Faulty, &flags) ||
2902 (!test_bit(ExternalBbl, &flags) &&
2903 rdev->badblocks.unacked_exist))
2904 len += sprintf(page+len, "faulty%s", sep);
2905 if (test_bit(In_sync, &flags))
2906 len += sprintf(page+len, "in_sync%s", sep);
2907 if (test_bit(Journal, &flags))
2908 len += sprintf(page+len, "journal%s", sep);
2909 if (test_bit(WriteMostly, &flags))
2910 len += sprintf(page+len, "write_mostly%s", sep);
2911 if (test_bit(Blocked, &flags) ||
2912 (rdev->badblocks.unacked_exist
2913 && !test_bit(Faulty, &flags)))
2914 len += sprintf(page+len, "blocked%s", sep);
2915 if (!test_bit(Faulty, &flags) &&
2916 !test_bit(Journal, &flags) &&
2917 !test_bit(In_sync, &flags))
2918 len += sprintf(page+len, "spare%s", sep);
2919 if (test_bit(WriteErrorSeen, &flags))
2920 len += sprintf(page+len, "write_error%s", sep);
2921 if (test_bit(WantReplacement, &flags))
2922 len += sprintf(page+len, "want_replacement%s", sep);
2923 if (test_bit(Replacement, &flags))
2924 len += sprintf(page+len, "replacement%s", sep);
2925 if (test_bit(ExternalBbl, &flags))
2926 len += sprintf(page+len, "external_bbl%s", sep);
2927 if (test_bit(FailFast, &flags))
2928 len += sprintf(page+len, "failfast%s", sep);
2929
2930 if (len)
2931 len -= strlen(sep);
2932
2933 return len+sprintf(page+len, "\n");
2934}
2935
2936static ssize_t
2937state_store(struct md_rdev *rdev, const char *buf, size_t len)
2938{
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953 int err = -EINVAL;
2954 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2955 md_error(rdev->mddev, rdev);
2956 if (test_bit(Faulty, &rdev->flags))
2957 err = 0;
2958 else
2959 err = -EBUSY;
2960 } else if (cmd_match(buf, "remove")) {
2961 if (rdev->mddev->pers) {
2962 clear_bit(Blocked, &rdev->flags);
2963 remove_and_add_spares(rdev->mddev, rdev);
2964 }
2965 if (rdev->raid_disk >= 0)
2966 err = -EBUSY;
2967 else {
2968 struct mddev *mddev = rdev->mddev;
2969 err = 0;
2970 if (mddev_is_clustered(mddev))
2971 err = md_cluster_ops->remove_disk(mddev, rdev);
2972
2973 if (err == 0) {
2974 md_kick_rdev_from_array(rdev);
2975 if (mddev->pers) {
2976 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2977 md_wakeup_thread(mddev->thread);
2978 }
2979 md_new_event(mddev);
2980 }
2981 }
2982 } else if (cmd_match(buf, "writemostly")) {
2983 set_bit(WriteMostly, &rdev->flags);
2984 mddev_create_serial_pool(rdev->mddev, rdev, false);
2985 err = 0;
2986 } else if (cmd_match(buf, "-writemostly")) {
2987 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2988 clear_bit(WriteMostly, &rdev->flags);
2989 err = 0;
2990 } else if (cmd_match(buf, "blocked")) {
2991 set_bit(Blocked, &rdev->flags);
2992 err = 0;
2993 } else if (cmd_match(buf, "-blocked")) {
2994 if (!test_bit(Faulty, &rdev->flags) &&
2995 !test_bit(ExternalBbl, &rdev->flags) &&
2996 rdev->badblocks.unacked_exist) {
2997
2998
2999
3000 md_error(rdev->mddev, rdev);
3001 }
3002 clear_bit(Blocked, &rdev->flags);
3003 clear_bit(BlockedBadBlocks, &rdev->flags);
3004 wake_up(&rdev->blocked_wait);
3005 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3006 md_wakeup_thread(rdev->mddev->thread);
3007
3008 err = 0;
3009 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3010 set_bit(In_sync, &rdev->flags);
3011 err = 0;
3012 } else if (cmd_match(buf, "failfast")) {
3013 set_bit(FailFast, &rdev->flags);
3014 err = 0;
3015 } else if (cmd_match(buf, "-failfast")) {
3016 clear_bit(FailFast, &rdev->flags);
3017 err = 0;
3018 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3019 !test_bit(Journal, &rdev->flags)) {
3020 if (rdev->mddev->pers == NULL) {
3021 clear_bit(In_sync, &rdev->flags);
3022 rdev->saved_raid_disk = rdev->raid_disk;
3023 rdev->raid_disk = -1;
3024 err = 0;
3025 }
3026 } else if (cmd_match(buf, "write_error")) {
3027 set_bit(WriteErrorSeen, &rdev->flags);
3028 err = 0;
3029 } else if (cmd_match(buf, "-write_error")) {
3030 clear_bit(WriteErrorSeen, &rdev->flags);
3031 err = 0;
3032 } else if (cmd_match(buf, "want_replacement")) {
3033
3034
3035
3036
3037 if (rdev->raid_disk >= 0 &&
3038 !test_bit(Journal, &rdev->flags) &&
3039 !test_bit(Replacement, &rdev->flags))
3040 set_bit(WantReplacement, &rdev->flags);
3041 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3042 md_wakeup_thread(rdev->mddev->thread);
3043 err = 0;
3044 } else if (cmd_match(buf, "-want_replacement")) {
3045
3046
3047
3048 err = 0;
3049 clear_bit(WantReplacement, &rdev->flags);
3050 } else if (cmd_match(buf, "replacement")) {
3051
3052
3053
3054
3055 if (rdev->mddev->pers)
3056 err = -EBUSY;
3057 else {
3058 set_bit(Replacement, &rdev->flags);
3059 err = 0;
3060 }
3061 } else if (cmd_match(buf, "-replacement")) {
3062
3063 if (rdev->mddev->pers)
3064 err = -EBUSY;
3065 else {
3066 clear_bit(Replacement, &rdev->flags);
3067 err = 0;
3068 }
3069 } else if (cmd_match(buf, "re-add")) {
3070 if (!rdev->mddev->pers)
3071 err = -EINVAL;
3072 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3073 rdev->saved_raid_disk >= 0) {
3074
3075
3076
3077
3078
3079
3080 if (!mddev_is_clustered(rdev->mddev) ||
3081 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3082 clear_bit(Faulty, &rdev->flags);
3083 err = add_bound_rdev(rdev);
3084 }
3085 } else
3086 err = -EBUSY;
3087 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3088 set_bit(ExternalBbl, &rdev->flags);
3089 rdev->badblocks.shift = 0;
3090 err = 0;
3091 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3092 clear_bit(ExternalBbl, &rdev->flags);
3093 err = 0;
3094 }
3095 if (!err)
3096 sysfs_notify_dirent_safe(rdev->sysfs_state);
3097 return err ? err : len;
3098}
3099static struct rdev_sysfs_entry rdev_state =
3100__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3101
3102static ssize_t
3103errors_show(struct md_rdev *rdev, char *page)
3104{
3105 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3106}
3107
3108static ssize_t
3109errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3110{
3111 unsigned int n;
3112 int rv;
3113
3114 rv = kstrtouint(buf, 10, &n);
3115 if (rv < 0)
3116 return rv;
3117 atomic_set(&rdev->corrected_errors, n);
3118 return len;
3119}
3120static struct rdev_sysfs_entry rdev_errors =
3121__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3122
3123static ssize_t
3124slot_show(struct md_rdev *rdev, char *page)
3125{
3126 if (test_bit(Journal, &rdev->flags))
3127 return sprintf(page, "journal\n");
3128 else if (rdev->raid_disk < 0)
3129 return sprintf(page, "none\n");
3130 else
3131 return sprintf(page, "%d\n", rdev->raid_disk);
3132}
3133
3134static ssize_t
3135slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3136{
3137 int slot;
3138 int err;
3139
3140 if (test_bit(Journal, &rdev->flags))
3141 return -EBUSY;
3142 if (strncmp(buf, "none", 4)==0)
3143 slot = -1;
3144 else {
3145 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3146 if (err < 0)
3147 return err;
3148 }
3149 if (rdev->mddev->pers && slot == -1) {
3150
3151
3152
3153
3154
3155
3156
3157 if (rdev->raid_disk == -1)
3158 return -EEXIST;
3159
3160 if (rdev->mddev->pers->hot_remove_disk == NULL)
3161 return -EINVAL;
3162 clear_bit(Blocked, &rdev->flags);
3163 remove_and_add_spares(rdev->mddev, rdev);
3164 if (rdev->raid_disk >= 0)
3165 return -EBUSY;
3166 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3167 md_wakeup_thread(rdev->mddev->thread);
3168 } else if (rdev->mddev->pers) {
3169
3170
3171
3172 int err;
3173
3174 if (rdev->raid_disk != -1)
3175 return -EBUSY;
3176
3177 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3178 return -EBUSY;
3179
3180 if (rdev->mddev->pers->hot_add_disk == NULL)
3181 return -EINVAL;
3182
3183 if (slot >= rdev->mddev->raid_disks &&
3184 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3185 return -ENOSPC;
3186
3187 rdev->raid_disk = slot;
3188 if (test_bit(In_sync, &rdev->flags))
3189 rdev->saved_raid_disk = slot;
3190 else
3191 rdev->saved_raid_disk = -1;
3192 clear_bit(In_sync, &rdev->flags);
3193 clear_bit(Bitmap_sync, &rdev->flags);
3194 err = rdev->mddev->pers->
3195 hot_add_disk(rdev->mddev, rdev);
3196 if (err) {
3197 rdev->raid_disk = -1;
3198 return err;
3199 } else
3200 sysfs_notify_dirent_safe(rdev->sysfs_state);
3201 if (sysfs_link_rdev(rdev->mddev, rdev))
3202 ;
3203
3204 } else {
3205 if (slot >= rdev->mddev->raid_disks &&
3206 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3207 return -ENOSPC;
3208 rdev->raid_disk = slot;
3209
3210 clear_bit(Faulty, &rdev->flags);
3211 clear_bit(WriteMostly, &rdev->flags);
3212 set_bit(In_sync, &rdev->flags);
3213 sysfs_notify_dirent_safe(rdev->sysfs_state);
3214 }
3215 return len;
3216}
3217
3218static struct rdev_sysfs_entry rdev_slot =
3219__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3220
3221static ssize_t
3222offset_show(struct md_rdev *rdev, char *page)
3223{
3224 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3225}
3226
3227static ssize_t
3228offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3229{
3230 unsigned long long offset;
3231 if (kstrtoull(buf, 10, &offset) < 0)
3232 return -EINVAL;
3233 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3234 return -EBUSY;
3235 if (rdev->sectors && rdev->mddev->external)
3236
3237
3238 return -EBUSY;
3239 rdev->data_offset = offset;
3240 rdev->new_data_offset = offset;
3241 return len;
3242}
3243
3244static struct rdev_sysfs_entry rdev_offset =
3245__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3246
3247static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3248{
3249 return sprintf(page, "%llu\n",
3250 (unsigned long long)rdev->new_data_offset);
3251}
3252
3253static ssize_t new_offset_store(struct md_rdev *rdev,
3254 const char *buf, size_t len)
3255{
3256 unsigned long long new_offset;
3257 struct mddev *mddev = rdev->mddev;
3258
3259 if (kstrtoull(buf, 10, &new_offset) < 0)
3260 return -EINVAL;
3261
3262 if (mddev->sync_thread ||
3263 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3264 return -EBUSY;
3265 if (new_offset == rdev->data_offset)
3266
3267 ;
3268 else if (new_offset > rdev->data_offset) {
3269
3270 if (new_offset - rdev->data_offset
3271 + mddev->dev_sectors > rdev->sectors)
3272 return -E2BIG;
3273 }
3274
3275
3276
3277
3278
3279 if (new_offset < rdev->data_offset &&
3280 mddev->reshape_backwards)
3281 return -EINVAL;
3282
3283
3284
3285
3286 if (new_offset > rdev->data_offset &&
3287 !mddev->reshape_backwards)
3288 return -EINVAL;
3289
3290 if (mddev->pers && mddev->persistent &&
3291 !super_types[mddev->major_version]
3292 .allow_new_offset(rdev, new_offset))
3293 return -E2BIG;
3294 rdev->new_data_offset = new_offset;
3295 if (new_offset > rdev->data_offset)
3296 mddev->reshape_backwards = 1;
3297 else if (new_offset < rdev->data_offset)
3298 mddev->reshape_backwards = 0;
3299
3300 return len;
3301}
3302static struct rdev_sysfs_entry rdev_new_offset =
3303__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3304
3305static ssize_t
3306rdev_size_show(struct md_rdev *rdev, char *page)
3307{
3308 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3309}
3310
3311static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3312{
3313
3314 if (s1+l1 <= s2)
3315 return 0;
3316 if (s2+l2 <= s1)
3317 return 0;
3318 return 1;
3319}
3320
3321static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3322{
3323 unsigned long long blocks;
3324 sector_t new;
3325
3326 if (kstrtoull(buf, 10, &blocks) < 0)
3327 return -EINVAL;
3328
3329 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3330 return -EINVAL;
3331
3332 new = blocks * 2;
3333 if (new != blocks * 2)
3334 return -EINVAL;
3335
3336 *sectors = new;
3337 return 0;
3338}
3339
3340static ssize_t
3341rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3342{
3343 struct mddev *my_mddev = rdev->mddev;
3344 sector_t oldsectors = rdev->sectors;
3345 sector_t sectors;
3346
3347 if (test_bit(Journal, &rdev->flags))
3348 return -EBUSY;
3349 if (strict_blocks_to_sectors(buf, §ors) < 0)
3350 return -EINVAL;
3351 if (rdev->data_offset != rdev->new_data_offset)
3352 return -EINVAL;
3353 if (my_mddev->pers && rdev->raid_disk >= 0) {
3354 if (my_mddev->persistent) {
3355 sectors = super_types[my_mddev->major_version].
3356 rdev_size_change(rdev, sectors);
3357 if (!sectors)
3358 return -EBUSY;
3359 } else if (!sectors)
3360 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3361 rdev->data_offset;
3362 if (!my_mddev->pers->resize)
3363
3364 return -EINVAL;
3365 }
3366 if (sectors < my_mddev->dev_sectors)
3367 return -EINVAL;
3368
3369 rdev->sectors = sectors;
3370 if (sectors > oldsectors && my_mddev->external) {
3371
3372
3373
3374
3375
3376
3377 struct mddev *mddev;
3378 int overlap = 0;
3379 struct list_head *tmp;
3380
3381 rcu_read_lock();
3382 for_each_mddev(mddev, tmp) {
3383 struct md_rdev *rdev2;
3384
3385 rdev_for_each(rdev2, mddev)
3386 if (rdev->bdev == rdev2->bdev &&
3387 rdev != rdev2 &&
3388 overlaps(rdev->data_offset, rdev->sectors,
3389 rdev2->data_offset,
3390 rdev2->sectors)) {
3391 overlap = 1;
3392 break;
3393 }
3394 if (overlap) {
3395 mddev_put(mddev);
3396 break;
3397 }
3398 }
3399 rcu_read_unlock();
3400 if (overlap) {
3401
3402
3403
3404
3405
3406
3407 rdev->sectors = oldsectors;
3408 return -EBUSY;
3409 }
3410 }
3411 return len;
3412}
3413
3414static struct rdev_sysfs_entry rdev_size =
3415__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3416
3417static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3418{
3419 unsigned long long recovery_start = rdev->recovery_offset;
3420
3421 if (test_bit(In_sync, &rdev->flags) ||
3422 recovery_start == MaxSector)
3423 return sprintf(page, "none\n");
3424
3425 return sprintf(page, "%llu\n", recovery_start);
3426}
3427
3428static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3429{
3430 unsigned long long recovery_start;
3431
3432 if (cmd_match(buf, "none"))
3433 recovery_start = MaxSector;
3434 else if (kstrtoull(buf, 10, &recovery_start))
3435 return -EINVAL;
3436
3437 if (rdev->mddev->pers &&
3438 rdev->raid_disk >= 0)
3439 return -EBUSY;
3440
3441 rdev->recovery_offset = recovery_start;
3442 if (recovery_start == MaxSector)
3443 set_bit(In_sync, &rdev->flags);
3444 else
3445 clear_bit(In_sync, &rdev->flags);
3446 return len;
3447}
3448
3449static struct rdev_sysfs_entry rdev_recovery_start =
3450__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463static ssize_t bb_show(struct md_rdev *rdev, char *page)
3464{
3465 return badblocks_show(&rdev->badblocks, page, 0);
3466}
3467static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3468{
3469 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3470
3471 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3472 wake_up(&rdev->blocked_wait);
3473 return rv;
3474}
3475static struct rdev_sysfs_entry rdev_bad_blocks =
3476__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3477
3478static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3479{
3480 return badblocks_show(&rdev->badblocks, page, 1);
3481}
3482static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3483{
3484 return badblocks_store(&rdev->badblocks, page, len, 1);
3485}
3486static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3487__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3488
3489static ssize_t
3490ppl_sector_show(struct md_rdev *rdev, char *page)
3491{
3492 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3493}
3494
3495static ssize_t
3496ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3497{
3498 unsigned long long sector;
3499
3500 if (kstrtoull(buf, 10, §or) < 0)
3501 return -EINVAL;
3502 if (sector != (sector_t)sector)
3503 return -EINVAL;
3504
3505 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3506 rdev->raid_disk >= 0)
3507 return -EBUSY;
3508
3509 if (rdev->mddev->persistent) {
3510 if (rdev->mddev->major_version == 0)
3511 return -EINVAL;
3512 if ((sector > rdev->sb_start &&
3513 sector - rdev->sb_start > S16_MAX) ||
3514 (sector < rdev->sb_start &&
3515 rdev->sb_start - sector > -S16_MIN))
3516 return -EINVAL;
3517 rdev->ppl.offset = sector - rdev->sb_start;
3518 } else if (!rdev->mddev->external) {
3519 return -EBUSY;
3520 }
3521 rdev->ppl.sector = sector;
3522 return len;
3523}
3524
3525static struct rdev_sysfs_entry rdev_ppl_sector =
3526__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3527
3528static ssize_t
3529ppl_size_show(struct md_rdev *rdev, char *page)
3530{
3531 return sprintf(page, "%u\n", rdev->ppl.size);
3532}
3533
3534static ssize_t
3535ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3536{
3537 unsigned int size;
3538
3539 if (kstrtouint(buf, 10, &size) < 0)
3540 return -EINVAL;
3541
3542 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3543 rdev->raid_disk >= 0)
3544 return -EBUSY;
3545
3546 if (rdev->mddev->persistent) {
3547 if (rdev->mddev->major_version == 0)
3548 return -EINVAL;
3549 if (size > U16_MAX)
3550 return -EINVAL;
3551 } else if (!rdev->mddev->external) {
3552 return -EBUSY;
3553 }
3554 rdev->ppl.size = size;
3555 return len;
3556}
3557
3558static struct rdev_sysfs_entry rdev_ppl_size =
3559__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3560
3561static struct attribute *rdev_default_attrs[] = {
3562 &rdev_state.attr,
3563 &rdev_errors.attr,
3564 &rdev_slot.attr,
3565 &rdev_offset.attr,
3566 &rdev_new_offset.attr,
3567 &rdev_size.attr,
3568 &rdev_recovery_start.attr,
3569 &rdev_bad_blocks.attr,
3570 &rdev_unack_bad_blocks.attr,
3571 &rdev_ppl_sector.attr,
3572 &rdev_ppl_size.attr,
3573 NULL,
3574};
3575static ssize_t
3576rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3577{
3578 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3579 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3580
3581 if (!entry->show)
3582 return -EIO;
3583 if (!rdev->mddev)
3584 return -ENODEV;
3585 return entry->show(rdev, page);
3586}
3587
3588static ssize_t
3589rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3590 const char *page, size_t length)
3591{
3592 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3593 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3594 ssize_t rv;
3595 struct mddev *mddev = rdev->mddev;
3596
3597 if (!entry->store)
3598 return -EIO;
3599 if (!capable(CAP_SYS_ADMIN))
3600 return -EACCES;
3601 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3602 if (!rv) {
3603 if (rdev->mddev == NULL)
3604 rv = -ENODEV;
3605 else
3606 rv = entry->store(rdev, page, length);
3607 mddev_unlock(mddev);
3608 }
3609 return rv;
3610}
3611
3612static void rdev_free(struct kobject *ko)
3613{
3614 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3615 kfree(rdev);
3616}
3617static const struct sysfs_ops rdev_sysfs_ops = {
3618 .show = rdev_attr_show,
3619 .store = rdev_attr_store,
3620};
3621static struct kobj_type rdev_ktype = {
3622 .release = rdev_free,
3623 .sysfs_ops = &rdev_sysfs_ops,
3624 .default_attrs = rdev_default_attrs,
3625};
3626
3627int md_rdev_init(struct md_rdev *rdev)
3628{
3629 rdev->desc_nr = -1;
3630 rdev->saved_raid_disk = -1;
3631 rdev->raid_disk = -1;
3632 rdev->flags = 0;
3633 rdev->data_offset = 0;
3634 rdev->new_data_offset = 0;
3635 rdev->sb_events = 0;
3636 rdev->last_read_error = 0;
3637 rdev->sb_loaded = 0;
3638 rdev->bb_page = NULL;
3639 atomic_set(&rdev->nr_pending, 0);
3640 atomic_set(&rdev->read_errors, 0);
3641 atomic_set(&rdev->corrected_errors, 0);
3642
3643 INIT_LIST_HEAD(&rdev->same_set);
3644 init_waitqueue_head(&rdev->blocked_wait);
3645
3646
3647
3648
3649
3650 return badblocks_init(&rdev->badblocks, 0);
3651}
3652EXPORT_SYMBOL_GPL(md_rdev_init);
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3664{
3665 char b[BDEVNAME_SIZE];
3666 int err;
3667 struct md_rdev *rdev;
3668 sector_t size;
3669
3670 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3671 if (!rdev)
3672 return ERR_PTR(-ENOMEM);
3673
3674 err = md_rdev_init(rdev);
3675 if (err)
3676 goto abort_free;
3677 err = alloc_disk_sb(rdev);
3678 if (err)
3679 goto abort_free;
3680
3681 err = lock_rdev(rdev, newdev, super_format == -2);
3682 if (err)
3683 goto abort_free;
3684
3685 kobject_init(&rdev->kobj, &rdev_ktype);
3686
3687 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3688 if (!size) {
3689 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3690 bdevname(rdev->bdev,b));
3691 err = -EINVAL;
3692 goto abort_free;
3693 }
3694
3695 if (super_format >= 0) {
3696 err = super_types[super_format].
3697 load_super(rdev, NULL, super_minor);
3698 if (err == -EINVAL) {
3699 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3700 bdevname(rdev->bdev,b),
3701 super_format, super_minor);
3702 goto abort_free;
3703 }
3704 if (err < 0) {
3705 pr_warn("md: could not read %s's sb, not importing!\n",
3706 bdevname(rdev->bdev,b));
3707 goto abort_free;
3708 }
3709 }
3710
3711 return rdev;
3712
3713abort_free:
3714 if (rdev->bdev)
3715 unlock_rdev(rdev);
3716 md_rdev_clear(rdev);
3717 kfree(rdev);
3718 return ERR_PTR(err);
3719}
3720
3721
3722
3723
3724
3725static int analyze_sbs(struct mddev *mddev)
3726{
3727 int i;
3728 struct md_rdev *rdev, *freshest, *tmp;
3729 char b[BDEVNAME_SIZE];
3730
3731 freshest = NULL;
3732 rdev_for_each_safe(rdev, tmp, mddev)
3733 switch (super_types[mddev->major_version].
3734 load_super(rdev, freshest, mddev->minor_version)) {
3735 case 1:
3736 freshest = rdev;
3737 break;
3738 case 0:
3739 break;
3740 default:
3741 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3742 bdevname(rdev->bdev,b));
3743 md_kick_rdev_from_array(rdev);
3744 }
3745
3746
3747 if (!freshest) {
3748 pr_warn("md: cannot find a valid disk\n");
3749 return -EINVAL;
3750 }
3751
3752 super_types[mddev->major_version].
3753 validate_super(mddev, freshest);
3754
3755 i = 0;
3756 rdev_for_each_safe(rdev, tmp, mddev) {
3757 if (mddev->max_disks &&
3758 (rdev->desc_nr >= mddev->max_disks ||
3759 i > mddev->max_disks)) {
3760 pr_warn("md: %s: %s: only %d devices permitted\n",
3761 mdname(mddev), bdevname(rdev->bdev, b),
3762 mddev->max_disks);
3763 md_kick_rdev_from_array(rdev);
3764 continue;
3765 }
3766 if (rdev != freshest) {
3767 if (super_types[mddev->major_version].
3768 validate_super(mddev, rdev)) {
3769 pr_warn("md: kicking non-fresh %s from array!\n",
3770 bdevname(rdev->bdev,b));
3771 md_kick_rdev_from_array(rdev);
3772 continue;
3773 }
3774 }
3775 if (mddev->level == LEVEL_MULTIPATH) {
3776 rdev->desc_nr = i++;
3777 rdev->raid_disk = rdev->desc_nr;
3778 set_bit(In_sync, &rdev->flags);
3779 } else if (rdev->raid_disk >=
3780 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3781 !test_bit(Journal, &rdev->flags)) {
3782 rdev->raid_disk = -1;
3783 clear_bit(In_sync, &rdev->flags);
3784 }
3785 }
3786
3787 return 0;
3788}
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3801{
3802 unsigned long result = 0;
3803 long decimals = -1;
3804 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3805 if (*cp == '.')
3806 decimals = 0;
3807 else if (decimals < scale) {
3808 unsigned int value;
3809 value = *cp - '0';
3810 result = result * 10 + value;
3811 if (decimals >= 0)
3812 decimals++;
3813 }
3814 cp++;
3815 }
3816 if (*cp == '\n')
3817 cp++;
3818 if (*cp)
3819 return -EINVAL;
3820 if (decimals < 0)
3821 decimals = 0;
3822 *res = result * int_pow(10, scale - decimals);
3823 return 0;
3824}
3825
3826static ssize_t
3827safe_delay_show(struct mddev *mddev, char *page)
3828{
3829 int msec = (mddev->safemode_delay*1000)/HZ;
3830 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3831}
3832static ssize_t
3833safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3834{
3835 unsigned long msec;
3836
3837 if (mddev_is_clustered(mddev)) {
3838 pr_warn("md: Safemode is disabled for clustered mode\n");
3839 return -EINVAL;
3840 }
3841
3842 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3843 return -EINVAL;
3844 if (msec == 0)
3845 mddev->safemode_delay = 0;
3846 else {
3847 unsigned long old_delay = mddev->safemode_delay;
3848 unsigned long new_delay = (msec*HZ)/1000;
3849
3850 if (new_delay == 0)
3851 new_delay = 1;
3852 mddev->safemode_delay = new_delay;
3853 if (new_delay < old_delay || old_delay == 0)
3854 mod_timer(&mddev->safemode_timer, jiffies+1);
3855 }
3856 return len;
3857}
3858static struct md_sysfs_entry md_safe_delay =
3859__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3860
3861static ssize_t
3862level_show(struct mddev *mddev, char *page)
3863{
3864 struct md_personality *p;
3865 int ret;
3866 spin_lock(&mddev->lock);
3867 p = mddev->pers;
3868 if (p)
3869 ret = sprintf(page, "%s\n", p->name);
3870 else if (mddev->clevel[0])
3871 ret = sprintf(page, "%s\n", mddev->clevel);
3872 else if (mddev->level != LEVEL_NONE)
3873 ret = sprintf(page, "%d\n", mddev->level);
3874 else
3875 ret = 0;
3876 spin_unlock(&mddev->lock);
3877 return ret;
3878}
3879
3880static ssize_t
3881level_store(struct mddev *mddev, const char *buf, size_t len)
3882{
3883 char clevel[16];
3884 ssize_t rv;
3885 size_t slen = len;
3886 struct md_personality *pers, *oldpers;
3887 long level;
3888 void *priv, *oldpriv;
3889 struct md_rdev *rdev;
3890
3891 if (slen == 0 || slen >= sizeof(clevel))
3892 return -EINVAL;
3893
3894 rv = mddev_lock(mddev);
3895 if (rv)
3896 return rv;
3897
3898 if (mddev->pers == NULL) {
3899 strncpy(mddev->clevel, buf, slen);
3900 if (mddev->clevel[slen-1] == '\n')
3901 slen--;
3902 mddev->clevel[slen] = 0;
3903 mddev->level = LEVEL_NONE;
3904 rv = len;
3905 goto out_unlock;
3906 }
3907 rv = -EROFS;
3908 if (mddev->ro)
3909 goto out_unlock;
3910
3911
3912
3913
3914
3915
3916
3917 rv = -EBUSY;
3918 if (mddev->sync_thread ||
3919 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3920 mddev->reshape_position != MaxSector ||
3921 mddev->sysfs_active)
3922 goto out_unlock;
3923
3924 rv = -EINVAL;
3925 if (!mddev->pers->quiesce) {
3926 pr_warn("md: %s: %s does not support online personality change\n",
3927 mdname(mddev), mddev->pers->name);
3928 goto out_unlock;
3929 }
3930
3931
3932 strncpy(clevel, buf, slen);
3933 if (clevel[slen-1] == '\n')
3934 slen--;
3935 clevel[slen] = 0;
3936 if (kstrtol(clevel, 10, &level))
3937 level = LEVEL_NONE;
3938
3939 if (request_module("md-%s", clevel) != 0)
3940 request_module("md-level-%s", clevel);
3941 spin_lock(&pers_lock);
3942 pers = find_pers(level, clevel);
3943 if (!pers || !try_module_get(pers->owner)) {
3944 spin_unlock(&pers_lock);
3945 pr_warn("md: personality %s not loaded\n", clevel);
3946 rv = -EINVAL;
3947 goto out_unlock;
3948 }
3949 spin_unlock(&pers_lock);
3950
3951 if (pers == mddev->pers) {
3952
3953 module_put(pers->owner);
3954 rv = len;
3955 goto out_unlock;
3956 }
3957 if (!pers->takeover) {
3958 module_put(pers->owner);
3959 pr_warn("md: %s: %s does not support personality takeover\n",
3960 mdname(mddev), clevel);
3961 rv = -EINVAL;
3962 goto out_unlock;
3963 }
3964
3965 rdev_for_each(rdev, mddev)
3966 rdev->new_raid_disk = rdev->raid_disk;
3967
3968
3969
3970
3971 priv = pers->takeover(mddev);
3972 if (IS_ERR(priv)) {
3973 mddev->new_level = mddev->level;
3974 mddev->new_layout = mddev->layout;
3975 mddev->new_chunk_sectors = mddev->chunk_sectors;
3976 mddev->raid_disks -= mddev->delta_disks;
3977 mddev->delta_disks = 0;
3978 mddev->reshape_backwards = 0;
3979 module_put(pers->owner);
3980 pr_warn("md: %s: %s would not accept array\n",
3981 mdname(mddev), clevel);
3982 rv = PTR_ERR(priv);
3983 goto out_unlock;
3984 }
3985
3986
3987 mddev_suspend(mddev);
3988 mddev_detach(mddev);
3989
3990 spin_lock(&mddev->lock);
3991 oldpers = mddev->pers;
3992 oldpriv = mddev->private;
3993 mddev->pers = pers;
3994 mddev->private = priv;
3995 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3996 mddev->level = mddev->new_level;
3997 mddev->layout = mddev->new_layout;
3998 mddev->chunk_sectors = mddev->new_chunk_sectors;
3999 mddev->delta_disks = 0;
4000 mddev->reshape_backwards = 0;
4001 mddev->degraded = 0;
4002 spin_unlock(&mddev->lock);
4003
4004 if (oldpers->sync_request == NULL &&
4005 mddev->external) {
4006
4007
4008
4009
4010
4011
4012
4013 mddev->in_sync = 0;
4014 mddev->safemode_delay = 0;
4015 mddev->safemode = 0;
4016 }
4017
4018 oldpers->free(mddev, oldpriv);
4019
4020 if (oldpers->sync_request == NULL &&
4021 pers->sync_request != NULL) {
4022
4023 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4024 pr_warn("md: cannot register extra attributes for %s\n",
4025 mdname(mddev));
4026 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4027 }
4028 if (oldpers->sync_request != NULL &&
4029 pers->sync_request == NULL) {
4030
4031 if (mddev->to_remove == NULL)
4032 mddev->to_remove = &md_redundancy_group;
4033 }
4034
4035 module_put(oldpers->owner);
4036
4037 rdev_for_each(rdev, mddev) {
4038 if (rdev->raid_disk < 0)
4039 continue;
4040 if (rdev->new_raid_disk >= mddev->raid_disks)
4041 rdev->new_raid_disk = -1;
4042 if (rdev->new_raid_disk == rdev->raid_disk)
4043 continue;
4044 sysfs_unlink_rdev(mddev, rdev);
4045 }
4046 rdev_for_each(rdev, mddev) {
4047 if (rdev->raid_disk < 0)
4048 continue;
4049 if (rdev->new_raid_disk == rdev->raid_disk)
4050 continue;
4051 rdev->raid_disk = rdev->new_raid_disk;
4052 if (rdev->raid_disk < 0)
4053 clear_bit(In_sync, &rdev->flags);
4054 else {
4055 if (sysfs_link_rdev(mddev, rdev))
4056 pr_warn("md: cannot register rd%d for %s after level change\n",
4057 rdev->raid_disk, mdname(mddev));
4058 }
4059 }
4060
4061 if (pers->sync_request == NULL) {
4062
4063
4064
4065 mddev->in_sync = 1;
4066 del_timer_sync(&mddev->safemode_timer);
4067 }
4068 blk_set_stacking_limits(&mddev->queue->limits);
4069 pers->run(mddev);
4070 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4071 mddev_resume(mddev);
4072 if (!mddev->thread)
4073 md_update_sb(mddev, 1);
4074 sysfs_notify(&mddev->kobj, NULL, "level");
4075 md_new_event(mddev);
4076 rv = len;
4077out_unlock:
4078 mddev_unlock(mddev);
4079 return rv;
4080}
4081
4082static struct md_sysfs_entry md_level =
4083__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4084
4085static ssize_t
4086layout_show(struct mddev *mddev, char *page)
4087{
4088
4089 if (mddev->reshape_position != MaxSector &&
4090 mddev->layout != mddev->new_layout)
4091 return sprintf(page, "%d (%d)\n",
4092 mddev->new_layout, mddev->layout);
4093 return sprintf(page, "%d\n", mddev->layout);
4094}
4095
4096static ssize_t
4097layout_store(struct mddev *mddev, const char *buf, size_t len)
4098{
4099 unsigned int n;
4100 int err;
4101
4102 err = kstrtouint(buf, 10, &n);
4103 if (err < 0)
4104 return err;
4105 err = mddev_lock(mddev);
4106 if (err)
4107 return err;
4108
4109 if (mddev->pers) {
4110 if (mddev->pers->check_reshape == NULL)
4111 err = -EBUSY;
4112 else if (mddev->ro)
4113 err = -EROFS;
4114 else {
4115 mddev->new_layout = n;
4116 err = mddev->pers->check_reshape(mddev);
4117 if (err)
4118 mddev->new_layout = mddev->layout;
4119 }
4120 } else {
4121 mddev->new_layout = n;
4122 if (mddev->reshape_position == MaxSector)
4123 mddev->layout = n;
4124 }
4125 mddev_unlock(mddev);
4126 return err ?: len;
4127}
4128static struct md_sysfs_entry md_layout =
4129__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4130
4131static ssize_t
4132raid_disks_show(struct mddev *mddev, char *page)
4133{
4134 if (mddev->raid_disks == 0)
4135 return 0;
4136 if (mddev->reshape_position != MaxSector &&
4137 mddev->delta_disks != 0)
4138 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4139 mddev->raid_disks - mddev->delta_disks);
4140 return sprintf(page, "%d\n", mddev->raid_disks);
4141}
4142
4143static int update_raid_disks(struct mddev *mddev, int raid_disks);
4144
4145static ssize_t
4146raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4147{
4148 unsigned int n;
4149 int err;
4150
4151 err = kstrtouint(buf, 10, &n);
4152 if (err < 0)
4153 return err;
4154
4155 err = mddev_lock(mddev);
4156 if (err)
4157 return err;
4158 if (mddev->pers)
4159 err = update_raid_disks(mddev, n);
4160 else if (mddev->reshape_position != MaxSector) {
4161 struct md_rdev *rdev;
4162 int olddisks = mddev->raid_disks - mddev->delta_disks;
4163
4164 err = -EINVAL;
4165 rdev_for_each(rdev, mddev) {
4166 if (olddisks < n &&
4167 rdev->data_offset < rdev->new_data_offset)
4168 goto out_unlock;
4169 if (olddisks > n &&
4170 rdev->data_offset > rdev->new_data_offset)
4171 goto out_unlock;
4172 }
4173 err = 0;
4174 mddev->delta_disks = n - olddisks;
4175 mddev->raid_disks = n;
4176 mddev->reshape_backwards = (mddev->delta_disks < 0);
4177 } else
4178 mddev->raid_disks = n;
4179out_unlock:
4180 mddev_unlock(mddev);
4181 return err ? err : len;
4182}
4183static struct md_sysfs_entry md_raid_disks =
4184__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4185
4186static ssize_t
4187chunk_size_show(struct mddev *mddev, char *page)
4188{
4189 if (mddev->reshape_position != MaxSector &&
4190 mddev->chunk_sectors != mddev->new_chunk_sectors)
4191 return sprintf(page, "%d (%d)\n",
4192 mddev->new_chunk_sectors << 9,
4193 mddev->chunk_sectors << 9);
4194 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4195}
4196
4197static ssize_t
4198chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4199{
4200 unsigned long n;
4201 int err;
4202
4203 err = kstrtoul(buf, 10, &n);
4204 if (err < 0)
4205 return err;
4206
4207 err = mddev_lock(mddev);
4208 if (err)
4209 return err;
4210 if (mddev->pers) {
4211 if (mddev->pers->check_reshape == NULL)
4212 err = -EBUSY;
4213 else if (mddev->ro)
4214 err = -EROFS;
4215 else {
4216 mddev->new_chunk_sectors = n >> 9;
4217 err = mddev->pers->check_reshape(mddev);
4218 if (err)
4219 mddev->new_chunk_sectors = mddev->chunk_sectors;
4220 }
4221 } else {
4222 mddev->new_chunk_sectors = n >> 9;
4223 if (mddev->reshape_position == MaxSector)
4224 mddev->chunk_sectors = n >> 9;
4225 }
4226 mddev_unlock(mddev);
4227 return err ?: len;
4228}
4229static struct md_sysfs_entry md_chunk_size =
4230__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4231
4232static ssize_t
4233resync_start_show(struct mddev *mddev, char *page)
4234{
4235 if (mddev->recovery_cp == MaxSector)
4236 return sprintf(page, "none\n");
4237 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4238}
4239
4240static ssize_t
4241resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4242{
4243 unsigned long long n;
4244 int err;
4245
4246 if (cmd_match(buf, "none"))
4247 n = MaxSector;
4248 else {
4249 err = kstrtoull(buf, 10, &n);
4250 if (err < 0)
4251 return err;
4252 if (n != (sector_t)n)
4253 return -EINVAL;
4254 }
4255
4256 err = mddev_lock(mddev);
4257 if (err)
4258 return err;
4259 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4260 err = -EBUSY;
4261
4262 if (!err) {
4263 mddev->recovery_cp = n;
4264 if (mddev->pers)
4265 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4266 }
4267 mddev_unlock(mddev);
4268 return err ?: len;
4269}
4270static struct md_sysfs_entry md_resync_start =
4271__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4272 resync_start_show, resync_start_store);
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4316 write_pending, active_idle, broken, bad_word};
4317static char *array_states[] = {
4318 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4319 "write-pending", "active-idle", "broken", NULL };
4320
4321static int match_word(const char *word, char **list)
4322{
4323 int n;
4324 for (n=0; list[n]; n++)
4325 if (cmd_match(word, list[n]))
4326 break;
4327 return n;
4328}
4329
4330static ssize_t
4331array_state_show(struct mddev *mddev, char *page)
4332{
4333 enum array_state st = inactive;
4334
4335 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4336 switch(mddev->ro) {
4337 case 1:
4338 st = readonly;
4339 break;
4340 case 2:
4341 st = read_auto;
4342 break;
4343 case 0:
4344 spin_lock(&mddev->lock);
4345 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4346 st = write_pending;
4347 else if (mddev->in_sync)
4348 st = clean;
4349 else if (mddev->safemode)
4350 st = active_idle;
4351 else
4352 st = active;
4353 spin_unlock(&mddev->lock);
4354 }
4355
4356 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4357 st = broken;
4358 } else {
4359 if (list_empty(&mddev->disks) &&
4360 mddev->raid_disks == 0 &&
4361 mddev->dev_sectors == 0)
4362 st = clear;
4363 else
4364 st = inactive;
4365 }
4366 return sprintf(page, "%s\n", array_states[st]);
4367}
4368
4369static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4370static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4371static int do_md_run(struct mddev *mddev);
4372static int restart_array(struct mddev *mddev);
4373
4374static ssize_t
4375array_state_store(struct mddev *mddev, const char *buf, size_t len)
4376{
4377 int err = 0;
4378 enum array_state st = match_word(buf, array_states);
4379
4380 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4381
4382
4383
4384 spin_lock(&mddev->lock);
4385 if (st == active) {
4386 restart_array(mddev);
4387 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4388 md_wakeup_thread(mddev->thread);
4389 wake_up(&mddev->sb_wait);
4390 } else {
4391 restart_array(mddev);
4392 if (!set_in_sync(mddev))
4393 err = -EBUSY;
4394 }
4395 if (!err)
4396 sysfs_notify_dirent_safe(mddev->sysfs_state);
4397 spin_unlock(&mddev->lock);
4398 return err ?: len;
4399 }
4400 err = mddev_lock(mddev);
4401 if (err)
4402 return err;
4403 err = -EINVAL;
4404 switch(st) {
4405 case bad_word:
4406 break;
4407 case clear:
4408
4409 err = do_md_stop(mddev, 0, NULL);
4410 break;
4411 case inactive:
4412
4413 if (mddev->pers)
4414 err = do_md_stop(mddev, 2, NULL);
4415 else
4416 err = 0;
4417 break;
4418 case suspended:
4419 break;
4420 case readonly:
4421 if (mddev->pers)
4422 err = md_set_readonly(mddev, NULL);
4423 else {
4424 mddev->ro = 1;
4425 set_disk_ro(mddev->gendisk, 1);
4426 err = do_md_run(mddev);
4427 }
4428 break;
4429 case read_auto:
4430 if (mddev->pers) {
4431 if (mddev->ro == 0)
4432 err = md_set_readonly(mddev, NULL);
4433 else if (mddev->ro == 1)
4434 err = restart_array(mddev);
4435 if (err == 0) {
4436 mddev->ro = 2;
4437 set_disk_ro(mddev->gendisk, 0);
4438 }
4439 } else {
4440 mddev->ro = 2;
4441 err = do_md_run(mddev);
4442 }
4443 break;
4444 case clean:
4445 if (mddev->pers) {
4446 err = restart_array(mddev);
4447 if (err)
4448 break;
4449 spin_lock(&mddev->lock);
4450 if (!set_in_sync(mddev))
4451 err = -EBUSY;
4452 spin_unlock(&mddev->lock);
4453 } else
4454 err = -EINVAL;
4455 break;
4456 case active:
4457 if (mddev->pers) {
4458 err = restart_array(mddev);
4459 if (err)
4460 break;
4461 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4462 wake_up(&mddev->sb_wait);
4463 err = 0;
4464 } else {
4465 mddev->ro = 0;
4466 set_disk_ro(mddev->gendisk, 0);
4467 err = do_md_run(mddev);
4468 }
4469 break;
4470 case write_pending:
4471 case active_idle:
4472 case broken:
4473
4474 break;
4475 }
4476
4477 if (!err) {
4478 if (mddev->hold_active == UNTIL_IOCTL)
4479 mddev->hold_active = 0;
4480 sysfs_notify_dirent_safe(mddev->sysfs_state);
4481 }
4482 mddev_unlock(mddev);
4483 return err ?: len;
4484}
4485static struct md_sysfs_entry md_array_state =
4486__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4487
4488static ssize_t
4489max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4490 return sprintf(page, "%d\n",
4491 atomic_read(&mddev->max_corr_read_errors));
4492}
4493
4494static ssize_t
4495max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4496{
4497 unsigned int n;
4498 int rv;
4499
4500 rv = kstrtouint(buf, 10, &n);
4501 if (rv < 0)
4502 return rv;
4503 atomic_set(&mddev->max_corr_read_errors, n);
4504 return len;
4505}
4506
4507static struct md_sysfs_entry max_corr_read_errors =
4508__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4509 max_corrected_read_errors_store);
4510
4511static ssize_t
4512null_show(struct mddev *mddev, char *page)
4513{
4514 return -EINVAL;
4515}
4516
4517static ssize_t
4518new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4519{
4520
4521
4522
4523
4524
4525
4526
4527 char *e;
4528 int major = simple_strtoul(buf, &e, 10);
4529 int minor;
4530 dev_t dev;
4531 struct md_rdev *rdev;
4532 int err;
4533
4534 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4535 return -EINVAL;
4536 minor = simple_strtoul(e+1, &e, 10);
4537 if (*e && *e != '\n')
4538 return -EINVAL;
4539 dev = MKDEV(major, minor);
4540 if (major != MAJOR(dev) ||
4541 minor != MINOR(dev))
4542 return -EOVERFLOW;
4543
4544 flush_workqueue(md_misc_wq);
4545
4546 err = mddev_lock(mddev);
4547 if (err)
4548 return err;
4549 if (mddev->persistent) {
4550 rdev = md_import_device(dev, mddev->major_version,
4551 mddev->minor_version);
4552 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4553 struct md_rdev *rdev0
4554 = list_entry(mddev->disks.next,
4555 struct md_rdev, same_set);
4556 err = super_types[mddev->major_version]
4557 .load_super(rdev, rdev0, mddev->minor_version);
4558 if (err < 0)
4559 goto out;
4560 }
4561 } else if (mddev->external)
4562 rdev = md_import_device(dev, -2, -1);
4563 else
4564 rdev = md_import_device(dev, -1, -1);
4565
4566 if (IS_ERR(rdev)) {
4567 mddev_unlock(mddev);
4568 return PTR_ERR(rdev);
4569 }
4570 err = bind_rdev_to_array(rdev, mddev);
4571 out:
4572 if (err)
4573 export_rdev(rdev);
4574 mddev_unlock(mddev);
4575 if (!err)
4576 md_new_event(mddev);
4577 return err ? err : len;
4578}
4579
4580static struct md_sysfs_entry md_new_device =
4581__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4582
4583static ssize_t
4584bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4585{
4586 char *end;
4587 unsigned long chunk, end_chunk;
4588 int err;
4589
4590 err = mddev_lock(mddev);
4591 if (err)
4592 return err;
4593 if (!mddev->bitmap)
4594 goto out;
4595
4596 while (*buf) {
4597 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4598 if (buf == end) break;
4599 if (*end == '-') {
4600 buf = end + 1;
4601 end_chunk = simple_strtoul(buf, &end, 0);
4602 if (buf == end) break;
4603 }
4604 if (*end && !isspace(*end)) break;
4605 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4606 buf = skip_spaces(end);
4607 }
4608 md_bitmap_unplug(mddev->bitmap);
4609out:
4610 mddev_unlock(mddev);
4611 return len;
4612}
4613
4614static struct md_sysfs_entry md_bitmap =
4615__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4616
4617static ssize_t
4618size_show(struct mddev *mddev, char *page)
4619{
4620 return sprintf(page, "%llu\n",
4621 (unsigned long long)mddev->dev_sectors / 2);
4622}
4623
4624static int update_size(struct mddev *mddev, sector_t num_sectors);
4625
4626static ssize_t
4627size_store(struct mddev *mddev, const char *buf, size_t len)
4628{
4629
4630
4631
4632
4633 sector_t sectors;
4634 int err = strict_blocks_to_sectors(buf, §ors);
4635
4636 if (err < 0)
4637 return err;
4638 err = mddev_lock(mddev);
4639 if (err)
4640 return err;
4641 if (mddev->pers) {
4642 err = update_size(mddev, sectors);
4643 if (err == 0)
4644 md_update_sb(mddev, 1);
4645 } else {
4646 if (mddev->dev_sectors == 0 ||
4647 mddev->dev_sectors > sectors)
4648 mddev->dev_sectors = sectors;
4649 else
4650 err = -ENOSPC;
4651 }
4652 mddev_unlock(mddev);
4653 return err ? err : len;
4654}
4655
4656static struct md_sysfs_entry md_size =
4657__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4658
4659
4660
4661
4662
4663
4664
4665static ssize_t
4666metadata_show(struct mddev *mddev, char *page)
4667{
4668 if (mddev->persistent)
4669 return sprintf(page, "%d.%d\n",
4670 mddev->major_version, mddev->minor_version);
4671 else if (mddev->external)
4672 return sprintf(page, "external:%s\n", mddev->metadata_type);
4673 else
4674 return sprintf(page, "none\n");
4675}
4676
4677static ssize_t
4678metadata_store(struct mddev *mddev, const char *buf, size_t len)
4679{
4680 int major, minor;
4681 char *e;
4682 int err;
4683
4684
4685
4686
4687
4688 err = mddev_lock(mddev);
4689 if (err)
4690 return err;
4691 err = -EBUSY;
4692 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4693 ;
4694 else if (!list_empty(&mddev->disks))
4695 goto out_unlock;
4696
4697 err = 0;
4698 if (cmd_match(buf, "none")) {
4699 mddev->persistent = 0;
4700 mddev->external = 0;
4701 mddev->major_version = 0;
4702 mddev->minor_version = 90;
4703 goto out_unlock;
4704 }
4705 if (strncmp(buf, "external:", 9) == 0) {
4706 size_t namelen = len-9;
4707 if (namelen >= sizeof(mddev->metadata_type))
4708 namelen = sizeof(mddev->metadata_type)-1;
4709 strncpy(mddev->metadata_type, buf+9, namelen);
4710 mddev->metadata_type[namelen] = 0;
4711 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4712 mddev->metadata_type[--namelen] = 0;
4713 mddev->persistent = 0;
4714 mddev->external = 1;
4715 mddev->major_version = 0;
4716 mddev->minor_version = 90;
4717 goto out_unlock;
4718 }
4719 major = simple_strtoul(buf, &e, 10);
4720 err = -EINVAL;
4721 if (e==buf || *e != '.')
4722 goto out_unlock;
4723 buf = e+1;
4724 minor = simple_strtoul(buf, &e, 10);
4725 if (e==buf || (*e && *e != '\n') )
4726 goto out_unlock;
4727 err = -ENOENT;
4728 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4729 goto out_unlock;
4730 mddev->major_version = major;
4731 mddev->minor_version = minor;
4732 mddev->persistent = 1;
4733 mddev->external = 0;
4734 err = 0;
4735out_unlock:
4736 mddev_unlock(mddev);
4737 return err ?: len;
4738}
4739
4740static struct md_sysfs_entry md_metadata =
4741__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4742
4743static ssize_t
4744action_show(struct mddev *mddev, char *page)
4745{
4746 char *type = "idle";
4747 unsigned long recovery = mddev->recovery;
4748 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4749 type = "frozen";
4750 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4751 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4752 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4753 type = "reshape";
4754 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4755 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4756 type = "resync";
4757 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4758 type = "check";
4759 else
4760 type = "repair";
4761 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4762 type = "recover";
4763 else if (mddev->reshape_position != MaxSector)
4764 type = "reshape";
4765 }
4766 return sprintf(page, "%s\n", type);
4767}
4768
4769static ssize_t
4770action_store(struct mddev *mddev, const char *page, size_t len)
4771{
4772 if (!mddev->pers || !mddev->pers->sync_request)
4773 return -EINVAL;
4774
4775
4776 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4777 if (cmd_match(page, "frozen"))
4778 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4779 else
4780 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4781 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4782 mddev_lock(mddev) == 0) {
4783 flush_workqueue(md_misc_wq);
4784 if (mddev->sync_thread) {
4785 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4786 md_reap_sync_thread(mddev);
4787 }
4788 mddev_unlock(mddev);
4789 }
4790 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4791 return -EBUSY;
4792 else if (cmd_match(page, "resync"))
4793 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4794 else if (cmd_match(page, "recover")) {
4795 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4796 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4797 } else if (cmd_match(page, "reshape")) {
4798 int err;
4799 if (mddev->pers->start_reshape == NULL)
4800 return -EINVAL;
4801 err = mddev_lock(mddev);
4802 if (!err) {
4803 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4804 err = -EBUSY;
4805 else {
4806 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4807 err = mddev->pers->start_reshape(mddev);
4808 }
4809 mddev_unlock(mddev);
4810 }
4811 if (err)
4812 return err;
4813 sysfs_notify(&mddev->kobj, NULL, "degraded");
4814 } else {
4815 if (cmd_match(page, "check"))
4816 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4817 else if (!cmd_match(page, "repair"))
4818 return -EINVAL;
4819 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4820 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4821 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4822 }
4823 if (mddev->ro == 2) {
4824
4825
4826
4827 mddev->ro = 0;
4828 md_wakeup_thread(mddev->sync_thread);
4829 }
4830 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4831 md_wakeup_thread(mddev->thread);
4832 sysfs_notify_dirent_safe(mddev->sysfs_action);
4833 return len;
4834}
4835
4836static struct md_sysfs_entry md_scan_mode =
4837__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4838
4839static ssize_t
4840last_sync_action_show(struct mddev *mddev, char *page)
4841{
4842 return sprintf(page, "%s\n", mddev->last_sync_action);
4843}
4844
4845static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4846
4847static ssize_t
4848mismatch_cnt_show(struct mddev *mddev, char *page)
4849{
4850 return sprintf(page, "%llu\n",
4851 (unsigned long long)
4852 atomic64_read(&mddev->resync_mismatches));
4853}
4854
4855static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4856
4857static ssize_t
4858sync_min_show(struct mddev *mddev, char *page)
4859{
4860 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4861 mddev->sync_speed_min ? "local": "system");
4862}
4863
4864static ssize_t
4865sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4866{
4867 unsigned int min;
4868 int rv;
4869
4870 if (strncmp(buf, "system", 6)==0) {
4871 min = 0;
4872 } else {
4873 rv = kstrtouint(buf, 10, &min);
4874 if (rv < 0)
4875 return rv;
4876 if (min == 0)
4877 return -EINVAL;
4878 }
4879 mddev->sync_speed_min = min;
4880 return len;
4881}
4882
4883static struct md_sysfs_entry md_sync_min =
4884__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4885
4886static ssize_t
4887sync_max_show(struct mddev *mddev, char *page)
4888{
4889 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4890 mddev->sync_speed_max ? "local": "system");
4891}
4892
4893static ssize_t
4894sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4895{
4896 unsigned int max;
4897 int rv;
4898
4899 if (strncmp(buf, "system", 6)==0) {
4900 max = 0;
4901 } else {
4902 rv = kstrtouint(buf, 10, &max);
4903 if (rv < 0)
4904 return rv;
4905 if (max == 0)
4906 return -EINVAL;
4907 }
4908 mddev->sync_speed_max = max;
4909 return len;
4910}
4911
4912static struct md_sysfs_entry md_sync_max =
4913__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4914
4915static ssize_t
4916degraded_show(struct mddev *mddev, char *page)
4917{
4918 return sprintf(page, "%d\n", mddev->degraded);
4919}
4920static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4921
4922static ssize_t
4923sync_force_parallel_show(struct mddev *mddev, char *page)
4924{
4925 return sprintf(page, "%d\n", mddev->parallel_resync);
4926}
4927
4928static ssize_t
4929sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4930{
4931 long n;
4932
4933 if (kstrtol(buf, 10, &n))
4934 return -EINVAL;
4935
4936 if (n != 0 && n != 1)
4937 return -EINVAL;
4938
4939 mddev->parallel_resync = n;
4940
4941 if (mddev->sync_thread)
4942 wake_up(&resync_wait);
4943
4944 return len;
4945}
4946
4947
4948static struct md_sysfs_entry md_sync_force_parallel =
4949__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4950 sync_force_parallel_show, sync_force_parallel_store);
4951
4952static ssize_t
4953sync_speed_show(struct mddev *mddev, char *page)
4954{
4955 unsigned long resync, dt, db;
4956 if (mddev->curr_resync == 0)
4957 return sprintf(page, "none\n");
4958 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4959 dt = (jiffies - mddev->resync_mark) / HZ;
4960 if (!dt) dt++;
4961 db = resync - mddev->resync_mark_cnt;
4962 return sprintf(page, "%lu\n", db/dt/2);
4963}
4964
4965static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4966
4967static ssize_t
4968sync_completed_show(struct mddev *mddev, char *page)
4969{
4970 unsigned long long max_sectors, resync;
4971
4972 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4973 return sprintf(page, "none\n");
4974
4975 if (mddev->curr_resync == 1 ||
4976 mddev->curr_resync == 2)
4977 return sprintf(page, "delayed\n");
4978
4979 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4980 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4981 max_sectors = mddev->resync_max_sectors;
4982 else
4983 max_sectors = mddev->dev_sectors;
4984
4985 resync = mddev->curr_resync_completed;
4986 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4987}
4988
4989static struct md_sysfs_entry md_sync_completed =
4990 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4991
4992static ssize_t
4993min_sync_show(struct mddev *mddev, char *page)
4994{
4995 return sprintf(page, "%llu\n",
4996 (unsigned long long)mddev->resync_min);
4997}
4998static ssize_t
4999min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5000{
5001 unsigned long long min;
5002 int err;
5003
5004 if (kstrtoull(buf, 10, &min))
5005 return -EINVAL;
5006
5007 spin_lock(&mddev->lock);
5008 err = -EINVAL;
5009 if (min > mddev->resync_max)
5010 goto out_unlock;
5011
5012 err = -EBUSY;
5013 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5014 goto out_unlock;
5015
5016
5017 mddev->resync_min = round_down(min, 8);
5018 err = 0;
5019
5020out_unlock:
5021 spin_unlock(&mddev->lock);
5022 return err ?: len;
5023}
5024
5025static struct md_sysfs_entry md_min_sync =
5026__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5027
5028static ssize_t
5029max_sync_show(struct mddev *mddev, char *page)
5030{
5031 if (mddev->resync_max == MaxSector)
5032 return sprintf(page, "max\n");
5033 else
5034 return sprintf(page, "%llu\n",
5035 (unsigned long long)mddev->resync_max);
5036}
5037static ssize_t
5038max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5039{
5040 int err;
5041 spin_lock(&mddev->lock);
5042 if (strncmp(buf, "max", 3) == 0)
5043 mddev->resync_max = MaxSector;
5044 else {
5045 unsigned long long max;
5046 int chunk;
5047
5048 err = -EINVAL;
5049 if (kstrtoull(buf, 10, &max))
5050 goto out_unlock;
5051 if (max < mddev->resync_min)
5052 goto out_unlock;
5053
5054 err = -EBUSY;
5055 if (max < mddev->resync_max &&
5056 mddev->ro == 0 &&
5057 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5058 goto out_unlock;
5059
5060
5061 chunk = mddev->chunk_sectors;
5062 if (chunk) {
5063 sector_t temp = max;
5064
5065 err = -EINVAL;
5066 if (sector_div(temp, chunk))
5067 goto out_unlock;
5068 }
5069 mddev->resync_max = max;
5070 }
5071 wake_up(&mddev->recovery_wait);
5072 err = 0;
5073out_unlock:
5074 spin_unlock(&mddev->lock);
5075 return err ?: len;
5076}
5077
5078static struct md_sysfs_entry md_max_sync =
5079__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5080
5081static ssize_t
5082suspend_lo_show(struct mddev *mddev, char *page)
5083{
5084 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5085}
5086
5087static ssize_t
5088suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5089{
5090 unsigned long long new;
5091 int err;
5092
5093 err = kstrtoull(buf, 10, &new);
5094 if (err < 0)
5095 return err;
5096 if (new != (sector_t)new)
5097 return -EINVAL;
5098
5099 err = mddev_lock(mddev);
5100 if (err)
5101 return err;
5102 err = -EINVAL;
5103 if (mddev->pers == NULL ||
5104 mddev->pers->quiesce == NULL)
5105 goto unlock;
5106 mddev_suspend(mddev);
5107 mddev->suspend_lo = new;
5108 mddev_resume(mddev);
5109
5110 err = 0;
5111unlock:
5112 mddev_unlock(mddev);
5113 return err ?: len;
5114}
5115static struct md_sysfs_entry md_suspend_lo =
5116__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5117
5118static ssize_t
5119suspend_hi_show(struct mddev *mddev, char *page)
5120{
5121 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5122}
5123
5124static ssize_t
5125suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5126{
5127 unsigned long long new;
5128 int err;
5129
5130 err = kstrtoull(buf, 10, &new);
5131 if (err < 0)
5132 return err;
5133 if (new != (sector_t)new)
5134 return -EINVAL;
5135
5136 err = mddev_lock(mddev);
5137 if (err)
5138 return err;
5139 err = -EINVAL;
5140 if (mddev->pers == NULL)
5141 goto unlock;
5142
5143 mddev_suspend(mddev);
5144 mddev->suspend_hi = new;
5145 mddev_resume(mddev);
5146
5147 err = 0;
5148unlock:
5149 mddev_unlock(mddev);
5150 return err ?: len;
5151}
5152static struct md_sysfs_entry md_suspend_hi =
5153__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5154
5155static ssize_t
5156reshape_position_show(struct mddev *mddev, char *page)
5157{
5158 if (mddev->reshape_position != MaxSector)
5159 return sprintf(page, "%llu\n",
5160 (unsigned long long)mddev->reshape_position);
5161 strcpy(page, "none\n");
5162 return 5;
5163}
5164
5165static ssize_t
5166reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5167{
5168 struct md_rdev *rdev;
5169 unsigned long long new;
5170 int err;
5171
5172 err = kstrtoull(buf, 10, &new);
5173 if (err < 0)
5174 return err;
5175 if (new != (sector_t)new)
5176 return -EINVAL;
5177 err = mddev_lock(mddev);
5178 if (err)
5179 return err;
5180 err = -EBUSY;
5181 if (mddev->pers)
5182 goto unlock;
5183 mddev->reshape_position = new;
5184 mddev->delta_disks = 0;
5185 mddev->reshape_backwards = 0;
5186 mddev->new_level = mddev->level;
5187 mddev->new_layout = mddev->layout;
5188 mddev->new_chunk_sectors = mddev->chunk_sectors;
5189 rdev_for_each(rdev, mddev)
5190 rdev->new_data_offset = rdev->data_offset;
5191 err = 0;
5192unlock:
5193 mddev_unlock(mddev);
5194 return err ?: len;
5195}
5196
5197static struct md_sysfs_entry md_reshape_position =
5198__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5199 reshape_position_store);
5200
5201static ssize_t
5202reshape_direction_show(struct mddev *mddev, char *page)
5203{
5204 return sprintf(page, "%s\n",
5205 mddev->reshape_backwards ? "backwards" : "forwards");
5206}
5207
5208static ssize_t
5209reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5210{
5211 int backwards = 0;
5212 int err;
5213
5214 if (cmd_match(buf, "forwards"))
5215 backwards = 0;
5216 else if (cmd_match(buf, "backwards"))
5217 backwards = 1;
5218 else
5219 return -EINVAL;
5220 if (mddev->reshape_backwards == backwards)
5221 return len;
5222
5223 err = mddev_lock(mddev);
5224 if (err)
5225 return err;
5226
5227 if (mddev->delta_disks)
5228 err = -EBUSY;
5229 else if (mddev->persistent &&
5230 mddev->major_version == 0)
5231 err = -EINVAL;
5232 else
5233 mddev->reshape_backwards = backwards;
5234 mddev_unlock(mddev);
5235 return err ?: len;
5236}
5237
5238static struct md_sysfs_entry md_reshape_direction =
5239__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5240 reshape_direction_store);
5241
5242static ssize_t
5243array_size_show(struct mddev *mddev, char *page)
5244{
5245 if (mddev->external_size)
5246 return sprintf(page, "%llu\n",
5247 (unsigned long long)mddev->array_sectors/2);
5248 else
5249 return sprintf(page, "default\n");
5250}
5251
5252static ssize_t
5253array_size_store(struct mddev *mddev, const char *buf, size_t len)
5254{
5255 sector_t sectors;
5256 int err;
5257
5258 err = mddev_lock(mddev);
5259 if (err)
5260 return err;
5261
5262
5263 if (mddev_is_clustered(mddev)) {
5264 mddev_unlock(mddev);
5265 return -EINVAL;
5266 }
5267
5268 if (strncmp(buf, "default", 7) == 0) {
5269 if (mddev->pers)
5270 sectors = mddev->pers->size(mddev, 0, 0);
5271 else
5272 sectors = mddev->array_sectors;
5273
5274 mddev->external_size = 0;
5275 } else {
5276 if (strict_blocks_to_sectors(buf, §ors) < 0)
5277 err = -EINVAL;
5278 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5279 err = -E2BIG;
5280 else
5281 mddev->external_size = 1;
5282 }
5283
5284 if (!err) {
5285 mddev->array_sectors = sectors;
5286 if (mddev->pers) {
5287 set_capacity(mddev->gendisk, mddev->array_sectors);
5288 revalidate_disk(mddev->gendisk);
5289 }
5290 }
5291 mddev_unlock(mddev);
5292 return err ?: len;
5293}
5294
5295static struct md_sysfs_entry md_array_size =
5296__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5297 array_size_store);
5298
5299static ssize_t
5300consistency_policy_show(struct mddev *mddev, char *page)
5301{
5302 int ret;
5303
5304 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5305 ret = sprintf(page, "journal\n");
5306 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5307 ret = sprintf(page, "ppl\n");
5308 } else if (mddev->bitmap) {
5309 ret = sprintf(page, "bitmap\n");
5310 } else if (mddev->pers) {
5311 if (mddev->pers->sync_request)
5312 ret = sprintf(page, "resync\n");
5313 else
5314 ret = sprintf(page, "none\n");
5315 } else {
5316 ret = sprintf(page, "unknown\n");
5317 }
5318
5319 return ret;
5320}
5321
5322static ssize_t
5323consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5324{
5325 int err = 0;
5326
5327 if (mddev->pers) {
5328 if (mddev->pers->change_consistency_policy)
5329 err = mddev->pers->change_consistency_policy(mddev, buf);
5330 else
5331 err = -EBUSY;
5332 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5333 set_bit(MD_HAS_PPL, &mddev->flags);
5334 } else {
5335 err = -EINVAL;
5336 }
5337
5338 return err ? err : len;
5339}
5340
5341static struct md_sysfs_entry md_consistency_policy =
5342__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5343 consistency_policy_store);
5344
5345static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5346{
5347 return sprintf(page, "%d\n", mddev->fail_last_dev);
5348}
5349
5350
5351
5352
5353
5354static ssize_t
5355fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5356{
5357 int ret;
5358 bool value;
5359
5360 ret = kstrtobool(buf, &value);
5361 if (ret)
5362 return ret;
5363
5364 if (value != mddev->fail_last_dev)
5365 mddev->fail_last_dev = value;
5366
5367 return len;
5368}
5369static struct md_sysfs_entry md_fail_last_dev =
5370__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5371 fail_last_dev_store);
5372
5373static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5374{
5375 if (mddev->pers == NULL || (mddev->pers->level != 1))
5376 return sprintf(page, "n/a\n");
5377 else
5378 return sprintf(page, "%d\n", mddev->serialize_policy);
5379}
5380
5381
5382
5383
5384
5385static ssize_t
5386serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5387{
5388 int err;
5389 bool value;
5390
5391 err = kstrtobool(buf, &value);
5392 if (err)
5393 return err;
5394
5395 if (value == mddev->serialize_policy)
5396 return len;
5397
5398 err = mddev_lock(mddev);
5399 if (err)
5400 return err;
5401 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5402 pr_err("md: serialize_policy is only effective for raid1\n");
5403 err = -EINVAL;
5404 goto unlock;
5405 }
5406
5407 mddev_suspend(mddev);
5408 if (value)
5409 mddev_create_serial_pool(mddev, NULL, true);
5410 else
5411 mddev_destroy_serial_pool(mddev, NULL, true);
5412 mddev->serialize_policy = value;
5413 mddev_resume(mddev);
5414unlock:
5415 mddev_unlock(mddev);
5416 return err ?: len;
5417}
5418
5419static struct md_sysfs_entry md_serialize_policy =
5420__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5421 serialize_policy_store);
5422
5423
5424static struct attribute *md_default_attrs[] = {
5425 &md_level.attr,
5426 &md_layout.attr,
5427 &md_raid_disks.attr,
5428 &md_chunk_size.attr,
5429 &md_size.attr,
5430 &md_resync_start.attr,
5431 &md_metadata.attr,
5432 &md_new_device.attr,
5433 &md_safe_delay.attr,
5434 &md_array_state.attr,
5435 &md_reshape_position.attr,
5436 &md_reshape_direction.attr,
5437 &md_array_size.attr,
5438 &max_corr_read_errors.attr,
5439 &md_consistency_policy.attr,
5440 &md_fail_last_dev.attr,
5441 &md_serialize_policy.attr,
5442 NULL,
5443};
5444
5445static struct attribute *md_redundancy_attrs[] = {
5446 &md_scan_mode.attr,
5447 &md_last_scan_mode.attr,
5448 &md_mismatches.attr,
5449 &md_sync_min.attr,
5450 &md_sync_max.attr,
5451 &md_sync_speed.attr,
5452 &md_sync_force_parallel.attr,
5453 &md_sync_completed.attr,
5454 &md_min_sync.attr,
5455 &md_max_sync.attr,
5456 &md_suspend_lo.attr,
5457 &md_suspend_hi.attr,
5458 &md_bitmap.attr,
5459 &md_degraded.attr,
5460 NULL,
5461};
5462static struct attribute_group md_redundancy_group = {
5463 .name = NULL,
5464 .attrs = md_redundancy_attrs,
5465};
5466
5467static ssize_t
5468md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5469{
5470 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5471 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5472 ssize_t rv;
5473
5474 if (!entry->show)
5475 return -EIO;
5476 spin_lock(&all_mddevs_lock);
5477 if (list_empty(&mddev->all_mddevs)) {
5478 spin_unlock(&all_mddevs_lock);
5479 return -EBUSY;
5480 }
5481 mddev_get(mddev);
5482 spin_unlock(&all_mddevs_lock);
5483
5484 rv = entry->show(mddev, page);
5485 mddev_put(mddev);
5486 return rv;
5487}
5488
5489static ssize_t
5490md_attr_store(struct kobject *kobj, struct attribute *attr,
5491 const char *page, size_t length)
5492{
5493 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5494 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5495 ssize_t rv;
5496
5497 if (!entry->store)
5498 return -EIO;
5499 if (!capable(CAP_SYS_ADMIN))
5500 return -EACCES;
5501 spin_lock(&all_mddevs_lock);
5502 if (list_empty(&mddev->all_mddevs)) {
5503 spin_unlock(&all_mddevs_lock);
5504 return -EBUSY;
5505 }
5506 mddev_get(mddev);
5507 spin_unlock(&all_mddevs_lock);
5508 rv = entry->store(mddev, page, length);
5509 mddev_put(mddev);
5510 return rv;
5511}
5512
5513static void md_free(struct kobject *ko)
5514{
5515 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5516
5517 if (mddev->sysfs_state)
5518 sysfs_put(mddev->sysfs_state);
5519
5520 if (mddev->gendisk)
5521 del_gendisk(mddev->gendisk);
5522 if (mddev->queue)
5523 blk_cleanup_queue(mddev->queue);
5524 if (mddev->gendisk)
5525 put_disk(mddev->gendisk);
5526 percpu_ref_exit(&mddev->writes_pending);
5527
5528 bioset_exit(&mddev->bio_set);
5529 bioset_exit(&mddev->sync_set);
5530 kfree(mddev);
5531}
5532
5533static const struct sysfs_ops md_sysfs_ops = {
5534 .show = md_attr_show,
5535 .store = md_attr_store,
5536};
5537static struct kobj_type md_ktype = {
5538 .release = md_free,
5539 .sysfs_ops = &md_sysfs_ops,
5540 .default_attrs = md_default_attrs,
5541};
5542
5543int mdp_major = 0;
5544
5545static void mddev_delayed_delete(struct work_struct *ws)
5546{
5547 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5548
5549 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5550 kobject_del(&mddev->kobj);
5551 kobject_put(&mddev->kobj);
5552}
5553
5554static void no_op(struct percpu_ref *r) {}
5555
5556int mddev_init_writes_pending(struct mddev *mddev)
5557{
5558 if (mddev->writes_pending.percpu_count_ptr)
5559 return 0;
5560 if (percpu_ref_init(&mddev->writes_pending, no_op,
5561 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5562 return -ENOMEM;
5563
5564 percpu_ref_put(&mddev->writes_pending);
5565 return 0;
5566}
5567EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5568
5569static int md_alloc(dev_t dev, char *name)
5570{
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580 static DEFINE_MUTEX(disks_mutex);
5581 struct mddev *mddev = mddev_find(dev);
5582 struct gendisk *disk;
5583 int partitioned;
5584 int shift;
5585 int unit;
5586 int error;
5587
5588 if (!mddev)
5589 return -ENODEV;
5590
5591 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5592 shift = partitioned ? MdpMinorShift : 0;
5593 unit = MINOR(mddev->unit) >> shift;
5594
5595
5596
5597
5598 flush_workqueue(md_misc_wq);
5599
5600 mutex_lock(&disks_mutex);
5601 error = -EEXIST;
5602 if (mddev->gendisk)
5603 goto abort;
5604
5605 if (name && !dev) {
5606
5607
5608 struct mddev *mddev2;
5609 spin_lock(&all_mddevs_lock);
5610
5611 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5612 if (mddev2->gendisk &&
5613 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5614 spin_unlock(&all_mddevs_lock);
5615 goto abort;
5616 }
5617 spin_unlock(&all_mddevs_lock);
5618 }
5619 if (name && dev)
5620
5621
5622
5623 mddev->hold_active = UNTIL_STOP;
5624
5625 error = -ENOMEM;
5626 mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
5627 if (!mddev->queue)
5628 goto abort;
5629 mddev->queue->queuedata = mddev;
5630
5631 blk_set_stacking_limits(&mddev->queue->limits);
5632
5633 disk = alloc_disk(1 << shift);
5634 if (!disk) {
5635 blk_cleanup_queue(mddev->queue);
5636 mddev->queue = NULL;
5637 goto abort;
5638 }
5639 disk->major = MAJOR(mddev->unit);
5640 disk->first_minor = unit << shift;
5641 if (name)
5642 strcpy(disk->disk_name, name);
5643 else if (partitioned)
5644 sprintf(disk->disk_name, "md_d%d", unit);
5645 else
5646 sprintf(disk->disk_name, "md%d", unit);
5647 disk->fops = &md_fops;
5648 disk->private_data = mddev;
5649 disk->queue = mddev->queue;
5650 blk_queue_write_cache(mddev->queue, true, true);
5651
5652
5653
5654
5655 disk->flags |= GENHD_FL_EXT_DEVT;
5656 mddev->gendisk = disk;
5657
5658
5659
5660 mutex_lock(&mddev->open_mutex);
5661 add_disk(disk);
5662
5663 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5664 if (error) {
5665
5666
5667
5668 pr_debug("md: cannot register %s/md - name in use\n",
5669 disk->disk_name);
5670 error = 0;
5671 }
5672 if (mddev->kobj.sd &&
5673 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5674 pr_debug("pointless warning\n");
5675 mutex_unlock(&mddev->open_mutex);
5676 abort:
5677 mutex_unlock(&disks_mutex);
5678 if (!error && mddev->kobj.sd) {
5679 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5680 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5681 }
5682 mddev_put(mddev);
5683 return error;
5684}
5685
5686static struct kobject *md_probe(dev_t dev, int *part, void *data)
5687{
5688 if (create_on_open)
5689 md_alloc(dev, NULL);
5690 return NULL;
5691}
5692
5693static int add_named_array(const char *val, const struct kernel_param *kp)
5694{
5695
5696
5697
5698
5699
5700
5701
5702 int len = strlen(val);
5703 char buf[DISK_NAME_LEN];
5704 unsigned long devnum;
5705
5706 while (len && val[len-1] == '\n')
5707 len--;
5708 if (len >= DISK_NAME_LEN)
5709 return -E2BIG;
5710 strlcpy(buf, val, len+1);
5711 if (strncmp(buf, "md_", 3) == 0)
5712 return md_alloc(0, buf);
5713 if (strncmp(buf, "md", 2) == 0 &&
5714 isdigit(buf[2]) &&
5715 kstrtoul(buf+2, 10, &devnum) == 0 &&
5716 devnum <= MINORMASK)
5717 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5718
5719 return -EINVAL;
5720}
5721
5722static void md_safemode_timeout(struct timer_list *t)
5723{
5724 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5725
5726 mddev->safemode = 1;
5727 if (mddev->external)
5728 sysfs_notify_dirent_safe(mddev->sysfs_state);
5729
5730 md_wakeup_thread(mddev->thread);
5731}
5732
5733static int start_dirty_degraded;
5734
5735int md_run(struct mddev *mddev)
5736{
5737 int err;
5738 struct md_rdev *rdev;
5739 struct md_personality *pers;
5740
5741 if (list_empty(&mddev->disks))
5742
5743 return -EINVAL;
5744
5745 if (mddev->pers)
5746 return -EBUSY;
5747
5748 if (mddev->sysfs_active)
5749 return -EBUSY;
5750
5751
5752
5753
5754 if (!mddev->raid_disks) {
5755 if (!mddev->persistent)
5756 return -EINVAL;
5757 err = analyze_sbs(mddev);
5758 if (err)
5759 return -EINVAL;
5760 }
5761
5762 if (mddev->level != LEVEL_NONE)
5763 request_module("md-level-%d", mddev->level);
5764 else if (mddev->clevel[0])
5765 request_module("md-%s", mddev->clevel);
5766
5767
5768
5769
5770
5771
5772 mddev->has_superblocks = false;
5773 rdev_for_each(rdev, mddev) {
5774 if (test_bit(Faulty, &rdev->flags))
5775 continue;
5776 sync_blockdev(rdev->bdev);
5777 invalidate_bdev(rdev->bdev);
5778 if (mddev->ro != 1 &&
5779 (bdev_read_only(rdev->bdev) ||
5780 bdev_read_only(rdev->meta_bdev))) {
5781 mddev->ro = 1;
5782 if (mddev->gendisk)
5783 set_disk_ro(mddev->gendisk, 1);
5784 }
5785
5786 if (rdev->sb_page)
5787 mddev->has_superblocks = true;
5788
5789
5790
5791
5792
5793 if (rdev->meta_bdev) {
5794 ;
5795 } else if (rdev->data_offset < rdev->sb_start) {
5796 if (mddev->dev_sectors &&
5797 rdev->data_offset + mddev->dev_sectors
5798 > rdev->sb_start) {
5799 pr_warn("md: %s: data overlaps metadata\n",
5800 mdname(mddev));
5801 return -EINVAL;
5802 }
5803 } else {
5804 if (rdev->sb_start + rdev->sb_size/512
5805 > rdev->data_offset) {
5806 pr_warn("md: %s: metadata overlaps data\n",
5807 mdname(mddev));
5808 return -EINVAL;
5809 }
5810 }
5811 sysfs_notify_dirent_safe(rdev->sysfs_state);
5812 }
5813
5814 if (!bioset_initialized(&mddev->bio_set)) {
5815 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5816 if (err)
5817 return err;
5818 }
5819 if (!bioset_initialized(&mddev->sync_set)) {
5820 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5821 if (err)
5822 return err;
5823 }
5824
5825 spin_lock(&pers_lock);
5826 pers = find_pers(mddev->level, mddev->clevel);
5827 if (!pers || !try_module_get(pers->owner)) {
5828 spin_unlock(&pers_lock);
5829 if (mddev->level != LEVEL_NONE)
5830 pr_warn("md: personality for level %d is not loaded!\n",
5831 mddev->level);
5832 else
5833 pr_warn("md: personality for level %s is not loaded!\n",
5834 mddev->clevel);
5835 err = -EINVAL;
5836 goto abort;
5837 }
5838 spin_unlock(&pers_lock);
5839 if (mddev->level != pers->level) {
5840 mddev->level = pers->level;
5841 mddev->new_level = pers->level;
5842 }
5843 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5844
5845 if (mddev->reshape_position != MaxSector &&
5846 pers->start_reshape == NULL) {
5847
5848 module_put(pers->owner);
5849 err = -EINVAL;
5850 goto abort;
5851 }
5852
5853 if (pers->sync_request) {
5854
5855
5856
5857 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5858 struct md_rdev *rdev2;
5859 int warned = 0;
5860
5861 rdev_for_each(rdev, mddev)
5862 rdev_for_each(rdev2, mddev) {
5863 if (rdev < rdev2 &&
5864 rdev->bdev->bd_contains ==
5865 rdev2->bdev->bd_contains) {
5866 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5867 mdname(mddev),
5868 bdevname(rdev->bdev,b),
5869 bdevname(rdev2->bdev,b2));
5870 warned = 1;
5871 }
5872 }
5873
5874 if (warned)
5875 pr_warn("True protection against single-disk failure might be compromised.\n");
5876 }
5877
5878 mddev->recovery = 0;
5879
5880 mddev->resync_max_sectors = mddev->dev_sectors;
5881
5882 mddev->ok_start_degraded = start_dirty_degraded;
5883
5884 if (start_readonly && mddev->ro == 0)
5885 mddev->ro = 2;
5886
5887 err = pers->run(mddev);
5888 if (err)
5889 pr_warn("md: pers->run() failed ...\n");
5890 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5891 WARN_ONCE(!mddev->external_size,
5892 "%s: default size too small, but 'external_size' not in effect?\n",
5893 __func__);
5894 pr_warn("md: invalid array_size %llu > default size %llu\n",
5895 (unsigned long long)mddev->array_sectors / 2,
5896 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5897 err = -EINVAL;
5898 }
5899 if (err == 0 && pers->sync_request &&
5900 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5901 struct bitmap *bitmap;
5902
5903 bitmap = md_bitmap_create(mddev, -1);
5904 if (IS_ERR(bitmap)) {
5905 err = PTR_ERR(bitmap);
5906 pr_warn("%s: failed to create bitmap (%d)\n",
5907 mdname(mddev), err);
5908 } else
5909 mddev->bitmap = bitmap;
5910
5911 }
5912 if (err)
5913 goto bitmap_abort;
5914
5915 if (mddev->bitmap_info.max_write_behind > 0) {
5916 bool create_pool = false;
5917
5918 rdev_for_each(rdev, mddev) {
5919 if (test_bit(WriteMostly, &rdev->flags) &&
5920 rdev_init_serial(rdev))
5921 create_pool = true;
5922 }
5923 if (create_pool && mddev->serial_info_pool == NULL) {
5924 mddev->serial_info_pool =
5925 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
5926 sizeof(struct serial_info));
5927 if (!mddev->serial_info_pool) {
5928 err = -ENOMEM;
5929 goto bitmap_abort;
5930 }
5931 }
5932 }
5933
5934 if (mddev->queue) {
5935 bool nonrot = true;
5936
5937 rdev_for_each(rdev, mddev) {
5938 if (rdev->raid_disk >= 0 &&
5939 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5940 nonrot = false;
5941 break;
5942 }
5943 }
5944 if (mddev->degraded)
5945 nonrot = false;
5946 if (nonrot)
5947 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5948 else
5949 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5950 mddev->queue->backing_dev_info->congested_data = mddev;
5951 mddev->queue->backing_dev_info->congested_fn = md_congested;
5952 }
5953 if (pers->sync_request) {
5954 if (mddev->kobj.sd &&
5955 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5956 pr_warn("md: cannot register extra attributes for %s\n",
5957 mdname(mddev));
5958 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5959 } else if (mddev->ro == 2)
5960 mddev->ro = 0;
5961
5962 atomic_set(&mddev->max_corr_read_errors,
5963 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5964 mddev->safemode = 0;
5965 if (mddev_is_clustered(mddev))
5966 mddev->safemode_delay = 0;
5967 else
5968 mddev->safemode_delay = (200 * HZ)/1000 +1;
5969 mddev->in_sync = 1;
5970 smp_wmb();
5971 spin_lock(&mddev->lock);
5972 mddev->pers = pers;
5973 spin_unlock(&mddev->lock);
5974 rdev_for_each(rdev, mddev)
5975 if (rdev->raid_disk >= 0)
5976 sysfs_link_rdev(mddev, rdev);
5977
5978 if (mddev->degraded && !mddev->ro)
5979
5980
5981
5982 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5983 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5984
5985 if (mddev->sb_flags)
5986 md_update_sb(mddev, 0);
5987
5988 md_new_event(mddev);
5989 return 0;
5990
5991bitmap_abort:
5992 mddev_detach(mddev);
5993 if (mddev->private)
5994 pers->free(mddev, mddev->private);
5995 mddev->private = NULL;
5996 module_put(pers->owner);
5997 md_bitmap_destroy(mddev);
5998abort:
5999 bioset_exit(&mddev->bio_set);
6000 bioset_exit(&mddev->sync_set);
6001 return err;
6002}
6003EXPORT_SYMBOL_GPL(md_run);
6004
6005static int do_md_run(struct mddev *mddev)
6006{
6007 int err;
6008
6009 set_bit(MD_NOT_READY, &mddev->flags);
6010 err = md_run(mddev);
6011 if (err)
6012 goto out;
6013 err = md_bitmap_load(mddev);
6014 if (err) {
6015 md_bitmap_destroy(mddev);
6016 goto out;
6017 }
6018
6019 if (mddev_is_clustered(mddev))
6020 md_allow_write(mddev);
6021
6022
6023 md_start(mddev);
6024
6025 md_wakeup_thread(mddev->thread);
6026 md_wakeup_thread(mddev->sync_thread);
6027
6028 set_capacity(mddev->gendisk, mddev->array_sectors);
6029 revalidate_disk(mddev->gendisk);
6030 clear_bit(MD_NOT_READY, &mddev->flags);
6031 mddev->changed = 1;
6032 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6033 sysfs_notify_dirent_safe(mddev->sysfs_state);
6034 sysfs_notify_dirent_safe(mddev->sysfs_action);
6035 sysfs_notify(&mddev->kobj, NULL, "degraded");
6036out:
6037 clear_bit(MD_NOT_READY, &mddev->flags);
6038 return err;
6039}
6040
6041int md_start(struct mddev *mddev)
6042{
6043 int ret = 0;
6044
6045 if (mddev->pers->start) {
6046 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6047 md_wakeup_thread(mddev->thread);
6048 ret = mddev->pers->start(mddev);
6049 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6050 md_wakeup_thread(mddev->sync_thread);
6051 }
6052 return ret;
6053}
6054EXPORT_SYMBOL_GPL(md_start);
6055
6056static int restart_array(struct mddev *mddev)
6057{
6058 struct gendisk *disk = mddev->gendisk;
6059 struct md_rdev *rdev;
6060 bool has_journal = false;
6061 bool has_readonly = false;
6062
6063
6064 if (list_empty(&mddev->disks))
6065 return -ENXIO;
6066 if (!mddev->pers)
6067 return -EINVAL;
6068 if (!mddev->ro)
6069 return -EBUSY;
6070
6071 rcu_read_lock();
6072 rdev_for_each_rcu(rdev, mddev) {
6073 if (test_bit(Journal, &rdev->flags) &&
6074 !test_bit(Faulty, &rdev->flags))
6075 has_journal = true;
6076 if (bdev_read_only(rdev->bdev))
6077 has_readonly = true;
6078 }
6079 rcu_read_unlock();
6080 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6081
6082 return -EINVAL;
6083 if (has_readonly)
6084 return -EROFS;
6085
6086 mddev->safemode = 0;
6087 mddev->ro = 0;
6088 set_disk_ro(disk, 0);
6089 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6090
6091 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6092 md_wakeup_thread(mddev->thread);
6093 md_wakeup_thread(mddev->sync_thread);
6094 sysfs_notify_dirent_safe(mddev->sysfs_state);
6095 return 0;
6096}
6097
6098static void md_clean(struct mddev *mddev)
6099{
6100 mddev->array_sectors = 0;
6101 mddev->external_size = 0;
6102 mddev->dev_sectors = 0;
6103 mddev->raid_disks = 0;
6104 mddev->recovery_cp = 0;
6105 mddev->resync_min = 0;
6106 mddev->resync_max = MaxSector;
6107 mddev->reshape_position = MaxSector;
6108 mddev->external = 0;
6109 mddev->persistent = 0;
6110 mddev->level = LEVEL_NONE;
6111 mddev->clevel[0] = 0;
6112 mddev->flags = 0;
6113 mddev->sb_flags = 0;
6114 mddev->ro = 0;
6115 mddev->metadata_type[0] = 0;
6116 mddev->chunk_sectors = 0;
6117 mddev->ctime = mddev->utime = 0;
6118 mddev->layout = 0;
6119 mddev->max_disks = 0;
6120 mddev->events = 0;
6121 mddev->can_decrease_events = 0;
6122 mddev->delta_disks = 0;
6123 mddev->reshape_backwards = 0;
6124 mddev->new_level = LEVEL_NONE;
6125 mddev->new_layout = 0;
6126 mddev->new_chunk_sectors = 0;
6127 mddev->curr_resync = 0;
6128 atomic64_set(&mddev->resync_mismatches, 0);
6129 mddev->suspend_lo = mddev->suspend_hi = 0;
6130 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6131 mddev->recovery = 0;
6132 mddev->in_sync = 0;
6133 mddev->changed = 0;
6134 mddev->degraded = 0;
6135 mddev->safemode = 0;
6136 mddev->private = NULL;
6137 mddev->cluster_info = NULL;
6138 mddev->bitmap_info.offset = 0;
6139 mddev->bitmap_info.default_offset = 0;
6140 mddev->bitmap_info.default_space = 0;
6141 mddev->bitmap_info.chunksize = 0;
6142 mddev->bitmap_info.daemon_sleep = 0;
6143 mddev->bitmap_info.max_write_behind = 0;
6144 mddev->bitmap_info.nodes = 0;
6145}
6146
6147static void __md_stop_writes(struct mddev *mddev)
6148{
6149 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6150 flush_workqueue(md_misc_wq);
6151 if (mddev->sync_thread) {
6152 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6153 md_reap_sync_thread(mddev);
6154 }
6155
6156 del_timer_sync(&mddev->safemode_timer);
6157
6158 if (mddev->pers && mddev->pers->quiesce) {
6159 mddev->pers->quiesce(mddev, 1);
6160 mddev->pers->quiesce(mddev, 0);
6161 }
6162 md_bitmap_flush(mddev);
6163
6164 if (mddev->ro == 0 &&
6165 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6166 mddev->sb_flags)) {
6167
6168 if (!mddev_is_clustered(mddev))
6169 mddev->in_sync = 1;
6170 md_update_sb(mddev, 1);
6171 }
6172
6173 mddev->serialize_policy = 0;
6174 mddev_destroy_serial_pool(mddev, NULL, true);
6175}
6176
6177void md_stop_writes(struct mddev *mddev)
6178{
6179 mddev_lock_nointr(mddev);
6180 __md_stop_writes(mddev);
6181 mddev_unlock(mddev);
6182}
6183EXPORT_SYMBOL_GPL(md_stop_writes);
6184
6185static void mddev_detach(struct mddev *mddev)
6186{
6187 md_bitmap_wait_behind_writes(mddev);
6188 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6189 mddev->pers->quiesce(mddev, 1);
6190 mddev->pers->quiesce(mddev, 0);
6191 }
6192 md_unregister_thread(&mddev->thread);
6193 if (mddev->queue)
6194 blk_sync_queue(mddev->queue);
6195}
6196
6197static void __md_stop(struct mddev *mddev)
6198{
6199 struct md_personality *pers = mddev->pers;
6200 md_bitmap_destroy(mddev);
6201 mddev_detach(mddev);
6202
6203 flush_workqueue(md_misc_wq);
6204 spin_lock(&mddev->lock);
6205 mddev->pers = NULL;
6206 spin_unlock(&mddev->lock);
6207 pers->free(mddev, mddev->private);
6208 mddev->private = NULL;
6209 if (pers->sync_request && mddev->to_remove == NULL)
6210 mddev->to_remove = &md_redundancy_group;
6211 module_put(pers->owner);
6212 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6213}
6214
6215void md_stop(struct mddev *mddev)
6216{
6217
6218
6219
6220 __md_stop(mddev);
6221 bioset_exit(&mddev->bio_set);
6222 bioset_exit(&mddev->sync_set);
6223}
6224
6225EXPORT_SYMBOL_GPL(md_stop);
6226
6227static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6228{
6229 int err = 0;
6230 int did_freeze = 0;
6231
6232 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6233 did_freeze = 1;
6234 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6235 md_wakeup_thread(mddev->thread);
6236 }
6237 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6238 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6239 if (mddev->sync_thread)
6240
6241
6242 wake_up_process(mddev->sync_thread->tsk);
6243
6244 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6245 return -EBUSY;
6246 mddev_unlock(mddev);
6247 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6248 &mddev->recovery));
6249 wait_event(mddev->sb_wait,
6250 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6251 mddev_lock_nointr(mddev);
6252
6253 mutex_lock(&mddev->open_mutex);
6254 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6255 mddev->sync_thread ||
6256 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6257 pr_warn("md: %s still in use.\n",mdname(mddev));
6258 if (did_freeze) {
6259 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6260 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6261 md_wakeup_thread(mddev->thread);
6262 }
6263 err = -EBUSY;
6264 goto out;
6265 }
6266 if (mddev->pers) {
6267 __md_stop_writes(mddev);
6268
6269 err = -ENXIO;
6270 if (mddev->ro==1)
6271 goto out;
6272 mddev->ro = 1;
6273 set_disk_ro(mddev->gendisk, 1);
6274 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6275 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6276 md_wakeup_thread(mddev->thread);
6277 sysfs_notify_dirent_safe(mddev->sysfs_state);
6278 err = 0;
6279 }
6280out:
6281 mutex_unlock(&mddev->open_mutex);
6282 return err;
6283}
6284
6285
6286
6287
6288
6289static int do_md_stop(struct mddev *mddev, int mode,
6290 struct block_device *bdev)
6291{
6292 struct gendisk *disk = mddev->gendisk;
6293 struct md_rdev *rdev;
6294 int did_freeze = 0;
6295
6296 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6297 did_freeze = 1;
6298 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6299 md_wakeup_thread(mddev->thread);
6300 }
6301 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6302 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6303 if (mddev->sync_thread)
6304
6305
6306 wake_up_process(mddev->sync_thread->tsk);
6307
6308 mddev_unlock(mddev);
6309 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6310 !test_bit(MD_RECOVERY_RUNNING,
6311 &mddev->recovery)));
6312 mddev_lock_nointr(mddev);
6313
6314 mutex_lock(&mddev->open_mutex);
6315 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6316 mddev->sysfs_active ||
6317 mddev->sync_thread ||
6318 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6319 pr_warn("md: %s still in use.\n",mdname(mddev));
6320 mutex_unlock(&mddev->open_mutex);
6321 if (did_freeze) {
6322 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6323 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6324 md_wakeup_thread(mddev->thread);
6325 }
6326 return -EBUSY;
6327 }
6328 if (mddev->pers) {
6329 if (mddev->ro)
6330 set_disk_ro(disk, 0);
6331
6332 __md_stop_writes(mddev);
6333 __md_stop(mddev);
6334 mddev->queue->backing_dev_info->congested_fn = NULL;
6335
6336
6337 sysfs_notify_dirent_safe(mddev->sysfs_state);
6338
6339 rdev_for_each(rdev, mddev)
6340 if (rdev->raid_disk >= 0)
6341 sysfs_unlink_rdev(mddev, rdev);
6342
6343 set_capacity(disk, 0);
6344 mutex_unlock(&mddev->open_mutex);
6345 mddev->changed = 1;
6346 revalidate_disk(disk);
6347
6348 if (mddev->ro)
6349 mddev->ro = 0;
6350 } else
6351 mutex_unlock(&mddev->open_mutex);
6352
6353
6354
6355 if (mode == 0) {
6356 pr_info("md: %s stopped.\n", mdname(mddev));
6357
6358 if (mddev->bitmap_info.file) {
6359 struct file *f = mddev->bitmap_info.file;
6360 spin_lock(&mddev->lock);
6361 mddev->bitmap_info.file = NULL;
6362 spin_unlock(&mddev->lock);
6363 fput(f);
6364 }
6365 mddev->bitmap_info.offset = 0;
6366
6367 export_array(mddev);
6368
6369 md_clean(mddev);
6370 if (mddev->hold_active == UNTIL_STOP)
6371 mddev->hold_active = 0;
6372 }
6373 md_new_event(mddev);
6374 sysfs_notify_dirent_safe(mddev->sysfs_state);
6375 return 0;
6376}
6377
6378#ifndef MODULE
6379static void autorun_array(struct mddev *mddev)
6380{
6381 struct md_rdev *rdev;
6382 int err;
6383
6384 if (list_empty(&mddev->disks))
6385 return;
6386
6387 pr_info("md: running: ");
6388
6389 rdev_for_each(rdev, mddev) {
6390 char b[BDEVNAME_SIZE];
6391 pr_cont("<%s>", bdevname(rdev->bdev,b));
6392 }
6393 pr_cont("\n");
6394
6395 err = do_md_run(mddev);
6396 if (err) {
6397 pr_warn("md: do_md_run() returned %d\n", err);
6398 do_md_stop(mddev, 0, NULL);
6399 }
6400}
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414static void autorun_devices(int part)
6415{
6416 struct md_rdev *rdev0, *rdev, *tmp;
6417 struct mddev *mddev;
6418 char b[BDEVNAME_SIZE];
6419
6420 pr_info("md: autorun ...\n");
6421 while (!list_empty(&pending_raid_disks)) {
6422 int unit;
6423 dev_t dev;
6424 LIST_HEAD(candidates);
6425 rdev0 = list_entry(pending_raid_disks.next,
6426 struct md_rdev, same_set);
6427
6428 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6429 INIT_LIST_HEAD(&candidates);
6430 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6431 if (super_90_load(rdev, rdev0, 0) >= 0) {
6432 pr_debug("md: adding %s ...\n",
6433 bdevname(rdev->bdev,b));
6434 list_move(&rdev->same_set, &candidates);
6435 }
6436
6437
6438
6439
6440
6441 if (part) {
6442 dev = MKDEV(mdp_major,
6443 rdev0->preferred_minor << MdpMinorShift);
6444 unit = MINOR(dev) >> MdpMinorShift;
6445 } else {
6446 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6447 unit = MINOR(dev);
6448 }
6449 if (rdev0->preferred_minor != unit) {
6450 pr_warn("md: unit number in %s is bad: %d\n",
6451 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6452 break;
6453 }
6454
6455 md_probe(dev, NULL, NULL);
6456 mddev = mddev_find(dev);
6457 if (!mddev || !mddev->gendisk) {
6458 if (mddev)
6459 mddev_put(mddev);
6460 break;
6461 }
6462 if (mddev_lock(mddev))
6463 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6464 else if (mddev->raid_disks || mddev->major_version
6465 || !list_empty(&mddev->disks)) {
6466 pr_warn("md: %s already running, cannot run %s\n",
6467 mdname(mddev), bdevname(rdev0->bdev,b));
6468 mddev_unlock(mddev);
6469 } else {
6470 pr_debug("md: created %s\n", mdname(mddev));
6471 mddev->persistent = 1;
6472 rdev_for_each_list(rdev, tmp, &candidates) {
6473 list_del_init(&rdev->same_set);
6474 if (bind_rdev_to_array(rdev, mddev))
6475 export_rdev(rdev);
6476 }
6477 autorun_array(mddev);
6478 mddev_unlock(mddev);
6479 }
6480
6481
6482
6483 rdev_for_each_list(rdev, tmp, &candidates) {
6484 list_del_init(&rdev->same_set);
6485 export_rdev(rdev);
6486 }
6487 mddev_put(mddev);
6488 }
6489 pr_info("md: ... autorun DONE.\n");
6490}
6491#endif
6492
6493static int get_version(void __user *arg)
6494{
6495 mdu_version_t ver;
6496
6497 ver.major = MD_MAJOR_VERSION;
6498 ver.minor = MD_MINOR_VERSION;
6499 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6500
6501 if (copy_to_user(arg, &ver, sizeof(ver)))
6502 return -EFAULT;
6503
6504 return 0;
6505}
6506
6507static int get_array_info(struct mddev *mddev, void __user *arg)
6508{
6509 mdu_array_info_t info;
6510 int nr,working,insync,failed,spare;
6511 struct md_rdev *rdev;
6512
6513 nr = working = insync = failed = spare = 0;
6514 rcu_read_lock();
6515 rdev_for_each_rcu(rdev, mddev) {
6516 nr++;
6517 if (test_bit(Faulty, &rdev->flags))
6518 failed++;
6519 else {
6520 working++;
6521 if (test_bit(In_sync, &rdev->flags))
6522 insync++;
6523 else if (test_bit(Journal, &rdev->flags))
6524
6525 ;
6526 else
6527 spare++;
6528 }
6529 }
6530 rcu_read_unlock();
6531
6532 info.major_version = mddev->major_version;
6533 info.minor_version = mddev->minor_version;
6534 info.patch_version = MD_PATCHLEVEL_VERSION;
6535 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6536 info.level = mddev->level;
6537 info.size = mddev->dev_sectors / 2;
6538 if (info.size != mddev->dev_sectors / 2)
6539 info.size = -1;
6540 info.nr_disks = nr;
6541 info.raid_disks = mddev->raid_disks;
6542 info.md_minor = mddev->md_minor;
6543 info.not_persistent= !mddev->persistent;
6544
6545 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6546 info.state = 0;
6547 if (mddev->in_sync)
6548 info.state = (1<<MD_SB_CLEAN);
6549 if (mddev->bitmap && mddev->bitmap_info.offset)
6550 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6551 if (mddev_is_clustered(mddev))
6552 info.state |= (1<<MD_SB_CLUSTERED);
6553 info.active_disks = insync;
6554 info.working_disks = working;
6555 info.failed_disks = failed;
6556 info.spare_disks = spare;
6557
6558 info.layout = mddev->layout;
6559 info.chunk_size = mddev->chunk_sectors << 9;
6560
6561 if (copy_to_user(arg, &info, sizeof(info)))
6562 return -EFAULT;
6563
6564 return 0;
6565}
6566
6567static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6568{
6569 mdu_bitmap_file_t *file = NULL;
6570 char *ptr;
6571 int err;
6572
6573 file = kzalloc(sizeof(*file), GFP_NOIO);
6574 if (!file)
6575 return -ENOMEM;
6576
6577 err = 0;
6578 spin_lock(&mddev->lock);
6579
6580 if (mddev->bitmap_info.file) {
6581 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6582 sizeof(file->pathname));
6583 if (IS_ERR(ptr))
6584 err = PTR_ERR(ptr);
6585 else
6586 memmove(file->pathname, ptr,
6587 sizeof(file->pathname)-(ptr-file->pathname));
6588 }
6589 spin_unlock(&mddev->lock);
6590
6591 if (err == 0 &&
6592 copy_to_user(arg, file, sizeof(*file)))
6593 err = -EFAULT;
6594
6595 kfree(file);
6596 return err;
6597}
6598
6599static int get_disk_info(struct mddev *mddev, void __user * arg)
6600{
6601 mdu_disk_info_t info;
6602 struct md_rdev *rdev;
6603
6604 if (copy_from_user(&info, arg, sizeof(info)))
6605 return -EFAULT;
6606
6607 rcu_read_lock();
6608 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6609 if (rdev) {
6610 info.major = MAJOR(rdev->bdev->bd_dev);
6611 info.minor = MINOR(rdev->bdev->bd_dev);
6612 info.raid_disk = rdev->raid_disk;
6613 info.state = 0;
6614 if (test_bit(Faulty, &rdev->flags))
6615 info.state |= (1<<MD_DISK_FAULTY);
6616 else if (test_bit(In_sync, &rdev->flags)) {
6617 info.state |= (1<<MD_DISK_ACTIVE);
6618 info.state |= (1<<MD_DISK_SYNC);
6619 }
6620 if (test_bit(Journal, &rdev->flags))
6621 info.state |= (1<<MD_DISK_JOURNAL);
6622 if (test_bit(WriteMostly, &rdev->flags))
6623 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6624 if (test_bit(FailFast, &rdev->flags))
6625 info.state |= (1<<MD_DISK_FAILFAST);
6626 } else {
6627 info.major = info.minor = 0;
6628 info.raid_disk = -1;
6629 info.state = (1<<MD_DISK_REMOVED);
6630 }
6631 rcu_read_unlock();
6632
6633 if (copy_to_user(arg, &info, sizeof(info)))
6634 return -EFAULT;
6635
6636 return 0;
6637}
6638
6639static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6640{
6641 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6642 struct md_rdev *rdev;
6643 dev_t dev = MKDEV(info->major,info->minor);
6644
6645 if (mddev_is_clustered(mddev) &&
6646 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6647 pr_warn("%s: Cannot add to clustered mddev.\n",
6648 mdname(mddev));
6649 return -EINVAL;
6650 }
6651
6652 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6653 return -EOVERFLOW;
6654
6655 if (!mddev->raid_disks) {
6656 int err;
6657
6658 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6659 if (IS_ERR(rdev)) {
6660 pr_warn("md: md_import_device returned %ld\n",
6661 PTR_ERR(rdev));
6662 return PTR_ERR(rdev);
6663 }
6664 if (!list_empty(&mddev->disks)) {
6665 struct md_rdev *rdev0
6666 = list_entry(mddev->disks.next,
6667 struct md_rdev, same_set);
6668 err = super_types[mddev->major_version]
6669 .load_super(rdev, rdev0, mddev->minor_version);
6670 if (err < 0) {
6671 pr_warn("md: %s has different UUID to %s\n",
6672 bdevname(rdev->bdev,b),
6673 bdevname(rdev0->bdev,b2));
6674 export_rdev(rdev);
6675 return -EINVAL;
6676 }
6677 }
6678 err = bind_rdev_to_array(rdev, mddev);
6679 if (err)
6680 export_rdev(rdev);
6681 return err;
6682 }
6683
6684
6685
6686
6687
6688
6689 if (mddev->pers) {
6690 int err;
6691 if (!mddev->pers->hot_add_disk) {
6692 pr_warn("%s: personality does not support diskops!\n",
6693 mdname(mddev));
6694 return -EINVAL;
6695 }
6696 if (mddev->persistent)
6697 rdev = md_import_device(dev, mddev->major_version,
6698 mddev->minor_version);
6699 else
6700 rdev = md_import_device(dev, -1, -1);
6701 if (IS_ERR(rdev)) {
6702 pr_warn("md: md_import_device returned %ld\n",
6703 PTR_ERR(rdev));
6704 return PTR_ERR(rdev);
6705 }
6706
6707 if (!mddev->persistent) {
6708 if (info->state & (1<<MD_DISK_SYNC) &&
6709 info->raid_disk < mddev->raid_disks) {
6710 rdev->raid_disk = info->raid_disk;
6711 set_bit(In_sync, &rdev->flags);
6712 clear_bit(Bitmap_sync, &rdev->flags);
6713 } else
6714 rdev->raid_disk = -1;
6715 rdev->saved_raid_disk = rdev->raid_disk;
6716 } else
6717 super_types[mddev->major_version].
6718 validate_super(mddev, rdev);
6719 if ((info->state & (1<<MD_DISK_SYNC)) &&
6720 rdev->raid_disk != info->raid_disk) {
6721
6722
6723
6724 export_rdev(rdev);
6725 return -EINVAL;
6726 }
6727
6728 clear_bit(In_sync, &rdev->flags);
6729 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6730 set_bit(WriteMostly, &rdev->flags);
6731 else
6732 clear_bit(WriteMostly, &rdev->flags);
6733 if (info->state & (1<<MD_DISK_FAILFAST))
6734 set_bit(FailFast, &rdev->flags);
6735 else
6736 clear_bit(FailFast, &rdev->flags);
6737
6738 if (info->state & (1<<MD_DISK_JOURNAL)) {
6739 struct md_rdev *rdev2;
6740 bool has_journal = false;
6741
6742
6743 rdev_for_each(rdev2, mddev) {
6744 if (test_bit(Journal, &rdev2->flags)) {
6745 has_journal = true;
6746 break;
6747 }
6748 }
6749 if (has_journal || mddev->bitmap) {
6750 export_rdev(rdev);
6751 return -EBUSY;
6752 }
6753 set_bit(Journal, &rdev->flags);
6754 }
6755
6756
6757
6758 if (mddev_is_clustered(mddev)) {
6759 if (info->state & (1 << MD_DISK_CANDIDATE))
6760 set_bit(Candidate, &rdev->flags);
6761 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6762
6763 err = md_cluster_ops->add_new_disk(mddev, rdev);
6764 if (err) {
6765 export_rdev(rdev);
6766 return err;
6767 }
6768 }
6769 }
6770
6771 rdev->raid_disk = -1;
6772 err = bind_rdev_to_array(rdev, mddev);
6773
6774 if (err)
6775 export_rdev(rdev);
6776
6777 if (mddev_is_clustered(mddev)) {
6778 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6779 if (!err) {
6780 err = md_cluster_ops->new_disk_ack(mddev,
6781 err == 0);
6782 if (err)
6783 md_kick_rdev_from_array(rdev);
6784 }
6785 } else {
6786 if (err)
6787 md_cluster_ops->add_new_disk_cancel(mddev);
6788 else
6789 err = add_bound_rdev(rdev);
6790 }
6791
6792 } else if (!err)
6793 err = add_bound_rdev(rdev);
6794
6795 return err;
6796 }
6797
6798
6799
6800
6801 if (mddev->major_version != 0) {
6802 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6803 return -EINVAL;
6804 }
6805
6806 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6807 int err;
6808 rdev = md_import_device(dev, -1, 0);
6809 if (IS_ERR(rdev)) {
6810 pr_warn("md: error, md_import_device() returned %ld\n",
6811 PTR_ERR(rdev));
6812 return PTR_ERR(rdev);
6813 }
6814 rdev->desc_nr = info->number;
6815 if (info->raid_disk < mddev->raid_disks)
6816 rdev->raid_disk = info->raid_disk;
6817 else
6818 rdev->raid_disk = -1;
6819
6820 if (rdev->raid_disk < mddev->raid_disks)
6821 if (info->state & (1<<MD_DISK_SYNC))
6822 set_bit(In_sync, &rdev->flags);
6823
6824 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6825 set_bit(WriteMostly, &rdev->flags);
6826 if (info->state & (1<<MD_DISK_FAILFAST))
6827 set_bit(FailFast, &rdev->flags);
6828
6829 if (!mddev->persistent) {
6830 pr_debug("md: nonpersistent superblock ...\n");
6831 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6832 } else
6833 rdev->sb_start = calc_dev_sboffset(rdev);
6834 rdev->sectors = rdev->sb_start;
6835
6836 err = bind_rdev_to_array(rdev, mddev);
6837 if (err) {
6838 export_rdev(rdev);
6839 return err;
6840 }
6841 }
6842
6843 return 0;
6844}
6845
6846static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6847{
6848 char b[BDEVNAME_SIZE];
6849 struct md_rdev *rdev;
6850
6851 if (!mddev->pers)
6852 return -ENODEV;
6853
6854 rdev = find_rdev(mddev, dev);
6855 if (!rdev)
6856 return -ENXIO;
6857
6858 if (rdev->raid_disk < 0)
6859 goto kick_rdev;
6860
6861 clear_bit(Blocked, &rdev->flags);
6862 remove_and_add_spares(mddev, rdev);
6863
6864 if (rdev->raid_disk >= 0)
6865 goto busy;
6866
6867kick_rdev:
6868 if (mddev_is_clustered(mddev))
6869 md_cluster_ops->remove_disk(mddev, rdev);
6870
6871 md_kick_rdev_from_array(rdev);
6872 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6873 if (mddev->thread)
6874 md_wakeup_thread(mddev->thread);
6875 else
6876 md_update_sb(mddev, 1);
6877 md_new_event(mddev);
6878
6879 return 0;
6880busy:
6881 pr_debug("md: cannot remove active disk %s from %s ...\n",
6882 bdevname(rdev->bdev,b), mdname(mddev));
6883 return -EBUSY;
6884}
6885
6886static int hot_add_disk(struct mddev *mddev, dev_t dev)
6887{
6888 char b[BDEVNAME_SIZE];
6889 int err;
6890 struct md_rdev *rdev;
6891
6892 if (!mddev->pers)
6893 return -ENODEV;
6894
6895 if (mddev->major_version != 0) {
6896 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6897 mdname(mddev));
6898 return -EINVAL;
6899 }
6900 if (!mddev->pers->hot_add_disk) {
6901 pr_warn("%s: personality does not support diskops!\n",
6902 mdname(mddev));
6903 return -EINVAL;
6904 }
6905
6906 rdev = md_import_device(dev, -1, 0);
6907 if (IS_ERR(rdev)) {
6908 pr_warn("md: error, md_import_device() returned %ld\n",
6909 PTR_ERR(rdev));
6910 return -EINVAL;
6911 }
6912
6913 if (mddev->persistent)
6914 rdev->sb_start = calc_dev_sboffset(rdev);
6915 else
6916 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6917
6918 rdev->sectors = rdev->sb_start;
6919
6920 if (test_bit(Faulty, &rdev->flags)) {
6921 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6922 bdevname(rdev->bdev,b), mdname(mddev));
6923 err = -EINVAL;
6924 goto abort_export;
6925 }
6926
6927 clear_bit(In_sync, &rdev->flags);
6928 rdev->desc_nr = -1;
6929 rdev->saved_raid_disk = -1;
6930 err = bind_rdev_to_array(rdev, mddev);
6931 if (err)
6932 goto abort_export;
6933
6934
6935
6936
6937
6938
6939 rdev->raid_disk = -1;
6940
6941 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6942 if (!mddev->thread)
6943 md_update_sb(mddev, 1);
6944
6945
6946
6947
6948 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6949 md_wakeup_thread(mddev->thread);
6950 md_new_event(mddev);
6951 return 0;
6952
6953abort_export:
6954 export_rdev(rdev);
6955 return err;
6956}
6957
6958static int set_bitmap_file(struct mddev *mddev, int fd)
6959{
6960 int err = 0;
6961
6962 if (mddev->pers) {
6963 if (!mddev->pers->quiesce || !mddev->thread)
6964 return -EBUSY;
6965 if (mddev->recovery || mddev->sync_thread)
6966 return -EBUSY;
6967
6968 }
6969
6970 if (fd >= 0) {
6971 struct inode *inode;
6972 struct file *f;
6973
6974 if (mddev->bitmap || mddev->bitmap_info.file)
6975 return -EEXIST;
6976 f = fget(fd);
6977
6978 if (f == NULL) {
6979 pr_warn("%s: error: failed to get bitmap file\n",
6980 mdname(mddev));
6981 return -EBADF;
6982 }
6983
6984 inode = f->f_mapping->host;
6985 if (!S_ISREG(inode->i_mode)) {
6986 pr_warn("%s: error: bitmap file must be a regular file\n",
6987 mdname(mddev));
6988 err = -EBADF;
6989 } else if (!(f->f_mode & FMODE_WRITE)) {
6990 pr_warn("%s: error: bitmap file must open for write\n",
6991 mdname(mddev));
6992 err = -EBADF;
6993 } else if (atomic_read(&inode->i_writecount) != 1) {
6994 pr_warn("%s: error: bitmap file is already in use\n",
6995 mdname(mddev));
6996 err = -EBUSY;
6997 }
6998 if (err) {
6999 fput(f);
7000 return err;
7001 }
7002 mddev->bitmap_info.file = f;
7003 mddev->bitmap_info.offset = 0;
7004 } else if (mddev->bitmap == NULL)
7005 return -ENOENT;
7006 err = 0;
7007 if (mddev->pers) {
7008 if (fd >= 0) {
7009 struct bitmap *bitmap;
7010
7011 bitmap = md_bitmap_create(mddev, -1);
7012 mddev_suspend(mddev);
7013 if (!IS_ERR(bitmap)) {
7014 mddev->bitmap = bitmap;
7015 err = md_bitmap_load(mddev);
7016 } else
7017 err = PTR_ERR(bitmap);
7018 if (err) {
7019 md_bitmap_destroy(mddev);
7020 fd = -1;
7021 }
7022 mddev_resume(mddev);
7023 } else if (fd < 0) {
7024 mddev_suspend(mddev);
7025 md_bitmap_destroy(mddev);
7026 mddev_resume(mddev);
7027 }
7028 }
7029 if (fd < 0) {
7030 struct file *f = mddev->bitmap_info.file;
7031 if (f) {
7032 spin_lock(&mddev->lock);
7033 mddev->bitmap_info.file = NULL;
7034 spin_unlock(&mddev->lock);
7035 fput(f);
7036 }
7037 }
7038
7039 return err;
7040}
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
7056{
7057
7058 if (info->raid_disks == 0) {
7059
7060 if (info->major_version < 0 ||
7061 info->major_version >= ARRAY_SIZE(super_types) ||
7062 super_types[info->major_version].name == NULL) {
7063
7064 pr_warn("md: superblock version %d not known\n",
7065 info->major_version);
7066 return -EINVAL;
7067 }
7068 mddev->major_version = info->major_version;
7069 mddev->minor_version = info->minor_version;
7070 mddev->patch_version = info->patch_version;
7071 mddev->persistent = !info->not_persistent;
7072
7073
7074
7075 mddev->ctime = ktime_get_real_seconds();
7076 return 0;
7077 }
7078 mddev->major_version = MD_MAJOR_VERSION;
7079 mddev->minor_version = MD_MINOR_VERSION;
7080 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7081 mddev->ctime = ktime_get_real_seconds();
7082
7083 mddev->level = info->level;
7084 mddev->clevel[0] = 0;
7085 mddev->dev_sectors = 2 * (sector_t)info->size;
7086 mddev->raid_disks = info->raid_disks;
7087
7088
7089
7090 if (info->state & (1<<MD_SB_CLEAN))
7091 mddev->recovery_cp = MaxSector;
7092 else
7093 mddev->recovery_cp = 0;
7094 mddev->persistent = ! info->not_persistent;
7095 mddev->external = 0;
7096
7097 mddev->layout = info->layout;
7098 if (mddev->level == 0)
7099
7100 mddev->layout = -1;
7101 mddev->chunk_sectors = info->chunk_size >> 9;
7102
7103 if (mddev->persistent) {
7104 mddev->max_disks = MD_SB_DISKS;
7105 mddev->flags = 0;
7106 mddev->sb_flags = 0;
7107 }
7108 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7109
7110 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7111 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7112 mddev->bitmap_info.offset = 0;
7113
7114 mddev->reshape_position = MaxSector;
7115
7116
7117
7118
7119 get_random_bytes(mddev->uuid, 16);
7120
7121 mddev->new_level = mddev->level;
7122 mddev->new_chunk_sectors = mddev->chunk_sectors;
7123 mddev->new_layout = mddev->layout;
7124 mddev->delta_disks = 0;
7125 mddev->reshape_backwards = 0;
7126
7127 return 0;
7128}
7129
7130void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7131{
7132 lockdep_assert_held(&mddev->reconfig_mutex);
7133
7134 if (mddev->external_size)
7135 return;
7136
7137 mddev->array_sectors = array_sectors;
7138}
7139EXPORT_SYMBOL(md_set_array_sectors);
7140
7141static int update_size(struct mddev *mddev, sector_t num_sectors)
7142{
7143 struct md_rdev *rdev;
7144 int rv;
7145 int fit = (num_sectors == 0);
7146 sector_t old_dev_sectors = mddev->dev_sectors;
7147
7148 if (mddev->pers->resize == NULL)
7149 return -EINVAL;
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7160 mddev->sync_thread)
7161 return -EBUSY;
7162 if (mddev->ro)
7163 return -EROFS;
7164
7165 rdev_for_each(rdev, mddev) {
7166 sector_t avail = rdev->sectors;
7167
7168 if (fit && (num_sectors == 0 || num_sectors > avail))
7169 num_sectors = avail;
7170 if (avail < num_sectors)
7171 return -ENOSPC;
7172 }
7173 rv = mddev->pers->resize(mddev, num_sectors);
7174 if (!rv) {
7175 if (mddev_is_clustered(mddev))
7176 md_cluster_ops->update_size(mddev, old_dev_sectors);
7177 else if (mddev->queue) {
7178 set_capacity(mddev->gendisk, mddev->array_sectors);
7179 revalidate_disk(mddev->gendisk);
7180 }
7181 }
7182 return rv;
7183}
7184
7185static int update_raid_disks(struct mddev *mddev, int raid_disks)
7186{
7187 int rv;
7188 struct md_rdev *rdev;
7189
7190 if (mddev->pers->check_reshape == NULL)
7191 return -EINVAL;
7192 if (mddev->ro)
7193 return -EROFS;
7194 if (raid_disks <= 0 ||
7195 (mddev->max_disks && raid_disks >= mddev->max_disks))
7196 return -EINVAL;
7197 if (mddev->sync_thread ||
7198 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7199 mddev->reshape_position != MaxSector)
7200 return -EBUSY;
7201
7202 rdev_for_each(rdev, mddev) {
7203 if (mddev->raid_disks < raid_disks &&
7204 rdev->data_offset < rdev->new_data_offset)
7205 return -EINVAL;
7206 if (mddev->raid_disks > raid_disks &&
7207 rdev->data_offset > rdev->new_data_offset)
7208 return -EINVAL;
7209 }
7210
7211 mddev->delta_disks = raid_disks - mddev->raid_disks;
7212 if (mddev->delta_disks < 0)
7213 mddev->reshape_backwards = 1;
7214 else if (mddev->delta_disks > 0)
7215 mddev->reshape_backwards = 0;
7216
7217 rv = mddev->pers->check_reshape(mddev);
7218 if (rv < 0) {
7219 mddev->delta_disks = 0;
7220 mddev->reshape_backwards = 0;
7221 }
7222 return rv;
7223}
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7234{
7235 int rv = 0;
7236 int cnt = 0;
7237 int state = 0;
7238
7239
7240 if (mddev->bitmap && mddev->bitmap_info.offset)
7241 state |= (1 << MD_SB_BITMAP_PRESENT);
7242
7243 if (mddev->major_version != info->major_version ||
7244 mddev->minor_version != info->minor_version ||
7245
7246 mddev->ctime != info->ctime ||
7247 mddev->level != info->level ||
7248
7249 mddev->persistent != !info->not_persistent ||
7250 mddev->chunk_sectors != info->chunk_size >> 9 ||
7251
7252 ((state^info->state) & 0xfffffe00)
7253 )
7254 return -EINVAL;
7255
7256 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7257 cnt++;
7258 if (mddev->raid_disks != info->raid_disks)
7259 cnt++;
7260 if (mddev->layout != info->layout)
7261 cnt++;
7262 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7263 cnt++;
7264 if (cnt == 0)
7265 return 0;
7266 if (cnt > 1)
7267 return -EINVAL;
7268
7269 if (mddev->layout != info->layout) {
7270
7271
7272
7273
7274 if (mddev->pers->check_reshape == NULL)
7275 return -EINVAL;
7276 else {
7277 mddev->new_layout = info->layout;
7278 rv = mddev->pers->check_reshape(mddev);
7279 if (rv)
7280 mddev->new_layout = mddev->layout;
7281 return rv;
7282 }
7283 }
7284 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7285 rv = update_size(mddev, (sector_t)info->size * 2);
7286
7287 if (mddev->raid_disks != info->raid_disks)
7288 rv = update_raid_disks(mddev, info->raid_disks);
7289
7290 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7291 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7292 rv = -EINVAL;
7293 goto err;
7294 }
7295 if (mddev->recovery || mddev->sync_thread) {
7296 rv = -EBUSY;
7297 goto err;
7298 }
7299 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7300 struct bitmap *bitmap;
7301
7302 if (mddev->bitmap) {
7303 rv = -EEXIST;
7304 goto err;
7305 }
7306 if (mddev->bitmap_info.default_offset == 0) {
7307 rv = -EINVAL;
7308 goto err;
7309 }
7310 mddev->bitmap_info.offset =
7311 mddev->bitmap_info.default_offset;
7312 mddev->bitmap_info.space =
7313 mddev->bitmap_info.default_space;
7314 bitmap = md_bitmap_create(mddev, -1);
7315 mddev_suspend(mddev);
7316 if (!IS_ERR(bitmap)) {
7317 mddev->bitmap = bitmap;
7318 rv = md_bitmap_load(mddev);
7319 } else
7320 rv = PTR_ERR(bitmap);
7321 if (rv)
7322 md_bitmap_destroy(mddev);
7323 mddev_resume(mddev);
7324 } else {
7325
7326 if (!mddev->bitmap) {
7327 rv = -ENOENT;
7328 goto err;
7329 }
7330 if (mddev->bitmap->storage.file) {
7331 rv = -EINVAL;
7332 goto err;
7333 }
7334 if (mddev->bitmap_info.nodes) {
7335
7336 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7337 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7338 rv = -EPERM;
7339 md_cluster_ops->unlock_all_bitmaps(mddev);
7340 goto err;
7341 }
7342
7343 mddev->bitmap_info.nodes = 0;
7344 md_cluster_ops->leave(mddev);
7345 }
7346 mddev_suspend(mddev);
7347 md_bitmap_destroy(mddev);
7348 mddev_resume(mddev);
7349 mddev->bitmap_info.offset = 0;
7350 }
7351 }
7352 md_update_sb(mddev, 1);
7353 return rv;
7354err:
7355 return rv;
7356}
7357
7358static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7359{
7360 struct md_rdev *rdev;
7361 int err = 0;
7362
7363 if (mddev->pers == NULL)
7364 return -ENODEV;
7365
7366 rcu_read_lock();
7367 rdev = md_find_rdev_rcu(mddev, dev);
7368 if (!rdev)
7369 err = -ENODEV;
7370 else {
7371 md_error(mddev, rdev);
7372 if (!test_bit(Faulty, &rdev->flags))
7373 err = -EBUSY;
7374 }
7375 rcu_read_unlock();
7376 return err;
7377}
7378
7379
7380
7381
7382
7383
7384
7385static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7386{
7387 struct mddev *mddev = bdev->bd_disk->private_data;
7388
7389 geo->heads = 2;
7390 geo->sectors = 4;
7391 geo->cylinders = mddev->array_sectors / 8;
7392 return 0;
7393}
7394
7395static inline bool md_ioctl_valid(unsigned int cmd)
7396{
7397 switch (cmd) {
7398 case ADD_NEW_DISK:
7399 case BLKROSET:
7400 case GET_ARRAY_INFO:
7401 case GET_BITMAP_FILE:
7402 case GET_DISK_INFO:
7403 case HOT_ADD_DISK:
7404 case HOT_REMOVE_DISK:
7405 case RAID_AUTORUN:
7406 case RAID_VERSION:
7407 case RESTART_ARRAY_RW:
7408 case RUN_ARRAY:
7409 case SET_ARRAY_INFO:
7410 case SET_BITMAP_FILE:
7411 case SET_DISK_FAULTY:
7412 case STOP_ARRAY:
7413 case STOP_ARRAY_RO:
7414 case CLUSTERED_DISK_NACK:
7415 return true;
7416 default:
7417 return false;
7418 }
7419}
7420
7421static int md_ioctl(struct block_device *bdev, fmode_t mode,
7422 unsigned int cmd, unsigned long arg)
7423{
7424 int err = 0;
7425 void __user *argp = (void __user *)arg;
7426 struct mddev *mddev = NULL;
7427 int ro;
7428 bool did_set_md_closing = false;
7429
7430 if (!md_ioctl_valid(cmd))
7431 return -ENOTTY;
7432
7433 switch (cmd) {
7434 case RAID_VERSION:
7435 case GET_ARRAY_INFO:
7436 case GET_DISK_INFO:
7437 break;
7438 default:
7439 if (!capable(CAP_SYS_ADMIN))
7440 return -EACCES;
7441 }
7442
7443
7444
7445
7446
7447 switch (cmd) {
7448 case RAID_VERSION:
7449 err = get_version(argp);
7450 goto out;
7451
7452#ifndef MODULE
7453 case RAID_AUTORUN:
7454 err = 0;
7455 autostart_arrays(arg);
7456 goto out;
7457#endif
7458 default:;
7459 }
7460
7461
7462
7463
7464
7465 mddev = bdev->bd_disk->private_data;
7466
7467 if (!mddev) {
7468 BUG();
7469 goto out;
7470 }
7471
7472
7473 switch (cmd) {
7474 case GET_ARRAY_INFO:
7475 if (!mddev->raid_disks && !mddev->external)
7476 err = -ENODEV;
7477 else
7478 err = get_array_info(mddev, argp);
7479 goto out;
7480
7481 case GET_DISK_INFO:
7482 if (!mddev->raid_disks && !mddev->external)
7483 err = -ENODEV;
7484 else
7485 err = get_disk_info(mddev, argp);
7486 goto out;
7487
7488 case SET_DISK_FAULTY:
7489 err = set_disk_faulty(mddev, new_decode_dev(arg));
7490 goto out;
7491
7492 case GET_BITMAP_FILE:
7493 err = get_bitmap_file(mddev, argp);
7494 goto out;
7495
7496 }
7497
7498 if (cmd == ADD_NEW_DISK)
7499
7500 flush_workqueue(md_misc_wq);
7501
7502 if (cmd == HOT_REMOVE_DISK)
7503
7504 wait_event_interruptible_timeout(mddev->sb_wait,
7505 !test_bit(MD_RECOVERY_NEEDED,
7506 &mddev->recovery),
7507 msecs_to_jiffies(5000));
7508 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7509
7510
7511
7512 mutex_lock(&mddev->open_mutex);
7513 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7514 mutex_unlock(&mddev->open_mutex);
7515 err = -EBUSY;
7516 goto out;
7517 }
7518 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7519 set_bit(MD_CLOSING, &mddev->flags);
7520 did_set_md_closing = true;
7521 mutex_unlock(&mddev->open_mutex);
7522 sync_blockdev(bdev);
7523 }
7524 err = mddev_lock(mddev);
7525 if (err) {
7526 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7527 err, cmd);
7528 goto out;
7529 }
7530
7531 if (cmd == SET_ARRAY_INFO) {
7532 mdu_array_info_t info;
7533 if (!arg)
7534 memset(&info, 0, sizeof(info));
7535 else if (copy_from_user(&info, argp, sizeof(info))) {
7536 err = -EFAULT;
7537 goto unlock;
7538 }
7539 if (mddev->pers) {
7540 err = update_array_info(mddev, &info);
7541 if (err) {
7542 pr_warn("md: couldn't update array info. %d\n", err);
7543 goto unlock;
7544 }
7545 goto unlock;
7546 }
7547 if (!list_empty(&mddev->disks)) {
7548 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7549 err = -EBUSY;
7550 goto unlock;
7551 }
7552 if (mddev->raid_disks) {
7553 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7554 err = -EBUSY;
7555 goto unlock;
7556 }
7557 err = set_array_info(mddev, &info);
7558 if (err) {
7559 pr_warn("md: couldn't set array info. %d\n", err);
7560 goto unlock;
7561 }
7562 goto unlock;
7563 }
7564
7565
7566
7567
7568
7569
7570 if ((!mddev->raid_disks && !mddev->external)
7571 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7572 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7573 && cmd != GET_BITMAP_FILE) {
7574 err = -ENODEV;
7575 goto unlock;
7576 }
7577
7578
7579
7580
7581 switch (cmd) {
7582 case RESTART_ARRAY_RW:
7583 err = restart_array(mddev);
7584 goto unlock;
7585
7586 case STOP_ARRAY:
7587 err = do_md_stop(mddev, 0, bdev);
7588 goto unlock;
7589
7590 case STOP_ARRAY_RO:
7591 err = md_set_readonly(mddev, bdev);
7592 goto unlock;
7593
7594 case HOT_REMOVE_DISK:
7595 err = hot_remove_disk(mddev, new_decode_dev(arg));
7596 goto unlock;
7597
7598 case ADD_NEW_DISK:
7599
7600
7601
7602
7603 if (mddev->pers) {
7604 mdu_disk_info_t info;
7605 if (copy_from_user(&info, argp, sizeof(info)))
7606 err = -EFAULT;
7607 else if (!(info.state & (1<<MD_DISK_SYNC)))
7608
7609 break;
7610 else
7611 err = add_new_disk(mddev, &info);
7612 goto unlock;
7613 }
7614 break;
7615
7616 case BLKROSET:
7617 if (get_user(ro, (int __user *)(arg))) {
7618 err = -EFAULT;
7619 goto unlock;
7620 }
7621 err = -EINVAL;
7622
7623
7624
7625
7626 if (ro)
7627 goto unlock;
7628
7629
7630 if (mddev->ro != 1)
7631 goto unlock;
7632
7633
7634
7635
7636 if (mddev->pers) {
7637 err = restart_array(mddev);
7638 if (err == 0) {
7639 mddev->ro = 2;
7640 set_disk_ro(mddev->gendisk, 0);
7641 }
7642 }
7643 goto unlock;
7644 }
7645
7646
7647
7648
7649
7650 if (mddev->ro && mddev->pers) {
7651 if (mddev->ro == 2) {
7652 mddev->ro = 0;
7653 sysfs_notify_dirent_safe(mddev->sysfs_state);
7654 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7655
7656
7657
7658
7659 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7660 mddev_unlock(mddev);
7661 wait_event(mddev->sb_wait,
7662 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7663 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7664 mddev_lock_nointr(mddev);
7665 }
7666 } else {
7667 err = -EROFS;
7668 goto unlock;
7669 }
7670 }
7671
7672 switch (cmd) {
7673 case ADD_NEW_DISK:
7674 {
7675 mdu_disk_info_t info;
7676 if (copy_from_user(&info, argp, sizeof(info)))
7677 err = -EFAULT;
7678 else
7679 err = add_new_disk(mddev, &info);
7680 goto unlock;
7681 }
7682
7683 case CLUSTERED_DISK_NACK:
7684 if (mddev_is_clustered(mddev))
7685 md_cluster_ops->new_disk_ack(mddev, false);
7686 else
7687 err = -EINVAL;
7688 goto unlock;
7689
7690 case HOT_ADD_DISK:
7691 err = hot_add_disk(mddev, new_decode_dev(arg));
7692 goto unlock;
7693
7694 case RUN_ARRAY:
7695 err = do_md_run(mddev);
7696 goto unlock;
7697
7698 case SET_BITMAP_FILE:
7699 err = set_bitmap_file(mddev, (int)arg);
7700 goto unlock;
7701
7702 default:
7703 err = -EINVAL;
7704 goto unlock;
7705 }
7706
7707unlock:
7708 if (mddev->hold_active == UNTIL_IOCTL &&
7709 err != -EINVAL)
7710 mddev->hold_active = 0;
7711 mddev_unlock(mddev);
7712out:
7713 if(did_set_md_closing)
7714 clear_bit(MD_CLOSING, &mddev->flags);
7715 return err;
7716}
7717#ifdef CONFIG_COMPAT
7718static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7719 unsigned int cmd, unsigned long arg)
7720{
7721 switch (cmd) {
7722 case HOT_REMOVE_DISK:
7723 case HOT_ADD_DISK:
7724 case SET_DISK_FAULTY:
7725 case SET_BITMAP_FILE:
7726
7727 break;
7728 default:
7729 arg = (unsigned long)compat_ptr(arg);
7730 break;
7731 }
7732
7733 return md_ioctl(bdev, mode, cmd, arg);
7734}
7735#endif
7736
7737static int md_open(struct block_device *bdev, fmode_t mode)
7738{
7739
7740
7741
7742
7743 struct mddev *mddev = mddev_find(bdev->bd_dev);
7744 int err;
7745
7746 if (!mddev)
7747 return -ENODEV;
7748
7749 if (mddev->gendisk != bdev->bd_disk) {
7750
7751
7752
7753 mddev_put(mddev);
7754
7755 flush_workqueue(md_misc_wq);
7756
7757 return -ERESTARTSYS;
7758 }
7759 BUG_ON(mddev != bdev->bd_disk->private_data);
7760
7761 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7762 goto out;
7763
7764 if (test_bit(MD_CLOSING, &mddev->flags)) {
7765 mutex_unlock(&mddev->open_mutex);
7766 err = -ENODEV;
7767 goto out;
7768 }
7769
7770 err = 0;
7771 atomic_inc(&mddev->openers);
7772 mutex_unlock(&mddev->open_mutex);
7773
7774 check_disk_change(bdev);
7775 out:
7776 if (err)
7777 mddev_put(mddev);
7778 return err;
7779}
7780
7781static void md_release(struct gendisk *disk, fmode_t mode)
7782{
7783 struct mddev *mddev = disk->private_data;
7784
7785 BUG_ON(!mddev);
7786 atomic_dec(&mddev->openers);
7787 mddev_put(mddev);
7788}
7789
7790static int md_media_changed(struct gendisk *disk)
7791{
7792 struct mddev *mddev = disk->private_data;
7793
7794 return mddev->changed;
7795}
7796
7797static int md_revalidate(struct gendisk *disk)
7798{
7799 struct mddev *mddev = disk->private_data;
7800
7801 mddev->changed = 0;
7802 return 0;
7803}
7804static const struct block_device_operations md_fops =
7805{
7806 .owner = THIS_MODULE,
7807 .open = md_open,
7808 .release = md_release,
7809 .ioctl = md_ioctl,
7810#ifdef CONFIG_COMPAT
7811 .compat_ioctl = md_compat_ioctl,
7812#endif
7813 .getgeo = md_getgeo,
7814 .media_changed = md_media_changed,
7815 .revalidate_disk= md_revalidate,
7816};
7817
7818static int md_thread(void *arg)
7819{
7820 struct md_thread *thread = arg;
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834 allow_signal(SIGKILL);
7835 while (!kthread_should_stop()) {
7836
7837
7838
7839
7840
7841
7842 if (signal_pending(current))
7843 flush_signals(current);
7844
7845 wait_event_interruptible_timeout
7846 (thread->wqueue,
7847 test_bit(THREAD_WAKEUP, &thread->flags)
7848 || kthread_should_stop() || kthread_should_park(),
7849 thread->timeout);
7850
7851 clear_bit(THREAD_WAKEUP, &thread->flags);
7852 if (kthread_should_park())
7853 kthread_parkme();
7854 if (!kthread_should_stop())
7855 thread->run(thread);
7856 }
7857
7858 return 0;
7859}
7860
7861void md_wakeup_thread(struct md_thread *thread)
7862{
7863 if (thread) {
7864 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7865 set_bit(THREAD_WAKEUP, &thread->flags);
7866 wake_up(&thread->wqueue);
7867 }
7868}
7869EXPORT_SYMBOL(md_wakeup_thread);
7870
7871struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7872 struct mddev *mddev, const char *name)
7873{
7874 struct md_thread *thread;
7875
7876 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7877 if (!thread)
7878 return NULL;
7879
7880 init_waitqueue_head(&thread->wqueue);
7881
7882 thread->run = run;
7883 thread->mddev = mddev;
7884 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7885 thread->tsk = kthread_run(md_thread, thread,
7886 "%s_%s",
7887 mdname(thread->mddev),
7888 name);
7889 if (IS_ERR(thread->tsk)) {
7890 kfree(thread);
7891 return NULL;
7892 }
7893 return thread;
7894}
7895EXPORT_SYMBOL(md_register_thread);
7896
7897void md_unregister_thread(struct md_thread **threadp)
7898{
7899 struct md_thread *thread = *threadp;
7900 if (!thread)
7901 return;
7902 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7903
7904
7905
7906 spin_lock(&pers_lock);
7907 *threadp = NULL;
7908 spin_unlock(&pers_lock);
7909
7910 kthread_stop(thread->tsk);
7911 kfree(thread);
7912}
7913EXPORT_SYMBOL(md_unregister_thread);
7914
7915void md_error(struct mddev *mddev, struct md_rdev *rdev)
7916{
7917 if (!rdev || test_bit(Faulty, &rdev->flags))
7918 return;
7919
7920 if (!mddev->pers || !mddev->pers->error_handler)
7921 return;
7922 mddev->pers->error_handler(mddev,rdev);
7923 if (mddev->degraded)
7924 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7925 sysfs_notify_dirent_safe(rdev->sysfs_state);
7926 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7927 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7928 md_wakeup_thread(mddev->thread);
7929 if (mddev->event_work.func)
7930 queue_work(md_misc_wq, &mddev->event_work);
7931 md_new_event(mddev);
7932}
7933EXPORT_SYMBOL(md_error);
7934
7935
7936
7937static void status_unused(struct seq_file *seq)
7938{
7939 int i = 0;
7940 struct md_rdev *rdev;
7941
7942 seq_printf(seq, "unused devices: ");
7943
7944 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7945 char b[BDEVNAME_SIZE];
7946 i++;
7947 seq_printf(seq, "%s ",
7948 bdevname(rdev->bdev,b));
7949 }
7950 if (!i)
7951 seq_printf(seq, "<none>");
7952
7953 seq_printf(seq, "\n");
7954}
7955
7956static int status_resync(struct seq_file *seq, struct mddev *mddev)
7957{
7958 sector_t max_sectors, resync, res;
7959 unsigned long dt, db = 0;
7960 sector_t rt, curr_mark_cnt, resync_mark_cnt;
7961 int scale, recovery_active;
7962 unsigned int per_milli;
7963
7964 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7965 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7966 max_sectors = mddev->resync_max_sectors;
7967 else
7968 max_sectors = mddev->dev_sectors;
7969
7970 resync = mddev->curr_resync;
7971 if (resync <= 3) {
7972 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7973
7974 resync = max_sectors;
7975 } else if (resync > max_sectors)
7976 resync = max_sectors;
7977 else
7978 resync -= atomic_read(&mddev->recovery_active);
7979
7980 if (resync == 0) {
7981 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
7982 struct md_rdev *rdev;
7983
7984 rdev_for_each(rdev, mddev)
7985 if (rdev->raid_disk >= 0 &&
7986 !test_bit(Faulty, &rdev->flags) &&
7987 rdev->recovery_offset != MaxSector &&
7988 rdev->recovery_offset) {
7989 seq_printf(seq, "\trecover=REMOTE");
7990 return 1;
7991 }
7992 if (mddev->reshape_position != MaxSector)
7993 seq_printf(seq, "\treshape=REMOTE");
7994 else
7995 seq_printf(seq, "\tresync=REMOTE");
7996 return 1;
7997 }
7998 if (mddev->recovery_cp < MaxSector) {
7999 seq_printf(seq, "\tresync=PENDING");
8000 return 1;
8001 }
8002 return 0;
8003 }
8004 if (resync < 3) {
8005 seq_printf(seq, "\tresync=DELAYED");
8006 return 1;
8007 }
8008
8009 WARN_ON(max_sectors == 0);
8010
8011
8012
8013
8014
8015 scale = 10;
8016 if (sizeof(sector_t) > sizeof(unsigned long)) {
8017 while ( max_sectors/2 > (1ULL<<(scale+32)))
8018 scale++;
8019 }
8020 res = (resync>>scale)*1000;
8021 sector_div(res, (u32)((max_sectors>>scale)+1));
8022
8023 per_milli = res;
8024 {
8025 int i, x = per_milli/50, y = 20-x;
8026 seq_printf(seq, "[");
8027 for (i = 0; i < x; i++)
8028 seq_printf(seq, "=");
8029 seq_printf(seq, ">");
8030 for (i = 0; i < y; i++)
8031 seq_printf(seq, ".");
8032 seq_printf(seq, "] ");
8033 }
8034 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8035 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8036 "reshape" :
8037 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8038 "check" :
8039 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8040 "resync" : "recovery"))),
8041 per_milli/10, per_milli % 10,
8042 (unsigned long long) resync/2,
8043 (unsigned long long) max_sectors/2);
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062 dt = ((jiffies - mddev->resync_mark) / HZ);
8063 if (!dt) dt++;
8064
8065 curr_mark_cnt = mddev->curr_mark_cnt;
8066 recovery_active = atomic_read(&mddev->recovery_active);
8067 resync_mark_cnt = mddev->resync_mark_cnt;
8068
8069 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8070 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8071
8072 rt = max_sectors - resync;
8073 rt = div64_u64(rt, db/32+1);
8074 rt *= dt;
8075 rt >>= 5;
8076
8077 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8078 ((unsigned long)rt % 60)/6);
8079
8080 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8081 return 1;
8082}
8083
8084static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8085{
8086 struct list_head *tmp;
8087 loff_t l = *pos;
8088 struct mddev *mddev;
8089
8090 if (l >= 0x10000)
8091 return NULL;
8092 if (!l--)
8093
8094 return (void*)1;
8095
8096 spin_lock(&all_mddevs_lock);
8097 list_for_each(tmp,&all_mddevs)
8098 if (!l--) {
8099 mddev = list_entry(tmp, struct mddev, all_mddevs);
8100 mddev_get(mddev);
8101 spin_unlock(&all_mddevs_lock);
8102 return mddev;
8103 }
8104 spin_unlock(&all_mddevs_lock);
8105 if (!l--)
8106 return (void*)2;
8107 return NULL;
8108}
8109
8110static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8111{
8112 struct list_head *tmp;
8113 struct mddev *next_mddev, *mddev = v;
8114
8115 ++*pos;
8116 if (v == (void*)2)
8117 return NULL;
8118
8119 spin_lock(&all_mddevs_lock);
8120 if (v == (void*)1)
8121 tmp = all_mddevs.next;
8122 else
8123 tmp = mddev->all_mddevs.next;
8124 if (tmp != &all_mddevs)
8125 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8126 else {
8127 next_mddev = (void*)2;
8128 *pos = 0x10000;
8129 }
8130 spin_unlock(&all_mddevs_lock);
8131
8132 if (v != (void*)1)
8133 mddev_put(mddev);
8134 return next_mddev;
8135
8136}
8137
8138static void md_seq_stop(struct seq_file *seq, void *v)
8139{
8140 struct mddev *mddev = v;
8141
8142 if (mddev && v != (void*)1 && v != (void*)2)
8143 mddev_put(mddev);
8144}
8145
8146static int md_seq_show(struct seq_file *seq, void *v)
8147{
8148 struct mddev *mddev = v;
8149 sector_t sectors;
8150 struct md_rdev *rdev;
8151
8152 if (v == (void*)1) {
8153 struct md_personality *pers;
8154 seq_printf(seq, "Personalities : ");
8155 spin_lock(&pers_lock);
8156 list_for_each_entry(pers, &pers_list, list)
8157 seq_printf(seq, "[%s] ", pers->name);
8158
8159 spin_unlock(&pers_lock);
8160 seq_printf(seq, "\n");
8161 seq->poll_event = atomic_read(&md_event_count);
8162 return 0;
8163 }
8164 if (v == (void*)2) {
8165 status_unused(seq);
8166 return 0;
8167 }
8168
8169 spin_lock(&mddev->lock);
8170 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8171 seq_printf(seq, "%s : %sactive", mdname(mddev),
8172 mddev->pers ? "" : "in");
8173 if (mddev->pers) {
8174 if (mddev->ro==1)
8175 seq_printf(seq, " (read-only)");
8176 if (mddev->ro==2)
8177 seq_printf(seq, " (auto-read-only)");
8178 seq_printf(seq, " %s", mddev->pers->name);
8179 }
8180
8181 sectors = 0;
8182 rcu_read_lock();
8183 rdev_for_each_rcu(rdev, mddev) {
8184 char b[BDEVNAME_SIZE];
8185 seq_printf(seq, " %s[%d]",
8186 bdevname(rdev->bdev,b), rdev->desc_nr);
8187 if (test_bit(WriteMostly, &rdev->flags))
8188 seq_printf(seq, "(W)");
8189 if (test_bit(Journal, &rdev->flags))
8190 seq_printf(seq, "(J)");
8191 if (test_bit(Faulty, &rdev->flags)) {
8192 seq_printf(seq, "(F)");
8193 continue;
8194 }
8195 if (rdev->raid_disk < 0)
8196 seq_printf(seq, "(S)");
8197 if (test_bit(Replacement, &rdev->flags))
8198 seq_printf(seq, "(R)");
8199 sectors += rdev->sectors;
8200 }
8201 rcu_read_unlock();
8202
8203 if (!list_empty(&mddev->disks)) {
8204 if (mddev->pers)
8205 seq_printf(seq, "\n %llu blocks",
8206 (unsigned long long)
8207 mddev->array_sectors / 2);
8208 else
8209 seq_printf(seq, "\n %llu blocks",
8210 (unsigned long long)sectors / 2);
8211 }
8212 if (mddev->persistent) {
8213 if (mddev->major_version != 0 ||
8214 mddev->minor_version != 90) {
8215 seq_printf(seq," super %d.%d",
8216 mddev->major_version,
8217 mddev->minor_version);
8218 }
8219 } else if (mddev->external)
8220 seq_printf(seq, " super external:%s",
8221 mddev->metadata_type);
8222 else
8223 seq_printf(seq, " super non-persistent");
8224
8225 if (mddev->pers) {
8226 mddev->pers->status(seq, mddev);
8227 seq_printf(seq, "\n ");
8228 if (mddev->pers->sync_request) {
8229 if (status_resync(seq, mddev))
8230 seq_printf(seq, "\n ");
8231 }
8232 } else
8233 seq_printf(seq, "\n ");
8234
8235 md_bitmap_status(seq, mddev->bitmap);
8236
8237 seq_printf(seq, "\n");
8238 }
8239 spin_unlock(&mddev->lock);
8240
8241 return 0;
8242}
8243
8244static const struct seq_operations md_seq_ops = {
8245 .start = md_seq_start,
8246 .next = md_seq_next,
8247 .stop = md_seq_stop,
8248 .show = md_seq_show,
8249};
8250
8251static int md_seq_open(struct inode *inode, struct file *file)
8252{
8253 struct seq_file *seq;
8254 int error;
8255
8256 error = seq_open(file, &md_seq_ops);
8257 if (error)
8258 return error;
8259
8260 seq = file->private_data;
8261 seq->poll_event = atomic_read(&md_event_count);
8262 return error;
8263}
8264
8265static int md_unloading;
8266static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8267{
8268 struct seq_file *seq = filp->private_data;
8269 __poll_t mask;
8270
8271 if (md_unloading)
8272 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8273 poll_wait(filp, &md_event_waiters, wait);
8274
8275
8276 mask = EPOLLIN | EPOLLRDNORM;
8277
8278 if (seq->poll_event != atomic_read(&md_event_count))
8279 mask |= EPOLLERR | EPOLLPRI;
8280 return mask;
8281}
8282
8283static const struct proc_ops mdstat_proc_ops = {
8284 .proc_open = md_seq_open,
8285 .proc_read = seq_read,
8286 .proc_lseek = seq_lseek,
8287 .proc_release = seq_release,
8288 .proc_poll = mdstat_poll,
8289};
8290
8291int register_md_personality(struct md_personality *p)
8292{
8293 pr_debug("md: %s personality registered for level %d\n",
8294 p->name, p->level);
8295 spin_lock(&pers_lock);
8296 list_add_tail(&p->list, &pers_list);
8297 spin_unlock(&pers_lock);
8298 return 0;
8299}
8300EXPORT_SYMBOL(register_md_personality);
8301
8302int unregister_md_personality(struct md_personality *p)
8303{
8304 pr_debug("md: %s personality unregistered\n", p->name);
8305 spin_lock(&pers_lock);
8306 list_del_init(&p->list);
8307 spin_unlock(&pers_lock);
8308 return 0;
8309}
8310EXPORT_SYMBOL(unregister_md_personality);
8311
8312int register_md_cluster_operations(struct md_cluster_operations *ops,
8313 struct module *module)
8314{
8315 int ret = 0;
8316 spin_lock(&pers_lock);
8317 if (md_cluster_ops != NULL)
8318 ret = -EALREADY;
8319 else {
8320 md_cluster_ops = ops;
8321 md_cluster_mod = module;
8322 }
8323 spin_unlock(&pers_lock);
8324 return ret;
8325}
8326EXPORT_SYMBOL(register_md_cluster_operations);
8327
8328int unregister_md_cluster_operations(void)
8329{
8330 spin_lock(&pers_lock);
8331 md_cluster_ops = NULL;
8332 spin_unlock(&pers_lock);
8333 return 0;
8334}
8335EXPORT_SYMBOL(unregister_md_cluster_operations);
8336
8337int md_setup_cluster(struct mddev *mddev, int nodes)
8338{
8339 if (!md_cluster_ops)
8340 request_module("md-cluster");
8341 spin_lock(&pers_lock);
8342
8343 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8344 pr_warn("can't find md-cluster module or get it's reference.\n");
8345 spin_unlock(&pers_lock);
8346 return -ENOENT;
8347 }
8348 spin_unlock(&pers_lock);
8349
8350 return md_cluster_ops->join(mddev, nodes);
8351}
8352
8353void md_cluster_stop(struct mddev *mddev)
8354{
8355 if (!md_cluster_ops)
8356 return;
8357 md_cluster_ops->leave(mddev);
8358 module_put(md_cluster_mod);
8359}
8360
8361static int is_mddev_idle(struct mddev *mddev, int init)
8362{
8363 struct md_rdev *rdev;
8364 int idle;
8365 int curr_events;
8366
8367 idle = 1;
8368 rcu_read_lock();
8369 rdev_for_each_rcu(rdev, mddev) {
8370 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8371 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8372 atomic_read(&disk->sync_io);
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395 if (init || curr_events - rdev->last_events > 64) {
8396 rdev->last_events = curr_events;
8397 idle = 0;
8398 }
8399 }
8400 rcu_read_unlock();
8401 return idle;
8402}
8403
8404void md_done_sync(struct mddev *mddev, int blocks, int ok)
8405{
8406
8407 atomic_sub(blocks, &mddev->recovery_active);
8408 wake_up(&mddev->recovery_wait);
8409 if (!ok) {
8410 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8411 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8412 md_wakeup_thread(mddev->thread);
8413
8414 }
8415}
8416EXPORT_SYMBOL(md_done_sync);
8417
8418
8419
8420
8421
8422
8423
8424
8425bool md_write_start(struct mddev *mddev, struct bio *bi)
8426{
8427 int did_change = 0;
8428
8429 if (bio_data_dir(bi) != WRITE)
8430 return true;
8431
8432 BUG_ON(mddev->ro == 1);
8433 if (mddev->ro == 2) {
8434
8435 mddev->ro = 0;
8436 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8437 md_wakeup_thread(mddev->thread);
8438 md_wakeup_thread(mddev->sync_thread);
8439 did_change = 1;
8440 }
8441 rcu_read_lock();
8442 percpu_ref_get(&mddev->writes_pending);
8443 smp_mb();
8444 if (mddev->safemode == 1)
8445 mddev->safemode = 0;
8446
8447 if (mddev->in_sync || mddev->sync_checkers) {
8448 spin_lock(&mddev->lock);
8449 if (mddev->in_sync) {
8450 mddev->in_sync = 0;
8451 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8452 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8453 md_wakeup_thread(mddev->thread);
8454 did_change = 1;
8455 }
8456 spin_unlock(&mddev->lock);
8457 }
8458 rcu_read_unlock();
8459 if (did_change)
8460 sysfs_notify_dirent_safe(mddev->sysfs_state);
8461 if (!mddev->has_superblocks)
8462 return true;
8463 wait_event(mddev->sb_wait,
8464 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8465 mddev->suspended);
8466 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8467 percpu_ref_put(&mddev->writes_pending);
8468 return false;
8469 }
8470 return true;
8471}
8472EXPORT_SYMBOL(md_write_start);
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482void md_write_inc(struct mddev *mddev, struct bio *bi)
8483{
8484 if (bio_data_dir(bi) != WRITE)
8485 return;
8486 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8487 percpu_ref_get(&mddev->writes_pending);
8488}
8489EXPORT_SYMBOL(md_write_inc);
8490
8491void md_write_end(struct mddev *mddev)
8492{
8493 percpu_ref_put(&mddev->writes_pending);
8494
8495 if (mddev->safemode == 2)
8496 md_wakeup_thread(mddev->thread);
8497 else if (mddev->safemode_delay)
8498
8499
8500
8501 mod_timer(&mddev->safemode_timer,
8502 roundup(jiffies, mddev->safemode_delay) +
8503 mddev->safemode_delay);
8504}
8505
8506EXPORT_SYMBOL(md_write_end);
8507
8508
8509
8510
8511
8512
8513
8514void md_allow_write(struct mddev *mddev)
8515{
8516 if (!mddev->pers)
8517 return;
8518 if (mddev->ro)
8519 return;
8520 if (!mddev->pers->sync_request)
8521 return;
8522
8523 spin_lock(&mddev->lock);
8524 if (mddev->in_sync) {
8525 mddev->in_sync = 0;
8526 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8527 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8528 if (mddev->safemode_delay &&
8529 mddev->safemode == 0)
8530 mddev->safemode = 1;
8531 spin_unlock(&mddev->lock);
8532 md_update_sb(mddev, 0);
8533 sysfs_notify_dirent_safe(mddev->sysfs_state);
8534
8535 wait_event(mddev->sb_wait,
8536 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8537 } else
8538 spin_unlock(&mddev->lock);
8539}
8540EXPORT_SYMBOL_GPL(md_allow_write);
8541
8542#define SYNC_MARKS 10
8543#define SYNC_MARK_STEP (3*HZ)
8544#define UPDATE_FREQUENCY (5*60*HZ)
8545void md_do_sync(struct md_thread *thread)
8546{
8547 struct mddev *mddev = thread->mddev;
8548 struct mddev *mddev2;
8549 unsigned int currspeed = 0, window;
8550 sector_t max_sectors,j, io_sectors, recovery_done;
8551 unsigned long mark[SYNC_MARKS];
8552 unsigned long update_time;
8553 sector_t mark_cnt[SYNC_MARKS];
8554 int last_mark,m;
8555 struct list_head *tmp;
8556 sector_t last_check;
8557 int skipped = 0;
8558 struct md_rdev *rdev;
8559 char *desc, *action = NULL;
8560 struct blk_plug plug;
8561 int ret;
8562
8563
8564 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8565 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8566 return;
8567 if (mddev->ro) {
8568 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8569 return;
8570 }
8571
8572 if (mddev_is_clustered(mddev)) {
8573 ret = md_cluster_ops->resync_start(mddev);
8574 if (ret)
8575 goto skip;
8576
8577 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8578 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8579 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8580 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8581 && ((unsigned long long)mddev->curr_resync_completed
8582 < (unsigned long long)mddev->resync_max_sectors))
8583 goto skip;
8584 }
8585
8586 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8587 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8588 desc = "data-check";
8589 action = "check";
8590 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8591 desc = "requested-resync";
8592 action = "repair";
8593 } else
8594 desc = "resync";
8595 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8596 desc = "reshape";
8597 else
8598 desc = "recovery";
8599
8600 mddev->last_sync_action = action ?: desc;
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618 do {
8619 int mddev2_minor = -1;
8620 mddev->curr_resync = 2;
8621
8622 try_again:
8623 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8624 goto skip;
8625 for_each_mddev(mddev2, tmp) {
8626 if (mddev2 == mddev)
8627 continue;
8628 if (!mddev->parallel_resync
8629 && mddev2->curr_resync
8630 && match_mddev_units(mddev, mddev2)) {
8631 DEFINE_WAIT(wq);
8632 if (mddev < mddev2 && mddev->curr_resync == 2) {
8633
8634 mddev->curr_resync = 1;
8635 wake_up(&resync_wait);
8636 }
8637 if (mddev > mddev2 && mddev->curr_resync == 1)
8638
8639
8640
8641 continue;
8642
8643
8644
8645
8646 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8647 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8648 mddev2->curr_resync >= mddev->curr_resync) {
8649 if (mddev2_minor != mddev2->md_minor) {
8650 mddev2_minor = mddev2->md_minor;
8651 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8652 desc, mdname(mddev),
8653 mdname(mddev2));
8654 }
8655 mddev_put(mddev2);
8656 if (signal_pending(current))
8657 flush_signals(current);
8658 schedule();
8659 finish_wait(&resync_wait, &wq);
8660 goto try_again;
8661 }
8662 finish_wait(&resync_wait, &wq);
8663 }
8664 }
8665 } while (mddev->curr_resync < 2);
8666
8667 j = 0;
8668 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8669
8670
8671
8672 max_sectors = mddev->resync_max_sectors;
8673 atomic64_set(&mddev->resync_mismatches, 0);
8674
8675 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8676 j = mddev->resync_min;
8677 else if (!mddev->bitmap)
8678 j = mddev->recovery_cp;
8679
8680 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8681 max_sectors = mddev->resync_max_sectors;
8682
8683
8684
8685
8686
8687 if (mddev_is_clustered(mddev) &&
8688 mddev->reshape_position != MaxSector)
8689 j = mddev->reshape_position;
8690 } else {
8691
8692 max_sectors = mddev->dev_sectors;
8693 j = MaxSector;
8694 rcu_read_lock();
8695 rdev_for_each_rcu(rdev, mddev)
8696 if (rdev->raid_disk >= 0 &&
8697 !test_bit(Journal, &rdev->flags) &&
8698 !test_bit(Faulty, &rdev->flags) &&
8699 !test_bit(In_sync, &rdev->flags) &&
8700 rdev->recovery_offset < j)
8701 j = rdev->recovery_offset;
8702 rcu_read_unlock();
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712 if (mddev->bitmap) {
8713 mddev->pers->quiesce(mddev, 1);
8714 mddev->pers->quiesce(mddev, 0);
8715 }
8716 }
8717
8718 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8719 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8720 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8721 speed_max(mddev), desc);
8722
8723 is_mddev_idle(mddev, 1);
8724
8725 io_sectors = 0;
8726 for (m = 0; m < SYNC_MARKS; m++) {
8727 mark[m] = jiffies;
8728 mark_cnt[m] = io_sectors;
8729 }
8730 last_mark = 0;
8731 mddev->resync_mark = mark[last_mark];
8732 mddev->resync_mark_cnt = mark_cnt[last_mark];
8733
8734
8735
8736
8737 window = 32 * (PAGE_SIZE / 512);
8738 pr_debug("md: using %dk window, over a total of %lluk.\n",
8739 window/2, (unsigned long long)max_sectors/2);
8740
8741 atomic_set(&mddev->recovery_active, 0);
8742 last_check = 0;
8743
8744 if (j>2) {
8745 pr_debug("md: resuming %s of %s from checkpoint.\n",
8746 desc, mdname(mddev));
8747 mddev->curr_resync = j;
8748 } else
8749 mddev->curr_resync = 3;
8750 mddev->curr_resync_completed = j;
8751 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8752 md_new_event(mddev);
8753 update_time = jiffies;
8754
8755 blk_start_plug(&plug);
8756 while (j < max_sectors) {
8757 sector_t sectors;
8758
8759 skipped = 0;
8760
8761 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8762 ((mddev->curr_resync > mddev->curr_resync_completed &&
8763 (mddev->curr_resync - mddev->curr_resync_completed)
8764 > (max_sectors >> 4)) ||
8765 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8766 (j - mddev->curr_resync_completed)*2
8767 >= mddev->resync_max - mddev->curr_resync_completed ||
8768 mddev->curr_resync_completed > mddev->resync_max
8769 )) {
8770
8771 wait_event(mddev->recovery_wait,
8772 atomic_read(&mddev->recovery_active) == 0);
8773 mddev->curr_resync_completed = j;
8774 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8775 j > mddev->recovery_cp)
8776 mddev->recovery_cp = j;
8777 update_time = jiffies;
8778 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8779 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8780 }
8781
8782 while (j >= mddev->resync_max &&
8783 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8784
8785
8786
8787
8788 flush_signals(current);
8789 wait_event_interruptible(mddev->recovery_wait,
8790 mddev->resync_max > j
8791 || test_bit(MD_RECOVERY_INTR,
8792 &mddev->recovery));
8793 }
8794
8795 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8796 break;
8797
8798 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8799 if (sectors == 0) {
8800 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8801 break;
8802 }
8803
8804 if (!skipped) {
8805 io_sectors += sectors;
8806 atomic_add(sectors, &mddev->recovery_active);
8807 }
8808
8809 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8810 break;
8811
8812 j += sectors;
8813 if (j > max_sectors)
8814
8815 j = max_sectors;
8816 if (j > 2)
8817 mddev->curr_resync = j;
8818 mddev->curr_mark_cnt = io_sectors;
8819 if (last_check == 0)
8820
8821
8822
8823 md_new_event(mddev);
8824
8825 if (last_check + window > io_sectors || j == max_sectors)
8826 continue;
8827
8828 last_check = io_sectors;
8829 repeat:
8830 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8831
8832 int next = (last_mark+1) % SYNC_MARKS;
8833
8834 mddev->resync_mark = mark[next];
8835 mddev->resync_mark_cnt = mark_cnt[next];
8836 mark[next] = jiffies;
8837 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8838 last_mark = next;
8839 }
8840
8841 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8842 break;
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852 cond_resched();
8853
8854 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8855 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8856 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8857
8858 if (currspeed > speed_min(mddev)) {
8859 if (currspeed > speed_max(mddev)) {
8860 msleep(500);
8861 goto repeat;
8862 }
8863 if (!is_mddev_idle(mddev, 0)) {
8864
8865
8866
8867
8868 wait_event(mddev->recovery_wait,
8869 !atomic_read(&mddev->recovery_active));
8870 }
8871 }
8872 }
8873 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8874 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8875 ? "interrupted" : "done");
8876
8877
8878
8879 blk_finish_plug(&plug);
8880 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8881
8882 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8883 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8884 mddev->curr_resync > 3) {
8885 mddev->curr_resync_completed = mddev->curr_resync;
8886 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8887 }
8888 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8889
8890 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8891 mddev->curr_resync > 3) {
8892 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8893 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8894 if (mddev->curr_resync >= mddev->recovery_cp) {
8895 pr_debug("md: checkpointing %s of %s.\n",
8896 desc, mdname(mddev));
8897 if (test_bit(MD_RECOVERY_ERROR,
8898 &mddev->recovery))
8899 mddev->recovery_cp =
8900 mddev->curr_resync_completed;
8901 else
8902 mddev->recovery_cp =
8903 mddev->curr_resync;
8904 }
8905 } else
8906 mddev->recovery_cp = MaxSector;
8907 } else {
8908 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8909 mddev->curr_resync = MaxSector;
8910 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8911 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8912 rcu_read_lock();
8913 rdev_for_each_rcu(rdev, mddev)
8914 if (rdev->raid_disk >= 0 &&
8915 mddev->delta_disks >= 0 &&
8916 !test_bit(Journal, &rdev->flags) &&
8917 !test_bit(Faulty, &rdev->flags) &&
8918 !test_bit(In_sync, &rdev->flags) &&
8919 rdev->recovery_offset < mddev->curr_resync)
8920 rdev->recovery_offset = mddev->curr_resync;
8921 rcu_read_unlock();
8922 }
8923 }
8924 }
8925 skip:
8926
8927
8928
8929 set_mask_bits(&mddev->sb_flags, 0,
8930 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8931
8932 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8933 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8934 mddev->delta_disks > 0 &&
8935 mddev->pers->finish_reshape &&
8936 mddev->pers->size &&
8937 mddev->queue) {
8938 mddev_lock_nointr(mddev);
8939 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8940 mddev_unlock(mddev);
8941 if (!mddev_is_clustered(mddev)) {
8942 set_capacity(mddev->gendisk, mddev->array_sectors);
8943 revalidate_disk(mddev->gendisk);
8944 }
8945 }
8946
8947 spin_lock(&mddev->lock);
8948 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8949
8950 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8951 mddev->resync_min = 0;
8952 mddev->resync_max = MaxSector;
8953 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8954 mddev->resync_min = mddev->curr_resync_completed;
8955 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8956 mddev->curr_resync = 0;
8957 spin_unlock(&mddev->lock);
8958
8959 wake_up(&resync_wait);
8960 md_wakeup_thread(mddev->thread);
8961 return;
8962}
8963EXPORT_SYMBOL_GPL(md_do_sync);
8964
8965static int remove_and_add_spares(struct mddev *mddev,
8966 struct md_rdev *this)
8967{
8968 struct md_rdev *rdev;
8969 int spares = 0;
8970 int removed = 0;
8971 bool remove_some = false;
8972
8973 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8974
8975 return 0;
8976
8977 rdev_for_each(rdev, mddev) {
8978 if ((this == NULL || rdev == this) &&
8979 rdev->raid_disk >= 0 &&
8980 !test_bit(Blocked, &rdev->flags) &&
8981 test_bit(Faulty, &rdev->flags) &&
8982 atomic_read(&rdev->nr_pending)==0) {
8983
8984
8985
8986
8987
8988 remove_some = true;
8989 set_bit(RemoveSynchronized, &rdev->flags);
8990 }
8991 }
8992
8993 if (remove_some)
8994 synchronize_rcu();
8995 rdev_for_each(rdev, mddev) {
8996 if ((this == NULL || rdev == this) &&
8997 rdev->raid_disk >= 0 &&
8998 !test_bit(Blocked, &rdev->flags) &&
8999 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9000 (!test_bit(In_sync, &rdev->flags) &&
9001 !test_bit(Journal, &rdev->flags))) &&
9002 atomic_read(&rdev->nr_pending)==0)) {
9003 if (mddev->pers->hot_remove_disk(
9004 mddev, rdev) == 0) {
9005 sysfs_unlink_rdev(mddev, rdev);
9006 rdev->saved_raid_disk = rdev->raid_disk;
9007 rdev->raid_disk = -1;
9008 removed++;
9009 }
9010 }
9011 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9012 clear_bit(RemoveSynchronized, &rdev->flags);
9013 }
9014
9015 if (removed && mddev->kobj.sd)
9016 sysfs_notify(&mddev->kobj, NULL, "degraded");
9017
9018 if (this && removed)
9019 goto no_add;
9020
9021 rdev_for_each(rdev, mddev) {
9022 if (this && this != rdev)
9023 continue;
9024 if (test_bit(Candidate, &rdev->flags))
9025 continue;
9026 if (rdev->raid_disk >= 0 &&
9027 !test_bit(In_sync, &rdev->flags) &&
9028 !test_bit(Journal, &rdev->flags) &&
9029 !test_bit(Faulty, &rdev->flags))
9030 spares++;
9031 if (rdev->raid_disk >= 0)
9032 continue;
9033 if (test_bit(Faulty, &rdev->flags))
9034 continue;
9035 if (!test_bit(Journal, &rdev->flags)) {
9036 if (mddev->ro &&
9037 ! (rdev->saved_raid_disk >= 0 &&
9038 !test_bit(Bitmap_sync, &rdev->flags)))
9039 continue;
9040
9041 rdev->recovery_offset = 0;
9042 }
9043 if (mddev->pers->
9044 hot_add_disk(mddev, rdev) == 0) {
9045 if (sysfs_link_rdev(mddev, rdev))
9046 ;
9047 if (!test_bit(Journal, &rdev->flags))
9048 spares++;
9049 md_new_event(mddev);
9050 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9051 }
9052 }
9053no_add:
9054 if (removed)
9055 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9056 return spares;
9057}
9058
9059static void md_start_sync(struct work_struct *ws)
9060{
9061 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9062
9063 mddev->sync_thread = md_register_thread(md_do_sync,
9064 mddev,
9065 "resync");
9066 if (!mddev->sync_thread) {
9067 pr_warn("%s: could not start resync thread...\n",
9068 mdname(mddev));
9069
9070 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9071 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9072 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9073 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9074 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9075 wake_up(&resync_wait);
9076 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9077 &mddev->recovery))
9078 if (mddev->sysfs_action)
9079 sysfs_notify_dirent_safe(mddev->sysfs_action);
9080 } else
9081 md_wakeup_thread(mddev->sync_thread);
9082 sysfs_notify_dirent_safe(mddev->sysfs_action);
9083 md_new_event(mddev);
9084}
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108void md_check_recovery(struct mddev *mddev)
9109{
9110 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9111
9112
9113
9114 set_bit(MD_UPDATING_SB, &mddev->flags);
9115 smp_mb__after_atomic();
9116 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9117 md_update_sb(mddev, 0);
9118 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9119 wake_up(&mddev->sb_wait);
9120 }
9121
9122 if (mddev->suspended)
9123 return;
9124
9125 if (mddev->bitmap)
9126 md_bitmap_daemon_work(mddev);
9127
9128 if (signal_pending(current)) {
9129 if (mddev->pers->sync_request && !mddev->external) {
9130 pr_debug("md: %s in immediate safe mode\n",
9131 mdname(mddev));
9132 mddev->safemode = 2;
9133 }
9134 flush_signals(current);
9135 }
9136
9137 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9138 return;
9139 if ( ! (
9140 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9141 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9142 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9143 (mddev->external == 0 && mddev->safemode == 1) ||
9144 (mddev->safemode == 2
9145 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9146 ))
9147 return;
9148
9149 if (mddev_trylock(mddev)) {
9150 int spares = 0;
9151 bool try_set_sync = mddev->safemode != 0;
9152
9153 if (!mddev->external && mddev->safemode == 1)
9154 mddev->safemode = 0;
9155
9156 if (mddev->ro) {
9157 struct md_rdev *rdev;
9158 if (!mddev->external && mddev->in_sync)
9159
9160
9161
9162
9163
9164 rdev_for_each(rdev, mddev)
9165 clear_bit(Blocked, &rdev->flags);
9166
9167
9168
9169
9170
9171
9172
9173 remove_and_add_spares(mddev, NULL);
9174
9175
9176
9177 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9178 md_reap_sync_thread(mddev);
9179 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9180 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9181 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9182 goto unlock;
9183 }
9184
9185 if (mddev_is_clustered(mddev)) {
9186 struct md_rdev *rdev;
9187
9188
9189
9190 rdev_for_each(rdev, mddev) {
9191 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9192 rdev->raid_disk < 0)
9193 md_kick_rdev_from_array(rdev);
9194 }
9195 }
9196
9197 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9198 spin_lock(&mddev->lock);
9199 set_in_sync(mddev);
9200 spin_unlock(&mddev->lock);
9201 }
9202
9203 if (mddev->sb_flags)
9204 md_update_sb(mddev, 0);
9205
9206 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9207 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9208
9209 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9210 goto unlock;
9211 }
9212 if (mddev->sync_thread) {
9213 md_reap_sync_thread(mddev);
9214 goto unlock;
9215 }
9216
9217
9218
9219 mddev->curr_resync_completed = 0;
9220 spin_lock(&mddev->lock);
9221 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9222 spin_unlock(&mddev->lock);
9223
9224
9225
9226 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9227 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9228
9229 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9230 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9231 goto not_running;
9232
9233
9234
9235
9236
9237
9238
9239 if (mddev->reshape_position != MaxSector) {
9240 if (mddev->pers->check_reshape == NULL ||
9241 mddev->pers->check_reshape(mddev) != 0)
9242
9243 goto not_running;
9244 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9245 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9246 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9247 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9248 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9249 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9250 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9251 } else if (mddev->recovery_cp < MaxSector) {
9252 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9253 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9254 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9255
9256 goto not_running;
9257
9258 if (mddev->pers->sync_request) {
9259 if (spares) {
9260
9261
9262
9263
9264 md_bitmap_write_all(mddev->bitmap);
9265 }
9266 INIT_WORK(&mddev->del_work, md_start_sync);
9267 queue_work(md_misc_wq, &mddev->del_work);
9268 goto unlock;
9269 }
9270 not_running:
9271 if (!mddev->sync_thread) {
9272 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9273 wake_up(&resync_wait);
9274 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9275 &mddev->recovery))
9276 if (mddev->sysfs_action)
9277 sysfs_notify_dirent_safe(mddev->sysfs_action);
9278 }
9279 unlock:
9280 wake_up(&mddev->sb_wait);
9281 mddev_unlock(mddev);
9282 }
9283}
9284EXPORT_SYMBOL(md_check_recovery);
9285
9286void md_reap_sync_thread(struct mddev *mddev)
9287{
9288 struct md_rdev *rdev;
9289 sector_t old_dev_sectors = mddev->dev_sectors;
9290 bool is_reshaped = false;
9291
9292
9293 md_unregister_thread(&mddev->sync_thread);
9294 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9295 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9296 mddev->degraded != mddev->raid_disks) {
9297
9298
9299 if (mddev->pers->spare_active(mddev)) {
9300 sysfs_notify(&mddev->kobj, NULL,
9301 "degraded");
9302 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9303 }
9304 }
9305 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9306 mddev->pers->finish_reshape) {
9307 mddev->pers->finish_reshape(mddev);
9308 if (mddev_is_clustered(mddev))
9309 is_reshaped = true;
9310 }
9311
9312
9313
9314
9315 if (!mddev->degraded)
9316 rdev_for_each(rdev, mddev)
9317 rdev->saved_raid_disk = -1;
9318
9319 md_update_sb(mddev, 1);
9320
9321
9322
9323 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9324 md_cluster_ops->resync_finish(mddev);
9325 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9326 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9327 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9328 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9329 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9330 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9331
9332
9333
9334
9335
9336 if (mddev_is_clustered(mddev) && is_reshaped
9337 && !test_bit(MD_CLOSING, &mddev->flags))
9338 md_cluster_ops->update_size(mddev, old_dev_sectors);
9339 wake_up(&resync_wait);
9340
9341 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9342 sysfs_notify_dirent_safe(mddev->sysfs_action);
9343 md_new_event(mddev);
9344 if (mddev->event_work.func)
9345 queue_work(md_misc_wq, &mddev->event_work);
9346}
9347EXPORT_SYMBOL(md_reap_sync_thread);
9348
9349void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9350{
9351 sysfs_notify_dirent_safe(rdev->sysfs_state);
9352 wait_event_timeout(rdev->blocked_wait,
9353 !test_bit(Blocked, &rdev->flags) &&
9354 !test_bit(BlockedBadBlocks, &rdev->flags),
9355 msecs_to_jiffies(5000));
9356 rdev_dec_pending(rdev, mddev);
9357}
9358EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9359
9360void md_finish_reshape(struct mddev *mddev)
9361{
9362
9363 struct md_rdev *rdev;
9364
9365 rdev_for_each(rdev, mddev) {
9366 if (rdev->data_offset > rdev->new_data_offset)
9367 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9368 else
9369 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9370 rdev->data_offset = rdev->new_data_offset;
9371 }
9372}
9373EXPORT_SYMBOL(md_finish_reshape);
9374
9375
9376
9377
9378int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9379 int is_new)
9380{
9381 struct mddev *mddev = rdev->mddev;
9382 int rv;
9383 if (is_new)
9384 s += rdev->new_data_offset;
9385 else
9386 s += rdev->data_offset;
9387 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9388 if (rv == 0) {
9389
9390 if (test_bit(ExternalBbl, &rdev->flags))
9391 sysfs_notify(&rdev->kobj, NULL,
9392 "unacknowledged_bad_blocks");
9393 sysfs_notify_dirent_safe(rdev->sysfs_state);
9394 set_mask_bits(&mddev->sb_flags, 0,
9395 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9396 md_wakeup_thread(rdev->mddev->thread);
9397 return 1;
9398 } else
9399 return 0;
9400}
9401EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9402
9403int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9404 int is_new)
9405{
9406 int rv;
9407 if (is_new)
9408 s += rdev->new_data_offset;
9409 else
9410 s += rdev->data_offset;
9411 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9412 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9413 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9414 return rv;
9415}
9416EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9417
9418static int md_notify_reboot(struct notifier_block *this,
9419 unsigned long code, void *x)
9420{
9421 struct list_head *tmp;
9422 struct mddev *mddev;
9423 int need_delay = 0;
9424
9425 for_each_mddev(mddev, tmp) {
9426 if (mddev_trylock(mddev)) {
9427 if (mddev->pers)
9428 __md_stop_writes(mddev);
9429 if (mddev->persistent)
9430 mddev->safemode = 2;
9431 mddev_unlock(mddev);
9432 }
9433 need_delay = 1;
9434 }
9435
9436
9437
9438
9439
9440
9441 if (need_delay)
9442 mdelay(1000*1);
9443
9444 return NOTIFY_DONE;
9445}
9446
9447static struct notifier_block md_notifier = {
9448 .notifier_call = md_notify_reboot,
9449 .next = NULL,
9450 .priority = INT_MAX,
9451};
9452
9453static void md_geninit(void)
9454{
9455 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9456
9457 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9458}
9459
9460static int __init md_init(void)
9461{
9462 int ret = -ENOMEM;
9463
9464 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9465 if (!md_wq)
9466 goto err_wq;
9467
9468 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9469 if (!md_misc_wq)
9470 goto err_misc_wq;
9471
9472 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9473 goto err_md;
9474
9475 if ((ret = register_blkdev(0, "mdp")) < 0)
9476 goto err_mdp;
9477 mdp_major = ret;
9478
9479 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9480 md_probe, NULL, NULL);
9481 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9482 md_probe, NULL, NULL);
9483
9484 register_reboot_notifier(&md_notifier);
9485 raid_table_header = register_sysctl_table(raid_root_table);
9486
9487 md_geninit();
9488 return 0;
9489
9490err_mdp:
9491 unregister_blkdev(MD_MAJOR, "md");
9492err_md:
9493 destroy_workqueue(md_misc_wq);
9494err_misc_wq:
9495 destroy_workqueue(md_wq);
9496err_wq:
9497 return ret;
9498}
9499
9500static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9501{
9502 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9503 struct md_rdev *rdev2;
9504 int role, ret;
9505 char b[BDEVNAME_SIZE];
9506
9507
9508
9509
9510
9511 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9512 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9513 if (ret)
9514 pr_info("md-cluster: resize failed\n");
9515 else
9516 md_bitmap_update_sb(mddev->bitmap);
9517 }
9518
9519
9520 rdev_for_each(rdev2, mddev) {
9521 if (test_bit(Faulty, &rdev2->flags))
9522 continue;
9523
9524
9525 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9526
9527 if (test_bit(Candidate, &rdev2->flags)) {
9528 if (role == 0xfffe) {
9529 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9530 md_kick_rdev_from_array(rdev2);
9531 continue;
9532 }
9533 else
9534 clear_bit(Candidate, &rdev2->flags);
9535 }
9536
9537 if (role != rdev2->raid_disk) {
9538
9539
9540
9541 if (rdev2->raid_disk == -1 && role != 0xffff &&
9542 !(le32_to_cpu(sb->feature_map) &
9543 MD_FEATURE_RESHAPE_ACTIVE)) {
9544 rdev2->saved_raid_disk = role;
9545 ret = remove_and_add_spares(mddev, rdev2);
9546 pr_info("Activated spare: %s\n",
9547 bdevname(rdev2->bdev,b));
9548
9549
9550 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9551 md_wakeup_thread(mddev->thread);
9552 }
9553
9554
9555
9556
9557
9558 if ((role == 0xfffe) || (role == 0xfffd)) {
9559 md_error(mddev, rdev2);
9560 clear_bit(Blocked, &rdev2->flags);
9561 }
9562 }
9563 }
9564
9565 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9566 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9567
9568
9569
9570
9571
9572 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9573 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9574
9575
9576
9577
9578 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9579 if (mddev->pers->update_reshape_pos)
9580 mddev->pers->update_reshape_pos(mddev);
9581 if (mddev->pers->start_reshape)
9582 mddev->pers->start_reshape(mddev);
9583 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9584 mddev->reshape_position != MaxSector &&
9585 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9586
9587 mddev->reshape_position = MaxSector;
9588 if (mddev->pers->update_reshape_pos)
9589 mddev->pers->update_reshape_pos(mddev);
9590 }
9591
9592
9593 mddev->events = le64_to_cpu(sb->events);
9594}
9595
9596static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9597{
9598 int err;
9599 struct page *swapout = rdev->sb_page;
9600 struct mdp_superblock_1 *sb;
9601
9602
9603
9604
9605 rdev->sb_page = NULL;
9606 err = alloc_disk_sb(rdev);
9607 if (err == 0) {
9608 ClearPageUptodate(rdev->sb_page);
9609 rdev->sb_loaded = 0;
9610 err = super_types[mddev->major_version].
9611 load_super(rdev, NULL, mddev->minor_version);
9612 }
9613 if (err < 0) {
9614 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9615 __func__, __LINE__, rdev->desc_nr, err);
9616 if (rdev->sb_page)
9617 put_page(rdev->sb_page);
9618 rdev->sb_page = swapout;
9619 rdev->sb_loaded = 1;
9620 return err;
9621 }
9622
9623 sb = page_address(rdev->sb_page);
9624
9625
9626
9627
9628 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9629 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9630
9631
9632
9633
9634 if (rdev->recovery_offset == MaxSector &&
9635 !test_bit(In_sync, &rdev->flags) &&
9636 mddev->pers->spare_active(mddev))
9637 sysfs_notify(&mddev->kobj, NULL, "degraded");
9638
9639 put_page(swapout);
9640 return 0;
9641}
9642
9643void md_reload_sb(struct mddev *mddev, int nr)
9644{
9645 struct md_rdev *rdev;
9646 int err;
9647
9648
9649 rdev_for_each_rcu(rdev, mddev) {
9650 if (rdev->desc_nr == nr)
9651 break;
9652 }
9653
9654 if (!rdev || rdev->desc_nr != nr) {
9655 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9656 return;
9657 }
9658
9659 err = read_rdev(mddev, rdev);
9660 if (err < 0)
9661 return;
9662
9663 check_sb_changes(mddev, rdev);
9664
9665
9666 rdev_for_each_rcu(rdev, mddev) {
9667 if (!test_bit(Faulty, &rdev->flags))
9668 read_rdev(mddev, rdev);
9669 }
9670}
9671EXPORT_SYMBOL(md_reload_sb);
9672
9673#ifndef MODULE
9674
9675
9676
9677
9678
9679
9680static DEFINE_MUTEX(detected_devices_mutex);
9681static LIST_HEAD(all_detected_devices);
9682struct detected_devices_node {
9683 struct list_head list;
9684 dev_t dev;
9685};
9686
9687void md_autodetect_dev(dev_t dev)
9688{
9689 struct detected_devices_node *node_detected_dev;
9690
9691 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9692 if (node_detected_dev) {
9693 node_detected_dev->dev = dev;
9694 mutex_lock(&detected_devices_mutex);
9695 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9696 mutex_unlock(&detected_devices_mutex);
9697 }
9698}
9699
9700static void autostart_arrays(int part)
9701{
9702 struct md_rdev *rdev;
9703 struct detected_devices_node *node_detected_dev;
9704 dev_t dev;
9705 int i_scanned, i_passed;
9706
9707 i_scanned = 0;
9708 i_passed = 0;
9709
9710 pr_info("md: Autodetecting RAID arrays.\n");
9711
9712 mutex_lock(&detected_devices_mutex);
9713 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9714 i_scanned++;
9715 node_detected_dev = list_entry(all_detected_devices.next,
9716 struct detected_devices_node, list);
9717 list_del(&node_detected_dev->list);
9718 dev = node_detected_dev->dev;
9719 kfree(node_detected_dev);
9720 mutex_unlock(&detected_devices_mutex);
9721 rdev = md_import_device(dev,0, 90);
9722 mutex_lock(&detected_devices_mutex);
9723 if (IS_ERR(rdev))
9724 continue;
9725
9726 if (test_bit(Faulty, &rdev->flags))
9727 continue;
9728
9729 set_bit(AutoDetected, &rdev->flags);
9730 list_add(&rdev->same_set, &pending_raid_disks);
9731 i_passed++;
9732 }
9733 mutex_unlock(&detected_devices_mutex);
9734
9735 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9736
9737 autorun_devices(part);
9738}
9739
9740#endif
9741
9742static __exit void md_exit(void)
9743{
9744 struct mddev *mddev;
9745 struct list_head *tmp;
9746 int delay = 1;
9747
9748 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9749 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9750
9751 unregister_blkdev(MD_MAJOR,"md");
9752 unregister_blkdev(mdp_major, "mdp");
9753 unregister_reboot_notifier(&md_notifier);
9754 unregister_sysctl_table(raid_table_header);
9755
9756
9757
9758
9759 md_unloading = 1;
9760 while (waitqueue_active(&md_event_waiters)) {
9761
9762 wake_up(&md_event_waiters);
9763 msleep(delay);
9764 delay += delay;
9765 }
9766 remove_proc_entry("mdstat", NULL);
9767
9768 for_each_mddev(mddev, tmp) {
9769 export_array(mddev);
9770 mddev->ctime = 0;
9771 mddev->hold_active = 0;
9772
9773
9774
9775
9776
9777
9778 }
9779 destroy_workqueue(md_misc_wq);
9780 destroy_workqueue(md_wq);
9781}
9782
9783subsys_initcall(md_init);
9784module_exit(md_exit)
9785
9786static int get_ro(char *buffer, const struct kernel_param *kp)
9787{
9788 return sprintf(buffer, "%d", start_readonly);
9789}
9790static int set_ro(const char *val, const struct kernel_param *kp)
9791{
9792 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9793}
9794
9795module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9796module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9797module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9798module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9799
9800MODULE_LICENSE("GPL");
9801MODULE_DESCRIPTION("MD RAID framework");
9802MODULE_ALIAS("md");
9803MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9804