1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/kthread.h>
48#include <linux/blkdev.h>
49#include <linux/badblocks.h>
50#include <linux/sysctl.h>
51#include <linux/seq_file.h>
52#include <linux/fs.h>
53#include <linux/poll.h>
54#include <linux/ctype.h>
55#include <linux/string.h>
56#include <linux/hdreg.h>
57#include <linux/proc_fs.h>
58#include <linux/random.h>
59#include <linux/module.h>
60#include <linux/reboot.h>
61#include <linux/file.h>
62#include <linux/compat.h>
63#include <linux/delay.h>
64#include <linux/raid/md_p.h>
65#include <linux/raid/md_u.h>
66#include <linux/slab.h>
67#include "md.h"
68#include "bitmap.h"
69
70#ifndef MODULE
71static void autostart_arrays(int part);
72#endif
73
74
75
76
77
78
79static LIST_HEAD(pers_list);
80static DEFINE_SPINLOCK(pers_lock);
81
82static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
83static struct workqueue_struct *md_wq;
84static struct workqueue_struct *md_misc_wq;
85
86static int remove_and_add_spares(struct mddev *mddev,
87 struct md_rdev *this);
88static void mddev_detach(struct mddev *mddev);
89
90
91
92
93
94
95#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
96
97
98
99
100
101
102
103
104
105
106
107
108
109static int sysctl_speed_limit_min = 1000;
110static int sysctl_speed_limit_max = 200000;
111static inline int speed_min(struct mddev *mddev)
112{
113 return mddev->sync_speed_min ?
114 mddev->sync_speed_min : sysctl_speed_limit_min;
115}
116
117static inline int speed_max(struct mddev *mddev)
118{
119 return mddev->sync_speed_max ?
120 mddev->sync_speed_max : sysctl_speed_limit_max;
121}
122
123static struct ctl_table_header *raid_table_header;
124
125static struct ctl_table raid_table[] = {
126 {
127 .procname = "speed_limit_min",
128 .data = &sysctl_speed_limit_min,
129 .maxlen = sizeof(int),
130 .mode = S_IRUGO|S_IWUSR,
131 .proc_handler = proc_dointvec,
132 },
133 {
134 .procname = "speed_limit_max",
135 .data = &sysctl_speed_limit_max,
136 .maxlen = sizeof(int),
137 .mode = S_IRUGO|S_IWUSR,
138 .proc_handler = proc_dointvec,
139 },
140 { }
141};
142
143static struct ctl_table raid_dir_table[] = {
144 {
145 .procname = "raid",
146 .maxlen = 0,
147 .mode = S_IRUGO|S_IXUGO,
148 .child = raid_table,
149 },
150 { }
151};
152
153static struct ctl_table raid_root_table[] = {
154 {
155 .procname = "dev",
156 .maxlen = 0,
157 .mode = 0555,
158 .child = raid_dir_table,
159 },
160 { }
161};
162
163static const struct block_device_operations md_fops;
164
165static int start_readonly;
166
167
168
169
170
171struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
172 struct mddev *mddev)
173{
174 struct bio *b;
175
176 if (!mddev || !mddev->bio_set)
177 return bio_alloc(gfp_mask, nr_iovecs);
178
179 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
180 if (!b)
181 return NULL;
182 return b;
183}
184EXPORT_SYMBOL_GPL(bio_alloc_mddev);
185
186struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
187 struct mddev *mddev)
188{
189 if (!mddev || !mddev->bio_set)
190 return bio_clone(bio, gfp_mask);
191
192 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
193}
194EXPORT_SYMBOL_GPL(bio_clone_mddev);
195
196
197
198
199
200
201
202
203
204
205
206static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
207static atomic_t md_event_count;
208void md_new_event(struct mddev *mddev)
209{
210 atomic_inc(&md_event_count);
211 wake_up(&md_event_waiters);
212}
213EXPORT_SYMBOL_GPL(md_new_event);
214
215
216
217
218
219static LIST_HEAD(all_mddevs);
220static DEFINE_SPINLOCK(all_mddevs_lock);
221
222
223
224
225
226
227
228
229#define for_each_mddev(_mddev,_tmp) \
230 \
231 for (({ spin_lock(&all_mddevs_lock); \
232 _tmp = all_mddevs.next; \
233 _mddev = NULL;}); \
234 ({ if (_tmp != &all_mddevs) \
235 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
236 spin_unlock(&all_mddevs_lock); \
237 if (_mddev) mddev_put(_mddev); \
238 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
239 _tmp != &all_mddevs;}); \
240 ({ spin_lock(&all_mddevs_lock); \
241 _tmp = _tmp->next;}) \
242 )
243
244
245
246
247
248
249
250
251static void md_make_request(struct request_queue *q, struct bio *bio)
252{
253 const int rw = bio_data_dir(bio);
254 struct mddev *mddev = q->queuedata;
255 int cpu;
256 unsigned int sectors;
257
258 if (mddev == NULL || mddev->pers == NULL) {
259 bio_io_error(bio);
260 return;
261 }
262 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
263 bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
264 return;
265 }
266check_suspended:
267 smp_rmb();
268 rcu_read_lock();
269 if (mddev->suspended) {
270 DEFINE_WAIT(__wait);
271 for (;;) {
272 prepare_to_wait(&mddev->sb_wait, &__wait,
273 TASK_UNINTERRUPTIBLE);
274 if (!mddev->suspended)
275 break;
276 rcu_read_unlock();
277 schedule();
278 rcu_read_lock();
279 }
280 finish_wait(&mddev->sb_wait, &__wait);
281 }
282 atomic_inc(&mddev->active_io);
283 rcu_read_unlock();
284
285
286
287
288
289 sectors = bio_sectors(bio);
290 if (!mddev->pers->make_request(mddev, bio)) {
291 atomic_dec(&mddev->active_io);
292 wake_up(&mddev->sb_wait);
293 goto check_suspended;
294 }
295
296 cpu = part_stat_lock();
297 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
298 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
299 part_stat_unlock();
300
301 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
302 wake_up(&mddev->sb_wait);
303}
304
305
306
307
308
309
310
311void mddev_suspend(struct mddev *mddev)
312{
313 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
314 if (mddev->suspended++)
315 return;
316 synchronize_rcu();
317 wake_up(&mddev->sb_wait);
318 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
319 mddev->pers->quiesce(mddev, 1);
320
321 del_timer_sync(&mddev->safemode_timer);
322}
323EXPORT_SYMBOL_GPL(mddev_suspend);
324
325void mddev_resume(struct mddev *mddev)
326{
327 if (--mddev->suspended)
328 return;
329 wake_up(&mddev->sb_wait);
330 mddev->pers->quiesce(mddev, 0);
331
332 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
333 md_wakeup_thread(mddev->thread);
334 md_wakeup_thread(mddev->sync_thread);
335}
336EXPORT_SYMBOL_GPL(mddev_resume);
337
338int mddev_congested(struct mddev *mddev, int bits)
339{
340 struct md_personality *pers = mddev->pers;
341 int ret = 0;
342
343 rcu_read_lock();
344 if (mddev->suspended)
345 ret = 1;
346 else if (pers && pers->congested)
347 ret = pers->congested(mddev, bits);
348 rcu_read_unlock();
349 return ret;
350}
351EXPORT_SYMBOL_GPL(mddev_congested);
352static int md_congested(void *data, int bits)
353{
354 struct mddev *mddev = data;
355 return mddev_congested(mddev, bits);
356}
357
358static int md_mergeable_bvec(struct request_queue *q,
359 struct bvec_merge_data *bvm,
360 struct bio_vec *biovec)
361{
362 struct mddev *mddev = q->queuedata;
363 int ret;
364 rcu_read_lock();
365 if (mddev->suspended) {
366
367 if (bvm->bi_size == 0)
368 ret = biovec->bv_len;
369 else
370 ret = 0;
371 } else {
372 struct md_personality *pers = mddev->pers;
373 if (pers && pers->mergeable_bvec)
374 ret = pers->mergeable_bvec(mddev, bvm, biovec);
375 else
376 ret = biovec->bv_len;
377 }
378 rcu_read_unlock();
379 return ret;
380}
381
382
383
384
385static void md_end_flush(struct bio *bio, int err)
386{
387 struct md_rdev *rdev = bio->bi_private;
388 struct mddev *mddev = rdev->mddev;
389
390 rdev_dec_pending(rdev, mddev);
391
392 if (atomic_dec_and_test(&mddev->flush_pending)) {
393
394 queue_work(md_wq, &mddev->flush_work);
395 }
396 bio_put(bio);
397}
398
399static void md_submit_flush_data(struct work_struct *ws);
400
401static void submit_flushes(struct work_struct *ws)
402{
403 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
404 struct md_rdev *rdev;
405
406 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
407 atomic_set(&mddev->flush_pending, 1);
408 rcu_read_lock();
409 rdev_for_each_rcu(rdev, mddev)
410 if (rdev->raid_disk >= 0 &&
411 !test_bit(Faulty, &rdev->flags)) {
412
413
414
415
416 struct bio *bi;
417 atomic_inc(&rdev->nr_pending);
418 atomic_inc(&rdev->nr_pending);
419 rcu_read_unlock();
420 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
421 bi->bi_end_io = md_end_flush;
422 bi->bi_private = rdev;
423 bi->bi_bdev = rdev->bdev;
424 atomic_inc(&mddev->flush_pending);
425 submit_bio(WRITE_FLUSH, bi);
426 rcu_read_lock();
427 rdev_dec_pending(rdev, mddev);
428 }
429 rcu_read_unlock();
430 if (atomic_dec_and_test(&mddev->flush_pending))
431 queue_work(md_wq, &mddev->flush_work);
432}
433
434static void md_submit_flush_data(struct work_struct *ws)
435{
436 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
437 struct bio *bio = mddev->flush_bio;
438
439 if (bio->bi_size == 0)
440
441 bio_endio(bio, 0);
442 else {
443 bio->bi_rw &= ~REQ_FLUSH;
444 mddev->pers->make_request(mddev, bio);
445 }
446
447 mddev->flush_bio = NULL;
448 wake_up(&mddev->sb_wait);
449}
450
451void md_flush_request(struct mddev *mddev, struct bio *bio)
452{
453 spin_lock_irq(&mddev->lock);
454 wait_event_lock_irq(mddev->sb_wait,
455 !mddev->flush_bio,
456 mddev->lock);
457 mddev->flush_bio = bio;
458 spin_unlock_irq(&mddev->lock);
459
460 INIT_WORK(&mddev->flush_work, submit_flushes);
461 queue_work(md_wq, &mddev->flush_work);
462}
463EXPORT_SYMBOL(md_flush_request);
464
465void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
466{
467 struct mddev *mddev = cb->data;
468 md_wakeup_thread(mddev->thread);
469 kfree(cb);
470}
471EXPORT_SYMBOL(md_unplug);
472
473static inline struct mddev *mddev_get(struct mddev *mddev)
474{
475 atomic_inc(&mddev->active);
476 return mddev;
477}
478
479static void mddev_delayed_delete(struct work_struct *ws);
480
481static void mddev_put(struct mddev *mddev)
482{
483 struct bio_set *bs = NULL;
484
485 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
486 return;
487 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
488 mddev->ctime == 0 && !mddev->hold_active) {
489
490
491 list_del_init(&mddev->all_mddevs);
492 bs = mddev->bio_set;
493 mddev->bio_set = NULL;
494 if (mddev->gendisk) {
495
496
497
498
499
500 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
501 queue_work(md_misc_wq, &mddev->del_work);
502 } else
503 kfree(mddev);
504 }
505 spin_unlock(&all_mddevs_lock);
506 if (bs)
507 bioset_free(bs);
508}
509
510static void md_safemode_timeout(unsigned long data);
511
512void mddev_init(struct mddev *mddev)
513{
514 mutex_init(&mddev->open_mutex);
515 mutex_init(&mddev->reconfig_mutex);
516 mutex_init(&mddev->bitmap_info.mutex);
517 INIT_LIST_HEAD(&mddev->disks);
518 INIT_LIST_HEAD(&mddev->all_mddevs);
519 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
520 (unsigned long) mddev);
521 atomic_set(&mddev->active, 1);
522 atomic_set(&mddev->openers, 0);
523 atomic_set(&mddev->active_io, 0);
524 spin_lock_init(&mddev->lock);
525 atomic_set(&mddev->flush_pending, 0);
526 init_waitqueue_head(&mddev->sb_wait);
527 init_waitqueue_head(&mddev->recovery_wait);
528 mddev->reshape_position = MaxSector;
529 mddev->reshape_backwards = 0;
530 mddev->last_sync_action = "none";
531 mddev->resync_min = 0;
532 mddev->resync_max = MaxSector;
533 mddev->level = LEVEL_NONE;
534}
535EXPORT_SYMBOL_GPL(mddev_init);
536
537static struct mddev *mddev_find(dev_t unit)
538{
539 struct mddev *mddev, *new = NULL;
540
541 if (unit && MAJOR(unit) != MD_MAJOR)
542 unit &= ~((1<<MdpMinorShift)-1);
543
544 retry:
545 spin_lock(&all_mddevs_lock);
546
547 if (unit) {
548 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
549 if (mddev->unit == unit) {
550 mddev_get(mddev);
551 spin_unlock(&all_mddevs_lock);
552 kfree(new);
553 return mddev;
554 }
555
556 if (new) {
557 list_add(&new->all_mddevs, &all_mddevs);
558 spin_unlock(&all_mddevs_lock);
559 new->hold_active = UNTIL_IOCTL;
560 return new;
561 }
562 } else if (new) {
563
564 static int next_minor = 512;
565 int start = next_minor;
566 int is_free = 0;
567 int dev = 0;
568 while (!is_free) {
569 dev = MKDEV(MD_MAJOR, next_minor);
570 next_minor++;
571 if (next_minor > MINORMASK)
572 next_minor = 0;
573 if (next_minor == start) {
574
575 spin_unlock(&all_mddevs_lock);
576 kfree(new);
577 return NULL;
578 }
579
580 is_free = 1;
581 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
582 if (mddev->unit == dev) {
583 is_free = 0;
584 break;
585 }
586 }
587 new->unit = dev;
588 new->md_minor = MINOR(dev);
589 new->hold_active = UNTIL_STOP;
590 list_add(&new->all_mddevs, &all_mddevs);
591 spin_unlock(&all_mddevs_lock);
592 return new;
593 }
594 spin_unlock(&all_mddevs_lock);
595
596 new = kzalloc(sizeof(*new), GFP_KERNEL);
597 if (!new)
598 return NULL;
599
600 new->unit = unit;
601 if (MAJOR(unit) == MD_MAJOR)
602 new->md_minor = MINOR(unit);
603 else
604 new->md_minor = MINOR(unit) >> MdpMinorShift;
605
606 mddev_init(new);
607
608 goto retry;
609}
610
611static struct attribute_group md_redundancy_group;
612
613void mddev_unlock(struct mddev *mddev)
614{
615 if (mddev->to_remove) {
616
617
618
619
620
621
622
623
624
625
626
627
628 struct attribute_group *to_remove = mddev->to_remove;
629 mddev->to_remove = NULL;
630 mddev->sysfs_active = 1;
631 mutex_unlock(&mddev->reconfig_mutex);
632
633 if (mddev->kobj.sd) {
634 if (to_remove != &md_redundancy_group)
635 sysfs_remove_group(&mddev->kobj, to_remove);
636 if (mddev->pers == NULL ||
637 mddev->pers->sync_request == NULL) {
638 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
639 if (mddev->sysfs_action)
640 sysfs_put(mddev->sysfs_action);
641 mddev->sysfs_action = NULL;
642 }
643 }
644 mddev->sysfs_active = 0;
645 } else
646 mutex_unlock(&mddev->reconfig_mutex);
647
648
649
650
651 spin_lock(&pers_lock);
652 md_wakeup_thread(mddev->thread);
653 spin_unlock(&pers_lock);
654}
655EXPORT_SYMBOL_GPL(mddev_unlock);
656
657struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
658{
659 struct md_rdev *rdev;
660
661 rdev_for_each_rcu(rdev, mddev)
662 if (rdev->desc_nr == nr)
663 return rdev;
664
665 return NULL;
666}
667EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
668
669static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
670{
671 struct md_rdev *rdev;
672
673 rdev_for_each(rdev, mddev)
674 if (rdev->bdev->bd_dev == dev)
675 return rdev;
676
677 return NULL;
678}
679
680static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
681{
682 struct md_rdev *rdev;
683
684 rdev_for_each_rcu(rdev, mddev)
685 if (rdev->bdev->bd_dev == dev)
686 return rdev;
687
688 return NULL;
689}
690
691static struct md_personality *find_pers(int level, char *clevel)
692{
693 struct md_personality *pers;
694 list_for_each_entry(pers, &pers_list, list) {
695 if (level != LEVEL_NONE && pers->level == level)
696 return pers;
697 if (strcmp(pers->name, clevel)==0)
698 return pers;
699 }
700 return NULL;
701}
702
703
704static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
705{
706 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
707 return MD_NEW_SIZE_SECTORS(num_sectors);
708}
709
710static int alloc_disk_sb(struct md_rdev *rdev)
711{
712 rdev->sb_page = alloc_page(GFP_KERNEL);
713 if (!rdev->sb_page)
714 return -ENOMEM;
715 return 0;
716}
717
718void md_rdev_clear(struct md_rdev *rdev)
719{
720 if (rdev->sb_page) {
721 put_page(rdev->sb_page);
722 rdev->sb_loaded = 0;
723 rdev->sb_page = NULL;
724 rdev->sb_start = 0;
725 rdev->sectors = 0;
726 }
727 if (rdev->bb_page) {
728 put_page(rdev->bb_page);
729 rdev->bb_page = NULL;
730 }
731 badblocks_exit(&rdev->badblocks);
732}
733EXPORT_SYMBOL_GPL(md_rdev_clear);
734
735static void super_written(struct bio *bio, int error)
736{
737 struct md_rdev *rdev = bio->bi_private;
738 struct mddev *mddev = rdev->mddev;
739
740 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
741 pr_err("md: super_written gets error=%d, uptodate=%d\n",
742 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
743 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
744 md_error(mddev, rdev);
745 if (!test_bit(Faulty, &rdev->flags)
746 && (bio->bi_rw & MD_FAILFAST)) {
747 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
748 set_bit(LastDev, &rdev->flags);
749 }
750 } else
751 clear_bit(LastDev, &rdev->flags);
752
753 if (atomic_dec_and_test(&mddev->pending_writes))
754 wake_up(&mddev->sb_wait);
755 rdev_dec_pending(rdev, mddev);
756 bio_put(bio);
757}
758
759void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
760 sector_t sector, int size, struct page *page)
761{
762
763
764
765
766
767
768 struct bio *bio;
769 int ff = WRITE_FLUSH_FUA;
770
771 if (test_bit(Faulty, &rdev->flags))
772 return;
773
774 bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
775
776 atomic_inc(&rdev->nr_pending);
777
778 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
779 bio->bi_sector = sector;
780 bio_add_page(bio, page, size, 0);
781 bio->bi_private = rdev;
782 bio->bi_end_io = super_written;
783
784 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
785 test_bit(FailFast, &rdev->flags) &&
786 !test_bit(LastDev, &rdev->flags))
787 ff |= MD_FAILFAST;
788
789 atomic_inc(&mddev->pending_writes);
790 submit_bio(ff, bio);
791}
792
793int md_super_wait(struct mddev *mddev)
794{
795
796 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
797 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
798 return -EAGAIN;
799 return 0;
800}
801
802int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
803 struct page *page, int rw, bool metadata_op)
804{
805 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
806 int ret;
807
808 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
809 rdev->meta_bdev : rdev->bdev;
810 if (metadata_op)
811 bio->bi_sector = sector + rdev->sb_start;
812 else if (rdev->mddev->reshape_position != MaxSector &&
813 (rdev->mddev->reshape_backwards ==
814 (sector >= rdev->mddev->reshape_position)))
815 bio->bi_sector = sector + rdev->new_data_offset;
816 else
817 bio->bi_sector = sector + rdev->data_offset;
818 bio_add_page(bio, page, size, 0);
819 submit_bio_wait(rw, bio);
820
821 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
822 bio_put(bio);
823 return ret;
824}
825EXPORT_SYMBOL_GPL(sync_page_io);
826
827static int read_disk_sb(struct md_rdev *rdev, int size)
828{
829 char b[BDEVNAME_SIZE];
830
831 if (rdev->sb_loaded)
832 return 0;
833
834 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
835 goto fail;
836 rdev->sb_loaded = 1;
837 return 0;
838
839fail:
840 pr_err("md: disabled device %s, could not read superblock.\n",
841 bdevname(rdev->bdev,b));
842 return -EINVAL;
843}
844
845static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
846{
847 return sb1->set_uuid0 == sb2->set_uuid0 &&
848 sb1->set_uuid1 == sb2->set_uuid1 &&
849 sb1->set_uuid2 == sb2->set_uuid2 &&
850 sb1->set_uuid3 == sb2->set_uuid3;
851}
852
853static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
854{
855 int ret;
856 mdp_super_t *tmp1, *tmp2;
857
858 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
859 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
860
861 if (!tmp1 || !tmp2) {
862 ret = 0;
863 goto abort;
864 }
865
866 *tmp1 = *sb1;
867 *tmp2 = *sb2;
868
869
870
871
872 tmp1->nr_disks = 0;
873 tmp2->nr_disks = 0;
874
875 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
876abort:
877 kfree(tmp1);
878 kfree(tmp2);
879 return ret;
880}
881
882static u32 md_csum_fold(u32 csum)
883{
884 csum = (csum & 0xffff) + (csum >> 16);
885 return (csum & 0xffff) + (csum >> 16);
886}
887
888static unsigned int calc_sb_csum(mdp_super_t *sb)
889{
890 u64 newcsum = 0;
891 u32 *sb32 = (u32*)sb;
892 int i;
893 unsigned int disk_csum, csum;
894
895 disk_csum = sb->sb_csum;
896 sb->sb_csum = 0;
897
898 for (i = 0; i < MD_SB_BYTES/4 ; i++)
899 newcsum += sb32[i];
900 csum = (newcsum & 0xffffffff) + (newcsum>>32);
901
902#ifdef CONFIG_ALPHA
903
904
905
906
907
908
909
910
911 sb->sb_csum = md_csum_fold(disk_csum);
912#else
913 sb->sb_csum = disk_csum;
914#endif
915 return csum;
916}
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948struct super_type {
949 char *name;
950 struct module *owner;
951 int (*load_super)(struct md_rdev *rdev,
952 struct md_rdev *refdev,
953 int minor_version);
954 int (*validate_super)(struct mddev *mddev,
955 struct md_rdev *rdev);
956 void (*sync_super)(struct mddev *mddev,
957 struct md_rdev *rdev);
958 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
959 sector_t num_sectors);
960 int (*allow_new_offset)(struct md_rdev *rdev,
961 unsigned long long new_offset);
962};
963
964
965
966
967
968
969
970
971
972int md_check_no_bitmap(struct mddev *mddev)
973{
974 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
975 return 0;
976 pr_warn("%s: bitmaps are not supported for %s\n",
977 mdname(mddev), mddev->pers->name);
978 return 1;
979}
980EXPORT_SYMBOL(md_check_no_bitmap);
981
982
983
984
985static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
986{
987 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
988 mdp_super_t *sb;
989 int ret;
990
991
992
993
994
995
996
997 rdev->sb_start = calc_dev_sboffset(rdev);
998
999 ret = read_disk_sb(rdev, MD_SB_BYTES);
1000 if (ret)
1001 return ret;
1002
1003 ret = -EINVAL;
1004
1005 bdevname(rdev->bdev, b);
1006 sb = page_address(rdev->sb_page);
1007
1008 if (sb->md_magic != MD_SB_MAGIC) {
1009 pr_warn("md: invalid raid superblock magic on %s\n", b);
1010 goto abort;
1011 }
1012
1013 if (sb->major_version != 0 ||
1014 sb->minor_version < 90 ||
1015 sb->minor_version > 91) {
1016 pr_warn("Bad version number %d.%d on %s\n",
1017 sb->major_version, sb->minor_version, b);
1018 goto abort;
1019 }
1020
1021 if (sb->raid_disks <= 0)
1022 goto abort;
1023
1024 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1025 pr_warn("md: invalid superblock checksum on %s\n", b);
1026 goto abort;
1027 }
1028
1029 rdev->preferred_minor = sb->md_minor;
1030 rdev->data_offset = 0;
1031 rdev->new_data_offset = 0;
1032 rdev->sb_size = MD_SB_BYTES;
1033 rdev->badblocks.shift = -1;
1034
1035 if (sb->level == LEVEL_MULTIPATH)
1036 rdev->desc_nr = -1;
1037 else
1038 rdev->desc_nr = sb->this_disk.number;
1039
1040 if (!refdev) {
1041 ret = 1;
1042 } else {
1043 __u64 ev1, ev2;
1044 mdp_super_t *refsb = page_address(refdev->sb_page);
1045 if (!uuid_equal(refsb, sb)) {
1046 pr_warn("md: %s has different UUID to %s\n",
1047 b, bdevname(refdev->bdev,b2));
1048 goto abort;
1049 }
1050 if (!sb_equal(refsb, sb)) {
1051 pr_warn("md: %s has same UUID but different superblock to %s\n",
1052 b, bdevname(refdev->bdev, b2));
1053 goto abort;
1054 }
1055 ev1 = md_event(sb);
1056 ev2 = md_event(refsb);
1057 if (ev1 > ev2)
1058 ret = 1;
1059 else
1060 ret = 0;
1061 }
1062 rdev->sectors = rdev->sb_start;
1063
1064
1065
1066
1067 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1068 sb->level >= 1)
1069 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1070
1071 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1072
1073 ret = -EINVAL;
1074
1075 abort:
1076 return ret;
1077}
1078
1079
1080
1081
1082static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1083{
1084 mdp_disk_t *desc;
1085 mdp_super_t *sb = page_address(rdev->sb_page);
1086 __u64 ev1 = md_event(sb);
1087
1088 rdev->raid_disk = -1;
1089 clear_bit(Faulty, &rdev->flags);
1090 clear_bit(In_sync, &rdev->flags);
1091 clear_bit(Bitmap_sync, &rdev->flags);
1092 clear_bit(WriteMostly, &rdev->flags);
1093
1094 if (mddev->raid_disks == 0) {
1095 mddev->major_version = 0;
1096 mddev->minor_version = sb->minor_version;
1097 mddev->patch_version = sb->patch_version;
1098 mddev->external = 0;
1099 mddev->chunk_sectors = sb->chunk_size >> 9;
1100 mddev->ctime = sb->ctime;
1101 mddev->utime = sb->utime;
1102 mddev->level = sb->level;
1103 mddev->clevel[0] = 0;
1104 mddev->layout = sb->layout;
1105 mddev->raid_disks = sb->raid_disks;
1106 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1107 mddev->events = ev1;
1108 mddev->bitmap_info.offset = 0;
1109 mddev->bitmap_info.space = 0;
1110
1111 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1112 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1113 mddev->reshape_backwards = 0;
1114
1115 if (mddev->minor_version >= 91) {
1116 mddev->reshape_position = sb->reshape_position;
1117 mddev->delta_disks = sb->delta_disks;
1118 mddev->new_level = sb->new_level;
1119 mddev->new_layout = sb->new_layout;
1120 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1121 if (mddev->delta_disks < 0)
1122 mddev->reshape_backwards = 1;
1123 } else {
1124 mddev->reshape_position = MaxSector;
1125 mddev->delta_disks = 0;
1126 mddev->new_level = mddev->level;
1127 mddev->new_layout = mddev->layout;
1128 mddev->new_chunk_sectors = mddev->chunk_sectors;
1129 }
1130
1131 if (sb->state & (1<<MD_SB_CLEAN))
1132 mddev->recovery_cp = MaxSector;
1133 else {
1134 if (sb->events_hi == sb->cp_events_hi &&
1135 sb->events_lo == sb->cp_events_lo) {
1136 mddev->recovery_cp = sb->recovery_cp;
1137 } else
1138 mddev->recovery_cp = 0;
1139 }
1140
1141 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1142 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1143 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1144 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1145
1146 mddev->max_disks = MD_SB_DISKS;
1147
1148 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1149 mddev->bitmap_info.file == NULL) {
1150 mddev->bitmap_info.offset =
1151 mddev->bitmap_info.default_offset;
1152 mddev->bitmap_info.space =
1153 mddev->bitmap_info.default_space;
1154 }
1155
1156 } else if (mddev->pers == NULL) {
1157
1158
1159 ++ev1;
1160 if (sb->disks[rdev->desc_nr].state & (
1161 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1162 if (ev1 < mddev->events)
1163 return -EINVAL;
1164 } else if (mddev->bitmap) {
1165
1166
1167
1168 if (ev1 < mddev->bitmap->events_cleared)
1169 return 0;
1170 if (ev1 < mddev->events)
1171 set_bit(Bitmap_sync, &rdev->flags);
1172 } else {
1173 if (ev1 < mddev->events)
1174
1175 return 0;
1176 }
1177
1178 if (mddev->level != LEVEL_MULTIPATH) {
1179 desc = sb->disks + rdev->desc_nr;
1180
1181 if (desc->state & (1<<MD_DISK_FAULTY))
1182 set_bit(Faulty, &rdev->flags);
1183 else if (desc->state & (1<<MD_DISK_SYNC)
1184) {
1185 set_bit(In_sync, &rdev->flags);
1186 rdev->raid_disk = desc->raid_disk;
1187 rdev->saved_raid_disk = desc->raid_disk;
1188 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1189
1190
1191
1192 if (mddev->minor_version >= 91) {
1193 rdev->recovery_offset = 0;
1194 rdev->raid_disk = desc->raid_disk;
1195 }
1196 }
1197 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1198 set_bit(WriteMostly, &rdev->flags);
1199 if (desc->state & (1<<MD_DISK_FAILFAST))
1200 set_bit(FailFast, &rdev->flags);
1201 } else
1202 set_bit(In_sync, &rdev->flags);
1203 return 0;
1204}
1205
1206
1207
1208
1209static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1210{
1211 mdp_super_t *sb;
1212 struct md_rdev *rdev2;
1213 int next_spare = mddev->raid_disks;
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225 int i;
1226 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1227
1228 rdev->sb_size = MD_SB_BYTES;
1229
1230 sb = page_address(rdev->sb_page);
1231
1232 memset(sb, 0, sizeof(*sb));
1233
1234 sb->md_magic = MD_SB_MAGIC;
1235 sb->major_version = mddev->major_version;
1236 sb->patch_version = mddev->patch_version;
1237 sb->gvalid_words = 0;
1238 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1239 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1240 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1241 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1242
1243 sb->ctime = mddev->ctime;
1244 sb->level = mddev->level;
1245 sb->size = mddev->dev_sectors / 2;
1246 sb->raid_disks = mddev->raid_disks;
1247 sb->md_minor = mddev->md_minor;
1248 sb->not_persistent = 0;
1249 sb->utime = mddev->utime;
1250 sb->state = 0;
1251 sb->events_hi = (mddev->events>>32);
1252 sb->events_lo = (u32)mddev->events;
1253
1254 if (mddev->reshape_position == MaxSector)
1255 sb->minor_version = 90;
1256 else {
1257 sb->minor_version = 91;
1258 sb->reshape_position = mddev->reshape_position;
1259 sb->new_level = mddev->new_level;
1260 sb->delta_disks = mddev->delta_disks;
1261 sb->new_layout = mddev->new_layout;
1262 sb->new_chunk = mddev->new_chunk_sectors << 9;
1263 }
1264 mddev->minor_version = sb->minor_version;
1265 if (mddev->in_sync)
1266 {
1267 sb->recovery_cp = mddev->recovery_cp;
1268 sb->cp_events_hi = (mddev->events>>32);
1269 sb->cp_events_lo = (u32)mddev->events;
1270 if (mddev->recovery_cp == MaxSector)
1271 sb->state = (1<< MD_SB_CLEAN);
1272 } else
1273 sb->recovery_cp = 0;
1274
1275 sb->layout = mddev->layout;
1276 sb->chunk_size = mddev->chunk_sectors << 9;
1277
1278 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1279 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1280
1281 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1282 rdev_for_each(rdev2, mddev) {
1283 mdp_disk_t *d;
1284 int desc_nr;
1285 int is_active = test_bit(In_sync, &rdev2->flags);
1286
1287 if (rdev2->raid_disk >= 0 &&
1288 sb->minor_version >= 91)
1289
1290
1291
1292
1293 is_active = 1;
1294 if (rdev2->raid_disk < 0 ||
1295 test_bit(Faulty, &rdev2->flags))
1296 is_active = 0;
1297 if (is_active)
1298 desc_nr = rdev2->raid_disk;
1299 else
1300 desc_nr = next_spare++;
1301 rdev2->desc_nr = desc_nr;
1302 d = &sb->disks[rdev2->desc_nr];
1303 nr_disks++;
1304 d->number = rdev2->desc_nr;
1305 d->major = MAJOR(rdev2->bdev->bd_dev);
1306 d->minor = MINOR(rdev2->bdev->bd_dev);
1307 if (is_active)
1308 d->raid_disk = rdev2->raid_disk;
1309 else
1310 d->raid_disk = rdev2->desc_nr;
1311 if (test_bit(Faulty, &rdev2->flags))
1312 d->state = (1<<MD_DISK_FAULTY);
1313 else if (is_active) {
1314 d->state = (1<<MD_DISK_ACTIVE);
1315 if (test_bit(In_sync, &rdev2->flags))
1316 d->state |= (1<<MD_DISK_SYNC);
1317 active++;
1318 working++;
1319 } else {
1320 d->state = 0;
1321 spare++;
1322 working++;
1323 }
1324 if (test_bit(WriteMostly, &rdev2->flags))
1325 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1326 if (test_bit(FailFast, &rdev2->flags))
1327 d->state |= (1<<MD_DISK_FAILFAST);
1328 }
1329
1330 for (i=0 ; i < mddev->raid_disks ; i++) {
1331 mdp_disk_t *d = &sb->disks[i];
1332 if (d->state == 0 && d->number == 0) {
1333 d->number = i;
1334 d->raid_disk = i;
1335 d->state = (1<<MD_DISK_REMOVED);
1336 d->state |= (1<<MD_DISK_FAULTY);
1337 failed++;
1338 }
1339 }
1340 sb->nr_disks = nr_disks;
1341 sb->active_disks = active;
1342 sb->working_disks = working;
1343 sb->failed_disks = failed;
1344 sb->spare_disks = spare;
1345
1346 sb->this_disk = sb->disks[rdev->desc_nr];
1347 sb->sb_csum = calc_sb_csum(sb);
1348}
1349
1350
1351
1352
1353static unsigned long long
1354super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1355{
1356 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1357 return 0;
1358 if (rdev->mddev->bitmap_info.offset)
1359 return 0;
1360 rdev->sb_start = calc_dev_sboffset(rdev);
1361 if (!num_sectors || num_sectors > rdev->sb_start)
1362 num_sectors = rdev->sb_start;
1363
1364
1365
1366 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1367 rdev->mddev->level >= 1)
1368 num_sectors = (sector_t)(2ULL << 32) - 2;
1369 do {
1370 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1371 rdev->sb_page);
1372 } while (md_super_wait(rdev->mddev) < 0);
1373 return num_sectors;
1374}
1375
1376static int
1377super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1378{
1379
1380 return new_offset == 0;
1381}
1382
1383
1384
1385
1386
1387static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1388{
1389 __le32 disk_csum;
1390 u32 csum;
1391 unsigned long long newcsum;
1392 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1393 __le32 *isuper = (__le32*)sb;
1394
1395 disk_csum = sb->sb_csum;
1396 sb->sb_csum = 0;
1397 newcsum = 0;
1398 for (; size >= 4; size -= 4)
1399 newcsum += le32_to_cpu(*isuper++);
1400
1401 if (size == 2)
1402 newcsum += le16_to_cpu(*(__le16*) isuper);
1403
1404 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1405 sb->sb_csum = disk_csum;
1406 return cpu_to_le32(csum);
1407}
1408
1409static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1410{
1411 struct mdp_superblock_1 *sb;
1412 int ret;
1413 sector_t sb_start;
1414 sector_t sectors;
1415 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1416 int bmask;
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 switch(minor_version) {
1427 case 0:
1428 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1429 sb_start -= 8*2;
1430 sb_start &= ~(sector_t)(4*2-1);
1431 break;
1432 case 1:
1433 sb_start = 0;
1434 break;
1435 case 2:
1436 sb_start = 8;
1437 break;
1438 default:
1439 return -EINVAL;
1440 }
1441 rdev->sb_start = sb_start;
1442
1443
1444
1445
1446 ret = read_disk_sb(rdev, 4096);
1447 if (ret) return ret;
1448
1449 sb = page_address(rdev->sb_page);
1450
1451 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1452 sb->major_version != cpu_to_le32(1) ||
1453 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1454 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1455 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1456 return -EINVAL;
1457
1458 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1459 pr_warn("md: invalid superblock checksum on %s\n",
1460 bdevname(rdev->bdev,b));
1461 return -EINVAL;
1462 }
1463 if (le64_to_cpu(sb->data_size) < 10) {
1464 pr_warn("md: data_size too small on %s\n",
1465 bdevname(rdev->bdev,b));
1466 return -EINVAL;
1467 }
1468 if (sb->pad0 ||
1469 sb->pad3[0] ||
1470 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1471
1472 return -EINVAL;
1473
1474 rdev->preferred_minor = 0xffff;
1475 rdev->data_offset = le64_to_cpu(sb->data_offset);
1476 rdev->new_data_offset = rdev->data_offset;
1477 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1478 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1479 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1480 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1481
1482 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1483 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1484 if (rdev->sb_size & bmask)
1485 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1486
1487 if (minor_version
1488 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1489 return -EINVAL;
1490 if (minor_version
1491 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1492 return -EINVAL;
1493
1494 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1495 rdev->desc_nr = -1;
1496 else
1497 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1498
1499 if (!rdev->bb_page) {
1500 rdev->bb_page = alloc_page(GFP_KERNEL);
1501 if (!rdev->bb_page)
1502 return -ENOMEM;
1503 }
1504 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1505 rdev->badblocks.count == 0) {
1506
1507
1508
1509 s32 offset;
1510 sector_t bb_sector;
1511 u64 *bbp;
1512 int i;
1513 int sectors = le16_to_cpu(sb->bblog_size);
1514 if (sectors > (PAGE_SIZE / 512))
1515 return -EINVAL;
1516 offset = le32_to_cpu(sb->bblog_offset);
1517 if (offset == 0)
1518 return -EINVAL;
1519 bb_sector = (long long)offset;
1520 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1521 rdev->bb_page, READ, true))
1522 return -EIO;
1523 bbp = (u64 *)page_address(rdev->bb_page);
1524 rdev->badblocks.shift = sb->bblog_shift;
1525 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1526 u64 bb = le64_to_cpu(*bbp);
1527 int count = bb & (0x3ff);
1528 u64 sector = bb >> 10;
1529 sector <<= sb->bblog_shift;
1530 count <<= sb->bblog_shift;
1531 if (bb + 1 == 0)
1532 break;
1533 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1534 return -EINVAL;
1535 }
1536 } else if (sb->bblog_offset != 0)
1537 rdev->badblocks.shift = 0;
1538
1539 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) {
1540 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1541 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1542 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1543 }
1544
1545 if (!refdev) {
1546 ret = 1;
1547 } else {
1548 __u64 ev1, ev2;
1549 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1550
1551 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1552 sb->level != refsb->level ||
1553 sb->layout != refsb->layout ||
1554 sb->chunksize != refsb->chunksize) {
1555 pr_warn("md: %s has strangely different superblock to %s\n",
1556 bdevname(rdev->bdev,b),
1557 bdevname(refdev->bdev,b2));
1558 return -EINVAL;
1559 }
1560 ev1 = le64_to_cpu(sb->events);
1561 ev2 = le64_to_cpu(refsb->events);
1562
1563 if (ev1 > ev2)
1564 ret = 1;
1565 else
1566 ret = 0;
1567 }
1568 if (minor_version) {
1569 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1570 sectors -= rdev->data_offset;
1571 } else
1572 sectors = rdev->sb_start;
1573 if (sectors < le64_to_cpu(sb->data_size))
1574 return -EINVAL;
1575 rdev->sectors = le64_to_cpu(sb->data_size);
1576 return ret;
1577}
1578
1579static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1580{
1581 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1582 __u64 ev1 = le64_to_cpu(sb->events);
1583
1584 rdev->raid_disk = -1;
1585 clear_bit(Faulty, &rdev->flags);
1586 clear_bit(In_sync, &rdev->flags);
1587 clear_bit(Bitmap_sync, &rdev->flags);
1588 clear_bit(WriteMostly, &rdev->flags);
1589
1590 if (mddev->raid_disks == 0) {
1591 mddev->major_version = 1;
1592 mddev->patch_version = 0;
1593 mddev->external = 0;
1594 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1595 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1596 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1597 mddev->level = le32_to_cpu(sb->level);
1598 mddev->clevel[0] = 0;
1599 mddev->layout = le32_to_cpu(sb->layout);
1600 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1601 mddev->dev_sectors = le64_to_cpu(sb->size);
1602 mddev->events = ev1;
1603 mddev->bitmap_info.offset = 0;
1604 mddev->bitmap_info.space = 0;
1605
1606
1607
1608 mddev->bitmap_info.default_offset = 1024 >> 9;
1609 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1610 mddev->reshape_backwards = 0;
1611
1612 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1613 memcpy(mddev->uuid, sb->set_uuid, 16);
1614
1615 mddev->max_disks = (4096-256)/2;
1616
1617 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1618 mddev->bitmap_info.file == NULL) {
1619 mddev->bitmap_info.offset =
1620 (__s32)le32_to_cpu(sb->bitmap_offset);
1621
1622
1623
1624
1625
1626 if (mddev->minor_version > 0)
1627 mddev->bitmap_info.space = 0;
1628 else if (mddev->bitmap_info.offset > 0)
1629 mddev->bitmap_info.space =
1630 8 - mddev->bitmap_info.offset;
1631 else
1632 mddev->bitmap_info.space =
1633 -mddev->bitmap_info.offset;
1634 }
1635
1636 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1637 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1638 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1639 mddev->new_level = le32_to_cpu(sb->new_level);
1640 mddev->new_layout = le32_to_cpu(sb->new_layout);
1641 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1642 if (mddev->delta_disks < 0 ||
1643 (mddev->delta_disks == 0 &&
1644 (le32_to_cpu(sb->feature_map)
1645 & MD_FEATURE_RESHAPE_BACKWARDS)))
1646 mddev->reshape_backwards = 1;
1647 } else {
1648 mddev->reshape_position = MaxSector;
1649 mddev->delta_disks = 0;
1650 mddev->new_level = mddev->level;
1651 mddev->new_layout = mddev->layout;
1652 mddev->new_chunk_sectors = mddev->chunk_sectors;
1653 }
1654
1655 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1656 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1657
1658 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) {
1659 if (le32_to_cpu(sb->feature_map) &
1660 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1661 return -EINVAL;
1662 set_bit(MD_HAS_PPL, &mddev->flags);
1663 }
1664 } else if (mddev->pers == NULL) {
1665
1666
1667 ++ev1;
1668 if (rdev->desc_nr >= 0 &&
1669 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1670 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1671 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1672 if (ev1 < mddev->events)
1673 return -EINVAL;
1674 } else if (mddev->bitmap) {
1675
1676
1677
1678 if (ev1 < mddev->bitmap->events_cleared)
1679 return 0;
1680 if (ev1 < mddev->events)
1681 set_bit(Bitmap_sync, &rdev->flags);
1682 } else {
1683 if (ev1 < mddev->events)
1684
1685 return 0;
1686 }
1687 if (mddev->level != LEVEL_MULTIPATH) {
1688 int role;
1689 if (rdev->desc_nr < 0 ||
1690 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1691 role = MD_DISK_ROLE_SPARE;
1692 rdev->desc_nr = -1;
1693 } else
1694 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1695 switch(role) {
1696 case MD_DISK_ROLE_SPARE:
1697 break;
1698 case MD_DISK_ROLE_FAULTY:
1699 set_bit(Faulty, &rdev->flags);
1700 break;
1701 case MD_DISK_ROLE_JOURNAL:
1702 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1703
1704 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1705 return -EINVAL;
1706 }
1707 set_bit(Journal, &rdev->flags);
1708 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1709 rdev->raid_disk = 0;
1710 break;
1711 default:
1712 rdev->saved_raid_disk = role;
1713 if ((le32_to_cpu(sb->feature_map) &
1714 MD_FEATURE_RECOVERY_OFFSET)) {
1715 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1716 if (!(le32_to_cpu(sb->feature_map) &
1717 MD_FEATURE_RECOVERY_BITMAP))
1718 rdev->saved_raid_disk = -1;
1719 } else
1720 set_bit(In_sync, &rdev->flags);
1721 rdev->raid_disk = role;
1722 break;
1723 }
1724 if (sb->devflags & WriteMostly1)
1725 set_bit(WriteMostly, &rdev->flags);
1726 if (sb->devflags & FailFast1)
1727 set_bit(FailFast, &rdev->flags);
1728 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1729 set_bit(Replacement, &rdev->flags);
1730 } else
1731 set_bit(In_sync, &rdev->flags);
1732
1733 return 0;
1734}
1735
1736static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1737{
1738 struct mdp_superblock_1 *sb;
1739 struct md_rdev *rdev2;
1740 int max_dev, i;
1741
1742
1743 sb = page_address(rdev->sb_page);
1744
1745 sb->feature_map = 0;
1746 sb->pad0 = 0;
1747 sb->recovery_offset = cpu_to_le64(0);
1748 memset(sb->pad3, 0, sizeof(sb->pad3));
1749
1750 sb->utime = cpu_to_le64((__u64)mddev->utime);
1751 sb->events = cpu_to_le64(mddev->events);
1752 if (mddev->in_sync)
1753 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1754 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1755 sb->resync_offset = cpu_to_le64(MaxSector);
1756 else
1757 sb->resync_offset = cpu_to_le64(0);
1758
1759 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1760
1761 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1762 sb->size = cpu_to_le64(mddev->dev_sectors);
1763 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1764 sb->level = cpu_to_le32(mddev->level);
1765 sb->layout = cpu_to_le32(mddev->layout);
1766 if (test_bit(FailFast, &rdev->flags))
1767 sb->devflags |= FailFast1;
1768 else
1769 sb->devflags &= ~FailFast1;
1770
1771 if (test_bit(WriteMostly, &rdev->flags))
1772 sb->devflags |= WriteMostly1;
1773 else
1774 sb->devflags &= ~WriteMostly1;
1775 sb->data_offset = cpu_to_le64(rdev->data_offset);
1776 sb->data_size = cpu_to_le64(rdev->sectors);
1777
1778 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1779 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1780 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1781 }
1782
1783 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1784 !test_bit(In_sync, &rdev->flags)) {
1785 sb->feature_map |=
1786 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1787 sb->recovery_offset =
1788 cpu_to_le64(rdev->recovery_offset);
1789 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1790 sb->feature_map |=
1791 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1792 }
1793
1794 if (test_bit(Journal, &rdev->flags))
1795 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1796 if (test_bit(Replacement, &rdev->flags))
1797 sb->feature_map |=
1798 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1799
1800 if (mddev->reshape_position != MaxSector) {
1801 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1802 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1803 sb->new_layout = cpu_to_le32(mddev->new_layout);
1804 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1805 sb->new_level = cpu_to_le32(mddev->new_level);
1806 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1807 if (mddev->delta_disks == 0 &&
1808 mddev->reshape_backwards)
1809 sb->feature_map
1810 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1811 if (rdev->new_data_offset != rdev->data_offset) {
1812 sb->feature_map
1813 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1814 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1815 - rdev->data_offset));
1816 }
1817 }
1818
1819 if (rdev->badblocks.count == 0)
1820 ;
1821 else if (sb->bblog_offset == 0)
1822
1823 md_error(mddev, rdev);
1824 else {
1825 struct badblocks *bb = &rdev->badblocks;
1826 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1827 u64 *p = bb->page;
1828 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1829 if (bb->changed) {
1830 unsigned seq;
1831
1832retry:
1833 seq = read_seqbegin(&bb->lock);
1834
1835 memset(bbp, 0xff, PAGE_SIZE);
1836
1837 for (i = 0 ; i < bb->count ; i++) {
1838 u64 internal_bb = p[i];
1839 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1840 | BB_LEN(internal_bb));
1841 bbp[i] = cpu_to_le64(store_bb);
1842 }
1843 bb->changed = 0;
1844 if (read_seqretry(&bb->lock, seq))
1845 goto retry;
1846
1847 bb->sector = (rdev->sb_start +
1848 (int)le32_to_cpu(sb->bblog_offset));
1849 bb->size = le16_to_cpu(sb->bblog_size);
1850 }
1851 }
1852
1853 max_dev = 0;
1854 rdev_for_each(rdev2, mddev)
1855 if (rdev2->desc_nr+1 > max_dev)
1856 max_dev = rdev2->desc_nr+1;
1857
1858 if (max_dev > le32_to_cpu(sb->max_dev)) {
1859 int bmask;
1860 sb->max_dev = cpu_to_le32(max_dev);
1861 rdev->sb_size = max_dev * 2 + 256;
1862 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1863 if (rdev->sb_size & bmask)
1864 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1865 } else
1866 max_dev = le32_to_cpu(sb->max_dev);
1867
1868 for (i=0; i<max_dev;i++)
1869 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1870
1871 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1872 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1873
1874 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
1875 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
1876 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
1877 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
1878 }
1879
1880 rdev_for_each(rdev2, mddev) {
1881 i = rdev2->desc_nr;
1882 if (test_bit(Faulty, &rdev2->flags))
1883 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1884 else if (test_bit(In_sync, &rdev2->flags))
1885 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1886 else if (test_bit(Journal, &rdev2->flags))
1887 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1888 else if (rdev2->raid_disk >= 0)
1889 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1890 else
1891 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1892 }
1893
1894 sb->sb_csum = calc_sb_1_csum(sb);
1895}
1896
1897static unsigned long long
1898super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1899{
1900 struct mdp_superblock_1 *sb;
1901 sector_t max_sectors;
1902 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1903 return 0;
1904 if (rdev->data_offset != rdev->new_data_offset)
1905 return 0;
1906 if (rdev->sb_start < rdev->data_offset) {
1907
1908 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1909 max_sectors -= rdev->data_offset;
1910 if (!num_sectors || num_sectors > max_sectors)
1911 num_sectors = max_sectors;
1912 } else if (rdev->mddev->bitmap_info.offset) {
1913
1914 return 0;
1915 } else {
1916
1917 sector_t sb_start;
1918 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1919 sb_start &= ~(sector_t)(4*2 - 1);
1920 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1921 if (!num_sectors || num_sectors > max_sectors)
1922 num_sectors = max_sectors;
1923 rdev->sb_start = sb_start;
1924 }
1925 sb = page_address(rdev->sb_page);
1926 sb->data_size = cpu_to_le64(num_sectors);
1927 sb->super_offset = rdev->sb_start;
1928 sb->sb_csum = calc_sb_1_csum(sb);
1929 do {
1930 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1931 rdev->sb_page);
1932 } while (md_super_wait(rdev->mddev) < 0);
1933 return num_sectors;
1934
1935}
1936
1937static int
1938super_1_allow_new_offset(struct md_rdev *rdev,
1939 unsigned long long new_offset)
1940{
1941
1942 struct bitmap *bitmap;
1943 if (new_offset >= rdev->data_offset)
1944 return 1;
1945
1946
1947
1948 if (rdev->mddev->minor_version == 0)
1949 return 1;
1950
1951
1952
1953
1954
1955
1956
1957 if (rdev->sb_start + (32+4)*2 > new_offset)
1958 return 0;
1959 bitmap = rdev->mddev->bitmap;
1960 if (bitmap && !rdev->mddev->bitmap_info.file &&
1961 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1962 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1963 return 0;
1964 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1965 return 0;
1966
1967 return 1;
1968}
1969
1970static struct super_type super_types[] = {
1971 [0] = {
1972 .name = "0.90.0",
1973 .owner = THIS_MODULE,
1974 .load_super = super_90_load,
1975 .validate_super = super_90_validate,
1976 .sync_super = super_90_sync,
1977 .rdev_size_change = super_90_rdev_size_change,
1978 .allow_new_offset = super_90_allow_new_offset,
1979 },
1980 [1] = {
1981 .name = "md-1",
1982 .owner = THIS_MODULE,
1983 .load_super = super_1_load,
1984 .validate_super = super_1_validate,
1985 .sync_super = super_1_sync,
1986 .rdev_size_change = super_1_rdev_size_change,
1987 .allow_new_offset = super_1_allow_new_offset,
1988 },
1989};
1990
1991static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1992{
1993 if (mddev->sync_super) {
1994 mddev->sync_super(mddev, rdev);
1995 return;
1996 }
1997
1998 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1999
2000 super_types[mddev->major_version].sync_super(mddev, rdev);
2001}
2002
2003static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2004{
2005 struct md_rdev *rdev, *rdev2;
2006
2007 rcu_read_lock();
2008 rdev_for_each_rcu(rdev, mddev1) {
2009 if (test_bit(Faulty, &rdev->flags) ||
2010 test_bit(Journal, &rdev->flags) ||
2011 rdev->raid_disk == -1)
2012 continue;
2013 rdev_for_each_rcu(rdev2, mddev2) {
2014 if (test_bit(Faulty, &rdev2->flags) ||
2015 test_bit(Journal, &rdev2->flags) ||
2016 rdev2->raid_disk == -1)
2017 continue;
2018 if (rdev->bdev->bd_contains ==
2019 rdev2->bdev->bd_contains) {
2020 rcu_read_unlock();
2021 return 1;
2022 }
2023 }
2024 }
2025 rcu_read_unlock();
2026 return 0;
2027}
2028
2029static LIST_HEAD(pending_raid_disks);
2030
2031
2032
2033
2034
2035
2036
2037
2038int md_integrity_register(struct mddev *mddev)
2039{
2040 struct md_rdev *rdev, *reference = NULL;
2041
2042 if (list_empty(&mddev->disks))
2043 return 0;
2044 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2045 return 0;
2046 rdev_for_each(rdev, mddev) {
2047
2048 if (test_bit(Faulty, &rdev->flags))
2049 continue;
2050 if (rdev->raid_disk < 0)
2051 continue;
2052 if (!reference) {
2053
2054 reference = rdev;
2055 continue;
2056 }
2057
2058 if (blk_integrity_compare(reference->bdev->bd_disk,
2059 rdev->bdev->bd_disk) < 0)
2060 return -EINVAL;
2061 }
2062 if (!reference || !bdev_get_integrity(reference->bdev))
2063 return 0;
2064
2065
2066
2067
2068 if (blk_integrity_register(mddev->gendisk,
2069 bdev_get_integrity(reference->bdev)) != 0) {
2070 pr_err("md: failed to register integrity for %s\n",
2071 mdname(mddev));
2072 return -EINVAL;
2073 }
2074 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2075 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2076 pr_err("md: failed to create integrity pool for %s\n",
2077 mdname(mddev));
2078 return -EINVAL;
2079 }
2080 return 0;
2081}
2082EXPORT_SYMBOL(md_integrity_register);
2083
2084
2085
2086
2087
2088int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2089{
2090 struct blk_integrity *bi_rdev;
2091 struct blk_integrity *bi_mddev;
2092 char name[BDEVNAME_SIZE];
2093
2094 if (!mddev->gendisk)
2095 return 0;
2096
2097 bi_rdev = bdev_get_integrity(rdev->bdev);
2098 bi_mddev = blk_get_integrity(mddev->gendisk);
2099
2100 if (!bi_mddev)
2101 return 0;
2102
2103 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2104 pr_err("%s: incompatible integrity profile for %s\n",
2105 mdname(mddev), bdevname(rdev->bdev, name));
2106 return -ENXIO;
2107 }
2108
2109 return 0;
2110}
2111EXPORT_SYMBOL(md_integrity_add_rdev);
2112
2113static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2114{
2115 char b[BDEVNAME_SIZE];
2116 struct kobject *ko;
2117 char *s;
2118 int err;
2119
2120
2121 if (find_rdev(mddev, rdev->bdev->bd_dev))
2122 return -EEXIST;
2123
2124 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2125 mddev->pers)
2126 return -EROFS;
2127
2128
2129 if (!test_bit(Journal, &rdev->flags) &&
2130 rdev->sectors &&
2131 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2132 if (mddev->pers) {
2133
2134
2135
2136
2137 if (mddev->level > 0)
2138 return -ENOSPC;
2139 } else
2140 mddev->dev_sectors = rdev->sectors;
2141 }
2142
2143
2144
2145
2146
2147 rcu_read_lock();
2148 if (rdev->desc_nr < 0) {
2149 int choice = 0;
2150 if (mddev->pers)
2151 choice = mddev->raid_disks;
2152 while (md_find_rdev_nr_rcu(mddev, choice))
2153 choice++;
2154 rdev->desc_nr = choice;
2155 } else {
2156 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2157 rcu_read_unlock();
2158 return -EBUSY;
2159 }
2160 }
2161 rcu_read_unlock();
2162 if (!test_bit(Journal, &rdev->flags) &&
2163 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2164 pr_warn("md: %s: array is limited to %d devices\n",
2165 mdname(mddev), mddev->max_disks);
2166 return -EBUSY;
2167 }
2168 bdevname(rdev->bdev,b);
2169 while ( (s=strchr(b, '/')) != NULL)
2170 *s = '!';
2171
2172 rdev->mddev = mddev;
2173 pr_debug("md: bind<%s>\n", b);
2174
2175 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2176 goto fail;
2177
2178 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2179 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2180 ;
2181 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2182
2183 list_add_rcu(&rdev->same_set, &mddev->disks);
2184 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2185
2186
2187 mddev->recovery_disabled++;
2188
2189 return 0;
2190
2191 fail:
2192 pr_warn("md: failed to register dev-%s for %s\n",
2193 b, mdname(mddev));
2194 return err;
2195}
2196
2197static void md_delayed_delete(struct work_struct *ws)
2198{
2199 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2200 kobject_del(&rdev->kobj);
2201 kobject_put(&rdev->kobj);
2202}
2203
2204static void unbind_rdev_from_array(struct md_rdev *rdev)
2205{
2206 char b[BDEVNAME_SIZE];
2207
2208 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2209 list_del_rcu(&rdev->same_set);
2210 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2211 rdev->mddev = NULL;
2212 sysfs_remove_link(&rdev->kobj, "block");
2213 sysfs_put(rdev->sysfs_state);
2214 rdev->sysfs_state = NULL;
2215 rdev->badblocks.count = 0;
2216
2217
2218
2219
2220 synchronize_rcu();
2221 INIT_WORK(&rdev->del_work, md_delayed_delete);
2222 kobject_get(&rdev->kobj);
2223 queue_work(md_misc_wq, &rdev->del_work);
2224}
2225
2226
2227
2228
2229
2230
2231static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2232{
2233 int err = 0;
2234 struct block_device *bdev;
2235 char b[BDEVNAME_SIZE];
2236
2237 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2238 shared ? (struct md_rdev *)lock_rdev : rdev);
2239 if (IS_ERR(bdev)) {
2240 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2241 return PTR_ERR(bdev);
2242 }
2243 rdev->bdev = bdev;
2244 return err;
2245}
2246
2247static void unlock_rdev(struct md_rdev *rdev)
2248{
2249 struct block_device *bdev = rdev->bdev;
2250 rdev->bdev = NULL;
2251 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2252}
2253
2254void md_autodetect_dev(dev_t dev);
2255
2256static void export_rdev(struct md_rdev *rdev)
2257{
2258 char b[BDEVNAME_SIZE];
2259
2260 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2261 md_rdev_clear(rdev);
2262#ifndef MODULE
2263 if (test_bit(AutoDetected, &rdev->flags))
2264 md_autodetect_dev(rdev->bdev->bd_dev);
2265#endif
2266 unlock_rdev(rdev);
2267 kobject_put(&rdev->kobj);
2268}
2269
2270void md_kick_rdev_from_array(struct md_rdev *rdev)
2271{
2272 unbind_rdev_from_array(rdev);
2273 export_rdev(rdev);
2274}
2275EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2276
2277static void export_array(struct mddev *mddev)
2278{
2279 struct md_rdev *rdev;
2280
2281 while (!list_empty(&mddev->disks)) {
2282 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2283 same_set);
2284 md_kick_rdev_from_array(rdev);
2285 }
2286 mddev->raid_disks = 0;
2287 mddev->major_version = 0;
2288}
2289
2290static void sync_sbs(struct mddev *mddev, int nospares)
2291{
2292
2293
2294
2295
2296
2297
2298 struct md_rdev *rdev;
2299 rdev_for_each(rdev, mddev) {
2300 if (rdev->sb_events == mddev->events ||
2301 (nospares &&
2302 rdev->raid_disk < 0 &&
2303 rdev->sb_events+1 == mddev->events)) {
2304
2305 rdev->sb_loaded = 2;
2306 } else {
2307 sync_super(mddev, rdev);
2308 rdev->sb_loaded = 1;
2309 }
2310 }
2311}
2312
2313void md_update_sb(struct mddev *mddev, int force_change)
2314{
2315 struct md_rdev *rdev;
2316 int sync_req;
2317 int nospares = 0;
2318 int any_badblocks_changed = 0;
2319
2320 if (mddev->ro) {
2321 if (force_change)
2322 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2323 return;
2324 }
2325repeat:
2326
2327 rdev_for_each(rdev, mddev) {
2328 if (rdev->raid_disk >= 0 &&
2329 mddev->delta_disks >= 0 &&
2330 !test_bit(Journal, &rdev->flags) &&
2331 !test_bit(In_sync, &rdev->flags) &&
2332 mddev->curr_resync_completed > rdev->recovery_offset)
2333 rdev->recovery_offset = mddev->curr_resync_completed;
2334
2335 }
2336 if (!mddev->persistent) {
2337 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2338 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2339 if (!mddev->external) {
2340 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2341 rdev_for_each(rdev, mddev) {
2342 if (rdev->badblocks.changed) {
2343 rdev->badblocks.changed = 0;
2344 ack_all_badblocks(&rdev->badblocks);
2345 md_error(mddev, rdev);
2346 }
2347 clear_bit(Blocked, &rdev->flags);
2348 clear_bit(BlockedBadBlocks, &rdev->flags);
2349 wake_up(&rdev->blocked_wait);
2350 }
2351 }
2352 wake_up(&mddev->sb_wait);
2353 return;
2354 }
2355
2356 spin_lock(&mddev->lock);
2357
2358 mddev->utime = get_seconds();
2359
2360 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2361 force_change = 1;
2362 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2363
2364
2365
2366
2367 nospares = 1;
2368 if (force_change)
2369 nospares = 0;
2370 if (mddev->degraded)
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380 nospares = 0;
2381
2382 sync_req = mddev->in_sync;
2383
2384
2385
2386 if (nospares
2387 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2388 && mddev->can_decrease_events
2389 && mddev->events != 1) {
2390 mddev->events--;
2391 mddev->can_decrease_events = 0;
2392 } else {
2393
2394 mddev->events ++;
2395 mddev->can_decrease_events = nospares;
2396 }
2397
2398
2399
2400
2401
2402
2403 WARN_ON(mddev->events == 0);
2404
2405 rdev_for_each(rdev, mddev) {
2406 if (rdev->badblocks.changed)
2407 any_badblocks_changed++;
2408 if (test_bit(Faulty, &rdev->flags))
2409 set_bit(FaultRecorded, &rdev->flags);
2410 }
2411
2412 sync_sbs(mddev, nospares);
2413 spin_unlock(&mddev->lock);
2414
2415 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2416 mdname(mddev), mddev->in_sync);
2417
2418rewrite:
2419 bitmap_update_sb(mddev->bitmap);
2420 rdev_for_each(rdev, mddev) {
2421 char b[BDEVNAME_SIZE];
2422
2423 if (rdev->sb_loaded != 1)
2424 continue;
2425
2426 if (!test_bit(Faulty, &rdev->flags)) {
2427 md_super_write(mddev,rdev,
2428 rdev->sb_start, rdev->sb_size,
2429 rdev->sb_page);
2430 pr_debug("md: (write) %s's sb offset: %llu\n",
2431 bdevname(rdev->bdev, b),
2432 (unsigned long long)rdev->sb_start);
2433 rdev->sb_events = mddev->events;
2434 if (rdev->badblocks.size) {
2435 md_super_write(mddev, rdev,
2436 rdev->badblocks.sector,
2437 rdev->badblocks.size << 9,
2438 rdev->bb_page);
2439 rdev->badblocks.size = 0;
2440 }
2441
2442 } else
2443 pr_debug("md: %s (skipping faulty)\n",
2444 bdevname(rdev->bdev, b));
2445
2446 if (mddev->level == LEVEL_MULTIPATH)
2447
2448 break;
2449 }
2450 if (md_super_wait(mddev) < 0)
2451 goto rewrite;
2452
2453
2454 if (mddev->in_sync != sync_req ||
2455 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2456 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2457
2458 goto repeat;
2459 wake_up(&mddev->sb_wait);
2460 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2461 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2462
2463 rdev_for_each(rdev, mddev) {
2464 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2465 clear_bit(Blocked, &rdev->flags);
2466
2467 if (any_badblocks_changed)
2468 ack_all_badblocks(&rdev->badblocks);
2469 clear_bit(BlockedBadBlocks, &rdev->flags);
2470 wake_up(&rdev->blocked_wait);
2471 }
2472}
2473EXPORT_SYMBOL(md_update_sb);
2474
2475static int add_bound_rdev(struct md_rdev *rdev)
2476{
2477 struct mddev *mddev = rdev->mddev;
2478 int err = 0;
2479 bool add_journal = test_bit(Journal, &rdev->flags);
2480
2481 if (!mddev->pers->hot_remove_disk || add_journal) {
2482
2483
2484
2485
2486 super_types[mddev->major_version].
2487 validate_super(mddev, rdev);
2488 if (add_journal)
2489 mddev_suspend(mddev);
2490 err = mddev->pers->hot_add_disk(mddev, rdev);
2491 if (add_journal)
2492 mddev_resume(mddev);
2493 if (err) {
2494 md_kick_rdev_from_array(rdev);
2495 return err;
2496 }
2497 }
2498 sysfs_notify_dirent_safe(rdev->sysfs_state);
2499
2500 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2501 if (mddev->degraded)
2502 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2503 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2504 md_new_event(mddev);
2505 md_wakeup_thread(mddev->thread);
2506 return 0;
2507}
2508
2509
2510
2511
2512static int cmd_match(const char *cmd, const char *str)
2513{
2514
2515
2516
2517
2518 while (*cmd && *str && *cmd == *str) {
2519 cmd++;
2520 str++;
2521 }
2522 if (*cmd == '\n')
2523 cmd++;
2524 if (*str || *cmd)
2525 return 0;
2526 return 1;
2527}
2528
2529struct rdev_sysfs_entry {
2530 struct attribute attr;
2531 ssize_t (*show)(struct md_rdev *, char *);
2532 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2533};
2534
2535static ssize_t
2536state_show(struct md_rdev *rdev, char *page)
2537{
2538 char *sep = ",";
2539 size_t len = 0;
2540 unsigned long flags = ACCESS_ONCE(rdev->flags);
2541
2542 if (test_bit(Faulty, &flags) ||
2543 (!test_bit(ExternalBbl, &flags) &&
2544 rdev->badblocks.unacked_exist))
2545 len += sprintf(page+len, "faulty%s", sep);
2546 if (test_bit(In_sync, &flags))
2547 len += sprintf(page+len, "in_sync%s", sep);
2548 if (test_bit(Journal, &flags))
2549 len += sprintf(page+len, "journal%s", sep);
2550 if (test_bit(WriteMostly, &flags))
2551 len += sprintf(page+len, "write_mostly%s", sep);
2552 if (test_bit(Blocked, &flags) ||
2553 (rdev->badblocks.unacked_exist
2554 && !test_bit(Faulty, &flags)))
2555 len += sprintf(page+len, "blocked%s", sep);
2556 if (!test_bit(Faulty, &flags) &&
2557 !test_bit(Journal, &flags) &&
2558 !test_bit(In_sync, &flags))
2559 len += sprintf(page+len, "spare%s", sep);
2560 if (test_bit(WriteErrorSeen, &flags))
2561 len += sprintf(page+len, "write_error%s", sep);
2562 if (test_bit(WantReplacement, &flags))
2563 len += sprintf(page+len, "want_replacement%s", sep);
2564 if (test_bit(Replacement, &flags))
2565 len += sprintf(page+len, "replacement%s", sep);
2566 if (test_bit(ExternalBbl, &flags))
2567 len += sprintf(page+len, "external_bbl%s", sep);
2568 if (test_bit(FailFast, &flags))
2569 len += sprintf(page+len, "failfast%s", sep);
2570
2571 if (len)
2572 len -= strlen(sep);
2573
2574 return len+sprintf(page+len, "\n");
2575}
2576
2577static ssize_t
2578state_store(struct md_rdev *rdev, const char *buf, size_t len)
2579{
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594 int err = -EINVAL;
2595 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2596 md_error(rdev->mddev, rdev);
2597 if (test_bit(Faulty, &rdev->flags))
2598 err = 0;
2599 else
2600 err = -EBUSY;
2601 } else if (cmd_match(buf, "remove")) {
2602 if (rdev->mddev->pers) {
2603 clear_bit(Blocked, &rdev->flags);
2604 remove_and_add_spares(rdev->mddev, rdev);
2605 }
2606 if (rdev->raid_disk >= 0)
2607 err = -EBUSY;
2608 else {
2609 struct mddev *mddev = rdev->mddev;
2610 md_kick_rdev_from_array(rdev);
2611 if (mddev->pers) {
2612 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2613 md_wakeup_thread(mddev->thread);
2614 }
2615 md_new_event(mddev);
2616 err = 0;
2617 }
2618 } else if (cmd_match(buf, "writemostly")) {
2619 set_bit(WriteMostly, &rdev->flags);
2620 err = 0;
2621 } else if (cmd_match(buf, "-writemostly")) {
2622 clear_bit(WriteMostly, &rdev->flags);
2623 err = 0;
2624 } else if (cmd_match(buf, "blocked")) {
2625 set_bit(Blocked, &rdev->flags);
2626 err = 0;
2627 } else if (cmd_match(buf, "-blocked")) {
2628 if (!test_bit(Faulty, &rdev->flags) &&
2629 !test_bit(ExternalBbl, &rdev->flags) &&
2630 rdev->badblocks.unacked_exist) {
2631
2632
2633
2634 md_error(rdev->mddev, rdev);
2635 }
2636 clear_bit(Blocked, &rdev->flags);
2637 clear_bit(BlockedBadBlocks, &rdev->flags);
2638 wake_up(&rdev->blocked_wait);
2639 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2640 md_wakeup_thread(rdev->mddev->thread);
2641
2642 err = 0;
2643 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2644 set_bit(In_sync, &rdev->flags);
2645 err = 0;
2646 } else if (cmd_match(buf, "failfast")) {
2647 set_bit(FailFast, &rdev->flags);
2648 err = 0;
2649 } else if (cmd_match(buf, "-failfast")) {
2650 clear_bit(FailFast, &rdev->flags);
2651 err = 0;
2652 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2653 !test_bit(Journal, &rdev->flags)) {
2654 if (rdev->mddev->pers == NULL) {
2655 clear_bit(In_sync, &rdev->flags);
2656 rdev->saved_raid_disk = rdev->raid_disk;
2657 rdev->raid_disk = -1;
2658 err = 0;
2659 }
2660 } else if (cmd_match(buf, "write_error")) {
2661 set_bit(WriteErrorSeen, &rdev->flags);
2662 err = 0;
2663 } else if (cmd_match(buf, "-write_error")) {
2664 clear_bit(WriteErrorSeen, &rdev->flags);
2665 err = 0;
2666 } else if (cmd_match(buf, "want_replacement")) {
2667
2668
2669
2670
2671 if (rdev->raid_disk >= 0 &&
2672 !test_bit(Journal, &rdev->flags) &&
2673 !test_bit(Replacement, &rdev->flags))
2674 set_bit(WantReplacement, &rdev->flags);
2675 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2676 md_wakeup_thread(rdev->mddev->thread);
2677 err = 0;
2678 } else if (cmd_match(buf, "-want_replacement")) {
2679
2680
2681
2682 err = 0;
2683 clear_bit(WantReplacement, &rdev->flags);
2684 } else if (cmd_match(buf, "replacement")) {
2685
2686
2687
2688
2689 if (rdev->mddev->pers)
2690 err = -EBUSY;
2691 else {
2692 set_bit(Replacement, &rdev->flags);
2693 err = 0;
2694 }
2695 } else if (cmd_match(buf, "-replacement")) {
2696
2697 if (rdev->mddev->pers)
2698 err = -EBUSY;
2699 else {
2700 clear_bit(Replacement, &rdev->flags);
2701 err = 0;
2702 }
2703 } else if (cmd_match(buf, "re-add")) {
2704 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2705 clear_bit(Faulty, &rdev->flags);
2706 err = add_bound_rdev(rdev);
2707 } else
2708 err = -EBUSY;
2709 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2710 set_bit(ExternalBbl, &rdev->flags);
2711 rdev->badblocks.shift = 0;
2712 err = 0;
2713 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2714 clear_bit(ExternalBbl, &rdev->flags);
2715 err = 0;
2716 }
2717 if (!err)
2718 sysfs_notify_dirent_safe(rdev->sysfs_state);
2719 return err ? err : len;
2720}
2721static struct rdev_sysfs_entry rdev_state =
2722__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2723
2724static ssize_t
2725errors_show(struct md_rdev *rdev, char *page)
2726{
2727 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2728}
2729
2730static ssize_t
2731errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2732{
2733 unsigned int n;
2734 int rv;
2735
2736 rv = kstrtouint(buf, 10, &n);
2737 if (rv < 0)
2738 return rv;
2739 atomic_set(&rdev->corrected_errors, n);
2740 return len;
2741}
2742static struct rdev_sysfs_entry rdev_errors =
2743__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2744
2745static ssize_t
2746slot_show(struct md_rdev *rdev, char *page)
2747{
2748 if (test_bit(Journal, &rdev->flags))
2749 return sprintf(page, "journal\n");
2750 else if (rdev->raid_disk < 0)
2751 return sprintf(page, "none\n");
2752 else
2753 return sprintf(page, "%d\n", rdev->raid_disk);
2754}
2755
2756static ssize_t
2757slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2758{
2759 int slot;
2760 int err;
2761
2762 if (test_bit(Journal, &rdev->flags))
2763 return -EBUSY;
2764 if (strncmp(buf, "none", 4)==0)
2765 slot = -1;
2766 else {
2767 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2768 if (err < 0)
2769 return err;
2770 }
2771 if (rdev->mddev->pers && slot == -1) {
2772
2773
2774
2775
2776
2777
2778
2779 if (rdev->raid_disk == -1)
2780 return -EEXIST;
2781
2782 if (rdev->mddev->pers->hot_remove_disk == NULL)
2783 return -EINVAL;
2784 clear_bit(Blocked, &rdev->flags);
2785 remove_and_add_spares(rdev->mddev, rdev);
2786 if (rdev->raid_disk >= 0)
2787 return -EBUSY;
2788 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2789 md_wakeup_thread(rdev->mddev->thread);
2790 } else if (rdev->mddev->pers) {
2791
2792
2793
2794 int err;
2795
2796 if (rdev->raid_disk != -1)
2797 return -EBUSY;
2798
2799 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2800 return -EBUSY;
2801
2802 if (rdev->mddev->pers->hot_add_disk == NULL)
2803 return -EINVAL;
2804
2805 if (slot >= rdev->mddev->raid_disks &&
2806 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2807 return -ENOSPC;
2808
2809 rdev->raid_disk = slot;
2810 if (test_bit(In_sync, &rdev->flags))
2811 rdev->saved_raid_disk = slot;
2812 else
2813 rdev->saved_raid_disk = -1;
2814 clear_bit(In_sync, &rdev->flags);
2815 clear_bit(Bitmap_sync, &rdev->flags);
2816 err = rdev->mddev->pers->
2817 hot_add_disk(rdev->mddev, rdev);
2818 if (err) {
2819 rdev->raid_disk = -1;
2820 return err;
2821 } else
2822 sysfs_notify_dirent_safe(rdev->sysfs_state);
2823 if (sysfs_link_rdev(rdev->mddev, rdev))
2824 ;
2825
2826 } else {
2827 if (slot >= rdev->mddev->raid_disks &&
2828 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2829 return -ENOSPC;
2830 rdev->raid_disk = slot;
2831
2832 clear_bit(Faulty, &rdev->flags);
2833 clear_bit(WriteMostly, &rdev->flags);
2834 set_bit(In_sync, &rdev->flags);
2835 sysfs_notify_dirent_safe(rdev->sysfs_state);
2836 }
2837 return len;
2838}
2839
2840static struct rdev_sysfs_entry rdev_slot =
2841__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2842
2843static ssize_t
2844offset_show(struct md_rdev *rdev, char *page)
2845{
2846 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2847}
2848
2849static ssize_t
2850offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2851{
2852 unsigned long long offset;
2853 if (kstrtoull(buf, 10, &offset) < 0)
2854 return -EINVAL;
2855 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2856 return -EBUSY;
2857 if (rdev->sectors && rdev->mddev->external)
2858
2859
2860 return -EBUSY;
2861 rdev->data_offset = offset;
2862 rdev->new_data_offset = offset;
2863 return len;
2864}
2865
2866static struct rdev_sysfs_entry rdev_offset =
2867__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2868
2869static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2870{
2871 return sprintf(page, "%llu\n",
2872 (unsigned long long)rdev->new_data_offset);
2873}
2874
2875static ssize_t new_offset_store(struct md_rdev *rdev,
2876 const char *buf, size_t len)
2877{
2878 unsigned long long new_offset;
2879 struct mddev *mddev = rdev->mddev;
2880
2881 if (kstrtoull(buf, 10, &new_offset) < 0)
2882 return -EINVAL;
2883
2884 if (mddev->sync_thread ||
2885 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2886 return -EBUSY;
2887 if (new_offset == rdev->data_offset)
2888
2889 ;
2890 else if (new_offset > rdev->data_offset) {
2891
2892 if (new_offset - rdev->data_offset
2893 + mddev->dev_sectors > rdev->sectors)
2894 return -E2BIG;
2895 }
2896
2897
2898
2899
2900
2901 if (new_offset < rdev->data_offset &&
2902 mddev->reshape_backwards)
2903 return -EINVAL;
2904
2905
2906
2907
2908 if (new_offset > rdev->data_offset &&
2909 !mddev->reshape_backwards)
2910 return -EINVAL;
2911
2912 if (mddev->pers && mddev->persistent &&
2913 !super_types[mddev->major_version]
2914 .allow_new_offset(rdev, new_offset))
2915 return -E2BIG;
2916 rdev->new_data_offset = new_offset;
2917 if (new_offset > rdev->data_offset)
2918 mddev->reshape_backwards = 1;
2919 else if (new_offset < rdev->data_offset)
2920 mddev->reshape_backwards = 0;
2921
2922 return len;
2923}
2924static struct rdev_sysfs_entry rdev_new_offset =
2925__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2926
2927static ssize_t
2928rdev_size_show(struct md_rdev *rdev, char *page)
2929{
2930 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2931}
2932
2933static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2934{
2935
2936 if (s1+l1 <= s2)
2937 return 0;
2938 if (s2+l2 <= s1)
2939 return 0;
2940 return 1;
2941}
2942
2943static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2944{
2945 unsigned long long blocks;
2946 sector_t new;
2947
2948 if (kstrtoull(buf, 10, &blocks) < 0)
2949 return -EINVAL;
2950
2951 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2952 return -EINVAL;
2953
2954 new = blocks * 2;
2955 if (new != blocks * 2)
2956 return -EINVAL;
2957
2958 *sectors = new;
2959 return 0;
2960}
2961
2962static ssize_t
2963rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2964{
2965 struct mddev *my_mddev = rdev->mddev;
2966 sector_t oldsectors = rdev->sectors;
2967 sector_t sectors;
2968
2969 if (test_bit(Journal, &rdev->flags))
2970 return -EBUSY;
2971 if (strict_blocks_to_sectors(buf, §ors) < 0)
2972 return -EINVAL;
2973 if (rdev->data_offset != rdev->new_data_offset)
2974 return -EINVAL;
2975 if (my_mddev->pers && rdev->raid_disk >= 0) {
2976 if (my_mddev->persistent) {
2977 sectors = super_types[my_mddev->major_version].
2978 rdev_size_change(rdev, sectors);
2979 if (!sectors)
2980 return -EBUSY;
2981 } else if (!sectors)
2982 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2983 rdev->data_offset;
2984 if (!my_mddev->pers->resize)
2985
2986 return -EINVAL;
2987 }
2988 if (sectors < my_mddev->dev_sectors)
2989 return -EINVAL;
2990
2991 rdev->sectors = sectors;
2992 if (sectors > oldsectors && my_mddev->external) {
2993
2994
2995
2996
2997
2998
2999 struct mddev *mddev;
3000 int overlap = 0;
3001 struct list_head *tmp;
3002
3003 rcu_read_lock();
3004 for_each_mddev(mddev, tmp) {
3005 struct md_rdev *rdev2;
3006
3007 rdev_for_each(rdev2, mddev)
3008 if (rdev->bdev == rdev2->bdev &&
3009 rdev != rdev2 &&
3010 overlaps(rdev->data_offset, rdev->sectors,
3011 rdev2->data_offset,
3012 rdev2->sectors)) {
3013 overlap = 1;
3014 break;
3015 }
3016 if (overlap) {
3017 mddev_put(mddev);
3018 break;
3019 }
3020 }
3021 rcu_read_unlock();
3022 if (overlap) {
3023
3024
3025
3026
3027
3028
3029 rdev->sectors = oldsectors;
3030 return -EBUSY;
3031 }
3032 }
3033 return len;
3034}
3035
3036static struct rdev_sysfs_entry rdev_size =
3037__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3038
3039static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3040{
3041 unsigned long long recovery_start = rdev->recovery_offset;
3042
3043 if (test_bit(In_sync, &rdev->flags) ||
3044 recovery_start == MaxSector)
3045 return sprintf(page, "none\n");
3046
3047 return sprintf(page, "%llu\n", recovery_start);
3048}
3049
3050static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3051{
3052 unsigned long long recovery_start;
3053
3054 if (cmd_match(buf, "none"))
3055 recovery_start = MaxSector;
3056 else if (kstrtoull(buf, 10, &recovery_start))
3057 return -EINVAL;
3058
3059 if (rdev->mddev->pers &&
3060 rdev->raid_disk >= 0)
3061 return -EBUSY;
3062
3063 rdev->recovery_offset = recovery_start;
3064 if (recovery_start == MaxSector)
3065 set_bit(In_sync, &rdev->flags);
3066 else
3067 clear_bit(In_sync, &rdev->flags);
3068 return len;
3069}
3070
3071static struct rdev_sysfs_entry rdev_recovery_start =
3072__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085static ssize_t bb_show(struct md_rdev *rdev, char *page)
3086{
3087 return badblocks_show(&rdev->badblocks, page, 0);
3088}
3089static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3090{
3091 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3092
3093 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3094 wake_up(&rdev->blocked_wait);
3095 return rv;
3096}
3097static struct rdev_sysfs_entry rdev_bad_blocks =
3098__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3099
3100static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3101{
3102 return badblocks_show(&rdev->badblocks, page, 1);
3103}
3104static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3105{
3106 return badblocks_store(&rdev->badblocks, page, len, 1);
3107}
3108static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3109__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3110
3111static ssize_t
3112ppl_sector_show(struct md_rdev *rdev, char *page)
3113{
3114 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3115}
3116
3117static ssize_t
3118ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3119{
3120 unsigned long long sector;
3121
3122 if (kstrtoull(buf, 10, §or) < 0)
3123 return -EINVAL;
3124 if (sector != (sector_t)sector)
3125 return -EINVAL;
3126
3127 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3128 rdev->raid_disk >= 0)
3129 return -EBUSY;
3130
3131 if (rdev->mddev->persistent) {
3132 if (rdev->mddev->major_version == 0)
3133 return -EINVAL;
3134 if ((sector > rdev->sb_start &&
3135 sector - rdev->sb_start > S16_MAX) ||
3136 (sector < rdev->sb_start &&
3137 rdev->sb_start - sector > -S16_MIN))
3138 return -EINVAL;
3139 rdev->ppl.offset = sector - rdev->sb_start;
3140 } else if (!rdev->mddev->external) {
3141 return -EBUSY;
3142 }
3143 rdev->ppl.sector = sector;
3144 return len;
3145}
3146
3147static struct rdev_sysfs_entry rdev_ppl_sector =
3148__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3149
3150static ssize_t
3151ppl_size_show(struct md_rdev *rdev, char *page)
3152{
3153 return sprintf(page, "%u\n", rdev->ppl.size);
3154}
3155
3156static ssize_t
3157ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3158{
3159 unsigned int size;
3160
3161 if (kstrtouint(buf, 10, &size) < 0)
3162 return -EINVAL;
3163
3164 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3165 rdev->raid_disk >= 0)
3166 return -EBUSY;
3167
3168 if (rdev->mddev->persistent) {
3169 if (rdev->mddev->major_version == 0)
3170 return -EINVAL;
3171 if (size > U16_MAX)
3172 return -EINVAL;
3173 } else if (!rdev->mddev->external) {
3174 return -EBUSY;
3175 }
3176 rdev->ppl.size = size;
3177 return len;
3178}
3179
3180static struct rdev_sysfs_entry rdev_ppl_size =
3181__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3182
3183static struct attribute *rdev_default_attrs[] = {
3184 &rdev_state.attr,
3185 &rdev_errors.attr,
3186 &rdev_slot.attr,
3187 &rdev_offset.attr,
3188 &rdev_new_offset.attr,
3189 &rdev_size.attr,
3190 &rdev_recovery_start.attr,
3191 &rdev_bad_blocks.attr,
3192 &rdev_unack_bad_blocks.attr,
3193 &rdev_ppl_sector.attr,
3194 &rdev_ppl_size.attr,
3195 NULL,
3196};
3197static ssize_t
3198rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3199{
3200 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3201 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3202
3203 if (!entry->show)
3204 return -EIO;
3205 if (!rdev->mddev)
3206 return -EBUSY;
3207 return entry->show(rdev, page);
3208}
3209
3210static ssize_t
3211rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3212 const char *page, size_t length)
3213{
3214 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3215 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3216 ssize_t rv;
3217 struct mddev *mddev = rdev->mddev;
3218
3219 if (!entry->store)
3220 return -EIO;
3221 if (!capable(CAP_SYS_ADMIN))
3222 return -EACCES;
3223 rv = mddev ? mddev_lock(mddev): -EBUSY;
3224 if (!rv) {
3225 if (rdev->mddev == NULL)
3226 rv = -EBUSY;
3227 else
3228 rv = entry->store(rdev, page, length);
3229 mddev_unlock(mddev);
3230 }
3231 return rv;
3232}
3233
3234static void rdev_free(struct kobject *ko)
3235{
3236 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3237 kfree(rdev);
3238}
3239static const struct sysfs_ops rdev_sysfs_ops = {
3240 .show = rdev_attr_show,
3241 .store = rdev_attr_store,
3242};
3243static struct kobj_type rdev_ktype = {
3244 .release = rdev_free,
3245 .sysfs_ops = &rdev_sysfs_ops,
3246 .default_attrs = rdev_default_attrs,
3247};
3248
3249int md_rdev_init(struct md_rdev *rdev)
3250{
3251 rdev->desc_nr = -1;
3252 rdev->saved_raid_disk = -1;
3253 rdev->raid_disk = -1;
3254 rdev->flags = 0;
3255 rdev->data_offset = 0;
3256 rdev->new_data_offset = 0;
3257 rdev->sb_events = 0;
3258 rdev->last_read_error.tv_sec = 0;
3259 rdev->last_read_error.tv_nsec = 0;
3260 rdev->sb_loaded = 0;
3261 rdev->bb_page = NULL;
3262 atomic_set(&rdev->nr_pending, 0);
3263 atomic_set(&rdev->read_errors, 0);
3264 atomic_set(&rdev->corrected_errors, 0);
3265
3266 INIT_LIST_HEAD(&rdev->same_set);
3267 init_waitqueue_head(&rdev->blocked_wait);
3268
3269
3270
3271
3272
3273 return badblocks_init(&rdev->badblocks, 0);
3274}
3275EXPORT_SYMBOL_GPL(md_rdev_init);
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3287{
3288 char b[BDEVNAME_SIZE];
3289 int err;
3290 struct md_rdev *rdev;
3291 sector_t size;
3292
3293 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3294 if (!rdev)
3295 return ERR_PTR(-ENOMEM);
3296
3297 err = md_rdev_init(rdev);
3298 if (err)
3299 goto abort_free;
3300 err = alloc_disk_sb(rdev);
3301 if (err)
3302 goto abort_free;
3303
3304 err = lock_rdev(rdev, newdev, super_format == -2);
3305 if (err)
3306 goto abort_free;
3307
3308 kobject_init(&rdev->kobj, &rdev_ktype);
3309
3310 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3311 if (!size) {
3312 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3313 bdevname(rdev->bdev,b));
3314 err = -EINVAL;
3315 goto abort_free;
3316 }
3317
3318 if (super_format >= 0) {
3319 err = super_types[super_format].
3320 load_super(rdev, NULL, super_minor);
3321 if (err == -EINVAL) {
3322 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3323 bdevname(rdev->bdev,b),
3324 super_format, super_minor);
3325 goto abort_free;
3326 }
3327 if (err < 0) {
3328 pr_warn("md: could not read %s's sb, not importing!\n",
3329 bdevname(rdev->bdev,b));
3330 goto abort_free;
3331 }
3332 }
3333
3334 return rdev;
3335
3336abort_free:
3337 if (rdev->bdev)
3338 unlock_rdev(rdev);
3339 md_rdev_clear(rdev);
3340 kfree(rdev);
3341 return ERR_PTR(err);
3342}
3343
3344
3345
3346
3347
3348static void analyze_sbs(struct mddev *mddev)
3349{
3350 int i;
3351 struct md_rdev *rdev, *freshest, *tmp;
3352 char b[BDEVNAME_SIZE];
3353
3354 freshest = NULL;
3355 rdev_for_each_safe(rdev, tmp, mddev)
3356 switch (super_types[mddev->major_version].
3357 load_super(rdev, freshest, mddev->minor_version)) {
3358 case 1:
3359 freshest = rdev;
3360 break;
3361 case 0:
3362 break;
3363 default:
3364 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3365 bdevname(rdev->bdev,b));
3366 md_kick_rdev_from_array(rdev);
3367 }
3368
3369 super_types[mddev->major_version].
3370 validate_super(mddev, freshest);
3371
3372 i = 0;
3373 rdev_for_each_safe(rdev, tmp, mddev) {
3374 if (mddev->max_disks &&
3375 (rdev->desc_nr >= mddev->max_disks ||
3376 i > mddev->max_disks)) {
3377 pr_warn("md: %s: %s: only %d devices permitted\n",
3378 mdname(mddev), bdevname(rdev->bdev, b),
3379 mddev->max_disks);
3380 md_kick_rdev_from_array(rdev);
3381 continue;
3382 }
3383 if (rdev != freshest) {
3384 if (super_types[mddev->major_version].
3385 validate_super(mddev, rdev)) {
3386 pr_warn("md: kicking non-fresh %s from array!\n",
3387 bdevname(rdev->bdev,b));
3388 md_kick_rdev_from_array(rdev);
3389 continue;
3390 }
3391 }
3392 if (mddev->level == LEVEL_MULTIPATH) {
3393 rdev->desc_nr = i++;
3394 rdev->raid_disk = rdev->desc_nr;
3395 set_bit(In_sync, &rdev->flags);
3396 } else if (rdev->raid_disk >=
3397 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3398 !test_bit(Journal, &rdev->flags)) {
3399 rdev->raid_disk = -1;
3400 clear_bit(In_sync, &rdev->flags);
3401 }
3402 }
3403}
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3416{
3417 unsigned long result = 0;
3418 long decimals = -1;
3419 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3420 if (*cp == '.')
3421 decimals = 0;
3422 else if (decimals < scale) {
3423 unsigned int value;
3424 value = *cp - '0';
3425 result = result * 10 + value;
3426 if (decimals >= 0)
3427 decimals++;
3428 }
3429 cp++;
3430 }
3431 if (*cp == '\n')
3432 cp++;
3433 if (*cp)
3434 return -EINVAL;
3435 if (decimals < 0)
3436 decimals = 0;
3437 while (decimals < scale) {
3438 result *= 10;
3439 decimals ++;
3440 }
3441 *res = result;
3442 return 0;
3443}
3444
3445static ssize_t
3446safe_delay_show(struct mddev *mddev, char *page)
3447{
3448 int msec = (mddev->safemode_delay*1000)/HZ;
3449 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3450}
3451static ssize_t
3452safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3453{
3454 unsigned long msec;
3455
3456 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3457 return -EINVAL;
3458 if (msec == 0)
3459 mddev->safemode_delay = 0;
3460 else {
3461 unsigned long old_delay = mddev->safemode_delay;
3462 unsigned long new_delay = (msec*HZ)/1000;
3463
3464 if (new_delay == 0)
3465 new_delay = 1;
3466 mddev->safemode_delay = new_delay;
3467 if (new_delay < old_delay || old_delay == 0)
3468 mod_timer(&mddev->safemode_timer, jiffies+1);
3469 }
3470 return len;
3471}
3472static struct md_sysfs_entry md_safe_delay =
3473__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3474
3475static ssize_t
3476level_show(struct mddev *mddev, char *page)
3477{
3478 struct md_personality *p;
3479 int ret;
3480 spin_lock(&mddev->lock);
3481 p = mddev->pers;
3482 if (p)
3483 ret = sprintf(page, "%s\n", p->name);
3484 else if (mddev->clevel[0])
3485 ret = sprintf(page, "%s\n", mddev->clevel);
3486 else if (mddev->level != LEVEL_NONE)
3487 ret = sprintf(page, "%d\n", mddev->level);
3488 else
3489 ret = 0;
3490 spin_unlock(&mddev->lock);
3491 return ret;
3492}
3493
3494static ssize_t
3495level_store(struct mddev *mddev, const char *buf, size_t len)
3496{
3497 char clevel[16];
3498 ssize_t rv;
3499 size_t slen = len;
3500 struct md_personality *pers, *oldpers;
3501 long level;
3502 void *priv, *oldpriv;
3503 struct md_rdev *rdev;
3504
3505 if (slen == 0 || slen >= sizeof(clevel))
3506 return -EINVAL;
3507
3508 rv = mddev_lock(mddev);
3509 if (rv)
3510 return rv;
3511
3512 if (mddev->pers == NULL) {
3513 strncpy(mddev->clevel, buf, slen);
3514 if (mddev->clevel[slen-1] == '\n')
3515 slen--;
3516 mddev->clevel[slen] = 0;
3517 mddev->level = LEVEL_NONE;
3518 rv = len;
3519 goto out_unlock;
3520 }
3521 rv = -EROFS;
3522 if (mddev->ro)
3523 goto out_unlock;
3524
3525
3526
3527
3528
3529
3530
3531 rv = -EBUSY;
3532 if (mddev->sync_thread ||
3533 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3534 mddev->reshape_position != MaxSector ||
3535 mddev->sysfs_active)
3536 goto out_unlock;
3537
3538 rv = -EINVAL;
3539 if (!mddev->pers->quiesce) {
3540 pr_warn("md: %s: %s does not support online personality change\n",
3541 mdname(mddev), mddev->pers->name);
3542 goto out_unlock;
3543 }
3544
3545
3546 strncpy(clevel, buf, slen);
3547 if (clevel[slen-1] == '\n')
3548 slen--;
3549 clevel[slen] = 0;
3550 if (kstrtol(clevel, 10, &level))
3551 level = LEVEL_NONE;
3552
3553 if (request_module("md-%s", clevel) != 0)
3554 request_module("md-level-%s", clevel);
3555 spin_lock(&pers_lock);
3556 pers = find_pers(level, clevel);
3557 if (!pers || !try_module_get(pers->owner)) {
3558 spin_unlock(&pers_lock);
3559 pr_warn("md: personality %s not loaded\n", clevel);
3560 rv = -EINVAL;
3561 goto out_unlock;
3562 }
3563 spin_unlock(&pers_lock);
3564
3565 if (pers == mddev->pers) {
3566
3567 module_put(pers->owner);
3568 rv = len;
3569 goto out_unlock;
3570 }
3571 if (!pers->takeover) {
3572 module_put(pers->owner);
3573 pr_warn("md: %s: %s does not support personality takeover\n",
3574 mdname(mddev), clevel);
3575 rv = -EINVAL;
3576 goto out_unlock;
3577 }
3578
3579 rdev_for_each(rdev, mddev)
3580 rdev->new_raid_disk = rdev->raid_disk;
3581
3582
3583
3584
3585 priv = pers->takeover(mddev);
3586 if (IS_ERR(priv)) {
3587 mddev->new_level = mddev->level;
3588 mddev->new_layout = mddev->layout;
3589 mddev->new_chunk_sectors = mddev->chunk_sectors;
3590 mddev->raid_disks -= mddev->delta_disks;
3591 mddev->delta_disks = 0;
3592 mddev->reshape_backwards = 0;
3593 module_put(pers->owner);
3594 pr_warn("md: %s: %s would not accept array\n",
3595 mdname(mddev), clevel);
3596 rv = PTR_ERR(priv);
3597 goto out_unlock;
3598 }
3599
3600
3601 mddev_suspend(mddev);
3602 mddev_detach(mddev);
3603
3604 spin_lock(&mddev->lock);
3605 oldpers = mddev->pers;
3606 oldpriv = mddev->private;
3607 mddev->pers = pers;
3608 mddev->private = priv;
3609 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3610 mddev->level = mddev->new_level;
3611 mddev->layout = mddev->new_layout;
3612 mddev->chunk_sectors = mddev->new_chunk_sectors;
3613 mddev->delta_disks = 0;
3614 mddev->reshape_backwards = 0;
3615 mddev->degraded = 0;
3616 spin_unlock(&mddev->lock);
3617
3618 if (oldpers->sync_request == NULL &&
3619 mddev->external) {
3620
3621
3622
3623
3624
3625
3626
3627 mddev->in_sync = 0;
3628 mddev->safemode_delay = 0;
3629 mddev->safemode = 0;
3630 }
3631
3632 oldpers->free(mddev, oldpriv);
3633
3634 if (oldpers->sync_request == NULL &&
3635 pers->sync_request != NULL) {
3636
3637 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3638 pr_warn("md: cannot register extra attributes for %s\n",
3639 mdname(mddev));
3640 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3641 }
3642 if (oldpers->sync_request != NULL &&
3643 pers->sync_request == NULL) {
3644
3645 if (mddev->to_remove == NULL)
3646 mddev->to_remove = &md_redundancy_group;
3647 }
3648
3649 module_put(oldpers->owner);
3650
3651 rdev_for_each(rdev, mddev) {
3652 if (rdev->raid_disk < 0)
3653 continue;
3654 if (rdev->new_raid_disk >= mddev->raid_disks)
3655 rdev->new_raid_disk = -1;
3656 if (rdev->new_raid_disk == rdev->raid_disk)
3657 continue;
3658 sysfs_unlink_rdev(mddev, rdev);
3659 }
3660 rdev_for_each(rdev, mddev) {
3661 if (rdev->raid_disk < 0)
3662 continue;
3663 if (rdev->new_raid_disk == rdev->raid_disk)
3664 continue;
3665 rdev->raid_disk = rdev->new_raid_disk;
3666 if (rdev->raid_disk < 0)
3667 clear_bit(In_sync, &rdev->flags);
3668 else {
3669 if (sysfs_link_rdev(mddev, rdev))
3670 pr_warn("md: cannot register rd%d for %s after level change\n",
3671 rdev->raid_disk, mdname(mddev));
3672 }
3673 }
3674
3675 if (pers->sync_request == NULL) {
3676
3677
3678
3679 mddev->in_sync = 1;
3680 del_timer_sync(&mddev->safemode_timer);
3681 }
3682 blk_set_stacking_limits(&mddev->queue->limits);
3683 pers->run(mddev);
3684 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3685 mddev_resume(mddev);
3686 if (!mddev->thread)
3687 md_update_sb(mddev, 1);
3688 sysfs_notify(&mddev->kobj, NULL, "level");
3689 md_new_event(mddev);
3690 rv = len;
3691out_unlock:
3692 mddev_unlock(mddev);
3693 return rv;
3694}
3695
3696static struct md_sysfs_entry md_level =
3697__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3698
3699static ssize_t
3700layout_show(struct mddev *mddev, char *page)
3701{
3702
3703 if (mddev->reshape_position != MaxSector &&
3704 mddev->layout != mddev->new_layout)
3705 return sprintf(page, "%d (%d)\n",
3706 mddev->new_layout, mddev->layout);
3707 return sprintf(page, "%d\n", mddev->layout);
3708}
3709
3710static ssize_t
3711layout_store(struct mddev *mddev, const char *buf, size_t len)
3712{
3713 unsigned int n;
3714 int err;
3715
3716 err = kstrtouint(buf, 10, &n);
3717 if (err < 0)
3718 return err;
3719 err = mddev_lock(mddev);
3720 if (err)
3721 return err;
3722
3723 if (mddev->pers) {
3724 if (mddev->pers->check_reshape == NULL)
3725 err = -EBUSY;
3726 else if (mddev->ro)
3727 err = -EROFS;
3728 else {
3729 mddev->new_layout = n;
3730 err = mddev->pers->check_reshape(mddev);
3731 if (err)
3732 mddev->new_layout = mddev->layout;
3733 }
3734 } else {
3735 mddev->new_layout = n;
3736 if (mddev->reshape_position == MaxSector)
3737 mddev->layout = n;
3738 }
3739 mddev_unlock(mddev);
3740 return err ?: len;
3741}
3742static struct md_sysfs_entry md_layout =
3743__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3744
3745static ssize_t
3746raid_disks_show(struct mddev *mddev, char *page)
3747{
3748 if (mddev->raid_disks == 0)
3749 return 0;
3750 if (mddev->reshape_position != MaxSector &&
3751 mddev->delta_disks != 0)
3752 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3753 mddev->raid_disks - mddev->delta_disks);
3754 return sprintf(page, "%d\n", mddev->raid_disks);
3755}
3756
3757static int update_raid_disks(struct mddev *mddev, int raid_disks);
3758
3759static ssize_t
3760raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3761{
3762 unsigned int n;
3763 int err;
3764
3765 err = kstrtouint(buf, 10, &n);
3766 if (err < 0)
3767 return err;
3768
3769 err = mddev_lock(mddev);
3770 if (err)
3771 return err;
3772 if (mddev->pers)
3773 err = update_raid_disks(mddev, n);
3774 else if (mddev->reshape_position != MaxSector) {
3775 struct md_rdev *rdev;
3776 int olddisks = mddev->raid_disks - mddev->delta_disks;
3777
3778 err = -EINVAL;
3779 rdev_for_each(rdev, mddev) {
3780 if (olddisks < n &&
3781 rdev->data_offset < rdev->new_data_offset)
3782 goto out_unlock;
3783 if (olddisks > n &&
3784 rdev->data_offset > rdev->new_data_offset)
3785 goto out_unlock;
3786 }
3787 err = 0;
3788 mddev->delta_disks = n - olddisks;
3789 mddev->raid_disks = n;
3790 mddev->reshape_backwards = (mddev->delta_disks < 0);
3791 } else
3792 mddev->raid_disks = n;
3793out_unlock:
3794 mddev_unlock(mddev);
3795 return err ? err : len;
3796}
3797static struct md_sysfs_entry md_raid_disks =
3798__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3799
3800static ssize_t
3801chunk_size_show(struct mddev *mddev, char *page)
3802{
3803 if (mddev->reshape_position != MaxSector &&
3804 mddev->chunk_sectors != mddev->new_chunk_sectors)
3805 return sprintf(page, "%d (%d)\n",
3806 mddev->new_chunk_sectors << 9,
3807 mddev->chunk_sectors << 9);
3808 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3809}
3810
3811static ssize_t
3812chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3813{
3814 unsigned long n;
3815 int err;
3816
3817 err = kstrtoul(buf, 10, &n);
3818 if (err < 0)
3819 return err;
3820
3821 err = mddev_lock(mddev);
3822 if (err)
3823 return err;
3824 if (mddev->pers) {
3825 if (mddev->pers->check_reshape == NULL)
3826 err = -EBUSY;
3827 else if (mddev->ro)
3828 err = -EROFS;
3829 else {
3830 mddev->new_chunk_sectors = n >> 9;
3831 err = mddev->pers->check_reshape(mddev);
3832 if (err)
3833 mddev->new_chunk_sectors = mddev->chunk_sectors;
3834 }
3835 } else {
3836 mddev->new_chunk_sectors = n >> 9;
3837 if (mddev->reshape_position == MaxSector)
3838 mddev->chunk_sectors = n >> 9;
3839 }
3840 mddev_unlock(mddev);
3841 return err ?: len;
3842}
3843static struct md_sysfs_entry md_chunk_size =
3844__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3845
3846static ssize_t
3847resync_start_show(struct mddev *mddev, char *page)
3848{
3849 if (mddev->recovery_cp == MaxSector)
3850 return sprintf(page, "none\n");
3851 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3852}
3853
3854static ssize_t
3855resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3856{
3857 unsigned long long n;
3858 int err;
3859
3860 if (cmd_match(buf, "none"))
3861 n = MaxSector;
3862 else {
3863 err = kstrtoull(buf, 10, &n);
3864 if (err < 0)
3865 return err;
3866 if (n != (sector_t)n)
3867 return -EINVAL;
3868 }
3869
3870 err = mddev_lock(mddev);
3871 if (err)
3872 return err;
3873 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3874 err = -EBUSY;
3875
3876 if (!err) {
3877 mddev->recovery_cp = n;
3878 if (mddev->pers)
3879 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
3880 }
3881 mddev_unlock(mddev);
3882 return err ?: len;
3883}
3884static struct md_sysfs_entry md_resync_start =
3885__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3924 write_pending, active_idle, bad_word};
3925static char *array_states[] = {
3926 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3927 "write-pending", "active-idle", NULL };
3928
3929static int match_word(const char *word, char **list)
3930{
3931 int n;
3932 for (n=0; list[n]; n++)
3933 if (cmd_match(word, list[n]))
3934 break;
3935 return n;
3936}
3937
3938static ssize_t
3939array_state_show(struct mddev *mddev, char *page)
3940{
3941 enum array_state st = inactive;
3942
3943 if (mddev->pers)
3944 switch(mddev->ro) {
3945 case 1:
3946 st = readonly;
3947 break;
3948 case 2:
3949 st = read_auto;
3950 break;
3951 case 0:
3952 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
3953 st = write_pending;
3954 else if (mddev->in_sync)
3955 st = clean;
3956 else if (mddev->safemode)
3957 st = active_idle;
3958 else
3959 st = active;
3960 }
3961 else {
3962 if (list_empty(&mddev->disks) &&
3963 mddev->raid_disks == 0 &&
3964 mddev->dev_sectors == 0)
3965 st = clear;
3966 else
3967 st = inactive;
3968 }
3969 return sprintf(page, "%s\n", array_states[st]);
3970}
3971
3972static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3973static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3974static int do_md_run(struct mddev *mddev);
3975static int restart_array(struct mddev *mddev);
3976
3977static ssize_t
3978array_state_store(struct mddev *mddev, const char *buf, size_t len)
3979{
3980 int err;
3981 enum array_state st = match_word(buf, array_states);
3982
3983 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3984
3985
3986
3987 spin_lock(&mddev->lock);
3988 if (st == active) {
3989 restart_array(mddev);
3990 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
3991 md_wakeup_thread(mddev->thread);
3992 wake_up(&mddev->sb_wait);
3993 err = 0;
3994 } else {
3995 restart_array(mddev);
3996 if (atomic_read(&mddev->writes_pending) == 0) {
3997 if (mddev->in_sync == 0) {
3998 mddev->in_sync = 1;
3999 if (mddev->safemode == 1)
4000 mddev->safemode = 0;
4001 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4002 }
4003 err = 0;
4004 } else
4005 err = -EBUSY;
4006 }
4007 if (!err)
4008 sysfs_notify_dirent_safe(mddev->sysfs_state);
4009 spin_unlock(&mddev->lock);
4010 return err ?: len;
4011 }
4012 err = mddev_lock(mddev);
4013 if (err)
4014 return err;
4015 err = -EINVAL;
4016 switch(st) {
4017 case bad_word:
4018 break;
4019 case clear:
4020
4021 err = do_md_stop(mddev, 0, NULL);
4022 break;
4023 case inactive:
4024
4025 if (mddev->pers)
4026 err = do_md_stop(mddev, 2, NULL);
4027 else
4028 err = 0;
4029 break;
4030 case suspended:
4031 break;
4032 case readonly:
4033 if (mddev->pers)
4034 err = md_set_readonly(mddev, NULL);
4035 else {
4036 mddev->ro = 1;
4037 set_disk_ro(mddev->gendisk, 1);
4038 err = do_md_run(mddev);
4039 }
4040 break;
4041 case read_auto:
4042 if (mddev->pers) {
4043 if (mddev->ro == 0)
4044 err = md_set_readonly(mddev, NULL);
4045 else if (mddev->ro == 1)
4046 err = restart_array(mddev);
4047 if (err == 0) {
4048 mddev->ro = 2;
4049 set_disk_ro(mddev->gendisk, 0);
4050 }
4051 } else {
4052 mddev->ro = 2;
4053 err = do_md_run(mddev);
4054 }
4055 break;
4056 case clean:
4057 if (mddev->pers) {
4058 err = restart_array(mddev);
4059 if (err)
4060 break;
4061 spin_lock(&mddev->lock);
4062 if (atomic_read(&mddev->writes_pending) == 0) {
4063 if (mddev->in_sync == 0) {
4064 mddev->in_sync = 1;
4065 if (mddev->safemode == 1)
4066 mddev->safemode = 0;
4067 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4068 }
4069 err = 0;
4070 } else
4071 err = -EBUSY;
4072 spin_unlock(&mddev->lock);
4073 } else
4074 err = -EINVAL;
4075 break;
4076 case active:
4077 if (mddev->pers) {
4078 err = restart_array(mddev);
4079 if (err)
4080 break;
4081 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4082 wake_up(&mddev->sb_wait);
4083 err = 0;
4084 } else {
4085 mddev->ro = 0;
4086 set_disk_ro(mddev->gendisk, 0);
4087 err = do_md_run(mddev);
4088 }
4089 break;
4090 case write_pending:
4091 case active_idle:
4092
4093 break;
4094 }
4095
4096 if (!err) {
4097 if (mddev->hold_active == UNTIL_IOCTL)
4098 mddev->hold_active = 0;
4099 sysfs_notify_dirent_safe(mddev->sysfs_state);
4100 }
4101 mddev_unlock(mddev);
4102 return err ?: len;
4103}
4104static struct md_sysfs_entry md_array_state =
4105__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4106
4107static ssize_t
4108max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4109 return sprintf(page, "%d\n",
4110 atomic_read(&mddev->max_corr_read_errors));
4111}
4112
4113static ssize_t
4114max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4115{
4116 unsigned int n;
4117 int rv;
4118
4119 rv = kstrtouint(buf, 10, &n);
4120 if (rv < 0)
4121 return rv;
4122 atomic_set(&mddev->max_corr_read_errors, n);
4123 return len;
4124}
4125
4126static struct md_sysfs_entry max_corr_read_errors =
4127__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4128 max_corrected_read_errors_store);
4129
4130static ssize_t
4131null_show(struct mddev *mddev, char *page)
4132{
4133 return -EINVAL;
4134}
4135
4136static ssize_t
4137new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4138{
4139
4140
4141
4142
4143
4144
4145
4146 char *e;
4147 int major = simple_strtoul(buf, &e, 10);
4148 int minor;
4149 dev_t dev;
4150 struct md_rdev *rdev;
4151 int err;
4152
4153 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4154 return -EINVAL;
4155 minor = simple_strtoul(e+1, &e, 10);
4156 if (*e && *e != '\n')
4157 return -EINVAL;
4158 dev = MKDEV(major, minor);
4159 if (major != MAJOR(dev) ||
4160 minor != MINOR(dev))
4161 return -EOVERFLOW;
4162
4163 flush_workqueue(md_misc_wq);
4164
4165 err = mddev_lock(mddev);
4166 if (err)
4167 return err;
4168 if (mddev->persistent) {
4169 rdev = md_import_device(dev, mddev->major_version,
4170 mddev->minor_version);
4171 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4172 struct md_rdev *rdev0
4173 = list_entry(mddev->disks.next,
4174 struct md_rdev, same_set);
4175 err = super_types[mddev->major_version]
4176 .load_super(rdev, rdev0, mddev->minor_version);
4177 if (err < 0)
4178 goto out;
4179 }
4180 } else if (mddev->external)
4181 rdev = md_import_device(dev, -2, -1);
4182 else
4183 rdev = md_import_device(dev, -1, -1);
4184
4185 if (IS_ERR(rdev)) {
4186 mddev_unlock(mddev);
4187 return PTR_ERR(rdev);
4188 }
4189 err = bind_rdev_to_array(rdev, mddev);
4190 out:
4191 if (err)
4192 export_rdev(rdev);
4193 mddev_unlock(mddev);
4194 return err ? err : len;
4195}
4196
4197static struct md_sysfs_entry md_new_device =
4198__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4199
4200static ssize_t
4201bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4202{
4203 char *end;
4204 unsigned long chunk, end_chunk;
4205 int err;
4206
4207 err = mddev_lock(mddev);
4208 if (err)
4209 return err;
4210 if (!mddev->bitmap)
4211 goto out;
4212
4213 while (*buf) {
4214 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4215 if (buf == end) break;
4216 if (*end == '-') {
4217 buf = end + 1;
4218 end_chunk = simple_strtoul(buf, &end, 0);
4219 if (buf == end) break;
4220 }
4221 if (*end && !isspace(*end)) break;
4222 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4223 buf = skip_spaces(end);
4224 }
4225 bitmap_unplug(mddev->bitmap);
4226out:
4227 mddev_unlock(mddev);
4228 return len;
4229}
4230
4231static struct md_sysfs_entry md_bitmap =
4232__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4233
4234static ssize_t
4235size_show(struct mddev *mddev, char *page)
4236{
4237 return sprintf(page, "%llu\n",
4238 (unsigned long long)mddev->dev_sectors / 2);
4239}
4240
4241static int update_size(struct mddev *mddev, sector_t num_sectors);
4242
4243static ssize_t
4244size_store(struct mddev *mddev, const char *buf, size_t len)
4245{
4246
4247
4248
4249
4250 sector_t sectors;
4251 int err = strict_blocks_to_sectors(buf, §ors);
4252
4253 if (err < 0)
4254 return err;
4255 err = mddev_lock(mddev);
4256 if (err)
4257 return err;
4258 if (mddev->pers) {
4259 err = update_size(mddev, sectors);
4260 if (err == 0)
4261 md_update_sb(mddev, 1);
4262 } else {
4263 if (mddev->dev_sectors == 0 ||
4264 mddev->dev_sectors > sectors)
4265 mddev->dev_sectors = sectors;
4266 else
4267 err = -ENOSPC;
4268 }
4269 mddev_unlock(mddev);
4270 return err ? err : len;
4271}
4272
4273static struct md_sysfs_entry md_size =
4274__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4275
4276
4277
4278
4279
4280
4281
4282static ssize_t
4283metadata_show(struct mddev *mddev, char *page)
4284{
4285 if (mddev->persistent)
4286 return sprintf(page, "%d.%d\n",
4287 mddev->major_version, mddev->minor_version);
4288 else if (mddev->external)
4289 return sprintf(page, "external:%s\n", mddev->metadata_type);
4290 else
4291 return sprintf(page, "none\n");
4292}
4293
4294static ssize_t
4295metadata_store(struct mddev *mddev, const char *buf, size_t len)
4296{
4297 int major, minor;
4298 char *e;
4299 int err;
4300
4301
4302
4303
4304
4305 err = mddev_lock(mddev);
4306 if (err)
4307 return err;
4308 err = -EBUSY;
4309 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4310 ;
4311 else if (!list_empty(&mddev->disks))
4312 goto out_unlock;
4313
4314 err = 0;
4315 if (cmd_match(buf, "none")) {
4316 mddev->persistent = 0;
4317 mddev->external = 0;
4318 mddev->major_version = 0;
4319 mddev->minor_version = 90;
4320 goto out_unlock;
4321 }
4322 if (strncmp(buf, "external:", 9) == 0) {
4323 size_t namelen = len-9;
4324 if (namelen >= sizeof(mddev->metadata_type))
4325 namelen = sizeof(mddev->metadata_type)-1;
4326 strncpy(mddev->metadata_type, buf+9, namelen);
4327 mddev->metadata_type[namelen] = 0;
4328 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4329 mddev->metadata_type[--namelen] = 0;
4330 mddev->persistent = 0;
4331 mddev->external = 1;
4332 mddev->major_version = 0;
4333 mddev->minor_version = 90;
4334 goto out_unlock;
4335 }
4336 major = simple_strtoul(buf, &e, 10);
4337 err = -EINVAL;
4338 if (e==buf || *e != '.')
4339 goto out_unlock;
4340 buf = e+1;
4341 minor = simple_strtoul(buf, &e, 10);
4342 if (e==buf || (*e && *e != '\n') )
4343 goto out_unlock;
4344 err = -ENOENT;
4345 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4346 goto out_unlock;
4347 mddev->major_version = major;
4348 mddev->minor_version = minor;
4349 mddev->persistent = 1;
4350 mddev->external = 0;
4351 err = 0;
4352out_unlock:
4353 mddev_unlock(mddev);
4354 return err ?: len;
4355}
4356
4357static struct md_sysfs_entry md_metadata =
4358__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4359
4360static ssize_t
4361action_show(struct mddev *mddev, char *page)
4362{
4363 char *type = "idle";
4364 unsigned long recovery = mddev->recovery;
4365 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4366 type = "frozen";
4367 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4368 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4369 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4370 type = "reshape";
4371 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4372 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4373 type = "resync";
4374 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4375 type = "check";
4376 else
4377 type = "repair";
4378 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4379 type = "recover";
4380 else if (mddev->reshape_position != MaxSector)
4381 type = "reshape";
4382 }
4383 return sprintf(page, "%s\n", type);
4384}
4385
4386static ssize_t
4387action_store(struct mddev *mddev, const char *page, size_t len)
4388{
4389 if (!mddev->pers || !mddev->pers->sync_request)
4390 return -EINVAL;
4391
4392
4393 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4394 if (cmd_match(page, "frozen"))
4395 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4396 else
4397 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4398 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4399 mddev_lock(mddev) == 0) {
4400 flush_workqueue(md_misc_wq);
4401 if (mddev->sync_thread) {
4402 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4403 md_reap_sync_thread(mddev);
4404 }
4405 mddev_unlock(mddev);
4406 }
4407 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4408 return -EBUSY;
4409 else if (cmd_match(page, "resync"))
4410 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4411 else if (cmd_match(page, "recover")) {
4412 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4413 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4414 } else if (cmd_match(page, "reshape")) {
4415 int err;
4416 if (mddev->pers->start_reshape == NULL)
4417 return -EINVAL;
4418 err = mddev_lock(mddev);
4419 if (!err) {
4420 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4421 err = -EBUSY;
4422 else {
4423 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4424 err = mddev->pers->start_reshape(mddev);
4425 }
4426 mddev_unlock(mddev);
4427 }
4428 if (err)
4429 return err;
4430 sysfs_notify(&mddev->kobj, NULL, "degraded");
4431 } else {
4432 if (cmd_match(page, "check"))
4433 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4434 else if (!cmd_match(page, "repair"))
4435 return -EINVAL;
4436 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4437 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4438 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4439 }
4440 if (mddev->ro == 2) {
4441
4442
4443
4444 mddev->ro = 0;
4445 md_wakeup_thread(mddev->sync_thread);
4446 }
4447 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4448 md_wakeup_thread(mddev->thread);
4449 sysfs_notify_dirent_safe(mddev->sysfs_action);
4450 return len;
4451}
4452
4453static struct md_sysfs_entry md_scan_mode =
4454__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4455
4456static ssize_t
4457last_sync_action_show(struct mddev *mddev, char *page)
4458{
4459 return sprintf(page, "%s\n", mddev->last_sync_action);
4460}
4461
4462static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4463
4464static ssize_t
4465mismatch_cnt_show(struct mddev *mddev, char *page)
4466{
4467 return sprintf(page, "%llu\n",
4468 (unsigned long long)
4469 atomic64_read(&mddev->resync_mismatches));
4470}
4471
4472static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4473
4474static ssize_t
4475sync_min_show(struct mddev *mddev, char *page)
4476{
4477 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4478 mddev->sync_speed_min ? "local": "system");
4479}
4480
4481static ssize_t
4482sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4483{
4484 unsigned int min;
4485 int rv;
4486
4487 if (strncmp(buf, "system", 6)==0) {
4488 min = 0;
4489 } else {
4490 rv = kstrtouint(buf, 10, &min);
4491 if (rv < 0)
4492 return rv;
4493 if (min == 0)
4494 return -EINVAL;
4495 }
4496 mddev->sync_speed_min = min;
4497 return len;
4498}
4499
4500static struct md_sysfs_entry md_sync_min =
4501__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4502
4503static ssize_t
4504sync_max_show(struct mddev *mddev, char *page)
4505{
4506 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4507 mddev->sync_speed_max ? "local": "system");
4508}
4509
4510static ssize_t
4511sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4512{
4513 unsigned int max;
4514 int rv;
4515
4516 if (strncmp(buf, "system", 6)==0) {
4517 max = 0;
4518 } else {
4519 rv = kstrtouint(buf, 10, &max);
4520 if (rv < 0)
4521 return rv;
4522 if (max == 0)
4523 return -EINVAL;
4524 }
4525 mddev->sync_speed_max = max;
4526 return len;
4527}
4528
4529static struct md_sysfs_entry md_sync_max =
4530__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4531
4532static ssize_t
4533degraded_show(struct mddev *mddev, char *page)
4534{
4535 return sprintf(page, "%d\n", mddev->degraded);
4536}
4537static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4538
4539static ssize_t
4540sync_force_parallel_show(struct mddev *mddev, char *page)
4541{
4542 return sprintf(page, "%d\n", mddev->parallel_resync);
4543}
4544
4545static ssize_t
4546sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4547{
4548 long n;
4549
4550 if (kstrtol(buf, 10, &n))
4551 return -EINVAL;
4552
4553 if (n != 0 && n != 1)
4554 return -EINVAL;
4555
4556 mddev->parallel_resync = n;
4557
4558 if (mddev->sync_thread)
4559 wake_up(&resync_wait);
4560
4561 return len;
4562}
4563
4564
4565static struct md_sysfs_entry md_sync_force_parallel =
4566__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4567 sync_force_parallel_show, sync_force_parallel_store);
4568
4569static ssize_t
4570sync_speed_show(struct mddev *mddev, char *page)
4571{
4572 unsigned long resync, dt, db;
4573 if (mddev->curr_resync == 0)
4574 return sprintf(page, "none\n");
4575 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4576 dt = (jiffies - mddev->resync_mark) / HZ;
4577 if (!dt) dt++;
4578 db = resync - mddev->resync_mark_cnt;
4579 return sprintf(page, "%lu\n", db/dt/2);
4580}
4581
4582static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4583
4584static ssize_t
4585sync_completed_show(struct mddev *mddev, char *page)
4586{
4587 unsigned long long max_sectors, resync;
4588
4589 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4590 return sprintf(page, "none\n");
4591
4592 if (mddev->curr_resync == 1 ||
4593 mddev->curr_resync == 2)
4594 return sprintf(page, "delayed\n");
4595
4596 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4597 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4598 max_sectors = mddev->resync_max_sectors;
4599 else
4600 max_sectors = mddev->dev_sectors;
4601
4602 resync = mddev->curr_resync_completed;
4603 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4604}
4605
4606static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4607
4608static ssize_t
4609min_sync_show(struct mddev *mddev, char *page)
4610{
4611 return sprintf(page, "%llu\n",
4612 (unsigned long long)mddev->resync_min);
4613}
4614static ssize_t
4615min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4616{
4617 unsigned long long min;
4618 int err;
4619
4620 if (kstrtoull(buf, 10, &min))
4621 return -EINVAL;
4622
4623 spin_lock(&mddev->lock);
4624 err = -EINVAL;
4625 if (min > mddev->resync_max)
4626 goto out_unlock;
4627
4628 err = -EBUSY;
4629 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4630 goto out_unlock;
4631
4632
4633 mddev->resync_min = round_down(min, 8);
4634 err = 0;
4635
4636out_unlock:
4637 spin_unlock(&mddev->lock);
4638 return err ?: len;
4639}
4640
4641static struct md_sysfs_entry md_min_sync =
4642__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4643
4644static ssize_t
4645max_sync_show(struct mddev *mddev, char *page)
4646{
4647 if (mddev->resync_max == MaxSector)
4648 return sprintf(page, "max\n");
4649 else
4650 return sprintf(page, "%llu\n",
4651 (unsigned long long)mddev->resync_max);
4652}
4653static ssize_t
4654max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4655{
4656 int err;
4657 spin_lock(&mddev->lock);
4658 if (strncmp(buf, "max", 3) == 0)
4659 mddev->resync_max = MaxSector;
4660 else {
4661 unsigned long long max;
4662 int chunk;
4663
4664 err = -EINVAL;
4665 if (kstrtoull(buf, 10, &max))
4666 goto out_unlock;
4667 if (max < mddev->resync_min)
4668 goto out_unlock;
4669
4670 err = -EBUSY;
4671 if (max < mddev->resync_max &&
4672 mddev->ro == 0 &&
4673 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4674 goto out_unlock;
4675
4676
4677 chunk = mddev->chunk_sectors;
4678 if (chunk) {
4679 sector_t temp = max;
4680
4681 err = -EINVAL;
4682 if (sector_div(temp, chunk))
4683 goto out_unlock;
4684 }
4685 mddev->resync_max = max;
4686 }
4687 wake_up(&mddev->recovery_wait);
4688 err = 0;
4689out_unlock:
4690 spin_unlock(&mddev->lock);
4691 return err ?: len;
4692}
4693
4694static struct md_sysfs_entry md_max_sync =
4695__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4696
4697static ssize_t
4698suspend_lo_show(struct mddev *mddev, char *page)
4699{
4700 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4701}
4702
4703static ssize_t
4704suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4705{
4706 unsigned long long old, new;
4707 int err;
4708
4709 err = kstrtoull(buf, 10, &new);
4710 if (err < 0)
4711 return err;
4712 if (new != (sector_t)new)
4713 return -EINVAL;
4714
4715 err = mddev_lock(mddev);
4716 if (err)
4717 return err;
4718 err = -EINVAL;
4719 if (mddev->pers == NULL ||
4720 mddev->pers->quiesce == NULL)
4721 goto unlock;
4722 old = mddev->suspend_lo;
4723 mddev->suspend_lo = new;
4724 if (new >= old)
4725
4726 mddev->pers->quiesce(mddev, 2);
4727 else {
4728
4729 mddev->pers->quiesce(mddev, 1);
4730 mddev->pers->quiesce(mddev, 0);
4731 }
4732 err = 0;
4733unlock:
4734 mddev_unlock(mddev);
4735 return err ?: len;
4736}
4737static struct md_sysfs_entry md_suspend_lo =
4738__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4739
4740static ssize_t
4741suspend_hi_show(struct mddev *mddev, char *page)
4742{
4743 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4744}
4745
4746static ssize_t
4747suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4748{
4749 unsigned long long old, new;
4750 int err;
4751
4752 err = kstrtoull(buf, 10, &new);
4753 if (err < 0)
4754 return err;
4755 if (new != (sector_t)new)
4756 return -EINVAL;
4757
4758 err = mddev_lock(mddev);
4759 if (err)
4760 return err;
4761 err = -EINVAL;
4762 if (mddev->pers == NULL ||
4763 mddev->pers->quiesce == NULL)
4764 goto unlock;
4765 old = mddev->suspend_hi;
4766 mddev->suspend_hi = new;
4767 if (new <= old)
4768
4769 mddev->pers->quiesce(mddev, 2);
4770 else {
4771
4772 mddev->pers->quiesce(mddev, 1);
4773 mddev->pers->quiesce(mddev, 0);
4774 }
4775 err = 0;
4776unlock:
4777 mddev_unlock(mddev);
4778 return err ?: len;
4779}
4780static struct md_sysfs_entry md_suspend_hi =
4781__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4782
4783static ssize_t
4784reshape_position_show(struct mddev *mddev, char *page)
4785{
4786 if (mddev->reshape_position != MaxSector)
4787 return sprintf(page, "%llu\n",
4788 (unsigned long long)mddev->reshape_position);
4789 strcpy(page, "none\n");
4790 return 5;
4791}
4792
4793static ssize_t
4794reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4795{
4796 struct md_rdev *rdev;
4797 unsigned long long new;
4798 int err;
4799
4800 err = kstrtoull(buf, 10, &new);
4801 if (err < 0)
4802 return err;
4803 if (new != (sector_t)new)
4804 return -EINVAL;
4805 err = mddev_lock(mddev);
4806 if (err)
4807 return err;
4808 err = -EBUSY;
4809 if (mddev->pers)
4810 goto unlock;
4811 mddev->reshape_position = new;
4812 mddev->delta_disks = 0;
4813 mddev->reshape_backwards = 0;
4814 mddev->new_level = mddev->level;
4815 mddev->new_layout = mddev->layout;
4816 mddev->new_chunk_sectors = mddev->chunk_sectors;
4817 rdev_for_each(rdev, mddev)
4818 rdev->new_data_offset = rdev->data_offset;
4819 err = 0;
4820unlock:
4821 mddev_unlock(mddev);
4822 return err ?: len;
4823}
4824
4825static struct md_sysfs_entry md_reshape_position =
4826__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4827 reshape_position_store);
4828
4829static ssize_t
4830reshape_direction_show(struct mddev *mddev, char *page)
4831{
4832 return sprintf(page, "%s\n",
4833 mddev->reshape_backwards ? "backwards" : "forwards");
4834}
4835
4836static ssize_t
4837reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4838{
4839 int backwards = 0;
4840 int err;
4841
4842 if (cmd_match(buf, "forwards"))
4843 backwards = 0;
4844 else if (cmd_match(buf, "backwards"))
4845 backwards = 1;
4846 else
4847 return -EINVAL;
4848 if (mddev->reshape_backwards == backwards)
4849 return len;
4850
4851 err = mddev_lock(mddev);
4852 if (err)
4853 return err;
4854
4855 if (mddev->delta_disks)
4856 err = -EBUSY;
4857 else if (mddev->persistent &&
4858 mddev->major_version == 0)
4859 err = -EINVAL;
4860 else
4861 mddev->reshape_backwards = backwards;
4862 mddev_unlock(mddev);
4863 return err ?: len;
4864}
4865
4866static struct md_sysfs_entry md_reshape_direction =
4867__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4868 reshape_direction_store);
4869
4870static ssize_t
4871array_size_show(struct mddev *mddev, char *page)
4872{
4873 if (mddev->external_size)
4874 return sprintf(page, "%llu\n",
4875 (unsigned long long)mddev->array_sectors/2);
4876 else
4877 return sprintf(page, "default\n");
4878}
4879
4880static ssize_t
4881array_size_store(struct mddev *mddev, const char *buf, size_t len)
4882{
4883 sector_t sectors;
4884 int err;
4885
4886 err = mddev_lock(mddev);
4887 if (err)
4888 return err;
4889
4890 if (strncmp(buf, "default", 7) == 0) {
4891 if (mddev->pers)
4892 sectors = mddev->pers->size(mddev, 0, 0);
4893 else
4894 sectors = mddev->array_sectors;
4895
4896 mddev->external_size = 0;
4897 } else {
4898 if (strict_blocks_to_sectors(buf, §ors) < 0)
4899 err = -EINVAL;
4900 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4901 err = -E2BIG;
4902 else
4903 mddev->external_size = 1;
4904 }
4905
4906 if (!err) {
4907 mddev->array_sectors = sectors;
4908 if (mddev->pers) {
4909 set_capacity(mddev->gendisk, mddev->array_sectors);
4910 revalidate_disk(mddev->gendisk);
4911 }
4912 }
4913 mddev_unlock(mddev);
4914 return err ?: len;
4915}
4916
4917static struct md_sysfs_entry md_array_size =
4918__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4919 array_size_store);
4920
4921static ssize_t
4922consistency_policy_show(struct mddev *mddev, char *page)
4923{
4924 int ret;
4925
4926 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
4927 ret = sprintf(page, "journal\n");
4928 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
4929 ret = sprintf(page, "ppl\n");
4930 } else if (mddev->bitmap) {
4931 ret = sprintf(page, "bitmap\n");
4932 } else if (mddev->pers) {
4933 if (mddev->pers->sync_request)
4934 ret = sprintf(page, "resync\n");
4935 else
4936 ret = sprintf(page, "none\n");
4937 } else {
4938 ret = sprintf(page, "unknown\n");
4939 }
4940
4941 return ret;
4942}
4943
4944static ssize_t
4945consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
4946{
4947 int err = 0;
4948
4949 if (mddev->pers) {
4950 if (mddev->pers->change_consistency_policy)
4951 err = mddev->pers->change_consistency_policy(mddev, buf);
4952 else
4953 err = -EBUSY;
4954 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
4955 set_bit(MD_HAS_PPL, &mddev->flags);
4956 } else {
4957 err = -EINVAL;
4958 }
4959
4960 return err ? err : len;
4961}
4962
4963static struct md_sysfs_entry md_consistency_policy =
4964__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
4965 consistency_policy_store);
4966
4967static struct attribute *md_default_attrs[] = {
4968 &md_level.attr,
4969 &md_layout.attr,
4970 &md_raid_disks.attr,
4971 &md_chunk_size.attr,
4972 &md_size.attr,
4973 &md_resync_start.attr,
4974 &md_metadata.attr,
4975 &md_new_device.attr,
4976 &md_safe_delay.attr,
4977 &md_array_state.attr,
4978 &md_reshape_position.attr,
4979 &md_reshape_direction.attr,
4980 &md_array_size.attr,
4981 &max_corr_read_errors.attr,
4982 &md_consistency_policy.attr,
4983 NULL,
4984};
4985
4986static struct attribute *md_redundancy_attrs[] = {
4987 &md_scan_mode.attr,
4988 &md_last_scan_mode.attr,
4989 &md_mismatches.attr,
4990 &md_sync_min.attr,
4991 &md_sync_max.attr,
4992 &md_sync_speed.attr,
4993 &md_sync_force_parallel.attr,
4994 &md_sync_completed.attr,
4995 &md_min_sync.attr,
4996 &md_max_sync.attr,
4997 &md_suspend_lo.attr,
4998 &md_suspend_hi.attr,
4999 &md_bitmap.attr,
5000 &md_degraded.attr,
5001 NULL,
5002};
5003static struct attribute_group md_redundancy_group = {
5004 .name = NULL,
5005 .attrs = md_redundancy_attrs,
5006};
5007
5008static ssize_t
5009md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5010{
5011 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5012 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5013 ssize_t rv;
5014
5015 if (!entry->show)
5016 return -EIO;
5017 spin_lock(&all_mddevs_lock);
5018 if (list_empty(&mddev->all_mddevs)) {
5019 spin_unlock(&all_mddevs_lock);
5020 return -EBUSY;
5021 }
5022 mddev_get(mddev);
5023 spin_unlock(&all_mddevs_lock);
5024
5025 rv = entry->show(mddev, page);
5026 mddev_put(mddev);
5027 return rv;
5028}
5029
5030static ssize_t
5031md_attr_store(struct kobject *kobj, struct attribute *attr,
5032 const char *page, size_t length)
5033{
5034 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5035 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5036 ssize_t rv;
5037
5038 if (!entry->store)
5039 return -EIO;
5040 if (!capable(CAP_SYS_ADMIN))
5041 return -EACCES;
5042 spin_lock(&all_mddevs_lock);
5043 if (list_empty(&mddev->all_mddevs)) {
5044 spin_unlock(&all_mddevs_lock);
5045 return -EBUSY;
5046 }
5047 mddev_get(mddev);
5048 spin_unlock(&all_mddevs_lock);
5049 rv = entry->store(mddev, page, length);
5050 mddev_put(mddev);
5051 return rv;
5052}
5053
5054static void md_free(struct kobject *ko)
5055{
5056 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5057
5058 if (mddev->sysfs_state)
5059 sysfs_put(mddev->sysfs_state);
5060
5061 if (mddev->gendisk) {
5062 del_gendisk(mddev->gendisk);
5063 put_disk(mddev->gendisk);
5064 }
5065 if (mddev->queue)
5066 blk_cleanup_queue(mddev->queue);
5067
5068 kfree(mddev);
5069}
5070
5071static const struct sysfs_ops md_sysfs_ops = {
5072 .show = md_attr_show,
5073 .store = md_attr_store,
5074};
5075static struct kobj_type md_ktype = {
5076 .release = md_free,
5077 .sysfs_ops = &md_sysfs_ops,
5078 .default_attrs = md_default_attrs,
5079};
5080
5081int mdp_major = 0;
5082
5083static void mddev_delayed_delete(struct work_struct *ws)
5084{
5085 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5086
5087 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5088 kobject_del(&mddev->kobj);
5089 kobject_put(&mddev->kobj);
5090}
5091
5092static int md_alloc(dev_t dev, char *name)
5093{
5094 static DEFINE_MUTEX(disks_mutex);
5095 struct mddev *mddev = mddev_find(dev);
5096 struct gendisk *disk;
5097 int partitioned;
5098 int shift;
5099 int unit;
5100 int error;
5101
5102 if (!mddev)
5103 return -ENODEV;
5104
5105 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5106 shift = partitioned ? MdpMinorShift : 0;
5107 unit = MINOR(mddev->unit) >> shift;
5108
5109
5110
5111
5112 flush_workqueue(md_misc_wq);
5113
5114 mutex_lock(&disks_mutex);
5115 error = -EEXIST;
5116 if (mddev->gendisk)
5117 goto abort;
5118
5119 if (name) {
5120
5121
5122 struct mddev *mddev2;
5123 spin_lock(&all_mddevs_lock);
5124
5125 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5126 if (mddev2->gendisk &&
5127 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5128 spin_unlock(&all_mddevs_lock);
5129 goto abort;
5130 }
5131 spin_unlock(&all_mddevs_lock);
5132 }
5133
5134 error = -ENOMEM;
5135 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5136 if (!mddev->queue)
5137 goto abort;
5138 mddev->queue->queuedata = mddev;
5139
5140 blk_queue_make_request(mddev->queue, md_make_request);
5141 blk_set_stacking_limits(&mddev->queue->limits);
5142
5143 disk = alloc_disk(1 << shift);
5144 if (!disk) {
5145 blk_cleanup_queue(mddev->queue);
5146 mddev->queue = NULL;
5147 goto abort;
5148 }
5149 disk->major = MAJOR(mddev->unit);
5150 disk->first_minor = unit << shift;
5151 if (name)
5152 strcpy(disk->disk_name, name);
5153 else if (partitioned)
5154 sprintf(disk->disk_name, "md_d%d", unit);
5155 else
5156 sprintf(disk->disk_name, "md%d", unit);
5157 disk->fops = &md_fops;
5158 disk->private_data = mddev;
5159 disk->queue = mddev->queue;
5160 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
5161
5162
5163
5164
5165 disk->flags |= GENHD_FL_EXT_DEVT;
5166 mddev->gendisk = disk;
5167
5168
5169
5170 mutex_lock(&mddev->open_mutex);
5171 add_disk(disk);
5172
5173 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5174 &disk_to_dev(disk)->kobj, "%s", "md");
5175 if (error) {
5176
5177
5178
5179 pr_debug("md: cannot register %s/md - name in use\n",
5180 disk->disk_name);
5181 error = 0;
5182 }
5183 if (mddev->kobj.sd &&
5184 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5185 pr_debug("pointless warning\n");
5186 mutex_unlock(&mddev->open_mutex);
5187 abort:
5188 mutex_unlock(&disks_mutex);
5189 if (!error && mddev->kobj.sd) {
5190 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5191 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5192 }
5193 mddev_put(mddev);
5194 return error;
5195}
5196
5197static struct kobject *md_probe(dev_t dev, int *part, void *data)
5198{
5199 md_alloc(dev, NULL);
5200 return NULL;
5201}
5202
5203static int add_named_array(const char *val, struct kernel_param *kp)
5204{
5205
5206
5207
5208
5209 int len = strlen(val);
5210 char buf[DISK_NAME_LEN];
5211
5212 while (len && val[len-1] == '\n')
5213 len--;
5214 if (len >= DISK_NAME_LEN)
5215 return -E2BIG;
5216 strlcpy(buf, val, len+1);
5217 if (strncmp(buf, "md_", 3) != 0)
5218 return -EINVAL;
5219 return md_alloc(0, buf);
5220}
5221
5222static void md_safemode_timeout(unsigned long data)
5223{
5224 struct mddev *mddev = (struct mddev *) data;
5225
5226 if (!atomic_read(&mddev->writes_pending)) {
5227 mddev->safemode = 1;
5228 if (mddev->external)
5229 sysfs_notify_dirent_safe(mddev->sysfs_state);
5230 }
5231 md_wakeup_thread(mddev->thread);
5232}
5233
5234static int start_dirty_degraded;
5235
5236int md_run(struct mddev *mddev)
5237{
5238 int err;
5239 struct md_rdev *rdev;
5240 struct md_personality *pers;
5241
5242 if (list_empty(&mddev->disks))
5243
5244 return -EINVAL;
5245
5246 if (mddev->pers)
5247 return -EBUSY;
5248
5249 if (mddev->sysfs_active)
5250 return -EBUSY;
5251
5252
5253
5254
5255 if (!mddev->raid_disks) {
5256 if (!mddev->persistent)
5257 return -EINVAL;
5258 analyze_sbs(mddev);
5259 }
5260
5261 if (mddev->level != LEVEL_NONE)
5262 request_module("md-level-%d", mddev->level);
5263 else if (mddev->clevel[0])
5264 request_module("md-%s", mddev->clevel);
5265
5266
5267
5268
5269
5270
5271 rdev_for_each(rdev, mddev) {
5272 if (test_bit(Faulty, &rdev->flags))
5273 continue;
5274 sync_blockdev(rdev->bdev);
5275 invalidate_bdev(rdev->bdev);
5276 if (mddev->ro != 1 &&
5277 (bdev_read_only(rdev->bdev) ||
5278 bdev_read_only(rdev->meta_bdev))) {
5279 mddev->ro = 1;
5280 if (mddev->gendisk)
5281 set_disk_ro(mddev->gendisk, 1);
5282 }
5283
5284
5285
5286
5287
5288 if (rdev->meta_bdev) {
5289 ;
5290 } else if (rdev->data_offset < rdev->sb_start) {
5291 if (mddev->dev_sectors &&
5292 rdev->data_offset + mddev->dev_sectors
5293 > rdev->sb_start) {
5294 pr_warn("md: %s: data overlaps metadata\n",
5295 mdname(mddev));
5296 return -EINVAL;
5297 }
5298 } else {
5299 if (rdev->sb_start + rdev->sb_size/512
5300 > rdev->data_offset) {
5301 pr_warn("md: %s: metadata overlaps data\n",
5302 mdname(mddev));
5303 return -EINVAL;
5304 }
5305 }
5306 sysfs_notify_dirent_safe(rdev->sysfs_state);
5307 }
5308
5309 if (mddev->bio_set == NULL)
5310 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5311
5312 spin_lock(&pers_lock);
5313 pers = find_pers(mddev->level, mddev->clevel);
5314 if (!pers || !try_module_get(pers->owner)) {
5315 spin_unlock(&pers_lock);
5316 if (mddev->level != LEVEL_NONE)
5317 pr_warn("md: personality for level %d is not loaded!\n",
5318 mddev->level);
5319 else
5320 pr_warn("md: personality for level %s is not loaded!\n",
5321 mddev->clevel);
5322 return -EINVAL;
5323 }
5324 spin_unlock(&pers_lock);
5325 if (mddev->level != pers->level) {
5326 mddev->level = pers->level;
5327 mddev->new_level = pers->level;
5328 }
5329 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5330
5331 if (mddev->reshape_position != MaxSector &&
5332 pers->start_reshape == NULL) {
5333
5334 module_put(pers->owner);
5335 return -EINVAL;
5336 }
5337
5338 if (pers->sync_request) {
5339
5340
5341
5342 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5343 struct md_rdev *rdev2;
5344 int warned = 0;
5345
5346 rdev_for_each(rdev, mddev)
5347 rdev_for_each(rdev2, mddev) {
5348 if (rdev < rdev2 &&
5349 rdev->bdev->bd_contains ==
5350 rdev2->bdev->bd_contains) {
5351 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5352 mdname(mddev),
5353 bdevname(rdev->bdev,b),
5354 bdevname(rdev2->bdev,b2));
5355 warned = 1;
5356 }
5357 }
5358
5359 if (warned)
5360 pr_warn("True protection against single-disk failure might be compromised.\n");
5361 }
5362
5363 mddev->recovery = 0;
5364
5365 mddev->resync_max_sectors = mddev->dev_sectors;
5366
5367 mddev->ok_start_degraded = start_dirty_degraded;
5368
5369 if (start_readonly && mddev->ro == 0)
5370 mddev->ro = 2;
5371
5372 err = pers->run(mddev);
5373 if (err)
5374 pr_warn("md: pers->run() failed ...\n");
5375 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5376 WARN_ONCE(!mddev->external_size,
5377 "%s: default size too small, but 'external_size' not in effect?\n",
5378 __func__);
5379 pr_warn("md: invalid array_size %llu > default size %llu\n",
5380 (unsigned long long)mddev->array_sectors / 2,
5381 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5382 err = -EINVAL;
5383 }
5384 if (err == 0 && pers->sync_request &&
5385 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5386 err = bitmap_create(mddev);
5387 if (err)
5388 pr_warn("%s: failed to create bitmap (%d)\n",
5389 mdname(mddev), err);
5390 }
5391 if (err) {
5392 mddev_detach(mddev);
5393 if (mddev->private)
5394 pers->free(mddev, mddev->private);
5395 mddev->private = NULL;
5396 module_put(pers->owner);
5397 bitmap_destroy(mddev);
5398 return err;
5399 }
5400 if (mddev->queue) {
5401 bool nonrot = true;
5402
5403 rdev_for_each(rdev, mddev) {
5404 if (rdev->raid_disk >= 0 &&
5405 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5406 nonrot = false;
5407 break;
5408 }
5409 }
5410 if (mddev->degraded)
5411 nonrot = false;
5412 if (nonrot)
5413 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5414 else
5415 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5416 mddev->queue->backing_dev_info.congested_data = mddev;
5417 mddev->queue->backing_dev_info.congested_fn = md_congested;
5418 blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec);
5419 }
5420 if (pers->sync_request) {
5421 if (mddev->kobj.sd &&
5422 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5423 pr_warn("md: cannot register extra attributes for %s\n",
5424 mdname(mddev));
5425 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5426 } else if (mddev->ro == 2)
5427 mddev->ro = 0;
5428
5429 atomic_set(&mddev->writes_pending,0);
5430 atomic_set(&mddev->max_corr_read_errors,
5431 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5432 mddev->safemode = 0;
5433 mddev->safemode_delay = (200 * HZ)/1000 +1;
5434 mddev->in_sync = 1;
5435 smp_wmb();
5436 spin_lock(&mddev->lock);
5437 mddev->pers = pers;
5438 spin_unlock(&mddev->lock);
5439 rdev_for_each(rdev, mddev)
5440 if (rdev->raid_disk >= 0)
5441 if (sysfs_link_rdev(mddev, rdev))
5442 ;
5443
5444 if (mddev->degraded && !mddev->ro)
5445
5446
5447
5448 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5449 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5450
5451 if (mddev->sb_flags)
5452 md_update_sb(mddev, 0);
5453
5454 md_new_event(mddev);
5455 sysfs_notify_dirent_safe(mddev->sysfs_state);
5456 sysfs_notify_dirent_safe(mddev->sysfs_action);
5457 sysfs_notify(&mddev->kobj, NULL, "degraded");
5458 return 0;
5459}
5460EXPORT_SYMBOL_GPL(md_run);
5461
5462static int do_md_run(struct mddev *mddev)
5463{
5464 int err;
5465
5466 err = md_run(mddev);
5467 if (err)
5468 goto out;
5469 err = bitmap_load(mddev);
5470 if (err) {
5471 bitmap_destroy(mddev);
5472 goto out;
5473 }
5474
5475 md_wakeup_thread(mddev->thread);
5476 md_wakeup_thread(mddev->sync_thread);
5477
5478 set_capacity(mddev->gendisk, mddev->array_sectors);
5479 revalidate_disk(mddev->gendisk);
5480 mddev->changed = 1;
5481 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5482out:
5483 return err;
5484}
5485
5486static int restart_array(struct mddev *mddev)
5487{
5488 struct gendisk *disk = mddev->gendisk;
5489 struct md_rdev *rdev;
5490 bool has_journal = false;
5491 bool has_readonly = false;
5492
5493
5494 if (list_empty(&mddev->disks))
5495 return -ENXIO;
5496 if (!mddev->pers)
5497 return -EINVAL;
5498 if (!mddev->ro)
5499 return -EBUSY;
5500
5501 rcu_read_lock();
5502 rdev_for_each_rcu(rdev, mddev) {
5503 if (test_bit(Journal, &rdev->flags) &&
5504 !test_bit(Faulty, &rdev->flags))
5505 has_journal = true;
5506 if (bdev_read_only(rdev->bdev))
5507 has_readonly = true;
5508 }
5509 rcu_read_unlock();
5510 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
5511
5512 return -EINVAL;
5513 if (has_readonly)
5514 return -EROFS;
5515
5516 mddev->safemode = 0;
5517 mddev->ro = 0;
5518 set_disk_ro(disk, 0);
5519 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5520
5521 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5522 md_wakeup_thread(mddev->thread);
5523 md_wakeup_thread(mddev->sync_thread);
5524 sysfs_notify_dirent_safe(mddev->sysfs_state);
5525 return 0;
5526}
5527
5528static void md_clean(struct mddev *mddev)
5529{
5530 mddev->array_sectors = 0;
5531 mddev->external_size = 0;
5532 mddev->dev_sectors = 0;
5533 mddev->raid_disks = 0;
5534 mddev->recovery_cp = 0;
5535 mddev->resync_min = 0;
5536 mddev->resync_max = MaxSector;
5537 mddev->reshape_position = MaxSector;
5538 mddev->external = 0;
5539 mddev->persistent = 0;
5540 mddev->level = LEVEL_NONE;
5541 mddev->clevel[0] = 0;
5542 mddev->flags = 0;
5543 mddev->sb_flags = 0;
5544 mddev->ro = 0;
5545 mddev->metadata_type[0] = 0;
5546 mddev->chunk_sectors = 0;
5547 mddev->ctime = mddev->utime = 0;
5548 mddev->layout = 0;
5549 mddev->max_disks = 0;
5550 mddev->events = 0;
5551 mddev->can_decrease_events = 0;
5552 mddev->delta_disks = 0;
5553 mddev->reshape_backwards = 0;
5554 mddev->new_level = LEVEL_NONE;
5555 mddev->new_layout = 0;
5556 mddev->new_chunk_sectors = 0;
5557 mddev->curr_resync = 0;
5558 atomic64_set(&mddev->resync_mismatches, 0);
5559 mddev->suspend_lo = mddev->suspend_hi = 0;
5560 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5561 mddev->recovery = 0;
5562 mddev->in_sync = 0;
5563 mddev->changed = 0;
5564 mddev->degraded = 0;
5565 mddev->safemode = 0;
5566 mddev->private = NULL;
5567 mddev->merge_check_needed = 0;
5568 mddev->bitmap_info.offset = 0;
5569 mddev->bitmap_info.default_offset = 0;
5570 mddev->bitmap_info.default_space = 0;
5571 mddev->bitmap_info.chunksize = 0;
5572 mddev->bitmap_info.daemon_sleep = 0;
5573 mddev->bitmap_info.max_write_behind = 0;
5574}
5575
5576static void __md_stop_writes(struct mddev *mddev)
5577{
5578 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5579 flush_workqueue(md_misc_wq);
5580 if (mddev->sync_thread) {
5581 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5582 md_reap_sync_thread(mddev);
5583 }
5584
5585 del_timer_sync(&mddev->safemode_timer);
5586
5587 if (mddev->pers && mddev->pers->quiesce) {
5588 mddev->pers->quiesce(mddev, 1);
5589 mddev->pers->quiesce(mddev, 0);
5590 }
5591 bitmap_flush(mddev);
5592
5593 if (mddev->ro == 0 &&
5594 (!mddev->in_sync ||
5595 mddev->sb_flags)) {
5596
5597 mddev->in_sync = 1;
5598 md_update_sb(mddev, 1);
5599 }
5600}
5601
5602void md_stop_writes(struct mddev *mddev)
5603{
5604 mddev_lock_nointr(mddev);
5605 __md_stop_writes(mddev);
5606 mddev_unlock(mddev);
5607}
5608EXPORT_SYMBOL_GPL(md_stop_writes);
5609
5610static void mddev_detach(struct mddev *mddev)
5611{
5612 struct bitmap *bitmap = mddev->bitmap;
5613
5614 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5615 pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
5616 mdname(mddev));
5617
5618 wait_event(bitmap->behind_wait,
5619 atomic_read(&bitmap->behind_writes) == 0);
5620 }
5621 if (mddev->pers && mddev->pers->quiesce) {
5622 mddev->pers->quiesce(mddev, 1);
5623 mddev->pers->quiesce(mddev, 0);
5624 }
5625 md_unregister_thread(&mddev->thread);
5626 if (mddev->queue)
5627 blk_sync_queue(mddev->queue);
5628}
5629
5630static void __md_stop(struct mddev *mddev)
5631{
5632 struct md_personality *pers = mddev->pers;
5633 mddev_detach(mddev);
5634
5635 flush_workqueue(md_misc_wq);
5636 spin_lock(&mddev->lock);
5637 mddev->pers = NULL;
5638 spin_unlock(&mddev->lock);
5639 pers->free(mddev, mddev->private);
5640 mddev->private = NULL;
5641 if (pers->sync_request && mddev->to_remove == NULL)
5642 mddev->to_remove = &md_redundancy_group;
5643 module_put(pers->owner);
5644 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5645}
5646
5647void md_stop(struct mddev *mddev)
5648{
5649
5650
5651
5652 __md_stop(mddev);
5653 bitmap_destroy(mddev);
5654 if (mddev->bio_set)
5655 bioset_free(mddev->bio_set);
5656}
5657
5658EXPORT_SYMBOL_GPL(md_stop);
5659
5660static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5661{
5662 int err = 0;
5663 int did_freeze = 0;
5664
5665 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5666 did_freeze = 1;
5667 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5668 md_wakeup_thread(mddev->thread);
5669 }
5670 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5671 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5672 if (mddev->sync_thread)
5673
5674
5675 wake_up_process(mddev->sync_thread->tsk);
5676
5677 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
5678 return -EBUSY;
5679 mddev_unlock(mddev);
5680 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5681 &mddev->recovery));
5682 wait_event(mddev->sb_wait,
5683 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
5684 mddev_lock_nointr(mddev);
5685
5686 mutex_lock(&mddev->open_mutex);
5687 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5688 mddev->sync_thread ||
5689 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5690 pr_warn("md: %s still in use.\n",mdname(mddev));
5691 if (did_freeze) {
5692 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5693 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5694 md_wakeup_thread(mddev->thread);
5695 }
5696 err = -EBUSY;
5697 goto out;
5698 }
5699 if (mddev->pers) {
5700 __md_stop_writes(mddev);
5701
5702 err = -ENXIO;
5703 if (mddev->ro==1)
5704 goto out;
5705 mddev->ro = 1;
5706 set_disk_ro(mddev->gendisk, 1);
5707 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5708 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5709 md_wakeup_thread(mddev->thread);
5710 sysfs_notify_dirent_safe(mddev->sysfs_state);
5711 err = 0;
5712 }
5713out:
5714 mutex_unlock(&mddev->open_mutex);
5715 return err;
5716}
5717
5718
5719
5720
5721
5722static int do_md_stop(struct mddev *mddev, int mode,
5723 struct block_device *bdev)
5724{
5725 struct gendisk *disk = mddev->gendisk;
5726 struct md_rdev *rdev;
5727 int did_freeze = 0;
5728
5729 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5730 did_freeze = 1;
5731 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5732 md_wakeup_thread(mddev->thread);
5733 }
5734 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5735 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5736 if (mddev->sync_thread)
5737
5738
5739 wake_up_process(mddev->sync_thread->tsk);
5740
5741 mddev_unlock(mddev);
5742 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5743 !test_bit(MD_RECOVERY_RUNNING,
5744 &mddev->recovery)));
5745 mddev_lock_nointr(mddev);
5746
5747 mutex_lock(&mddev->open_mutex);
5748 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5749 mddev->sysfs_active ||
5750 mddev->sync_thread ||
5751 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5752 pr_warn("md: %s still in use.\n",mdname(mddev));
5753 mutex_unlock(&mddev->open_mutex);
5754 if (did_freeze) {
5755 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5756 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5757 md_wakeup_thread(mddev->thread);
5758 }
5759 return -EBUSY;
5760 }
5761 if (mddev->pers) {
5762 if (mddev->ro)
5763 set_disk_ro(disk, 0);
5764
5765 __md_stop_writes(mddev);
5766 __md_stop(mddev);
5767 mddev->queue->merge_bvec_fn = NULL;
5768 mddev->queue->backing_dev_info.congested_fn = NULL;
5769
5770
5771 sysfs_notify_dirent_safe(mddev->sysfs_state);
5772
5773 rdev_for_each(rdev, mddev)
5774 if (rdev->raid_disk >= 0)
5775 sysfs_unlink_rdev(mddev, rdev);
5776
5777 set_capacity(disk, 0);
5778 mutex_unlock(&mddev->open_mutex);
5779 mddev->changed = 1;
5780 revalidate_disk(disk);
5781
5782 if (mddev->ro)
5783 mddev->ro = 0;
5784 } else
5785 mutex_unlock(&mddev->open_mutex);
5786
5787
5788
5789 if (mode == 0) {
5790 pr_info("md: %s stopped.\n", mdname(mddev));
5791
5792 bitmap_destroy(mddev);
5793 if (mddev->bitmap_info.file) {
5794 struct file *f = mddev->bitmap_info.file;
5795 spin_lock(&mddev->lock);
5796 mddev->bitmap_info.file = NULL;
5797 spin_unlock(&mddev->lock);
5798 fput(f);
5799 }
5800 mddev->bitmap_info.offset = 0;
5801
5802 export_array(mddev);
5803
5804 md_clean(mddev);
5805 if (mddev->hold_active == UNTIL_STOP)
5806 mddev->hold_active = 0;
5807 }
5808 blk_integrity_unregister(disk);
5809 md_new_event(mddev);
5810 sysfs_notify_dirent_safe(mddev->sysfs_state);
5811 return 0;
5812}
5813
5814#ifndef MODULE
5815static void autorun_array(struct mddev *mddev)
5816{
5817 struct md_rdev *rdev;
5818 int err;
5819
5820 if (list_empty(&mddev->disks))
5821 return;
5822
5823 pr_info("md: running: ");
5824
5825 rdev_for_each(rdev, mddev) {
5826 char b[BDEVNAME_SIZE];
5827 pr_cont("<%s>", bdevname(rdev->bdev,b));
5828 }
5829 pr_cont("\n");
5830
5831 err = do_md_run(mddev);
5832 if (err) {
5833 pr_warn("md: do_md_run() returned %d\n", err);
5834 do_md_stop(mddev, 0, NULL);
5835 }
5836}
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850static void autorun_devices(int part)
5851{
5852 struct md_rdev *rdev0, *rdev, *tmp;
5853 struct mddev *mddev;
5854 char b[BDEVNAME_SIZE];
5855
5856 pr_info("md: autorun ...\n");
5857 while (!list_empty(&pending_raid_disks)) {
5858 int unit;
5859 dev_t dev;
5860 LIST_HEAD(candidates);
5861 rdev0 = list_entry(pending_raid_disks.next,
5862 struct md_rdev, same_set);
5863
5864 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
5865 INIT_LIST_HEAD(&candidates);
5866 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5867 if (super_90_load(rdev, rdev0, 0) >= 0) {
5868 pr_debug("md: adding %s ...\n",
5869 bdevname(rdev->bdev,b));
5870 list_move(&rdev->same_set, &candidates);
5871 }
5872
5873
5874
5875
5876
5877 if (part) {
5878 dev = MKDEV(mdp_major,
5879 rdev0->preferred_minor << MdpMinorShift);
5880 unit = MINOR(dev) >> MdpMinorShift;
5881 } else {
5882 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5883 unit = MINOR(dev);
5884 }
5885 if (rdev0->preferred_minor != unit) {
5886 pr_warn("md: unit number in %s is bad: %d\n",
5887 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5888 break;
5889 }
5890
5891 md_probe(dev, NULL, NULL);
5892 mddev = mddev_find(dev);
5893 if (!mddev || !mddev->gendisk) {
5894 if (mddev)
5895 mddev_put(mddev);
5896 break;
5897 }
5898 if (mddev_lock(mddev))
5899 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
5900 else if (mddev->raid_disks || mddev->major_version
5901 || !list_empty(&mddev->disks)) {
5902 pr_warn("md: %s already running, cannot run %s\n",
5903 mdname(mddev), bdevname(rdev0->bdev,b));
5904 mddev_unlock(mddev);
5905 } else {
5906 pr_debug("md: created %s\n", mdname(mddev));
5907 mddev->persistent = 1;
5908 rdev_for_each_list(rdev, tmp, &candidates) {
5909 list_del_init(&rdev->same_set);
5910 if (bind_rdev_to_array(rdev, mddev))
5911 export_rdev(rdev);
5912 }
5913 autorun_array(mddev);
5914 mddev_unlock(mddev);
5915 }
5916
5917
5918
5919 rdev_for_each_list(rdev, tmp, &candidates) {
5920 list_del_init(&rdev->same_set);
5921 export_rdev(rdev);
5922 }
5923 mddev_put(mddev);
5924 }
5925 pr_info("md: ... autorun DONE.\n");
5926}
5927#endif
5928
5929static int get_version(void __user *arg)
5930{
5931 mdu_version_t ver;
5932
5933 ver.major = MD_MAJOR_VERSION;
5934 ver.minor = MD_MINOR_VERSION;
5935 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5936
5937 if (copy_to_user(arg, &ver, sizeof(ver)))
5938 return -EFAULT;
5939
5940 return 0;
5941}
5942
5943static int get_array_info(struct mddev *mddev, void __user *arg)
5944{
5945 mdu_array_info_t info;
5946 int nr,working,insync,failed,spare;
5947 struct md_rdev *rdev;
5948
5949 nr = working = insync = failed = spare = 0;
5950 rcu_read_lock();
5951 rdev_for_each_rcu(rdev, mddev) {
5952 nr++;
5953 if (test_bit(Faulty, &rdev->flags))
5954 failed++;
5955 else {
5956 working++;
5957 if (test_bit(In_sync, &rdev->flags))
5958 insync++;
5959 else if (test_bit(Journal, &rdev->flags))
5960
5961 ;
5962 else
5963 spare++;
5964 }
5965 }
5966 rcu_read_unlock();
5967
5968 info.major_version = mddev->major_version;
5969 info.minor_version = mddev->minor_version;
5970 info.patch_version = MD_PATCHLEVEL_VERSION;
5971 info.ctime = mddev->ctime;
5972 info.level = mddev->level;
5973 info.size = mddev->dev_sectors / 2;
5974 if (info.size != mddev->dev_sectors / 2)
5975 info.size = -1;
5976 info.nr_disks = nr;
5977 info.raid_disks = mddev->raid_disks;
5978 info.md_minor = mddev->md_minor;
5979 info.not_persistent= !mddev->persistent;
5980
5981 info.utime = mddev->utime;
5982 info.state = 0;
5983 if (mddev->in_sync)
5984 info.state = (1<<MD_SB_CLEAN);
5985 if (mddev->bitmap && mddev->bitmap_info.offset)
5986 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5987 info.active_disks = insync;
5988 info.working_disks = working;
5989 info.failed_disks = failed;
5990 info.spare_disks = spare;
5991
5992 info.layout = mddev->layout;
5993 info.chunk_size = mddev->chunk_sectors << 9;
5994
5995 if (copy_to_user(arg, &info, sizeof(info)))
5996 return -EFAULT;
5997
5998 return 0;
5999}
6000
6001static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6002{
6003 mdu_bitmap_file_t *file = NULL;
6004 char *ptr;
6005 int err;
6006
6007 file = kzalloc(sizeof(*file), GFP_NOIO);
6008 if (!file)
6009 return -ENOMEM;
6010
6011 err = 0;
6012 spin_lock(&mddev->lock);
6013
6014 if (mddev->bitmap_info.file) {
6015 ptr = d_path(&mddev->bitmap_info.file->f_path, file->pathname,
6016 sizeof(file->pathname));
6017 if (IS_ERR(ptr))
6018 err = PTR_ERR(ptr);
6019 else
6020 memmove(file->pathname, ptr,
6021 sizeof(file->pathname)-(ptr-file->pathname));
6022 }
6023 spin_unlock(&mddev->lock);
6024
6025 if (err == 0 &&
6026 copy_to_user(arg, file, sizeof(*file)))
6027 err = -EFAULT;
6028
6029 kfree(file);
6030 return err;
6031}
6032
6033static int get_disk_info(struct mddev *mddev, void __user * arg)
6034{
6035 mdu_disk_info_t info;
6036 struct md_rdev *rdev;
6037
6038 if (copy_from_user(&info, arg, sizeof(info)))
6039 return -EFAULT;
6040
6041 rcu_read_lock();
6042 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6043 if (rdev) {
6044 info.major = MAJOR(rdev->bdev->bd_dev);
6045 info.minor = MINOR(rdev->bdev->bd_dev);
6046 info.raid_disk = rdev->raid_disk;
6047 info.state = 0;
6048 if (test_bit(Faulty, &rdev->flags))
6049 info.state |= (1<<MD_DISK_FAULTY);
6050 else if (test_bit(In_sync, &rdev->flags)) {
6051 info.state |= (1<<MD_DISK_ACTIVE);
6052 info.state |= (1<<MD_DISK_SYNC);
6053 }
6054 if (test_bit(Journal, &rdev->flags))
6055 info.state |= (1<<MD_DISK_JOURNAL);
6056 if (test_bit(WriteMostly, &rdev->flags))
6057 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6058 if (test_bit(FailFast, &rdev->flags))
6059 info.state |= (1<<MD_DISK_FAILFAST);
6060 } else {
6061 info.major = info.minor = 0;
6062 info.raid_disk = -1;
6063 info.state = (1<<MD_DISK_REMOVED);
6064 }
6065 rcu_read_unlock();
6066
6067 if (copy_to_user(arg, &info, sizeof(info)))
6068 return -EFAULT;
6069
6070 return 0;
6071}
6072
6073static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6074{
6075 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6076 struct md_rdev *rdev;
6077 dev_t dev = MKDEV(info->major,info->minor);
6078
6079 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6080 return -EOVERFLOW;
6081
6082 if (!mddev->raid_disks) {
6083 int err;
6084
6085 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6086 if (IS_ERR(rdev)) {
6087 pr_warn("md: md_import_device returned %ld\n",
6088 PTR_ERR(rdev));
6089 return PTR_ERR(rdev);
6090 }
6091 if (!list_empty(&mddev->disks)) {
6092 struct md_rdev *rdev0
6093 = list_entry(mddev->disks.next,
6094 struct md_rdev, same_set);
6095 err = super_types[mddev->major_version]
6096 .load_super(rdev, rdev0, mddev->minor_version);
6097 if (err < 0) {
6098 pr_warn("md: %s has different UUID to %s\n",
6099 bdevname(rdev->bdev,b),
6100 bdevname(rdev0->bdev,b2));
6101 export_rdev(rdev);
6102 return -EINVAL;
6103 }
6104 }
6105 err = bind_rdev_to_array(rdev, mddev);
6106 if (err)
6107 export_rdev(rdev);
6108 return err;
6109 }
6110
6111
6112
6113
6114
6115
6116 if (mddev->pers) {
6117 int err;
6118 if (!mddev->pers->hot_add_disk) {
6119 pr_warn("%s: personality does not support diskops!\n",
6120 mdname(mddev));
6121 return -EINVAL;
6122 }
6123 if (mddev->persistent)
6124 rdev = md_import_device(dev, mddev->major_version,
6125 mddev->minor_version);
6126 else
6127 rdev = md_import_device(dev, -1, -1);
6128 if (IS_ERR(rdev)) {
6129 pr_warn("md: md_import_device returned %ld\n",
6130 PTR_ERR(rdev));
6131 return PTR_ERR(rdev);
6132 }
6133
6134 if (!mddev->persistent) {
6135 if (info->state & (1<<MD_DISK_SYNC) &&
6136 info->raid_disk < mddev->raid_disks) {
6137 rdev->raid_disk = info->raid_disk;
6138 set_bit(In_sync, &rdev->flags);
6139 clear_bit(Bitmap_sync, &rdev->flags);
6140 } else
6141 rdev->raid_disk = -1;
6142 rdev->saved_raid_disk = rdev->raid_disk;
6143 } else
6144 super_types[mddev->major_version].
6145 validate_super(mddev, rdev);
6146 if ((info->state & (1<<MD_DISK_SYNC)) &&
6147 rdev->raid_disk != info->raid_disk) {
6148
6149
6150
6151 export_rdev(rdev);
6152 return -EINVAL;
6153 }
6154
6155 clear_bit(In_sync, &rdev->flags);
6156 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6157 set_bit(WriteMostly, &rdev->flags);
6158 else
6159 clear_bit(WriteMostly, &rdev->flags);
6160 if (info->state & (1<<MD_DISK_FAILFAST))
6161 set_bit(FailFast, &rdev->flags);
6162 else
6163 clear_bit(FailFast, &rdev->flags);
6164
6165 if (info->state & (1<<MD_DISK_JOURNAL)) {
6166 struct md_rdev *rdev2;
6167 bool has_journal = false;
6168
6169
6170 rdev_for_each(rdev2, mddev) {
6171 if (test_bit(Journal, &rdev2->flags)) {
6172 has_journal = true;
6173 break;
6174 }
6175 }
6176 if (has_journal) {
6177 export_rdev(rdev);
6178 return -EBUSY;
6179 }
6180 set_bit(Journal, &rdev->flags);
6181 }
6182
6183 rdev->raid_disk = -1;
6184 err = bind_rdev_to_array(rdev, mddev);
6185
6186 if (err)
6187 export_rdev(rdev);
6188 else
6189 err = add_bound_rdev(rdev);
6190
6191 return err;
6192 }
6193
6194
6195
6196
6197 if (mddev->major_version != 0) {
6198 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6199 return -EINVAL;
6200 }
6201
6202 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6203 int err;
6204 rdev = md_import_device(dev, -1, 0);
6205 if (IS_ERR(rdev)) {
6206 pr_warn("md: error, md_import_device() returned %ld\n",
6207 PTR_ERR(rdev));
6208 return PTR_ERR(rdev);
6209 }
6210 rdev->desc_nr = info->number;
6211 if (info->raid_disk < mddev->raid_disks)
6212 rdev->raid_disk = info->raid_disk;
6213 else
6214 rdev->raid_disk = -1;
6215
6216 if (rdev->raid_disk < mddev->raid_disks)
6217 if (info->state & (1<<MD_DISK_SYNC))
6218 set_bit(In_sync, &rdev->flags);
6219
6220 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6221 set_bit(WriteMostly, &rdev->flags);
6222 if (info->state & (1<<MD_DISK_FAILFAST))
6223 set_bit(FailFast, &rdev->flags);
6224
6225 if (!mddev->persistent) {
6226 pr_debug("md: nonpersistent superblock ...\n");
6227 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6228 } else
6229 rdev->sb_start = calc_dev_sboffset(rdev);
6230 rdev->sectors = rdev->sb_start;
6231
6232 err = bind_rdev_to_array(rdev, mddev);
6233 if (err) {
6234 export_rdev(rdev);
6235 return err;
6236 }
6237 }
6238
6239 return 0;
6240}
6241
6242static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6243{
6244 char b[BDEVNAME_SIZE];
6245 struct md_rdev *rdev;
6246
6247 rdev = find_rdev(mddev, dev);
6248 if (!rdev)
6249 return -ENXIO;
6250
6251 if (rdev->raid_disk < 0)
6252 goto kick_rdev;
6253
6254 clear_bit(Blocked, &rdev->flags);
6255 remove_and_add_spares(mddev, rdev);
6256
6257 if (rdev->raid_disk >= 0)
6258 goto busy;
6259
6260kick_rdev:
6261 md_kick_rdev_from_array(rdev);
6262 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6263 if (mddev->thread)
6264 md_wakeup_thread(mddev->thread);
6265 else
6266 md_update_sb(mddev, 1);
6267 md_new_event(mddev);
6268
6269 return 0;
6270busy:
6271 pr_debug("md: cannot remove active disk %s from %s ...\n",
6272 bdevname(rdev->bdev,b), mdname(mddev));
6273 return -EBUSY;
6274}
6275
6276static int hot_add_disk(struct mddev *mddev, dev_t dev)
6277{
6278 char b[BDEVNAME_SIZE];
6279 int err;
6280 struct md_rdev *rdev;
6281
6282 if (!mddev->pers)
6283 return -ENODEV;
6284
6285 if (mddev->major_version != 0) {
6286 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6287 mdname(mddev));
6288 return -EINVAL;
6289 }
6290 if (!mddev->pers->hot_add_disk) {
6291 pr_warn("%s: personality does not support diskops!\n",
6292 mdname(mddev));
6293 return -EINVAL;
6294 }
6295
6296 rdev = md_import_device(dev, -1, 0);
6297 if (IS_ERR(rdev)) {
6298 pr_warn("md: error, md_import_device() returned %ld\n",
6299 PTR_ERR(rdev));
6300 return -EINVAL;
6301 }
6302
6303 if (mddev->persistent)
6304 rdev->sb_start = calc_dev_sboffset(rdev);
6305 else
6306 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6307
6308 rdev->sectors = rdev->sb_start;
6309
6310 if (test_bit(Faulty, &rdev->flags)) {
6311 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6312 bdevname(rdev->bdev,b), mdname(mddev));
6313 err = -EINVAL;
6314 goto abort_export;
6315 }
6316 clear_bit(In_sync, &rdev->flags);
6317 rdev->desc_nr = -1;
6318 rdev->saved_raid_disk = -1;
6319 err = bind_rdev_to_array(rdev, mddev);
6320 if (err)
6321 goto abort_export;
6322
6323
6324
6325
6326
6327
6328 rdev->raid_disk = -1;
6329
6330 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6331 if (!mddev->thread)
6332 md_update_sb(mddev, 1);
6333
6334
6335
6336
6337 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6338 md_wakeup_thread(mddev->thread);
6339 md_new_event(mddev);
6340 return 0;
6341
6342abort_export:
6343 export_rdev(rdev);
6344 return err;
6345}
6346
6347static int set_bitmap_file(struct mddev *mddev, int fd)
6348{
6349 int err = 0;
6350
6351 if (mddev->pers) {
6352 if (!mddev->pers->quiesce || !mddev->thread)
6353 return -EBUSY;
6354 if (mddev->recovery || mddev->sync_thread)
6355 return -EBUSY;
6356
6357 }
6358
6359 if (fd >= 0) {
6360 struct inode *inode;
6361 struct file *f;
6362
6363 if (mddev->bitmap || mddev->bitmap_info.file)
6364 return -EEXIST;
6365 f = fget(fd);
6366
6367 if (f == NULL) {
6368 pr_warn("%s: error: failed to get bitmap file\n",
6369 mdname(mddev));
6370 return -EBADF;
6371 }
6372
6373 inode = f->f_mapping->host;
6374 if (!S_ISREG(inode->i_mode)) {
6375 pr_warn("%s: error: bitmap file must be a regular file\n",
6376 mdname(mddev));
6377 err = -EBADF;
6378 } else if (!(f->f_mode & FMODE_WRITE)) {
6379 pr_warn("%s: error: bitmap file must open for write\n",
6380 mdname(mddev));
6381 err = -EBADF;
6382 } else if (atomic_read(&inode->i_writecount) != 1) {
6383 pr_warn("%s: error: bitmap file is already in use\n",
6384 mdname(mddev));
6385 err = -EBUSY;
6386 }
6387 if (err) {
6388 fput(f);
6389 return err;
6390 }
6391 mddev->bitmap_info.file = f;
6392 mddev->bitmap_info.offset = 0;
6393 } else if (mddev->bitmap == NULL)
6394 return -ENOENT;
6395 err = 0;
6396 if (mddev->pers) {
6397 mddev->pers->quiesce(mddev, 1);
6398 if (fd >= 0) {
6399 err = bitmap_create(mddev);
6400 if (!err)
6401 err = bitmap_load(mddev);
6402 }
6403 if (fd < 0 || err) {
6404 bitmap_destroy(mddev);
6405 fd = -1;
6406 }
6407 mddev->pers->quiesce(mddev, 0);
6408 }
6409 if (fd < 0) {
6410 struct file *f = mddev->bitmap_info.file;
6411 if (f) {
6412 spin_lock(&mddev->lock);
6413 mddev->bitmap_info.file = NULL;
6414 spin_unlock(&mddev->lock);
6415 fput(f);
6416 }
6417 }
6418
6419 return err;
6420}
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6436{
6437
6438 if (info->raid_disks == 0) {
6439
6440 if (info->major_version < 0 ||
6441 info->major_version >= ARRAY_SIZE(super_types) ||
6442 super_types[info->major_version].name == NULL) {
6443
6444 pr_warn("md: superblock version %d not known\n",
6445 info->major_version);
6446 return -EINVAL;
6447 }
6448 mddev->major_version = info->major_version;
6449 mddev->minor_version = info->minor_version;
6450 mddev->patch_version = info->patch_version;
6451 mddev->persistent = !info->not_persistent;
6452
6453
6454
6455 mddev->ctime = get_seconds();
6456 return 0;
6457 }
6458 mddev->major_version = MD_MAJOR_VERSION;
6459 mddev->minor_version = MD_MINOR_VERSION;
6460 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6461 mddev->ctime = get_seconds();
6462
6463 mddev->level = info->level;
6464 mddev->clevel[0] = 0;
6465 mddev->dev_sectors = 2 * (sector_t)info->size;
6466 mddev->raid_disks = info->raid_disks;
6467
6468
6469
6470 if (info->state & (1<<MD_SB_CLEAN))
6471 mddev->recovery_cp = MaxSector;
6472 else
6473 mddev->recovery_cp = 0;
6474 mddev->persistent = ! info->not_persistent;
6475 mddev->external = 0;
6476
6477 mddev->layout = info->layout;
6478 mddev->chunk_sectors = info->chunk_size >> 9;
6479
6480 mddev->max_disks = MD_SB_DISKS;
6481
6482 if (mddev->persistent) {
6483 mddev->flags = 0;
6484 mddev->sb_flags = 0;
6485 }
6486 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6487
6488 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6489 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6490 mddev->bitmap_info.offset = 0;
6491
6492 mddev->reshape_position = MaxSector;
6493
6494
6495
6496
6497 get_random_bytes(mddev->uuid, 16);
6498
6499 mddev->new_level = mddev->level;
6500 mddev->new_chunk_sectors = mddev->chunk_sectors;
6501 mddev->new_layout = mddev->layout;
6502 mddev->delta_disks = 0;
6503 mddev->reshape_backwards = 0;
6504
6505 return 0;
6506}
6507
6508void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6509{
6510 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6511
6512 if (mddev->external_size)
6513 return;
6514
6515 mddev->array_sectors = array_sectors;
6516}
6517EXPORT_SYMBOL(md_set_array_sectors);
6518
6519static int update_size(struct mddev *mddev, sector_t num_sectors)
6520{
6521 struct md_rdev *rdev;
6522 int rv;
6523 int fit = (num_sectors == 0);
6524
6525 if (mddev->pers->resize == NULL)
6526 return -EINVAL;
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6537 mddev->sync_thread)
6538 return -EBUSY;
6539 if (mddev->ro)
6540 return -EROFS;
6541
6542 rdev_for_each(rdev, mddev) {
6543 sector_t avail = rdev->sectors;
6544
6545 if (fit && (num_sectors == 0 || num_sectors > avail))
6546 num_sectors = avail;
6547 if (avail < num_sectors)
6548 return -ENOSPC;
6549 }
6550 rv = mddev->pers->resize(mddev, num_sectors);
6551 if (!rv)
6552 revalidate_disk(mddev->gendisk);
6553 return rv;
6554}
6555
6556static int update_raid_disks(struct mddev *mddev, int raid_disks)
6557{
6558 int rv;
6559 struct md_rdev *rdev;
6560
6561 if (mddev->pers->check_reshape == NULL)
6562 return -EINVAL;
6563 if (mddev->ro)
6564 return -EROFS;
6565 if (raid_disks <= 0 ||
6566 (mddev->max_disks && raid_disks >= mddev->max_disks))
6567 return -EINVAL;
6568 if (mddev->sync_thread ||
6569 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6570 mddev->reshape_position != MaxSector)
6571 return -EBUSY;
6572
6573 rdev_for_each(rdev, mddev) {
6574 if (mddev->raid_disks < raid_disks &&
6575 rdev->data_offset < rdev->new_data_offset)
6576 return -EINVAL;
6577 if (mddev->raid_disks > raid_disks &&
6578 rdev->data_offset > rdev->new_data_offset)
6579 return -EINVAL;
6580 }
6581
6582 mddev->delta_disks = raid_disks - mddev->raid_disks;
6583 if (mddev->delta_disks < 0)
6584 mddev->reshape_backwards = 1;
6585 else if (mddev->delta_disks > 0)
6586 mddev->reshape_backwards = 0;
6587
6588 rv = mddev->pers->check_reshape(mddev);
6589 if (rv < 0) {
6590 mddev->delta_disks = 0;
6591 mddev->reshape_backwards = 0;
6592 }
6593 return rv;
6594}
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6605{
6606 int rv = 0;
6607 int cnt = 0;
6608 int state = 0;
6609
6610
6611 if (mddev->bitmap && mddev->bitmap_info.offset)
6612 state |= (1 << MD_SB_BITMAP_PRESENT);
6613
6614 if (mddev->major_version != info->major_version ||
6615 mddev->minor_version != info->minor_version ||
6616
6617 mddev->ctime != info->ctime ||
6618 mddev->level != info->level ||
6619
6620 mddev->persistent != !info->not_persistent ||
6621 mddev->chunk_sectors != info->chunk_size >> 9 ||
6622
6623 ((state^info->state) & 0xfffffe00)
6624 )
6625 return -EINVAL;
6626
6627 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6628 cnt++;
6629 if (mddev->raid_disks != info->raid_disks)
6630 cnt++;
6631 if (mddev->layout != info->layout)
6632 cnt++;
6633 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6634 cnt++;
6635 if (cnt == 0)
6636 return 0;
6637 if (cnt > 1)
6638 return -EINVAL;
6639
6640 if (mddev->layout != info->layout) {
6641
6642
6643
6644
6645 if (mddev->pers->check_reshape == NULL)
6646 return -EINVAL;
6647 else {
6648 mddev->new_layout = info->layout;
6649 rv = mddev->pers->check_reshape(mddev);
6650 if (rv)
6651 mddev->new_layout = mddev->layout;
6652 return rv;
6653 }
6654 }
6655 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6656 rv = update_size(mddev, (sector_t)info->size * 2);
6657
6658 if (mddev->raid_disks != info->raid_disks)
6659 rv = update_raid_disks(mddev, info->raid_disks);
6660
6661 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6662 if (mddev->pers->quiesce == NULL || mddev->thread == NULL)
6663 return -EINVAL;
6664 if (mddev->recovery || mddev->sync_thread)
6665 return -EBUSY;
6666 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6667
6668 if (mddev->bitmap)
6669 return -EEXIST;
6670 if (mddev->bitmap_info.default_offset == 0)
6671 return -EINVAL;
6672 mddev->bitmap_info.offset =
6673 mddev->bitmap_info.default_offset;
6674 mddev->bitmap_info.space =
6675 mddev->bitmap_info.default_space;
6676 mddev->pers->quiesce(mddev, 1);
6677 rv = bitmap_create(mddev);
6678 if (!rv)
6679 rv = bitmap_load(mddev);
6680 if (rv)
6681 bitmap_destroy(mddev);
6682 mddev->pers->quiesce(mddev, 0);
6683 } else {
6684
6685 if (!mddev->bitmap)
6686 return -ENOENT;
6687 if (mddev->bitmap->storage.file)
6688 return -EINVAL;
6689 mddev->pers->quiesce(mddev, 1);
6690 bitmap_destroy(mddev);
6691 mddev->pers->quiesce(mddev, 0);
6692 mddev->bitmap_info.offset = 0;
6693 }
6694 }
6695 md_update_sb(mddev, 1);
6696 return rv;
6697}
6698
6699static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6700{
6701 struct md_rdev *rdev;
6702 int err = 0;
6703
6704 if (mddev->pers == NULL)
6705 return -ENODEV;
6706
6707 rcu_read_lock();
6708 rdev = find_rdev_rcu(mddev, dev);
6709 if (!rdev)
6710 err = -ENODEV;
6711 else {
6712 md_error(mddev, rdev);
6713 if (!test_bit(Faulty, &rdev->flags))
6714 err = -EBUSY;
6715 }
6716 rcu_read_unlock();
6717 return err;
6718}
6719
6720
6721
6722
6723
6724
6725
6726static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6727{
6728 struct mddev *mddev = bdev->bd_disk->private_data;
6729
6730 geo->heads = 2;
6731 geo->sectors = 4;
6732 geo->cylinders = mddev->array_sectors / 8;
6733 return 0;
6734}
6735
6736static inline bool md_ioctl_valid(unsigned int cmd)
6737{
6738 switch (cmd) {
6739 case ADD_NEW_DISK:
6740 case BLKROSET:
6741 case GET_ARRAY_INFO:
6742 case GET_BITMAP_FILE:
6743 case GET_DISK_INFO:
6744 case HOT_ADD_DISK:
6745 case HOT_REMOVE_DISK:
6746 case RAID_AUTORUN:
6747 case RAID_VERSION:
6748 case RESTART_ARRAY_RW:
6749 case RUN_ARRAY:
6750 case SET_ARRAY_INFO:
6751 case SET_BITMAP_FILE:
6752 case SET_DISK_FAULTY:
6753 case STOP_ARRAY:
6754 case STOP_ARRAY_RO:
6755 return true;
6756 default:
6757 return false;
6758 }
6759}
6760
6761static int md_ioctl(struct block_device *bdev, fmode_t mode,
6762 unsigned int cmd, unsigned long arg)
6763{
6764 int err = 0;
6765 void __user *argp = (void __user *)arg;
6766 struct mddev *mddev = NULL;
6767 int ro;
6768
6769 if (!md_ioctl_valid(cmd))
6770 return -ENOTTY;
6771
6772 switch (cmd) {
6773 case RAID_VERSION:
6774 case GET_ARRAY_INFO:
6775 case GET_DISK_INFO:
6776 break;
6777 default:
6778 if (!capable(CAP_SYS_ADMIN))
6779 return -EACCES;
6780 }
6781
6782
6783
6784
6785
6786 switch (cmd) {
6787 case RAID_VERSION:
6788 err = get_version(argp);
6789 goto out;
6790
6791#ifndef MODULE
6792 case RAID_AUTORUN:
6793 err = 0;
6794 autostart_arrays(arg);
6795 goto out;
6796#endif
6797 default:;
6798 }
6799
6800
6801
6802
6803
6804 mddev = bdev->bd_disk->private_data;
6805
6806 if (!mddev) {
6807 BUG();
6808 goto out;
6809 }
6810
6811
6812 switch (cmd) {
6813 case GET_ARRAY_INFO:
6814 if (!mddev->raid_disks && !mddev->external)
6815 err = -ENODEV;
6816 else
6817 err = get_array_info(mddev, argp);
6818 goto out;
6819
6820 case GET_DISK_INFO:
6821 if (!mddev->raid_disks && !mddev->external)
6822 err = -ENODEV;
6823 else
6824 err = get_disk_info(mddev, argp);
6825 goto out;
6826
6827 case SET_DISK_FAULTY:
6828 err = set_disk_faulty(mddev, new_decode_dev(arg));
6829 goto out;
6830
6831 case GET_BITMAP_FILE:
6832 err = get_bitmap_file(mddev, argp);
6833 goto out;
6834
6835 }
6836
6837 if (cmd == ADD_NEW_DISK)
6838
6839 flush_workqueue(md_misc_wq);
6840
6841 if (cmd == HOT_REMOVE_DISK)
6842
6843 wait_event_interruptible_timeout(mddev->sb_wait,
6844 !test_bit(MD_RECOVERY_NEEDED,
6845 &mddev->recovery),
6846 msecs_to_jiffies(5000));
6847 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6848
6849
6850
6851 mutex_lock(&mddev->open_mutex);
6852 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6853 mutex_unlock(&mddev->open_mutex);
6854 err = -EBUSY;
6855 goto out;
6856 }
6857 set_bit(MD_CLOSING, &mddev->flags);
6858 mutex_unlock(&mddev->open_mutex);
6859 sync_blockdev(bdev);
6860 }
6861 err = mddev_lock(mddev);
6862 if (err) {
6863 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
6864 err, cmd);
6865 goto out;
6866 }
6867
6868 if (cmd == SET_ARRAY_INFO) {
6869 mdu_array_info_t info;
6870 if (!arg)
6871 memset(&info, 0, sizeof(info));
6872 else if (copy_from_user(&info, argp, sizeof(info))) {
6873 err = -EFAULT;
6874 goto unlock;
6875 }
6876 if (mddev->pers) {
6877 err = update_array_info(mddev, &info);
6878 if (err) {
6879 pr_warn("md: couldn't update array info. %d\n", err);
6880 goto unlock;
6881 }
6882 goto unlock;
6883 }
6884 if (!list_empty(&mddev->disks)) {
6885 pr_warn("md: array %s already has disks!\n", mdname(mddev));
6886 err = -EBUSY;
6887 goto unlock;
6888 }
6889 if (mddev->raid_disks) {
6890 pr_warn("md: array %s already initialised!\n", mdname(mddev));
6891 err = -EBUSY;
6892 goto unlock;
6893 }
6894 err = set_array_info(mddev, &info);
6895 if (err) {
6896 pr_warn("md: couldn't set array info. %d\n", err);
6897 goto unlock;
6898 }
6899 goto unlock;
6900 }
6901
6902
6903
6904
6905
6906
6907 if ((!mddev->raid_disks && !mddev->external)
6908 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6909 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6910 && cmd != GET_BITMAP_FILE) {
6911 err = -ENODEV;
6912 goto unlock;
6913 }
6914
6915
6916
6917
6918 switch (cmd) {
6919 case RESTART_ARRAY_RW:
6920 err = restart_array(mddev);
6921 goto unlock;
6922
6923 case STOP_ARRAY:
6924 err = do_md_stop(mddev, 0, bdev);
6925 goto unlock;
6926
6927 case STOP_ARRAY_RO:
6928 err = md_set_readonly(mddev, bdev);
6929 goto unlock;
6930
6931 case HOT_REMOVE_DISK:
6932 err = hot_remove_disk(mddev, new_decode_dev(arg));
6933 goto unlock;
6934
6935 case ADD_NEW_DISK:
6936
6937
6938
6939
6940 if (mddev->pers) {
6941 mdu_disk_info_t info;
6942 if (copy_from_user(&info, argp, sizeof(info)))
6943 err = -EFAULT;
6944 else if (!(info.state & (1<<MD_DISK_SYNC)))
6945
6946 break;
6947 else
6948 err = add_new_disk(mddev, &info);
6949 goto unlock;
6950 }
6951 break;
6952
6953 case BLKROSET:
6954 if (get_user(ro, (int __user *)(arg))) {
6955 err = -EFAULT;
6956 goto unlock;
6957 }
6958 err = -EINVAL;
6959
6960
6961
6962
6963 if (ro)
6964 goto unlock;
6965
6966
6967 if (mddev->ro != 1)
6968 goto unlock;
6969
6970
6971
6972
6973 if (mddev->pers) {
6974 err = restart_array(mddev);
6975 if (err == 0) {
6976 mddev->ro = 2;
6977 set_disk_ro(mddev->gendisk, 0);
6978 }
6979 }
6980 goto unlock;
6981 }
6982
6983
6984
6985
6986
6987 if (mddev->ro && mddev->pers) {
6988 if (mddev->ro == 2) {
6989 mddev->ro = 0;
6990 sysfs_notify_dirent_safe(mddev->sysfs_state);
6991 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6992
6993
6994
6995
6996 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
6997 mddev_unlock(mddev);
6998 wait_event(mddev->sb_wait,
6999 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7000 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7001 mddev_lock_nointr(mddev);
7002 }
7003 } else {
7004 err = -EROFS;
7005 goto unlock;
7006 }
7007 }
7008
7009 switch (cmd) {
7010 case ADD_NEW_DISK:
7011 {
7012 mdu_disk_info_t info;
7013 if (copy_from_user(&info, argp, sizeof(info)))
7014 err = -EFAULT;
7015 else
7016 err = add_new_disk(mddev, &info);
7017 goto unlock;
7018 }
7019
7020 case HOT_ADD_DISK:
7021 err = hot_add_disk(mddev, new_decode_dev(arg));
7022 goto unlock;
7023
7024 case RUN_ARRAY:
7025 err = do_md_run(mddev);
7026 goto unlock;
7027
7028 case SET_BITMAP_FILE:
7029 err = set_bitmap_file(mddev, (int)arg);
7030 goto unlock;
7031
7032 default:
7033 err = -EINVAL;
7034 goto unlock;
7035 }
7036
7037unlock:
7038 if (mddev->hold_active == UNTIL_IOCTL &&
7039 err != -EINVAL)
7040 mddev->hold_active = 0;
7041 mddev_unlock(mddev);
7042out:
7043 return err;
7044}
7045#ifdef CONFIG_COMPAT
7046static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7047 unsigned int cmd, unsigned long arg)
7048{
7049 switch (cmd) {
7050 case HOT_REMOVE_DISK:
7051 case HOT_ADD_DISK:
7052 case SET_DISK_FAULTY:
7053 case SET_BITMAP_FILE:
7054
7055 break;
7056 default:
7057 arg = (unsigned long)compat_ptr(arg);
7058 break;
7059 }
7060
7061 return md_ioctl(bdev, mode, cmd, arg);
7062}
7063#endif
7064
7065static int md_open(struct block_device *bdev, fmode_t mode)
7066{
7067
7068
7069
7070
7071 struct mddev *mddev = mddev_find(bdev->bd_dev);
7072 int err;
7073
7074 if (!mddev)
7075 return -ENODEV;
7076
7077 if (mddev->gendisk != bdev->bd_disk) {
7078
7079
7080
7081 mddev_put(mddev);
7082
7083 flush_workqueue(md_misc_wq);
7084
7085 return -ERESTARTSYS;
7086 }
7087 BUG_ON(mddev != bdev->bd_disk->private_data);
7088
7089 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7090 goto out;
7091
7092 if (test_bit(MD_CLOSING, &mddev->flags)) {
7093 mutex_unlock(&mddev->open_mutex);
7094 err = -ENODEV;
7095 goto out;
7096 }
7097
7098 err = 0;
7099 atomic_inc(&mddev->openers);
7100 mutex_unlock(&mddev->open_mutex);
7101
7102 check_disk_change(bdev);
7103 out:
7104 if (err)
7105 mddev_put(mddev);
7106 return err;
7107}
7108
7109static void md_release(struct gendisk *disk, fmode_t mode)
7110{
7111 struct mddev *mddev = disk->private_data;
7112
7113 BUG_ON(!mddev);
7114 atomic_dec(&mddev->openers);
7115 mddev_put(mddev);
7116}
7117
7118static int md_media_changed(struct gendisk *disk)
7119{
7120 struct mddev *mddev = disk->private_data;
7121
7122 return mddev->changed;
7123}
7124
7125static int md_revalidate(struct gendisk *disk)
7126{
7127 struct mddev *mddev = disk->private_data;
7128
7129 mddev->changed = 0;
7130 return 0;
7131}
7132static const struct block_device_operations md_fops =
7133{
7134 .owner = THIS_MODULE,
7135 .open = md_open,
7136 .release = md_release,
7137 .ioctl = md_ioctl,
7138#ifdef CONFIG_COMPAT
7139 .compat_ioctl = md_compat_ioctl,
7140#endif
7141 .getgeo = md_getgeo,
7142 .media_changed = md_media_changed,
7143 .revalidate_disk= md_revalidate,
7144};
7145
7146static int md_thread(void *arg)
7147{
7148 struct md_thread *thread = arg;
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162 allow_signal(SIGKILL);
7163 while (!kthread_should_stop()) {
7164
7165
7166
7167
7168
7169
7170 if (signal_pending(current))
7171 flush_signals(current);
7172
7173 wait_event_interruptible_timeout
7174 (thread->wqueue,
7175 test_bit(THREAD_WAKEUP, &thread->flags)
7176 || kthread_should_stop(),
7177 thread->timeout);
7178
7179 clear_bit(THREAD_WAKEUP, &thread->flags);
7180 if (!kthread_should_stop())
7181 thread->run(thread);
7182 }
7183
7184 return 0;
7185}
7186
7187void md_wakeup_thread(struct md_thread *thread)
7188{
7189 if (thread) {
7190 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7191 set_bit(THREAD_WAKEUP, &thread->flags);
7192 wake_up(&thread->wqueue);
7193 }
7194}
7195EXPORT_SYMBOL(md_wakeup_thread);
7196
7197struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7198 struct mddev *mddev, const char *name)
7199{
7200 struct md_thread *thread;
7201
7202 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7203 if (!thread)
7204 return NULL;
7205
7206 init_waitqueue_head(&thread->wqueue);
7207
7208 thread->run = run;
7209 thread->mddev = mddev;
7210 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7211 thread->tsk = kthread_run(md_thread, thread,
7212 "%s_%s",
7213 mdname(thread->mddev),
7214 name);
7215 if (IS_ERR(thread->tsk)) {
7216 kfree(thread);
7217 return NULL;
7218 }
7219 return thread;
7220}
7221EXPORT_SYMBOL(md_register_thread);
7222
7223void md_unregister_thread(struct md_thread **threadp)
7224{
7225 struct md_thread *thread = *threadp;
7226 if (!thread)
7227 return;
7228 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7229
7230
7231
7232 spin_lock(&pers_lock);
7233 *threadp = NULL;
7234 spin_unlock(&pers_lock);
7235
7236 kthread_stop(thread->tsk);
7237 kfree(thread);
7238}
7239EXPORT_SYMBOL(md_unregister_thread);
7240
7241void md_error(struct mddev *mddev, struct md_rdev *rdev)
7242{
7243 if (!rdev || test_bit(Faulty, &rdev->flags))
7244 return;
7245
7246 if (!mddev->pers || !mddev->pers->error_handler)
7247 return;
7248 mddev->pers->error_handler(mddev,rdev);
7249 if (mddev->degraded)
7250 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7251 sysfs_notify_dirent_safe(rdev->sysfs_state);
7252 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7253 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7254 md_wakeup_thread(mddev->thread);
7255 if (mddev->event_work.func)
7256 queue_work(md_misc_wq, &mddev->event_work);
7257 md_new_event(mddev);
7258}
7259EXPORT_SYMBOL(md_error);
7260
7261
7262
7263static void status_unused(struct seq_file *seq)
7264{
7265 int i = 0;
7266 struct md_rdev *rdev;
7267
7268 seq_printf(seq, "unused devices: ");
7269
7270 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7271 char b[BDEVNAME_SIZE];
7272 i++;
7273 seq_printf(seq, "%s ",
7274 bdevname(rdev->bdev,b));
7275 }
7276 if (!i)
7277 seq_printf(seq, "<none>");
7278
7279 seq_printf(seq, "\n");
7280}
7281
7282static int status_resync(struct seq_file *seq, struct mddev *mddev)
7283{
7284 sector_t max_sectors, resync, res;
7285 unsigned long dt, db;
7286 sector_t rt;
7287 int scale;
7288 unsigned int per_milli;
7289
7290 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7291 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7292 max_sectors = mddev->resync_max_sectors;
7293 else
7294 max_sectors = mddev->dev_sectors;
7295
7296 resync = mddev->curr_resync;
7297 if (resync <= 3) {
7298 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7299
7300 resync = max_sectors;
7301 } else
7302 resync -= atomic_read(&mddev->recovery_active);
7303
7304 if (resync == 0) {
7305 if (mddev->recovery_cp < MaxSector) {
7306 seq_printf(seq, "\tresync=PENDING");
7307 return 1;
7308 }
7309 return 0;
7310 }
7311 if (resync < 3) {
7312 seq_printf(seq, "\tresync=DELAYED");
7313 return 1;
7314 }
7315
7316 WARN_ON(max_sectors == 0);
7317
7318
7319
7320
7321
7322 scale = 10;
7323 if (sizeof(sector_t) > sizeof(unsigned long)) {
7324 while ( max_sectors/2 > (1ULL<<(scale+32)))
7325 scale++;
7326 }
7327 res = (resync>>scale)*1000;
7328 sector_div(res, (u32)((max_sectors>>scale)+1));
7329
7330 per_milli = res;
7331 {
7332 int i, x = per_milli/50, y = 20-x;
7333 seq_printf(seq, "[");
7334 for (i = 0; i < x; i++)
7335 seq_printf(seq, "=");
7336 seq_printf(seq, ">");
7337 for (i = 0; i < y; i++)
7338 seq_printf(seq, ".");
7339 seq_printf(seq, "] ");
7340 }
7341 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7342 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7343 "reshape" :
7344 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7345 "check" :
7346 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7347 "resync" : "recovery"))),
7348 per_milli/10, per_milli % 10,
7349 (unsigned long long) resync/2,
7350 (unsigned long long) max_sectors/2);
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366 dt = ((jiffies - mddev->resync_mark) / HZ);
7367 if (!dt) dt++;
7368 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7369 - mddev->resync_mark_cnt;
7370
7371 rt = max_sectors - resync;
7372 sector_div(rt, db/32+1);
7373 rt *= dt;
7374 rt >>= 5;
7375
7376 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7377 ((unsigned long)rt % 60)/6);
7378
7379 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7380 return 1;
7381}
7382
7383static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7384{
7385 struct list_head *tmp;
7386 loff_t l = *pos;
7387 struct mddev *mddev;
7388
7389 if (l >= 0x10000)
7390 return NULL;
7391 if (!l--)
7392
7393 return (void*)1;
7394
7395 spin_lock(&all_mddevs_lock);
7396 list_for_each(tmp,&all_mddevs)
7397 if (!l--) {
7398 mddev = list_entry(tmp, struct mddev, all_mddevs);
7399 mddev_get(mddev);
7400 spin_unlock(&all_mddevs_lock);
7401 return mddev;
7402 }
7403 spin_unlock(&all_mddevs_lock);
7404 if (!l--)
7405 return (void*)2;
7406 return NULL;
7407}
7408
7409static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7410{
7411 struct list_head *tmp;
7412 struct mddev *next_mddev, *mddev = v;
7413
7414 ++*pos;
7415 if (v == (void*)2)
7416 return NULL;
7417
7418 spin_lock(&all_mddevs_lock);
7419 if (v == (void*)1)
7420 tmp = all_mddevs.next;
7421 else
7422 tmp = mddev->all_mddevs.next;
7423 if (tmp != &all_mddevs)
7424 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7425 else {
7426 next_mddev = (void*)2;
7427 *pos = 0x10000;
7428 }
7429 spin_unlock(&all_mddevs_lock);
7430
7431 if (v != (void*)1)
7432 mddev_put(mddev);
7433 return next_mddev;
7434
7435}
7436
7437static void md_seq_stop(struct seq_file *seq, void *v)
7438{
7439 struct mddev *mddev = v;
7440
7441 if (mddev && v != (void*)1 && v != (void*)2)
7442 mddev_put(mddev);
7443}
7444
7445static int md_seq_show(struct seq_file *seq, void *v)
7446{
7447 struct mddev *mddev = v;
7448 sector_t sectors;
7449 struct md_rdev *rdev;
7450
7451 if (v == (void*)1) {
7452 struct md_personality *pers;
7453 seq_printf(seq, "Personalities : ");
7454 spin_lock(&pers_lock);
7455 list_for_each_entry(pers, &pers_list, list)
7456 seq_printf(seq, "[%s] ", pers->name);
7457
7458 spin_unlock(&pers_lock);
7459 seq_printf(seq, "\n");
7460 seq->poll_event = atomic_read(&md_event_count);
7461 return 0;
7462 }
7463 if (v == (void*)2) {
7464 status_unused(seq);
7465 return 0;
7466 }
7467
7468 spin_lock(&mddev->lock);
7469 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7470 seq_printf(seq, "%s : %sactive", mdname(mddev),
7471 mddev->pers ? "" : "in");
7472 if (mddev->pers) {
7473 if (mddev->ro==1)
7474 seq_printf(seq, " (read-only)");
7475 if (mddev->ro==2)
7476 seq_printf(seq, " (auto-read-only)");
7477 seq_printf(seq, " %s", mddev->pers->name);
7478 }
7479
7480 sectors = 0;
7481 rcu_read_lock();
7482 rdev_for_each_rcu(rdev, mddev) {
7483 char b[BDEVNAME_SIZE];
7484 seq_printf(seq, " %s[%d]",
7485 bdevname(rdev->bdev,b), rdev->desc_nr);
7486 if (test_bit(WriteMostly, &rdev->flags))
7487 seq_printf(seq, "(W)");
7488 if (test_bit(Journal, &rdev->flags))
7489 seq_printf(seq, "(J)");
7490 if (test_bit(Faulty, &rdev->flags)) {
7491 seq_printf(seq, "(F)");
7492 continue;
7493 }
7494 if (rdev->raid_disk < 0)
7495 seq_printf(seq, "(S)");
7496 if (test_bit(Replacement, &rdev->flags))
7497 seq_printf(seq, "(R)");
7498 sectors += rdev->sectors;
7499 }
7500 rcu_read_unlock();
7501
7502 if (!list_empty(&mddev->disks)) {
7503 if (mddev->pers)
7504 seq_printf(seq, "\n %llu blocks",
7505 (unsigned long long)
7506 mddev->array_sectors / 2);
7507 else
7508 seq_printf(seq, "\n %llu blocks",
7509 (unsigned long long)sectors / 2);
7510 }
7511 if (mddev->persistent) {
7512 if (mddev->major_version != 0 ||
7513 mddev->minor_version != 90) {
7514 seq_printf(seq," super %d.%d",
7515 mddev->major_version,
7516 mddev->minor_version);
7517 }
7518 } else if (mddev->external)
7519 seq_printf(seq, " super external:%s",
7520 mddev->metadata_type);
7521 else
7522 seq_printf(seq, " super non-persistent");
7523
7524 if (mddev->pers) {
7525 mddev->pers->status(seq, mddev);
7526 seq_printf(seq, "\n ");
7527 if (mddev->pers->sync_request) {
7528 if (status_resync(seq, mddev))
7529 seq_printf(seq, "\n ");
7530 }
7531 } else
7532 seq_printf(seq, "\n ");
7533
7534 bitmap_status(seq, mddev->bitmap);
7535
7536 seq_printf(seq, "\n");
7537 }
7538 spin_unlock(&mddev->lock);
7539
7540 return 0;
7541}
7542
7543static const struct seq_operations md_seq_ops = {
7544 .start = md_seq_start,
7545 .next = md_seq_next,
7546 .stop = md_seq_stop,
7547 .show = md_seq_show,
7548};
7549
7550static int md_seq_open(struct inode *inode, struct file *file)
7551{
7552 struct seq_file *seq;
7553 int error;
7554
7555 error = seq_open(file, &md_seq_ops);
7556 if (error)
7557 return error;
7558
7559 seq = file->private_data;
7560 seq->poll_event = atomic_read(&md_event_count);
7561 return error;
7562}
7563
7564static int md_unloading;
7565static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7566{
7567 struct seq_file *seq = filp->private_data;
7568 int mask;
7569
7570 if (md_unloading)
7571 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7572 poll_wait(filp, &md_event_waiters, wait);
7573
7574
7575 mask = POLLIN | POLLRDNORM;
7576
7577 if (seq->poll_event != atomic_read(&md_event_count))
7578 mask |= POLLERR | POLLPRI;
7579 return mask;
7580}
7581
7582static const struct file_operations md_seq_fops = {
7583 .owner = THIS_MODULE,
7584 .open = md_seq_open,
7585 .read = seq_read,
7586 .llseek = seq_lseek,
7587 .release = seq_release_private,
7588 .poll = mdstat_poll,
7589};
7590
7591int register_md_personality(struct md_personality *p)
7592{
7593 pr_debug("md: %s personality registered for level %d\n",
7594 p->name, p->level);
7595 spin_lock(&pers_lock);
7596 list_add_tail(&p->list, &pers_list);
7597 spin_unlock(&pers_lock);
7598 return 0;
7599}
7600EXPORT_SYMBOL(register_md_personality);
7601
7602int unregister_md_personality(struct md_personality *p)
7603{
7604 pr_debug("md: %s personality unregistered\n", p->name);
7605 spin_lock(&pers_lock);
7606 list_del_init(&p->list);
7607 spin_unlock(&pers_lock);
7608 return 0;
7609}
7610EXPORT_SYMBOL(unregister_md_personality);
7611
7612static int is_mddev_idle(struct mddev *mddev, int init)
7613{
7614 struct md_rdev *rdev;
7615 int idle;
7616 int curr_events;
7617
7618 idle = 1;
7619 rcu_read_lock();
7620 rdev_for_each_rcu(rdev, mddev) {
7621 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7622 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7623 (int)part_stat_read(&disk->part0, sectors[1]) -
7624 atomic_read(&disk->sync_io);
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647 if (init || curr_events - rdev->last_events > 64) {
7648 rdev->last_events = curr_events;
7649 idle = 0;
7650 }
7651 }
7652 rcu_read_unlock();
7653 return idle;
7654}
7655
7656void md_done_sync(struct mddev *mddev, int blocks, int ok)
7657{
7658
7659 atomic_sub(blocks, &mddev->recovery_active);
7660 wake_up(&mddev->recovery_wait);
7661 if (!ok) {
7662 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7663 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7664 md_wakeup_thread(mddev->thread);
7665
7666 }
7667}
7668EXPORT_SYMBOL(md_done_sync);
7669
7670
7671
7672
7673
7674
7675
7676
7677bool md_write_start(struct mddev *mddev, struct bio *bi)
7678{
7679 int did_change = 0;
7680
7681 if (bio_data_dir(bi) != WRITE)
7682 return true;
7683
7684 BUG_ON(mddev->ro == 1);
7685 if (mddev->ro == 2) {
7686
7687 mddev->ro = 0;
7688 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7689 md_wakeup_thread(mddev->thread);
7690 md_wakeup_thread(mddev->sync_thread);
7691 did_change = 1;
7692 }
7693 atomic_inc(&mddev->writes_pending);
7694 if (mddev->safemode == 1)
7695 mddev->safemode = 0;
7696 if (mddev->in_sync) {
7697 spin_lock(&mddev->lock);
7698 if (mddev->in_sync) {
7699 mddev->in_sync = 0;
7700 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7701 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
7702 md_wakeup_thread(mddev->thread);
7703 did_change = 1;
7704 }
7705 spin_unlock(&mddev->lock);
7706 }
7707 if (did_change)
7708 sysfs_notify_dirent_safe(mddev->sysfs_state);
7709 wait_event(mddev->sb_wait,
7710 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
7711 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
7712 atomic_dec(&mddev->writes_pending);
7713 return false;
7714 }
7715 return true;
7716}
7717EXPORT_SYMBOL(md_write_start);
7718
7719void md_write_end(struct mddev *mddev)
7720{
7721 if (atomic_dec_and_test(&mddev->writes_pending)) {
7722 if (mddev->safemode == 2)
7723 md_wakeup_thread(mddev->thread);
7724 else if (mddev->safemode_delay)
7725 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7726 }
7727}
7728EXPORT_SYMBOL(md_write_end);
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739int md_allow_write(struct mddev *mddev)
7740{
7741 if (!mddev->pers)
7742 return 0;
7743 if (mddev->ro)
7744 return 0;
7745 if (!mddev->pers->sync_request)
7746 return 0;
7747
7748 spin_lock(&mddev->lock);
7749 if (mddev->in_sync) {
7750 mddev->in_sync = 0;
7751 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7752 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
7753 if (mddev->safemode_delay &&
7754 mddev->safemode == 0)
7755 mddev->safemode = 1;
7756 spin_unlock(&mddev->lock);
7757 md_update_sb(mddev, 0);
7758 sysfs_notify_dirent_safe(mddev->sysfs_state);
7759 } else
7760 spin_unlock(&mddev->lock);
7761
7762 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
7763 return -EAGAIN;
7764 else
7765 return 0;
7766}
7767EXPORT_SYMBOL_GPL(md_allow_write);
7768
7769#define SYNC_MARKS 10
7770#define SYNC_MARK_STEP (3*HZ)
7771#define UPDATE_FREQUENCY (5*60*HZ)
7772void md_do_sync(struct md_thread *thread)
7773{
7774 struct mddev *mddev = thread->mddev;
7775 struct mddev *mddev2;
7776 unsigned int currspeed = 0,
7777 window;
7778 sector_t max_sectors,j, io_sectors, recovery_done;
7779 unsigned long mark[SYNC_MARKS];
7780 unsigned long update_time;
7781 sector_t mark_cnt[SYNC_MARKS];
7782 int last_mark,m;
7783 struct list_head *tmp;
7784 sector_t last_check;
7785 int skipped = 0;
7786 struct md_rdev *rdev;
7787 char *desc, *action = NULL;
7788 struct blk_plug plug;
7789
7790
7791 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7792 return;
7793 if (mddev->ro) {
7794 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7795 return;
7796 }
7797
7798 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7799 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7800 desc = "data-check";
7801 action = "check";
7802 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7803 desc = "requested-resync";
7804 action = "repair";
7805 } else
7806 desc = "resync";
7807 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7808 desc = "reshape";
7809 else
7810 desc = "recovery";
7811
7812 mddev->last_sync_action = action ?: desc;
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830 do {
7831 int mddev2_minor = -1;
7832 mddev->curr_resync = 2;
7833
7834 try_again:
7835 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7836 goto skip;
7837 for_each_mddev(mddev2, tmp) {
7838 if (mddev2 == mddev)
7839 continue;
7840 if (!mddev->parallel_resync
7841 && mddev2->curr_resync
7842 && match_mddev_units(mddev, mddev2)) {
7843 DEFINE_WAIT(wq);
7844 if (mddev < mddev2 && mddev->curr_resync == 2) {
7845
7846 mddev->curr_resync = 1;
7847 wake_up(&resync_wait);
7848 }
7849 if (mddev > mddev2 && mddev->curr_resync == 1)
7850
7851
7852
7853 continue;
7854
7855
7856
7857
7858 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7859 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7860 mddev2->curr_resync >= mddev->curr_resync) {
7861 if (mddev2_minor != mddev2->md_minor) {
7862 mddev2_minor = mddev2->md_minor;
7863 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
7864 desc, mdname(mddev),
7865 mdname(mddev2));
7866 }
7867 mddev_put(mddev2);
7868 if (signal_pending(current))
7869 flush_signals(current);
7870 schedule();
7871 finish_wait(&resync_wait, &wq);
7872 goto try_again;
7873 }
7874 finish_wait(&resync_wait, &wq);
7875 }
7876 }
7877 } while (mddev->curr_resync < 2);
7878
7879 j = 0;
7880 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7881
7882
7883
7884 max_sectors = mddev->resync_max_sectors;
7885 atomic64_set(&mddev->resync_mismatches, 0);
7886
7887 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7888 j = mddev->resync_min;
7889 else if (!mddev->bitmap)
7890 j = mddev->recovery_cp;
7891
7892 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7893 max_sectors = mddev->resync_max_sectors;
7894 else {
7895
7896 max_sectors = mddev->dev_sectors;
7897 j = MaxSector;
7898 rcu_read_lock();
7899 rdev_for_each_rcu(rdev, mddev)
7900 if (rdev->raid_disk >= 0 &&
7901 !test_bit(Journal, &rdev->flags) &&
7902 !test_bit(Faulty, &rdev->flags) &&
7903 !test_bit(In_sync, &rdev->flags) &&
7904 rdev->recovery_offset < j)
7905 j = rdev->recovery_offset;
7906 rcu_read_unlock();
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916 if (mddev->bitmap) {
7917 mddev->pers->quiesce(mddev, 1);
7918 mddev->pers->quiesce(mddev, 0);
7919 }
7920 }
7921
7922 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
7923 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
7924 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
7925 speed_max(mddev), desc);
7926
7927 is_mddev_idle(mddev, 1);
7928
7929 io_sectors = 0;
7930 for (m = 0; m < SYNC_MARKS; m++) {
7931 mark[m] = jiffies;
7932 mark_cnt[m] = io_sectors;
7933 }
7934 last_mark = 0;
7935 mddev->resync_mark = mark[last_mark];
7936 mddev->resync_mark_cnt = mark_cnt[last_mark];
7937
7938
7939
7940
7941 window = 32*(PAGE_SIZE/512);
7942 pr_debug("md: using %dk window, over a total of %lluk.\n",
7943 window/2, (unsigned long long)max_sectors/2);
7944
7945 atomic_set(&mddev->recovery_active, 0);
7946 last_check = 0;
7947
7948 if (j>2) {
7949 pr_debug("md: resuming %s of %s from checkpoint.\n",
7950 desc, mdname(mddev));
7951 mddev->curr_resync = j;
7952 } else
7953 mddev->curr_resync = 3;
7954 mddev->curr_resync_completed = j;
7955 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7956 md_new_event(mddev);
7957 update_time = jiffies;
7958
7959 blk_start_plug(&plug);
7960 while (j < max_sectors) {
7961 sector_t sectors;
7962
7963 skipped = 0;
7964
7965 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7966 ((mddev->curr_resync > mddev->curr_resync_completed &&
7967 (mddev->curr_resync - mddev->curr_resync_completed)
7968 > (max_sectors >> 4)) ||
7969 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7970 (j - mddev->curr_resync_completed)*2
7971 >= mddev->resync_max - mddev->curr_resync_completed ||
7972 mddev->curr_resync_completed > mddev->resync_max
7973 )) {
7974
7975 wait_event(mddev->recovery_wait,
7976 atomic_read(&mddev->recovery_active) == 0);
7977 mddev->curr_resync_completed = j;
7978 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7979 j > mddev->recovery_cp)
7980 mddev->recovery_cp = j;
7981 update_time = jiffies;
7982 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7983 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7984 }
7985
7986 while (j >= mddev->resync_max &&
7987 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7988
7989
7990
7991
7992 flush_signals(current);
7993 wait_event_interruptible(mddev->recovery_wait,
7994 mddev->resync_max > j
7995 || test_bit(MD_RECOVERY_INTR,
7996 &mddev->recovery));
7997 }
7998
7999 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8000 break;
8001
8002 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8003 if (sectors == 0) {
8004 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8005 break;
8006 }
8007
8008 if (!skipped) {
8009 io_sectors += sectors;
8010 atomic_add(sectors, &mddev->recovery_active);
8011 }
8012
8013 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8014 break;
8015
8016 j += sectors;
8017 if (j > max_sectors)
8018
8019 j = max_sectors;
8020 if (j > 2)
8021 mddev->curr_resync = j;
8022 mddev->curr_mark_cnt = io_sectors;
8023 if (last_check == 0)
8024
8025
8026
8027 md_new_event(mddev);
8028
8029 if (last_check + window > io_sectors || j == max_sectors)
8030 continue;
8031
8032 last_check = io_sectors;
8033 repeat:
8034 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8035
8036 int next = (last_mark+1) % SYNC_MARKS;
8037
8038 mddev->resync_mark = mark[next];
8039 mddev->resync_mark_cnt = mark_cnt[next];
8040 mark[next] = jiffies;
8041 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8042 last_mark = next;
8043 }
8044
8045 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8046 break;
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056 cond_resched();
8057
8058 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8059 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8060 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8061
8062 if (currspeed > speed_min(mddev)) {
8063 if (currspeed > speed_max(mddev)) {
8064 msleep(500);
8065 goto repeat;
8066 }
8067 if (!is_mddev_idle(mddev, 0)) {
8068
8069
8070
8071
8072 wait_event(mddev->recovery_wait,
8073 !atomic_read(&mddev->recovery_active));
8074 }
8075 }
8076 }
8077 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8078 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8079 ? "interrupted" : "done");
8080
8081
8082
8083 blk_finish_plug(&plug);
8084 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8085
8086 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8087 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8088 mddev->curr_resync > 3) {
8089 mddev->curr_resync_completed = mddev->curr_resync;
8090 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8091 }
8092
8093 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8094
8095 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8096 mddev->curr_resync > 3) {
8097 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8098 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8099 if (mddev->curr_resync >= mddev->recovery_cp) {
8100 pr_debug("md: checkpointing %s of %s.\n",
8101 desc, mdname(mddev));
8102 if (test_bit(MD_RECOVERY_ERROR,
8103 &mddev->recovery))
8104 mddev->recovery_cp =
8105 mddev->curr_resync_completed;
8106 else
8107 mddev->recovery_cp =
8108 mddev->curr_resync;
8109 }
8110 } else
8111 mddev->recovery_cp = MaxSector;
8112 } else {
8113 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8114 mddev->curr_resync = MaxSector;
8115 rcu_read_lock();
8116 rdev_for_each_rcu(rdev, mddev)
8117 if (rdev->raid_disk >= 0 &&
8118 mddev->delta_disks >= 0 &&
8119 !test_bit(Journal, &rdev->flags) &&
8120 !test_bit(Faulty, &rdev->flags) &&
8121 !test_bit(In_sync, &rdev->flags) &&
8122 rdev->recovery_offset < mddev->curr_resync)
8123 rdev->recovery_offset = mddev->curr_resync;
8124 rcu_read_unlock();
8125 }
8126 }
8127 skip:
8128
8129
8130
8131 set_mask_bits(&mddev->sb_flags, 0,
8132 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8133
8134 spin_lock(&mddev->lock);
8135 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8136
8137 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8138 mddev->resync_min = 0;
8139 mddev->resync_max = MaxSector;
8140 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8141 mddev->resync_min = mddev->curr_resync_completed;
8142 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8143 mddev->curr_resync = 0;
8144 spin_unlock(&mddev->lock);
8145
8146 wake_up(&resync_wait);
8147 md_wakeup_thread(mddev->thread);
8148 return;
8149}
8150EXPORT_SYMBOL_GPL(md_do_sync);
8151
8152static int remove_and_add_spares(struct mddev *mddev,
8153 struct md_rdev *this)
8154{
8155 struct md_rdev *rdev;
8156 int spares = 0;
8157 int removed = 0;
8158 bool remove_some = false;
8159
8160 rdev_for_each(rdev, mddev) {
8161 if ((this == NULL || rdev == this) &&
8162 rdev->raid_disk >= 0 &&
8163 !test_bit(Blocked, &rdev->flags) &&
8164 test_bit(Faulty, &rdev->flags) &&
8165 atomic_read(&rdev->nr_pending)==0) {
8166
8167
8168
8169
8170
8171 remove_some = true;
8172 set_bit(RemoveSynchronized, &rdev->flags);
8173 }
8174 }
8175
8176 if (remove_some)
8177 synchronize_rcu();
8178 rdev_for_each(rdev, mddev) {
8179 if ((this == NULL || rdev == this) &&
8180 rdev->raid_disk >= 0 &&
8181 !test_bit(Blocked, &rdev->flags) &&
8182 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8183 (!test_bit(In_sync, &rdev->flags) &&
8184 !test_bit(Journal, &rdev->flags))) &&
8185 atomic_read(&rdev->nr_pending)==0)) {
8186 if (mddev->pers->hot_remove_disk(
8187 mddev, rdev) == 0) {
8188 sysfs_unlink_rdev(mddev, rdev);
8189 rdev->raid_disk = -1;
8190 removed++;
8191 }
8192 }
8193 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8194 clear_bit(RemoveSynchronized, &rdev->flags);
8195 }
8196
8197 if (removed && mddev->kobj.sd)
8198 sysfs_notify(&mddev->kobj, NULL, "degraded");
8199
8200 if (this && removed)
8201 goto no_add;
8202
8203 rdev_for_each(rdev, mddev) {
8204 if (this && this != rdev)
8205 continue;
8206 if (rdev->raid_disk >= 0 &&
8207 !test_bit(In_sync, &rdev->flags) &&
8208 !test_bit(Journal, &rdev->flags) &&
8209 !test_bit(Faulty, &rdev->flags))
8210 spares++;
8211 if (rdev->raid_disk >= 0)
8212 continue;
8213 if (test_bit(Faulty, &rdev->flags))
8214 continue;
8215 if (!test_bit(Journal, &rdev->flags)) {
8216 if (mddev->ro &&
8217 ! (rdev->saved_raid_disk >= 0 &&
8218 !test_bit(Bitmap_sync, &rdev->flags)))
8219 continue;
8220
8221 rdev->recovery_offset = 0;
8222 }
8223 if (mddev->pers->
8224 hot_add_disk(mddev, rdev) == 0) {
8225 if (sysfs_link_rdev(mddev, rdev))
8226 ;
8227 if (!test_bit(Journal, &rdev->flags))
8228 spares++;
8229 md_new_event(mddev);
8230 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8231 }
8232 }
8233no_add:
8234 if (removed)
8235 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8236 return spares;
8237}
8238
8239static void md_start_sync(struct work_struct *ws)
8240{
8241 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8242
8243 mddev->sync_thread = md_register_thread(md_do_sync,
8244 mddev,
8245 "resync");
8246 if (!mddev->sync_thread) {
8247 pr_warn("%s: could not start resync thread...\n",
8248 mdname(mddev));
8249
8250 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8251 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8252 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8253 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8254 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8255 wake_up(&resync_wait);
8256 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8257 &mddev->recovery))
8258 if (mddev->sysfs_action)
8259 sysfs_notify_dirent_safe(mddev->sysfs_action);
8260 } else
8261 md_wakeup_thread(mddev->sync_thread);
8262 sysfs_notify_dirent_safe(mddev->sysfs_action);
8263 md_new_event(mddev);
8264}
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288void md_check_recovery(struct mddev *mddev)
8289{
8290 if (mddev->bitmap)
8291 bitmap_daemon_work(mddev);
8292
8293 if (signal_pending(current)) {
8294 if (mddev->pers->sync_request && !mddev->external) {
8295 pr_debug("md: %s in immediate safe mode\n",
8296 mdname(mddev));
8297 mddev->safemode = 2;
8298 }
8299 flush_signals(current);
8300 }
8301
8302 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8303 return;
8304 if ( ! (
8305 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8306 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8307 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8308 (mddev->external == 0 && mddev->safemode == 1) ||
8309 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8310 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8311 ))
8312 return;
8313
8314 if (mddev_trylock(mddev)) {
8315 int spares = 0;
8316
8317 if (mddev->ro) {
8318 struct md_rdev *rdev;
8319 if (!mddev->external && mddev->in_sync)
8320
8321
8322
8323
8324
8325 rdev_for_each(rdev, mddev)
8326 clear_bit(Blocked, &rdev->flags);
8327
8328
8329
8330
8331
8332
8333
8334 remove_and_add_spares(mddev, NULL);
8335
8336
8337
8338 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8339 md_reap_sync_thread(mddev);
8340 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8341 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8342 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8343 goto unlock;
8344 }
8345
8346 if (!mddev->external) {
8347 int did_change = 0;
8348 spin_lock(&mddev->lock);
8349 if (mddev->safemode &&
8350 !atomic_read(&mddev->writes_pending) &&
8351 !mddev->in_sync &&
8352 mddev->recovery_cp == MaxSector) {
8353 mddev->in_sync = 1;
8354 did_change = 1;
8355 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8356 }
8357 if (mddev->safemode == 1)
8358 mddev->safemode = 0;
8359 spin_unlock(&mddev->lock);
8360 if (did_change)
8361 sysfs_notify_dirent_safe(mddev->sysfs_state);
8362 }
8363
8364 if (mddev->sb_flags)
8365 md_update_sb(mddev, 0);
8366
8367 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8368 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8369
8370 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8371 goto unlock;
8372 }
8373 if (mddev->sync_thread) {
8374 md_reap_sync_thread(mddev);
8375 goto unlock;
8376 }
8377
8378
8379
8380 mddev->curr_resync_completed = 0;
8381 spin_lock(&mddev->lock);
8382 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8383 spin_unlock(&mddev->lock);
8384
8385
8386
8387 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8388 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8389
8390 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8391 mddev->suspended ||
8392 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8393 goto not_running;
8394
8395
8396
8397
8398
8399
8400
8401 if (mddev->reshape_position != MaxSector) {
8402 if (mddev->pers->check_reshape == NULL ||
8403 mddev->pers->check_reshape(mddev) != 0)
8404
8405 goto not_running;
8406 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8407 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8408 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8409 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8410 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8411 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8412 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8413 } else if (mddev->recovery_cp < MaxSector) {
8414 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8415 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8416 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8417
8418 goto not_running;
8419
8420 if (mddev->pers->sync_request) {
8421 if (spares) {
8422
8423
8424
8425
8426 bitmap_write_all(mddev->bitmap);
8427 }
8428 INIT_WORK(&mddev->del_work, md_start_sync);
8429 queue_work(md_misc_wq, &mddev->del_work);
8430 goto unlock;
8431 }
8432 not_running:
8433 if (!mddev->sync_thread) {
8434 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8435 wake_up(&resync_wait);
8436 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8437 &mddev->recovery))
8438 if (mddev->sysfs_action)
8439 sysfs_notify_dirent_safe(mddev->sysfs_action);
8440 }
8441 unlock:
8442 wake_up(&mddev->sb_wait);
8443 mddev_unlock(mddev);
8444 }
8445}
8446EXPORT_SYMBOL(md_check_recovery);
8447
8448void md_reap_sync_thread(struct mddev *mddev)
8449{
8450 struct md_rdev *rdev;
8451
8452
8453 md_unregister_thread(&mddev->sync_thread);
8454 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8455 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8456
8457
8458 if (mddev->pers->spare_active(mddev)) {
8459 sysfs_notify(&mddev->kobj, NULL,
8460 "degraded");
8461 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8462 }
8463 }
8464 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8465 mddev->pers->finish_reshape)
8466 mddev->pers->finish_reshape(mddev);
8467
8468
8469
8470
8471 if (!mddev->degraded)
8472 rdev_for_each(rdev, mddev)
8473 rdev->saved_raid_disk = -1;
8474
8475 md_update_sb(mddev, 1);
8476 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8477 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8478 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8479 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8480 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8481 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8482 wake_up(&resync_wait);
8483
8484 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8485 sysfs_notify_dirent_safe(mddev->sysfs_action);
8486 md_new_event(mddev);
8487 if (mddev->event_work.func)
8488 queue_work(md_misc_wq, &mddev->event_work);
8489}
8490EXPORT_SYMBOL(md_reap_sync_thread);
8491
8492void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8493{
8494 sysfs_notify_dirent_safe(rdev->sysfs_state);
8495 wait_event_timeout(rdev->blocked_wait,
8496 !test_bit(Blocked, &rdev->flags) &&
8497 !test_bit(BlockedBadBlocks, &rdev->flags),
8498 msecs_to_jiffies(5000));
8499 rdev_dec_pending(rdev, mddev);
8500}
8501EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8502
8503void md_finish_reshape(struct mddev *mddev)
8504{
8505
8506 struct md_rdev *rdev;
8507
8508 rdev_for_each(rdev, mddev) {
8509 if (rdev->data_offset > rdev->new_data_offset)
8510 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8511 else
8512 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8513 rdev->data_offset = rdev->new_data_offset;
8514 }
8515}
8516EXPORT_SYMBOL(md_finish_reshape);
8517
8518
8519
8520
8521int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8522 int is_new)
8523{
8524 struct mddev *mddev = rdev->mddev;
8525 int rv;
8526 if (is_new)
8527 s += rdev->new_data_offset;
8528 else
8529 s += rdev->data_offset;
8530 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8531 if (rv == 0) {
8532
8533 if (test_bit(ExternalBbl, &rdev->flags))
8534 sysfs_notify(&rdev->kobj, NULL,
8535 "unacknowledged_bad_blocks");
8536 sysfs_notify_dirent_safe(rdev->sysfs_state);
8537 set_mask_bits(&mddev->sb_flags, 0,
8538 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
8539 md_wakeup_thread(rdev->mddev->thread);
8540 return 1;
8541 } else
8542 return 0;
8543}
8544EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8545
8546int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8547 int is_new)
8548{
8549 int rv;
8550 if (is_new)
8551 s += rdev->new_data_offset;
8552 else
8553 s += rdev->data_offset;
8554 rv = badblocks_clear(&rdev->badblocks, s, sectors);
8555 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
8556 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
8557 return rv;
8558}
8559EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8560
8561static int md_notify_reboot(struct notifier_block *this,
8562 unsigned long code, void *x)
8563{
8564 struct list_head *tmp;
8565 struct mddev *mddev;
8566 int need_delay = 0;
8567
8568 for_each_mddev(mddev, tmp) {
8569 if (mddev_trylock(mddev)) {
8570 if (mddev->pers)
8571 __md_stop_writes(mddev);
8572 if (mddev->persistent)
8573 mddev->safemode = 2;
8574 mddev_unlock(mddev);
8575 }
8576 need_delay = 1;
8577 }
8578
8579
8580
8581
8582
8583
8584 if (need_delay)
8585 mdelay(1000*1);
8586
8587 return NOTIFY_DONE;
8588}
8589
8590static struct notifier_block md_notifier = {
8591 .notifier_call = md_notify_reboot,
8592 .next = NULL,
8593 .priority = INT_MAX,
8594};
8595
8596static void md_geninit(void)
8597{
8598 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8599
8600 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8601}
8602
8603static int __init md_init(void)
8604{
8605 int ret = -ENOMEM;
8606
8607 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8608 if (!md_wq)
8609 goto err_wq;
8610
8611 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8612 if (!md_misc_wq)
8613 goto err_misc_wq;
8614
8615 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8616 goto err_md;
8617
8618 if ((ret = register_blkdev(0, "mdp")) < 0)
8619 goto err_mdp;
8620 mdp_major = ret;
8621
8622 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8623 md_probe, NULL, NULL);
8624 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8625 md_probe, NULL, NULL);
8626
8627 register_reboot_notifier(&md_notifier);
8628 raid_table_header = register_sysctl_table(raid_root_table);
8629
8630 md_geninit();
8631 return 0;
8632
8633err_mdp:
8634 unregister_blkdev(MD_MAJOR, "md");
8635err_md:
8636 destroy_workqueue(md_misc_wq);
8637err_misc_wq:
8638 destroy_workqueue(md_wq);
8639err_wq:
8640 return ret;
8641}
8642
8643#ifndef MODULE
8644
8645
8646
8647
8648
8649
8650static DEFINE_MUTEX(detected_devices_mutex);
8651static LIST_HEAD(all_detected_devices);
8652struct detected_devices_node {
8653 struct list_head list;
8654 dev_t dev;
8655};
8656
8657void md_autodetect_dev(dev_t dev)
8658{
8659 struct detected_devices_node *node_detected_dev;
8660
8661 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8662 if (node_detected_dev) {
8663 node_detected_dev->dev = dev;
8664 mutex_lock(&detected_devices_mutex);
8665 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8666 mutex_unlock(&detected_devices_mutex);
8667 }
8668}
8669
8670static void autostart_arrays(int part)
8671{
8672 struct md_rdev *rdev;
8673 struct detected_devices_node *node_detected_dev;
8674 dev_t dev;
8675 int i_scanned, i_passed;
8676
8677 i_scanned = 0;
8678 i_passed = 0;
8679
8680 pr_info("md: Autodetecting RAID arrays.\n");
8681
8682 mutex_lock(&detected_devices_mutex);
8683 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8684 i_scanned++;
8685 node_detected_dev = list_entry(all_detected_devices.next,
8686 struct detected_devices_node, list);
8687 list_del(&node_detected_dev->list);
8688 dev = node_detected_dev->dev;
8689 kfree(node_detected_dev);
8690 mutex_unlock(&detected_devices_mutex);
8691 rdev = md_import_device(dev,0, 90);
8692 mutex_lock(&detected_devices_mutex);
8693 if (IS_ERR(rdev))
8694 continue;
8695
8696 if (test_bit(Faulty, &rdev->flags))
8697 continue;
8698
8699 set_bit(AutoDetected, &rdev->flags);
8700 list_add(&rdev->same_set, &pending_raid_disks);
8701 i_passed++;
8702 }
8703 mutex_unlock(&detected_devices_mutex);
8704
8705 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
8706
8707 autorun_devices(part);
8708}
8709
8710#endif
8711
8712static __exit void md_exit(void)
8713{
8714 struct mddev *mddev;
8715 struct list_head *tmp;
8716 int delay = 1;
8717
8718 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8719 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8720
8721 unregister_blkdev(MD_MAJOR,"md");
8722 unregister_blkdev(mdp_major, "mdp");
8723 unregister_reboot_notifier(&md_notifier);
8724 unregister_sysctl_table(raid_table_header);
8725
8726
8727
8728
8729 md_unloading = 1;
8730 while (waitqueue_active(&md_event_waiters)) {
8731
8732 wake_up(&md_event_waiters);
8733 msleep(delay);
8734 delay += delay;
8735 }
8736 remove_proc_entry("mdstat", NULL);
8737
8738 for_each_mddev(mddev, tmp) {
8739 export_array(mddev);
8740 mddev->hold_active = 0;
8741 }
8742 destroy_workqueue(md_misc_wq);
8743 destroy_workqueue(md_wq);
8744}
8745
8746subsys_initcall(md_init);
8747module_exit(md_exit)
8748
8749static int get_ro(char *buffer, struct kernel_param *kp)
8750{
8751 return sprintf(buffer, "%d", start_readonly);
8752}
8753static int set_ro(const char *val, struct kernel_param *kp)
8754{
8755 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8756}
8757
8758module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8759module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8760module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8761
8762MODULE_LICENSE("GPL");
8763MODULE_DESCRIPTION("MD RAID framework");
8764MODULE_ALIAS("md");
8765MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8766