1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/badblocks.h>
38#include <linux/sysctl.h>
39#include <linux/seq_file.h>
40#include <linux/fs.h>
41#include <linux/poll.h>
42#include <linux/ctype.h>
43#include <linux/string.h>
44#include <linux/hdreg.h>
45#include <linux/proc_fs.h>
46#include <linux/random.h>
47#include <linux/module.h>
48#include <linux/reboot.h>
49#include <linux/file.h>
50#include <linux/compat.h>
51#include <linux/delay.h>
52#include <linux/raid/md_p.h>
53#include <linux/raid/md_u.h>
54#include <linux/slab.h>
55#include "md.h"
56#include "bitmap.h"
57#include "md-cluster.h"
58
59#ifndef MODULE
60static void autostart_arrays(int part);
61#endif
62
63
64
65
66
67
68static LIST_HEAD(pers_list);
69static DEFINE_SPINLOCK(pers_lock);
70
71struct md_cluster_operations *md_cluster_ops;
72EXPORT_SYMBOL(md_cluster_ops);
73struct module *md_cluster_mod;
74EXPORT_SYMBOL(md_cluster_mod);
75
76static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
77static struct workqueue_struct *md_wq;
78static struct workqueue_struct *md_misc_wq;
79
80static int remove_and_add_spares(struct mddev *mddev,
81 struct md_rdev *this);
82static void mddev_detach(struct mddev *mddev);
83
84
85
86
87
88
89#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
90
91
92
93
94
95
96
97
98
99
100
101
102
103static int sysctl_speed_limit_min = 1000;
104static int sysctl_speed_limit_max = 200000;
105static inline int speed_min(struct mddev *mddev)
106{
107 return mddev->sync_speed_min ?
108 mddev->sync_speed_min : sysctl_speed_limit_min;
109}
110
111static inline int speed_max(struct mddev *mddev)
112{
113 return mddev->sync_speed_max ?
114 mddev->sync_speed_max : sysctl_speed_limit_max;
115}
116
117static struct ctl_table_header *raid_table_header;
118
119static struct ctl_table raid_table[] = {
120 {
121 .procname = "speed_limit_min",
122 .data = &sysctl_speed_limit_min,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 {
128 .procname = "speed_limit_max",
129 .data = &sysctl_speed_limit_max,
130 .maxlen = sizeof(int),
131 .mode = S_IRUGO|S_IWUSR,
132 .proc_handler = proc_dointvec,
133 },
134 { }
135};
136
137static struct ctl_table raid_dir_table[] = {
138 {
139 .procname = "raid",
140 .maxlen = 0,
141 .mode = S_IRUGO|S_IXUGO,
142 .child = raid_table,
143 },
144 { }
145};
146
147static struct ctl_table raid_root_table[] = {
148 {
149 .procname = "dev",
150 .maxlen = 0,
151 .mode = 0555,
152 .child = raid_dir_table,
153 },
154 { }
155};
156
157static const struct block_device_operations md_fops;
158
159static int start_readonly;
160
161
162
163
164
165struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
166 struct mddev *mddev)
167{
168 struct bio *b;
169
170 if (!mddev || !mddev->bio_set)
171 return bio_alloc(gfp_mask, nr_iovecs);
172
173 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
174 if (!b)
175 return NULL;
176 return b;
177}
178EXPORT_SYMBOL_GPL(bio_alloc_mddev);
179
180struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
181 struct mddev *mddev)
182{
183 if (!mddev || !mddev->bio_set)
184 return bio_clone(bio, gfp_mask);
185
186 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
187}
188EXPORT_SYMBOL_GPL(bio_clone_mddev);
189
190
191
192
193
194
195
196
197
198
199
200static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
201static atomic_t md_event_count;
202void md_new_event(struct mddev *mddev)
203{
204 atomic_inc(&md_event_count);
205 wake_up(&md_event_waiters);
206}
207EXPORT_SYMBOL_GPL(md_new_event);
208
209
210
211
212
213static LIST_HEAD(all_mddevs);
214static DEFINE_SPINLOCK(all_mddevs_lock);
215
216
217
218
219
220
221
222
223#define for_each_mddev(_mddev,_tmp) \
224 \
225 for (({ spin_lock(&all_mddevs_lock); \
226 _tmp = all_mddevs.next; \
227 _mddev = NULL;}); \
228 ({ if (_tmp != &all_mddevs) \
229 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
230 spin_unlock(&all_mddevs_lock); \
231 if (_mddev) mddev_put(_mddev); \
232 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
233 _tmp != &all_mddevs;}); \
234 ({ spin_lock(&all_mddevs_lock); \
235 _tmp = _tmp->next;}) \
236 )
237
238
239
240
241
242
243
244
245static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
246{
247 const int rw = bio_data_dir(bio);
248 struct mddev *mddev = q->queuedata;
249 unsigned int sectors;
250 int cpu;
251
252 blk_queue_split(q, &bio, q->bio_split);
253
254 if (mddev == NULL || mddev->pers == NULL) {
255 bio_io_error(bio);
256 return BLK_QC_T_NONE;
257 }
258 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
259 if (bio_sectors(bio) != 0)
260 bio->bi_error = -EROFS;
261 bio_endio(bio);
262 return BLK_QC_T_NONE;
263 }
264 smp_rmb();
265 rcu_read_lock();
266 if (mddev->suspended) {
267 DEFINE_WAIT(__wait);
268 for (;;) {
269 prepare_to_wait(&mddev->sb_wait, &__wait,
270 TASK_UNINTERRUPTIBLE);
271 if (!mddev->suspended)
272 break;
273 rcu_read_unlock();
274 schedule();
275 rcu_read_lock();
276 }
277 finish_wait(&mddev->sb_wait, &__wait);
278 }
279 atomic_inc(&mddev->active_io);
280 rcu_read_unlock();
281
282
283
284
285
286 sectors = bio_sectors(bio);
287
288 bio->bi_rw &= ~REQ_NOMERGE;
289 mddev->pers->make_request(mddev, bio);
290
291 cpu = part_stat_lock();
292 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
293 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
294 part_stat_unlock();
295
296 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
297 wake_up(&mddev->sb_wait);
298
299 return BLK_QC_T_NONE;
300}
301
302
303
304
305
306
307
308void mddev_suspend(struct mddev *mddev)
309{
310 WARN_ON_ONCE(current == mddev->thread->tsk);
311 if (mddev->suspended++)
312 return;
313 synchronize_rcu();
314 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
315 mddev->pers->quiesce(mddev, 1);
316
317 del_timer_sync(&mddev->safemode_timer);
318}
319EXPORT_SYMBOL_GPL(mddev_suspend);
320
321void mddev_resume(struct mddev *mddev)
322{
323 if (--mddev->suspended)
324 return;
325 wake_up(&mddev->sb_wait);
326 mddev->pers->quiesce(mddev, 0);
327
328 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
329 md_wakeup_thread(mddev->thread);
330 md_wakeup_thread(mddev->sync_thread);
331}
332EXPORT_SYMBOL_GPL(mddev_resume);
333
334int mddev_congested(struct mddev *mddev, int bits)
335{
336 struct md_personality *pers = mddev->pers;
337 int ret = 0;
338
339 rcu_read_lock();
340 if (mddev->suspended)
341 ret = 1;
342 else if (pers && pers->congested)
343 ret = pers->congested(mddev, bits);
344 rcu_read_unlock();
345 return ret;
346}
347EXPORT_SYMBOL_GPL(mddev_congested);
348static int md_congested(void *data, int bits)
349{
350 struct mddev *mddev = data;
351 return mddev_congested(mddev, bits);
352}
353
354
355
356
357
358static void md_end_flush(struct bio *bio)
359{
360 struct md_rdev *rdev = bio->bi_private;
361 struct mddev *mddev = rdev->mddev;
362
363 rdev_dec_pending(rdev, mddev);
364
365 if (atomic_dec_and_test(&mddev->flush_pending)) {
366
367 queue_work(md_wq, &mddev->flush_work);
368 }
369 bio_put(bio);
370}
371
372static void md_submit_flush_data(struct work_struct *ws);
373
374static void submit_flushes(struct work_struct *ws)
375{
376 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
377 struct md_rdev *rdev;
378
379 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
380 atomic_set(&mddev->flush_pending, 1);
381 rcu_read_lock();
382 rdev_for_each_rcu(rdev, mddev)
383 if (rdev->raid_disk >= 0 &&
384 !test_bit(Faulty, &rdev->flags)) {
385
386
387
388
389 struct bio *bi;
390 atomic_inc(&rdev->nr_pending);
391 atomic_inc(&rdev->nr_pending);
392 rcu_read_unlock();
393 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
394 bi->bi_end_io = md_end_flush;
395 bi->bi_private = rdev;
396 bi->bi_bdev = rdev->bdev;
397 atomic_inc(&mddev->flush_pending);
398 submit_bio(WRITE_FLUSH, bi);
399 rcu_read_lock();
400 rdev_dec_pending(rdev, mddev);
401 }
402 rcu_read_unlock();
403 if (atomic_dec_and_test(&mddev->flush_pending))
404 queue_work(md_wq, &mddev->flush_work);
405}
406
407static void md_submit_flush_data(struct work_struct *ws)
408{
409 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
410 struct bio *bio = mddev->flush_bio;
411
412 if (bio->bi_iter.bi_size == 0)
413
414 bio_endio(bio);
415 else {
416 bio->bi_rw &= ~REQ_FLUSH;
417 mddev->pers->make_request(mddev, bio);
418 }
419
420 mddev->flush_bio = NULL;
421 wake_up(&mddev->sb_wait);
422}
423
424void md_flush_request(struct mddev *mddev, struct bio *bio)
425{
426 spin_lock_irq(&mddev->lock);
427 wait_event_lock_irq(mddev->sb_wait,
428 !mddev->flush_bio,
429 mddev->lock);
430 mddev->flush_bio = bio;
431 spin_unlock_irq(&mddev->lock);
432
433 INIT_WORK(&mddev->flush_work, submit_flushes);
434 queue_work(md_wq, &mddev->flush_work);
435}
436EXPORT_SYMBOL(md_flush_request);
437
438void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
439{
440 struct mddev *mddev = cb->data;
441 md_wakeup_thread(mddev->thread);
442 kfree(cb);
443}
444EXPORT_SYMBOL(md_unplug);
445
446static inline struct mddev *mddev_get(struct mddev *mddev)
447{
448 atomic_inc(&mddev->active);
449 return mddev;
450}
451
452static void mddev_delayed_delete(struct work_struct *ws);
453
454static void mddev_put(struct mddev *mddev)
455{
456 struct bio_set *bs = NULL;
457
458 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
459 return;
460 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
461 mddev->ctime == 0 && !mddev->hold_active) {
462
463
464 list_del_init(&mddev->all_mddevs);
465 bs = mddev->bio_set;
466 mddev->bio_set = NULL;
467 if (mddev->gendisk) {
468
469
470
471
472
473 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
474 queue_work(md_misc_wq, &mddev->del_work);
475 } else
476 kfree(mddev);
477 }
478 spin_unlock(&all_mddevs_lock);
479 if (bs)
480 bioset_free(bs);
481}
482
483static void md_safemode_timeout(unsigned long data);
484
485void mddev_init(struct mddev *mddev)
486{
487 mutex_init(&mddev->open_mutex);
488 mutex_init(&mddev->reconfig_mutex);
489 mutex_init(&mddev->bitmap_info.mutex);
490 INIT_LIST_HEAD(&mddev->disks);
491 INIT_LIST_HEAD(&mddev->all_mddevs);
492 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
493 (unsigned long) mddev);
494 atomic_set(&mddev->active, 1);
495 atomic_set(&mddev->openers, 0);
496 atomic_set(&mddev->active_io, 0);
497 spin_lock_init(&mddev->lock);
498 atomic_set(&mddev->flush_pending, 0);
499 init_waitqueue_head(&mddev->sb_wait);
500 init_waitqueue_head(&mddev->recovery_wait);
501 mddev->reshape_position = MaxSector;
502 mddev->reshape_backwards = 0;
503 mddev->last_sync_action = "none";
504 mddev->resync_min = 0;
505 mddev->resync_max = MaxSector;
506 mddev->level = LEVEL_NONE;
507}
508EXPORT_SYMBOL_GPL(mddev_init);
509
510static struct mddev *mddev_find(dev_t unit)
511{
512 struct mddev *mddev, *new = NULL;
513
514 if (unit && MAJOR(unit) != MD_MAJOR)
515 unit &= ~((1<<MdpMinorShift)-1);
516
517 retry:
518 spin_lock(&all_mddevs_lock);
519
520 if (unit) {
521 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
522 if (mddev->unit == unit) {
523 mddev_get(mddev);
524 spin_unlock(&all_mddevs_lock);
525 kfree(new);
526 return mddev;
527 }
528
529 if (new) {
530 list_add(&new->all_mddevs, &all_mddevs);
531 spin_unlock(&all_mddevs_lock);
532 new->hold_active = UNTIL_IOCTL;
533 return new;
534 }
535 } else if (new) {
536
537 static int next_minor = 512;
538 int start = next_minor;
539 int is_free = 0;
540 int dev = 0;
541 while (!is_free) {
542 dev = MKDEV(MD_MAJOR, next_minor);
543 next_minor++;
544 if (next_minor > MINORMASK)
545 next_minor = 0;
546 if (next_minor == start) {
547
548 spin_unlock(&all_mddevs_lock);
549 kfree(new);
550 return NULL;
551 }
552
553 is_free = 1;
554 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
555 if (mddev->unit == dev) {
556 is_free = 0;
557 break;
558 }
559 }
560 new->unit = dev;
561 new->md_minor = MINOR(dev);
562 new->hold_active = UNTIL_STOP;
563 list_add(&new->all_mddevs, &all_mddevs);
564 spin_unlock(&all_mddevs_lock);
565 return new;
566 }
567 spin_unlock(&all_mddevs_lock);
568
569 new = kzalloc(sizeof(*new), GFP_KERNEL);
570 if (!new)
571 return NULL;
572
573 new->unit = unit;
574 if (MAJOR(unit) == MD_MAJOR)
575 new->md_minor = MINOR(unit);
576 else
577 new->md_minor = MINOR(unit) >> MdpMinorShift;
578
579 mddev_init(new);
580
581 goto retry;
582}
583
584static struct attribute_group md_redundancy_group;
585
586void mddev_unlock(struct mddev *mddev)
587{
588 if (mddev->to_remove) {
589
590
591
592
593
594
595
596
597
598
599
600
601 struct attribute_group *to_remove = mddev->to_remove;
602 mddev->to_remove = NULL;
603 mddev->sysfs_active = 1;
604 mutex_unlock(&mddev->reconfig_mutex);
605
606 if (mddev->kobj.sd) {
607 if (to_remove != &md_redundancy_group)
608 sysfs_remove_group(&mddev->kobj, to_remove);
609 if (mddev->pers == NULL ||
610 mddev->pers->sync_request == NULL) {
611 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
612 if (mddev->sysfs_action)
613 sysfs_put(mddev->sysfs_action);
614 mddev->sysfs_action = NULL;
615 }
616 }
617 mddev->sysfs_active = 0;
618 } else
619 mutex_unlock(&mddev->reconfig_mutex);
620
621
622
623
624 spin_lock(&pers_lock);
625 md_wakeup_thread(mddev->thread);
626 spin_unlock(&pers_lock);
627}
628EXPORT_SYMBOL_GPL(mddev_unlock);
629
630struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
631{
632 struct md_rdev *rdev;
633
634 rdev_for_each_rcu(rdev, mddev)
635 if (rdev->desc_nr == nr)
636 return rdev;
637
638 return NULL;
639}
640EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
641
642static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
643{
644 struct md_rdev *rdev;
645
646 rdev_for_each(rdev, mddev)
647 if (rdev->bdev->bd_dev == dev)
648 return rdev;
649
650 return NULL;
651}
652
653static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
654{
655 struct md_rdev *rdev;
656
657 rdev_for_each_rcu(rdev, mddev)
658 if (rdev->bdev->bd_dev == dev)
659 return rdev;
660
661 return NULL;
662}
663
664static struct md_personality *find_pers(int level, char *clevel)
665{
666 struct md_personality *pers;
667 list_for_each_entry(pers, &pers_list, list) {
668 if (level != LEVEL_NONE && pers->level == level)
669 return pers;
670 if (strcmp(pers->name, clevel)==0)
671 return pers;
672 }
673 return NULL;
674}
675
676
677static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
678{
679 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
680 return MD_NEW_SIZE_SECTORS(num_sectors);
681}
682
683static int alloc_disk_sb(struct md_rdev *rdev)
684{
685 rdev->sb_page = alloc_page(GFP_KERNEL);
686 if (!rdev->sb_page) {
687 printk(KERN_ALERT "md: out of memory.\n");
688 return -ENOMEM;
689 }
690
691 return 0;
692}
693
694void md_rdev_clear(struct md_rdev *rdev)
695{
696 if (rdev->sb_page) {
697 put_page(rdev->sb_page);
698 rdev->sb_loaded = 0;
699 rdev->sb_page = NULL;
700 rdev->sb_start = 0;
701 rdev->sectors = 0;
702 }
703 if (rdev->bb_page) {
704 put_page(rdev->bb_page);
705 rdev->bb_page = NULL;
706 }
707 badblocks_exit(&rdev->badblocks);
708}
709EXPORT_SYMBOL_GPL(md_rdev_clear);
710
711static void super_written(struct bio *bio)
712{
713 struct md_rdev *rdev = bio->bi_private;
714 struct mddev *mddev = rdev->mddev;
715
716 if (bio->bi_error) {
717 printk("md: super_written gets error=%d\n", bio->bi_error);
718 md_error(mddev, rdev);
719 }
720
721 if (atomic_dec_and_test(&mddev->pending_writes))
722 wake_up(&mddev->sb_wait);
723 rdev_dec_pending(rdev, mddev);
724 bio_put(bio);
725}
726
727void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
728 sector_t sector, int size, struct page *page)
729{
730
731
732
733
734
735
736 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
737
738 atomic_inc(&rdev->nr_pending);
739
740 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
741 bio->bi_iter.bi_sector = sector;
742 bio_add_page(bio, page, size, 0);
743 bio->bi_private = rdev;
744 bio->bi_end_io = super_written;
745
746 atomic_inc(&mddev->pending_writes);
747 submit_bio(WRITE_FLUSH_FUA, bio);
748}
749
750void md_super_wait(struct mddev *mddev)
751{
752
753 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
754}
755
756int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
757 struct page *page, int rw, bool metadata_op)
758{
759 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
760 int ret;
761
762 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
763 rdev->meta_bdev : rdev->bdev;
764 if (metadata_op)
765 bio->bi_iter.bi_sector = sector + rdev->sb_start;
766 else if (rdev->mddev->reshape_position != MaxSector &&
767 (rdev->mddev->reshape_backwards ==
768 (sector >= rdev->mddev->reshape_position)))
769 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
770 else
771 bio->bi_iter.bi_sector = sector + rdev->data_offset;
772 bio_add_page(bio, page, size, 0);
773 submit_bio_wait(rw, bio);
774
775 ret = !bio->bi_error;
776 bio_put(bio);
777 return ret;
778}
779EXPORT_SYMBOL_GPL(sync_page_io);
780
781static int read_disk_sb(struct md_rdev *rdev, int size)
782{
783 char b[BDEVNAME_SIZE];
784
785 if (rdev->sb_loaded)
786 return 0;
787
788 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
789 goto fail;
790 rdev->sb_loaded = 1;
791 return 0;
792
793fail:
794 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
795 bdevname(rdev->bdev,b));
796 return -EINVAL;
797}
798
799static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
800{
801 return sb1->set_uuid0 == sb2->set_uuid0 &&
802 sb1->set_uuid1 == sb2->set_uuid1 &&
803 sb1->set_uuid2 == sb2->set_uuid2 &&
804 sb1->set_uuid3 == sb2->set_uuid3;
805}
806
807static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
808{
809 int ret;
810 mdp_super_t *tmp1, *tmp2;
811
812 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
813 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
814
815 if (!tmp1 || !tmp2) {
816 ret = 0;
817 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
818 goto abort;
819 }
820
821 *tmp1 = *sb1;
822 *tmp2 = *sb2;
823
824
825
826
827 tmp1->nr_disks = 0;
828 tmp2->nr_disks = 0;
829
830 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
831abort:
832 kfree(tmp1);
833 kfree(tmp2);
834 return ret;
835}
836
837static u32 md_csum_fold(u32 csum)
838{
839 csum = (csum & 0xffff) + (csum >> 16);
840 return (csum & 0xffff) + (csum >> 16);
841}
842
843static unsigned int calc_sb_csum(mdp_super_t *sb)
844{
845 u64 newcsum = 0;
846 u32 *sb32 = (u32*)sb;
847 int i;
848 unsigned int disk_csum, csum;
849
850 disk_csum = sb->sb_csum;
851 sb->sb_csum = 0;
852
853 for (i = 0; i < MD_SB_BYTES/4 ; i++)
854 newcsum += sb32[i];
855 csum = (newcsum & 0xffffffff) + (newcsum>>32);
856
857#ifdef CONFIG_ALPHA
858
859
860
861
862
863
864
865
866 sb->sb_csum = md_csum_fold(disk_csum);
867#else
868 sb->sb_csum = disk_csum;
869#endif
870 return csum;
871}
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903struct super_type {
904 char *name;
905 struct module *owner;
906 int (*load_super)(struct md_rdev *rdev,
907 struct md_rdev *refdev,
908 int minor_version);
909 int (*validate_super)(struct mddev *mddev,
910 struct md_rdev *rdev);
911 void (*sync_super)(struct mddev *mddev,
912 struct md_rdev *rdev);
913 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
914 sector_t num_sectors);
915 int (*allow_new_offset)(struct md_rdev *rdev,
916 unsigned long long new_offset);
917};
918
919
920
921
922
923
924
925
926
927int md_check_no_bitmap(struct mddev *mddev)
928{
929 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
930 return 0;
931 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
932 mdname(mddev), mddev->pers->name);
933 return 1;
934}
935EXPORT_SYMBOL(md_check_no_bitmap);
936
937
938
939
940static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
941{
942 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
943 mdp_super_t *sb;
944 int ret;
945
946
947
948
949
950
951
952 rdev->sb_start = calc_dev_sboffset(rdev);
953
954 ret = read_disk_sb(rdev, MD_SB_BYTES);
955 if (ret) return ret;
956
957 ret = -EINVAL;
958
959 bdevname(rdev->bdev, b);
960 sb = page_address(rdev->sb_page);
961
962 if (sb->md_magic != MD_SB_MAGIC) {
963 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
964 b);
965 goto abort;
966 }
967
968 if (sb->major_version != 0 ||
969 sb->minor_version < 90 ||
970 sb->minor_version > 91) {
971 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
972 sb->major_version, sb->minor_version,
973 b);
974 goto abort;
975 }
976
977 if (sb->raid_disks <= 0)
978 goto abort;
979
980 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
981 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
982 b);
983 goto abort;
984 }
985
986 rdev->preferred_minor = sb->md_minor;
987 rdev->data_offset = 0;
988 rdev->new_data_offset = 0;
989 rdev->sb_size = MD_SB_BYTES;
990 rdev->badblocks.shift = -1;
991
992 if (sb->level == LEVEL_MULTIPATH)
993 rdev->desc_nr = -1;
994 else
995 rdev->desc_nr = sb->this_disk.number;
996
997 if (!refdev) {
998 ret = 1;
999 } else {
1000 __u64 ev1, ev2;
1001 mdp_super_t *refsb = page_address(refdev->sb_page);
1002 if (!uuid_equal(refsb, sb)) {
1003 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1004 b, bdevname(refdev->bdev,b2));
1005 goto abort;
1006 }
1007 if (!sb_equal(refsb, sb)) {
1008 printk(KERN_WARNING "md: %s has same UUID"
1009 " but different superblock to %s\n",
1010 b, bdevname(refdev->bdev, b2));
1011 goto abort;
1012 }
1013 ev1 = md_event(sb);
1014 ev2 = md_event(refsb);
1015 if (ev1 > ev2)
1016 ret = 1;
1017 else
1018 ret = 0;
1019 }
1020 rdev->sectors = rdev->sb_start;
1021
1022
1023
1024
1025 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1026 sb->level >= 1)
1027 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1028
1029 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1030
1031 ret = -EINVAL;
1032
1033 abort:
1034 return ret;
1035}
1036
1037
1038
1039
1040static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1041{
1042 mdp_disk_t *desc;
1043 mdp_super_t *sb = page_address(rdev->sb_page);
1044 __u64 ev1 = md_event(sb);
1045
1046 rdev->raid_disk = -1;
1047 clear_bit(Faulty, &rdev->flags);
1048 clear_bit(In_sync, &rdev->flags);
1049 clear_bit(Bitmap_sync, &rdev->flags);
1050 clear_bit(WriteMostly, &rdev->flags);
1051
1052 if (mddev->raid_disks == 0) {
1053 mddev->major_version = 0;
1054 mddev->minor_version = sb->minor_version;
1055 mddev->patch_version = sb->patch_version;
1056 mddev->external = 0;
1057 mddev->chunk_sectors = sb->chunk_size >> 9;
1058 mddev->ctime = sb->ctime;
1059 mddev->utime = sb->utime;
1060 mddev->level = sb->level;
1061 mddev->clevel[0] = 0;
1062 mddev->layout = sb->layout;
1063 mddev->raid_disks = sb->raid_disks;
1064 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1065 mddev->events = ev1;
1066 mddev->bitmap_info.offset = 0;
1067 mddev->bitmap_info.space = 0;
1068
1069 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1070 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1071 mddev->reshape_backwards = 0;
1072
1073 if (mddev->minor_version >= 91) {
1074 mddev->reshape_position = sb->reshape_position;
1075 mddev->delta_disks = sb->delta_disks;
1076 mddev->new_level = sb->new_level;
1077 mddev->new_layout = sb->new_layout;
1078 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1079 if (mddev->delta_disks < 0)
1080 mddev->reshape_backwards = 1;
1081 } else {
1082 mddev->reshape_position = MaxSector;
1083 mddev->delta_disks = 0;
1084 mddev->new_level = mddev->level;
1085 mddev->new_layout = mddev->layout;
1086 mddev->new_chunk_sectors = mddev->chunk_sectors;
1087 }
1088
1089 if (sb->state & (1<<MD_SB_CLEAN))
1090 mddev->recovery_cp = MaxSector;
1091 else {
1092 if (sb->events_hi == sb->cp_events_hi &&
1093 sb->events_lo == sb->cp_events_lo) {
1094 mddev->recovery_cp = sb->recovery_cp;
1095 } else
1096 mddev->recovery_cp = 0;
1097 }
1098
1099 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1100 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1101 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1102 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1103
1104 mddev->max_disks = MD_SB_DISKS;
1105
1106 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1107 mddev->bitmap_info.file == NULL) {
1108 mddev->bitmap_info.offset =
1109 mddev->bitmap_info.default_offset;
1110 mddev->bitmap_info.space =
1111 mddev->bitmap_info.default_space;
1112 }
1113
1114 } else if (mddev->pers == NULL) {
1115
1116
1117 ++ev1;
1118 if (sb->disks[rdev->desc_nr].state & (
1119 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1120 if (ev1 < mddev->events)
1121 return -EINVAL;
1122 } else if (mddev->bitmap) {
1123
1124
1125
1126 if (ev1 < mddev->bitmap->events_cleared)
1127 return 0;
1128 if (ev1 < mddev->events)
1129 set_bit(Bitmap_sync, &rdev->flags);
1130 } else {
1131 if (ev1 < mddev->events)
1132
1133 return 0;
1134 }
1135
1136 if (mddev->level != LEVEL_MULTIPATH) {
1137 desc = sb->disks + rdev->desc_nr;
1138
1139 if (desc->state & (1<<MD_DISK_FAULTY))
1140 set_bit(Faulty, &rdev->flags);
1141 else if (desc->state & (1<<MD_DISK_SYNC)
1142) {
1143 set_bit(In_sync, &rdev->flags);
1144 rdev->raid_disk = desc->raid_disk;
1145 rdev->saved_raid_disk = desc->raid_disk;
1146 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1147
1148
1149
1150 if (mddev->minor_version >= 91) {
1151 rdev->recovery_offset = 0;
1152 rdev->raid_disk = desc->raid_disk;
1153 }
1154 }
1155 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1156 set_bit(WriteMostly, &rdev->flags);
1157 } else
1158 set_bit(In_sync, &rdev->flags);
1159 return 0;
1160}
1161
1162
1163
1164
1165static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1166{
1167 mdp_super_t *sb;
1168 struct md_rdev *rdev2;
1169 int next_spare = mddev->raid_disks;
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181 int i;
1182 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1183
1184 rdev->sb_size = MD_SB_BYTES;
1185
1186 sb = page_address(rdev->sb_page);
1187
1188 memset(sb, 0, sizeof(*sb));
1189
1190 sb->md_magic = MD_SB_MAGIC;
1191 sb->major_version = mddev->major_version;
1192 sb->patch_version = mddev->patch_version;
1193 sb->gvalid_words = 0;
1194 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1195 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1196 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1197 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1198
1199 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1200 sb->level = mddev->level;
1201 sb->size = mddev->dev_sectors / 2;
1202 sb->raid_disks = mddev->raid_disks;
1203 sb->md_minor = mddev->md_minor;
1204 sb->not_persistent = 0;
1205 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1206 sb->state = 0;
1207 sb->events_hi = (mddev->events>>32);
1208 sb->events_lo = (u32)mddev->events;
1209
1210 if (mddev->reshape_position == MaxSector)
1211 sb->minor_version = 90;
1212 else {
1213 sb->minor_version = 91;
1214 sb->reshape_position = mddev->reshape_position;
1215 sb->new_level = mddev->new_level;
1216 sb->delta_disks = mddev->delta_disks;
1217 sb->new_layout = mddev->new_layout;
1218 sb->new_chunk = mddev->new_chunk_sectors << 9;
1219 }
1220 mddev->minor_version = sb->minor_version;
1221 if (mddev->in_sync)
1222 {
1223 sb->recovery_cp = mddev->recovery_cp;
1224 sb->cp_events_hi = (mddev->events>>32);
1225 sb->cp_events_lo = (u32)mddev->events;
1226 if (mddev->recovery_cp == MaxSector)
1227 sb->state = (1<< MD_SB_CLEAN);
1228 } else
1229 sb->recovery_cp = 0;
1230
1231 sb->layout = mddev->layout;
1232 sb->chunk_size = mddev->chunk_sectors << 9;
1233
1234 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1235 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1236
1237 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1238 rdev_for_each(rdev2, mddev) {
1239 mdp_disk_t *d;
1240 int desc_nr;
1241 int is_active = test_bit(In_sync, &rdev2->flags);
1242
1243 if (rdev2->raid_disk >= 0 &&
1244 sb->minor_version >= 91)
1245
1246
1247
1248
1249 is_active = 1;
1250 if (rdev2->raid_disk < 0 ||
1251 test_bit(Faulty, &rdev2->flags))
1252 is_active = 0;
1253 if (is_active)
1254 desc_nr = rdev2->raid_disk;
1255 else
1256 desc_nr = next_spare++;
1257 rdev2->desc_nr = desc_nr;
1258 d = &sb->disks[rdev2->desc_nr];
1259 nr_disks++;
1260 d->number = rdev2->desc_nr;
1261 d->major = MAJOR(rdev2->bdev->bd_dev);
1262 d->minor = MINOR(rdev2->bdev->bd_dev);
1263 if (is_active)
1264 d->raid_disk = rdev2->raid_disk;
1265 else
1266 d->raid_disk = rdev2->desc_nr;
1267 if (test_bit(Faulty, &rdev2->flags))
1268 d->state = (1<<MD_DISK_FAULTY);
1269 else if (is_active) {
1270 d->state = (1<<MD_DISK_ACTIVE);
1271 if (test_bit(In_sync, &rdev2->flags))
1272 d->state |= (1<<MD_DISK_SYNC);
1273 active++;
1274 working++;
1275 } else {
1276 d->state = 0;
1277 spare++;
1278 working++;
1279 }
1280 if (test_bit(WriteMostly, &rdev2->flags))
1281 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1282 }
1283
1284 for (i=0 ; i < mddev->raid_disks ; i++) {
1285 mdp_disk_t *d = &sb->disks[i];
1286 if (d->state == 0 && d->number == 0) {
1287 d->number = i;
1288 d->raid_disk = i;
1289 d->state = (1<<MD_DISK_REMOVED);
1290 d->state |= (1<<MD_DISK_FAULTY);
1291 failed++;
1292 }
1293 }
1294 sb->nr_disks = nr_disks;
1295 sb->active_disks = active;
1296 sb->working_disks = working;
1297 sb->failed_disks = failed;
1298 sb->spare_disks = spare;
1299
1300 sb->this_disk = sb->disks[rdev->desc_nr];
1301 sb->sb_csum = calc_sb_csum(sb);
1302}
1303
1304
1305
1306
1307static unsigned long long
1308super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1309{
1310 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1311 return 0;
1312 if (rdev->mddev->bitmap_info.offset)
1313 return 0;
1314 rdev->sb_start = calc_dev_sboffset(rdev);
1315 if (!num_sectors || num_sectors > rdev->sb_start)
1316 num_sectors = rdev->sb_start;
1317
1318
1319
1320 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1321 rdev->mddev->level >= 1)
1322 num_sectors = (sector_t)(2ULL << 32) - 2;
1323 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1324 rdev->sb_page);
1325 md_super_wait(rdev->mddev);
1326 return num_sectors;
1327}
1328
1329static int
1330super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1331{
1332
1333 return new_offset == 0;
1334}
1335
1336
1337
1338
1339
1340static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1341{
1342 __le32 disk_csum;
1343 u32 csum;
1344 unsigned long long newcsum;
1345 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1346 __le32 *isuper = (__le32*)sb;
1347
1348 disk_csum = sb->sb_csum;
1349 sb->sb_csum = 0;
1350 newcsum = 0;
1351 for (; size >= 4; size -= 4)
1352 newcsum += le32_to_cpu(*isuper++);
1353
1354 if (size == 2)
1355 newcsum += le16_to_cpu(*(__le16*) isuper);
1356
1357 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1358 sb->sb_csum = disk_csum;
1359 return cpu_to_le32(csum);
1360}
1361
1362static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1363{
1364 struct mdp_superblock_1 *sb;
1365 int ret;
1366 sector_t sb_start;
1367 sector_t sectors;
1368 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1369 int bmask;
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379 switch(minor_version) {
1380 case 0:
1381 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1382 sb_start -= 8*2;
1383 sb_start &= ~(sector_t)(4*2-1);
1384 break;
1385 case 1:
1386 sb_start = 0;
1387 break;
1388 case 2:
1389 sb_start = 8;
1390 break;
1391 default:
1392 return -EINVAL;
1393 }
1394 rdev->sb_start = sb_start;
1395
1396
1397
1398
1399 ret = read_disk_sb(rdev, 4096);
1400 if (ret) return ret;
1401
1402 sb = page_address(rdev->sb_page);
1403
1404 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1405 sb->major_version != cpu_to_le32(1) ||
1406 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1407 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1408 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1409 return -EINVAL;
1410
1411 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1412 printk("md: invalid superblock checksum on %s\n",
1413 bdevname(rdev->bdev,b));
1414 return -EINVAL;
1415 }
1416 if (le64_to_cpu(sb->data_size) < 10) {
1417 printk("md: data_size too small on %s\n",
1418 bdevname(rdev->bdev,b));
1419 return -EINVAL;
1420 }
1421 if (sb->pad0 ||
1422 sb->pad3[0] ||
1423 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1424
1425 return -EINVAL;
1426
1427 rdev->preferred_minor = 0xffff;
1428 rdev->data_offset = le64_to_cpu(sb->data_offset);
1429 rdev->new_data_offset = rdev->data_offset;
1430 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1431 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1432 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1433 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1434
1435 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1436 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1437 if (rdev->sb_size & bmask)
1438 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1439
1440 if (minor_version
1441 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1442 return -EINVAL;
1443 if (minor_version
1444 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1445 return -EINVAL;
1446
1447 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1448 rdev->desc_nr = -1;
1449 else
1450 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1451
1452 if (!rdev->bb_page) {
1453 rdev->bb_page = alloc_page(GFP_KERNEL);
1454 if (!rdev->bb_page)
1455 return -ENOMEM;
1456 }
1457 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1458 rdev->badblocks.count == 0) {
1459
1460
1461
1462 s32 offset;
1463 sector_t bb_sector;
1464 u64 *bbp;
1465 int i;
1466 int sectors = le16_to_cpu(sb->bblog_size);
1467 if (sectors > (PAGE_SIZE / 512))
1468 return -EINVAL;
1469 offset = le32_to_cpu(sb->bblog_offset);
1470 if (offset == 0)
1471 return -EINVAL;
1472 bb_sector = (long long)offset;
1473 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1474 rdev->bb_page, READ, true))
1475 return -EIO;
1476 bbp = (u64 *)page_address(rdev->bb_page);
1477 rdev->badblocks.shift = sb->bblog_shift;
1478 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1479 u64 bb = le64_to_cpu(*bbp);
1480 int count = bb & (0x3ff);
1481 u64 sector = bb >> 10;
1482 sector <<= sb->bblog_shift;
1483 count <<= sb->bblog_shift;
1484 if (bb + 1 == 0)
1485 break;
1486 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1487 return -EINVAL;
1488 }
1489 } else if (sb->bblog_offset != 0)
1490 rdev->badblocks.shift = 0;
1491
1492 if (!refdev) {
1493 ret = 1;
1494 } else {
1495 __u64 ev1, ev2;
1496 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1497
1498 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1499 sb->level != refsb->level ||
1500 sb->layout != refsb->layout ||
1501 sb->chunksize != refsb->chunksize) {
1502 printk(KERN_WARNING "md: %s has strangely different"
1503 " superblock to %s\n",
1504 bdevname(rdev->bdev,b),
1505 bdevname(refdev->bdev,b2));
1506 return -EINVAL;
1507 }
1508 ev1 = le64_to_cpu(sb->events);
1509 ev2 = le64_to_cpu(refsb->events);
1510
1511 if (ev1 > ev2)
1512 ret = 1;
1513 else
1514 ret = 0;
1515 }
1516 if (minor_version) {
1517 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1518 sectors -= rdev->data_offset;
1519 } else
1520 sectors = rdev->sb_start;
1521 if (sectors < le64_to_cpu(sb->data_size))
1522 return -EINVAL;
1523 rdev->sectors = le64_to_cpu(sb->data_size);
1524 return ret;
1525}
1526
1527static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1528{
1529 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1530 __u64 ev1 = le64_to_cpu(sb->events);
1531
1532 rdev->raid_disk = -1;
1533 clear_bit(Faulty, &rdev->flags);
1534 clear_bit(In_sync, &rdev->flags);
1535 clear_bit(Bitmap_sync, &rdev->flags);
1536 clear_bit(WriteMostly, &rdev->flags);
1537
1538 if (mddev->raid_disks == 0) {
1539 mddev->major_version = 1;
1540 mddev->patch_version = 0;
1541 mddev->external = 0;
1542 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1543 mddev->ctime = le64_to_cpu(sb->ctime);
1544 mddev->utime = le64_to_cpu(sb->utime);
1545 mddev->level = le32_to_cpu(sb->level);
1546 mddev->clevel[0] = 0;
1547 mddev->layout = le32_to_cpu(sb->layout);
1548 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1549 mddev->dev_sectors = le64_to_cpu(sb->size);
1550 mddev->events = ev1;
1551 mddev->bitmap_info.offset = 0;
1552 mddev->bitmap_info.space = 0;
1553
1554
1555
1556 mddev->bitmap_info.default_offset = 1024 >> 9;
1557 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1558 mddev->reshape_backwards = 0;
1559
1560 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1561 memcpy(mddev->uuid, sb->set_uuid, 16);
1562
1563 mddev->max_disks = (4096-256)/2;
1564
1565 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1566 mddev->bitmap_info.file == NULL) {
1567 mddev->bitmap_info.offset =
1568 (__s32)le32_to_cpu(sb->bitmap_offset);
1569
1570
1571
1572
1573
1574 if (mddev->minor_version > 0)
1575 mddev->bitmap_info.space = 0;
1576 else if (mddev->bitmap_info.offset > 0)
1577 mddev->bitmap_info.space =
1578 8 - mddev->bitmap_info.offset;
1579 else
1580 mddev->bitmap_info.space =
1581 -mddev->bitmap_info.offset;
1582 }
1583
1584 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1585 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1586 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1587 mddev->new_level = le32_to_cpu(sb->new_level);
1588 mddev->new_layout = le32_to_cpu(sb->new_layout);
1589 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1590 if (mddev->delta_disks < 0 ||
1591 (mddev->delta_disks == 0 &&
1592 (le32_to_cpu(sb->feature_map)
1593 & MD_FEATURE_RESHAPE_BACKWARDS)))
1594 mddev->reshape_backwards = 1;
1595 } else {
1596 mddev->reshape_position = MaxSector;
1597 mddev->delta_disks = 0;
1598 mddev->new_level = mddev->level;
1599 mddev->new_layout = mddev->layout;
1600 mddev->new_chunk_sectors = mddev->chunk_sectors;
1601 }
1602
1603 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
1604 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1605 if (mddev->recovery_cp == MaxSector)
1606 set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
1607 }
1608 } else if (mddev->pers == NULL) {
1609
1610
1611 ++ev1;
1612 if (rdev->desc_nr >= 0 &&
1613 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1614 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1615 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1616 if (ev1 < mddev->events)
1617 return -EINVAL;
1618 } else if (mddev->bitmap) {
1619
1620
1621
1622 if (ev1 < mddev->bitmap->events_cleared)
1623 return 0;
1624 if (ev1 < mddev->events)
1625 set_bit(Bitmap_sync, &rdev->flags);
1626 } else {
1627 if (ev1 < mddev->events)
1628
1629 return 0;
1630 }
1631 if (mddev->level != LEVEL_MULTIPATH) {
1632 int role;
1633 if (rdev->desc_nr < 0 ||
1634 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1635 role = MD_DISK_ROLE_SPARE;
1636 rdev->desc_nr = -1;
1637 } else
1638 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1639 switch(role) {
1640 case MD_DISK_ROLE_SPARE:
1641 break;
1642 case MD_DISK_ROLE_FAULTY:
1643 set_bit(Faulty, &rdev->flags);
1644 break;
1645 case MD_DISK_ROLE_JOURNAL:
1646 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1647
1648 printk(KERN_WARNING
1649 "md: journal device provided without journal feature, ignoring the device\n");
1650 return -EINVAL;
1651 }
1652 set_bit(Journal, &rdev->flags);
1653 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1654 rdev->raid_disk = 0;
1655 break;
1656 default:
1657 rdev->saved_raid_disk = role;
1658 if ((le32_to_cpu(sb->feature_map) &
1659 MD_FEATURE_RECOVERY_OFFSET)) {
1660 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1661 if (!(le32_to_cpu(sb->feature_map) &
1662 MD_FEATURE_RECOVERY_BITMAP))
1663 rdev->saved_raid_disk = -1;
1664 } else
1665 set_bit(In_sync, &rdev->flags);
1666 rdev->raid_disk = role;
1667 break;
1668 }
1669 if (sb->devflags & WriteMostly1)
1670 set_bit(WriteMostly, &rdev->flags);
1671 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1672 set_bit(Replacement, &rdev->flags);
1673 } else
1674 set_bit(In_sync, &rdev->flags);
1675
1676 return 0;
1677}
1678
1679static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1680{
1681 struct mdp_superblock_1 *sb;
1682 struct md_rdev *rdev2;
1683 int max_dev, i;
1684
1685
1686 sb = page_address(rdev->sb_page);
1687
1688 sb->feature_map = 0;
1689 sb->pad0 = 0;
1690 sb->recovery_offset = cpu_to_le64(0);
1691 memset(sb->pad3, 0, sizeof(sb->pad3));
1692
1693 sb->utime = cpu_to_le64((__u64)mddev->utime);
1694 sb->events = cpu_to_le64(mddev->events);
1695 if (mddev->in_sync)
1696 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1697 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1698 sb->resync_offset = cpu_to_le64(MaxSector);
1699 else
1700 sb->resync_offset = cpu_to_le64(0);
1701
1702 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1703
1704 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1705 sb->size = cpu_to_le64(mddev->dev_sectors);
1706 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1707 sb->level = cpu_to_le32(mddev->level);
1708 sb->layout = cpu_to_le32(mddev->layout);
1709
1710 if (test_bit(WriteMostly, &rdev->flags))
1711 sb->devflags |= WriteMostly1;
1712 else
1713 sb->devflags &= ~WriteMostly1;
1714 sb->data_offset = cpu_to_le64(rdev->data_offset);
1715 sb->data_size = cpu_to_le64(rdev->sectors);
1716
1717 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1718 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1719 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1720 }
1721
1722 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1723 !test_bit(In_sync, &rdev->flags)) {
1724 sb->feature_map |=
1725 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1726 sb->recovery_offset =
1727 cpu_to_le64(rdev->recovery_offset);
1728 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1729 sb->feature_map |=
1730 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1731 }
1732
1733 if (test_bit(Journal, &rdev->flags))
1734 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1735 if (test_bit(Replacement, &rdev->flags))
1736 sb->feature_map |=
1737 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1738
1739 if (mddev->reshape_position != MaxSector) {
1740 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1741 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1742 sb->new_layout = cpu_to_le32(mddev->new_layout);
1743 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1744 sb->new_level = cpu_to_le32(mddev->new_level);
1745 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1746 if (mddev->delta_disks == 0 &&
1747 mddev->reshape_backwards)
1748 sb->feature_map
1749 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1750 if (rdev->new_data_offset != rdev->data_offset) {
1751 sb->feature_map
1752 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1753 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1754 - rdev->data_offset));
1755 }
1756 }
1757
1758 if (mddev_is_clustered(mddev))
1759 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1760
1761 if (rdev->badblocks.count == 0)
1762 ;
1763 else if (sb->bblog_offset == 0)
1764
1765 md_error(mddev, rdev);
1766 else {
1767 struct badblocks *bb = &rdev->badblocks;
1768 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1769 u64 *p = bb->page;
1770 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1771 if (bb->changed) {
1772 unsigned seq;
1773
1774retry:
1775 seq = read_seqbegin(&bb->lock);
1776
1777 memset(bbp, 0xff, PAGE_SIZE);
1778
1779 for (i = 0 ; i < bb->count ; i++) {
1780 u64 internal_bb = p[i];
1781 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1782 | BB_LEN(internal_bb));
1783 bbp[i] = cpu_to_le64(store_bb);
1784 }
1785 bb->changed = 0;
1786 if (read_seqretry(&bb->lock, seq))
1787 goto retry;
1788
1789 bb->sector = (rdev->sb_start +
1790 (int)le32_to_cpu(sb->bblog_offset));
1791 bb->size = le16_to_cpu(sb->bblog_size);
1792 }
1793 }
1794
1795 max_dev = 0;
1796 rdev_for_each(rdev2, mddev)
1797 if (rdev2->desc_nr+1 > max_dev)
1798 max_dev = rdev2->desc_nr+1;
1799
1800 if (max_dev > le32_to_cpu(sb->max_dev)) {
1801 int bmask;
1802 sb->max_dev = cpu_to_le32(max_dev);
1803 rdev->sb_size = max_dev * 2 + 256;
1804 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1805 if (rdev->sb_size & bmask)
1806 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1807 } else
1808 max_dev = le32_to_cpu(sb->max_dev);
1809
1810 for (i=0; i<max_dev;i++)
1811 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1812
1813 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1814 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1815
1816 rdev_for_each(rdev2, mddev) {
1817 i = rdev2->desc_nr;
1818 if (test_bit(Faulty, &rdev2->flags))
1819 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1820 else if (test_bit(In_sync, &rdev2->flags))
1821 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1822 else if (test_bit(Journal, &rdev2->flags))
1823 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1824 else if (rdev2->raid_disk >= 0)
1825 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1826 else
1827 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1828 }
1829
1830 sb->sb_csum = calc_sb_1_csum(sb);
1831}
1832
1833static unsigned long long
1834super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1835{
1836 struct mdp_superblock_1 *sb;
1837 sector_t max_sectors;
1838 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1839 return 0;
1840 if (rdev->data_offset != rdev->new_data_offset)
1841 return 0;
1842 if (rdev->sb_start < rdev->data_offset) {
1843
1844 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1845 max_sectors -= rdev->data_offset;
1846 if (!num_sectors || num_sectors > max_sectors)
1847 num_sectors = max_sectors;
1848 } else if (rdev->mddev->bitmap_info.offset) {
1849
1850 return 0;
1851 } else {
1852
1853 sector_t sb_start;
1854 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1855 sb_start &= ~(sector_t)(4*2 - 1);
1856 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1857 if (!num_sectors || num_sectors > max_sectors)
1858 num_sectors = max_sectors;
1859 rdev->sb_start = sb_start;
1860 }
1861 sb = page_address(rdev->sb_page);
1862 sb->data_size = cpu_to_le64(num_sectors);
1863 sb->super_offset = rdev->sb_start;
1864 sb->sb_csum = calc_sb_1_csum(sb);
1865 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1866 rdev->sb_page);
1867 md_super_wait(rdev->mddev);
1868 return num_sectors;
1869
1870}
1871
1872static int
1873super_1_allow_new_offset(struct md_rdev *rdev,
1874 unsigned long long new_offset)
1875{
1876
1877 struct bitmap *bitmap;
1878 if (new_offset >= rdev->data_offset)
1879 return 1;
1880
1881
1882
1883 if (rdev->mddev->minor_version == 0)
1884 return 1;
1885
1886
1887
1888
1889
1890
1891
1892 if (rdev->sb_start + (32+4)*2 > new_offset)
1893 return 0;
1894 bitmap = rdev->mddev->bitmap;
1895 if (bitmap && !rdev->mddev->bitmap_info.file &&
1896 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1897 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1898 return 0;
1899 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1900 return 0;
1901
1902 return 1;
1903}
1904
1905static struct super_type super_types[] = {
1906 [0] = {
1907 .name = "0.90.0",
1908 .owner = THIS_MODULE,
1909 .load_super = super_90_load,
1910 .validate_super = super_90_validate,
1911 .sync_super = super_90_sync,
1912 .rdev_size_change = super_90_rdev_size_change,
1913 .allow_new_offset = super_90_allow_new_offset,
1914 },
1915 [1] = {
1916 .name = "md-1",
1917 .owner = THIS_MODULE,
1918 .load_super = super_1_load,
1919 .validate_super = super_1_validate,
1920 .sync_super = super_1_sync,
1921 .rdev_size_change = super_1_rdev_size_change,
1922 .allow_new_offset = super_1_allow_new_offset,
1923 },
1924};
1925
1926static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1927{
1928 if (mddev->sync_super) {
1929 mddev->sync_super(mddev, rdev);
1930 return;
1931 }
1932
1933 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1934
1935 super_types[mddev->major_version].sync_super(mddev, rdev);
1936}
1937
1938static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1939{
1940 struct md_rdev *rdev, *rdev2;
1941
1942 rcu_read_lock();
1943 rdev_for_each_rcu(rdev, mddev1) {
1944 if (test_bit(Faulty, &rdev->flags) ||
1945 test_bit(Journal, &rdev->flags) ||
1946 rdev->raid_disk == -1)
1947 continue;
1948 rdev_for_each_rcu(rdev2, mddev2) {
1949 if (test_bit(Faulty, &rdev2->flags) ||
1950 test_bit(Journal, &rdev2->flags) ||
1951 rdev2->raid_disk == -1)
1952 continue;
1953 if (rdev->bdev->bd_contains ==
1954 rdev2->bdev->bd_contains) {
1955 rcu_read_unlock();
1956 return 1;
1957 }
1958 }
1959 }
1960 rcu_read_unlock();
1961 return 0;
1962}
1963
1964static LIST_HEAD(pending_raid_disks);
1965
1966
1967
1968
1969
1970
1971
1972
1973int md_integrity_register(struct mddev *mddev)
1974{
1975 struct md_rdev *rdev, *reference = NULL;
1976
1977 if (list_empty(&mddev->disks))
1978 return 0;
1979 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1980 return 0;
1981 rdev_for_each(rdev, mddev) {
1982
1983 if (test_bit(Faulty, &rdev->flags))
1984 continue;
1985 if (rdev->raid_disk < 0)
1986 continue;
1987 if (!reference) {
1988
1989 reference = rdev;
1990 continue;
1991 }
1992
1993 if (blk_integrity_compare(reference->bdev->bd_disk,
1994 rdev->bdev->bd_disk) < 0)
1995 return -EINVAL;
1996 }
1997 if (!reference || !bdev_get_integrity(reference->bdev))
1998 return 0;
1999
2000
2001
2002
2003 blk_integrity_register(mddev->gendisk,
2004 bdev_get_integrity(reference->bdev));
2005
2006 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2007 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2008 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2009 mdname(mddev));
2010 return -EINVAL;
2011 }
2012 return 0;
2013}
2014EXPORT_SYMBOL(md_integrity_register);
2015
2016
2017
2018
2019
2020int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2021{
2022 struct blk_integrity *bi_rdev;
2023 struct blk_integrity *bi_mddev;
2024 char name[BDEVNAME_SIZE];
2025
2026 if (!mddev->gendisk)
2027 return 0;
2028
2029 bi_rdev = bdev_get_integrity(rdev->bdev);
2030 bi_mddev = blk_get_integrity(mddev->gendisk);
2031
2032 if (!bi_mddev)
2033 return 0;
2034
2035 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2036 printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n",
2037 mdname(mddev), bdevname(rdev->bdev, name));
2038 return -ENXIO;
2039 }
2040
2041 return 0;
2042}
2043EXPORT_SYMBOL(md_integrity_add_rdev);
2044
2045static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2046{
2047 char b[BDEVNAME_SIZE];
2048 struct kobject *ko;
2049 int err;
2050
2051
2052 if (find_rdev(mddev, rdev->bdev->bd_dev))
2053 return -EEXIST;
2054
2055
2056 if (!test_bit(Journal, &rdev->flags) &&
2057 rdev->sectors &&
2058 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2059 if (mddev->pers) {
2060
2061
2062
2063
2064 if (mddev->level > 0)
2065 return -ENOSPC;
2066 } else
2067 mddev->dev_sectors = rdev->sectors;
2068 }
2069
2070
2071
2072
2073
2074 rcu_read_lock();
2075 if (rdev->desc_nr < 0) {
2076 int choice = 0;
2077 if (mddev->pers)
2078 choice = mddev->raid_disks;
2079 while (md_find_rdev_nr_rcu(mddev, choice))
2080 choice++;
2081 rdev->desc_nr = choice;
2082 } else {
2083 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2084 rcu_read_unlock();
2085 return -EBUSY;
2086 }
2087 }
2088 rcu_read_unlock();
2089 if (!test_bit(Journal, &rdev->flags) &&
2090 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2091 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2092 mdname(mddev), mddev->max_disks);
2093 return -EBUSY;
2094 }
2095 bdevname(rdev->bdev,b);
2096 strreplace(b, '/', '!');
2097
2098 rdev->mddev = mddev;
2099 printk(KERN_INFO "md: bind<%s>\n", b);
2100
2101 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2102 goto fail;
2103
2104 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2105 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2106 ;
2107 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2108
2109 list_add_rcu(&rdev->same_set, &mddev->disks);
2110 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2111
2112
2113 mddev->recovery_disabled++;
2114
2115 return 0;
2116
2117 fail:
2118 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2119 b, mdname(mddev));
2120 return err;
2121}
2122
2123static void md_delayed_delete(struct work_struct *ws)
2124{
2125 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2126 kobject_del(&rdev->kobj);
2127 kobject_put(&rdev->kobj);
2128}
2129
2130static void unbind_rdev_from_array(struct md_rdev *rdev)
2131{
2132 char b[BDEVNAME_SIZE];
2133
2134 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2135 list_del_rcu(&rdev->same_set);
2136 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2137 rdev->mddev = NULL;
2138 sysfs_remove_link(&rdev->kobj, "block");
2139 sysfs_put(rdev->sysfs_state);
2140 rdev->sysfs_state = NULL;
2141 rdev->badblocks.count = 0;
2142
2143
2144
2145
2146 synchronize_rcu();
2147 INIT_WORK(&rdev->del_work, md_delayed_delete);
2148 kobject_get(&rdev->kobj);
2149 queue_work(md_misc_wq, &rdev->del_work);
2150}
2151
2152
2153
2154
2155
2156
2157static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2158{
2159 int err = 0;
2160 struct block_device *bdev;
2161 char b[BDEVNAME_SIZE];
2162
2163 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2164 shared ? (struct md_rdev *)lock_rdev : rdev);
2165 if (IS_ERR(bdev)) {
2166 printk(KERN_ERR "md: could not open %s.\n",
2167 __bdevname(dev, b));
2168 return PTR_ERR(bdev);
2169 }
2170 rdev->bdev = bdev;
2171 return err;
2172}
2173
2174static void unlock_rdev(struct md_rdev *rdev)
2175{
2176 struct block_device *bdev = rdev->bdev;
2177 rdev->bdev = NULL;
2178 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2179}
2180
2181void md_autodetect_dev(dev_t dev);
2182
2183static void export_rdev(struct md_rdev *rdev)
2184{
2185 char b[BDEVNAME_SIZE];
2186
2187 printk(KERN_INFO "md: export_rdev(%s)\n",
2188 bdevname(rdev->bdev,b));
2189 md_rdev_clear(rdev);
2190#ifndef MODULE
2191 if (test_bit(AutoDetected, &rdev->flags))
2192 md_autodetect_dev(rdev->bdev->bd_dev);
2193#endif
2194 unlock_rdev(rdev);
2195 kobject_put(&rdev->kobj);
2196}
2197
2198void md_kick_rdev_from_array(struct md_rdev *rdev)
2199{
2200 unbind_rdev_from_array(rdev);
2201 export_rdev(rdev);
2202}
2203EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2204
2205static void export_array(struct mddev *mddev)
2206{
2207 struct md_rdev *rdev;
2208
2209 while (!list_empty(&mddev->disks)) {
2210 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2211 same_set);
2212 md_kick_rdev_from_array(rdev);
2213 }
2214 mddev->raid_disks = 0;
2215 mddev->major_version = 0;
2216}
2217
2218static void sync_sbs(struct mddev *mddev, int nospares)
2219{
2220
2221
2222
2223
2224
2225
2226 struct md_rdev *rdev;
2227 rdev_for_each(rdev, mddev) {
2228 if (rdev->sb_events == mddev->events ||
2229 (nospares &&
2230 rdev->raid_disk < 0 &&
2231 rdev->sb_events+1 == mddev->events)) {
2232
2233 rdev->sb_loaded = 2;
2234 } else {
2235 sync_super(mddev, rdev);
2236 rdev->sb_loaded = 1;
2237 }
2238 }
2239}
2240
2241static bool does_sb_need_changing(struct mddev *mddev)
2242{
2243 struct md_rdev *rdev;
2244 struct mdp_superblock_1 *sb;
2245 int role;
2246
2247
2248 rdev_for_each(rdev, mddev)
2249 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2250 break;
2251
2252
2253 if (!rdev)
2254 return false;
2255
2256 sb = page_address(rdev->sb_page);
2257
2258 rdev_for_each(rdev, mddev) {
2259 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2260
2261 if (role == 0xffff && rdev->raid_disk >=0 &&
2262 !test_bit(Faulty, &rdev->flags))
2263 return true;
2264
2265 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2266 return true;
2267 }
2268
2269
2270 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2271 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2272 (mddev->layout != le64_to_cpu(sb->layout)) ||
2273 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2274 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2275 return true;
2276
2277 return false;
2278}
2279
2280void md_update_sb(struct mddev *mddev, int force_change)
2281{
2282 struct md_rdev *rdev;
2283 int sync_req;
2284 int nospares = 0;
2285 int any_badblocks_changed = 0;
2286 int ret = -1;
2287
2288 if (mddev->ro) {
2289 if (force_change)
2290 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2291 return;
2292 }
2293
2294 if (mddev_is_clustered(mddev)) {
2295 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2296 force_change = 1;
2297 ret = md_cluster_ops->metadata_update_start(mddev);
2298
2299 if (!does_sb_need_changing(mddev)) {
2300 if (ret == 0)
2301 md_cluster_ops->metadata_update_cancel(mddev);
2302 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2303 return;
2304 }
2305 }
2306repeat:
2307
2308 rdev_for_each(rdev, mddev) {
2309 if (rdev->raid_disk >= 0 &&
2310 mddev->delta_disks >= 0 &&
2311 !test_bit(Journal, &rdev->flags) &&
2312 !test_bit(In_sync, &rdev->flags) &&
2313 mddev->curr_resync_completed > rdev->recovery_offset)
2314 rdev->recovery_offset = mddev->curr_resync_completed;
2315
2316 }
2317 if (!mddev->persistent) {
2318 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2319 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2320 if (!mddev->external) {
2321 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2322 rdev_for_each(rdev, mddev) {
2323 if (rdev->badblocks.changed) {
2324 rdev->badblocks.changed = 0;
2325 ack_all_badblocks(&rdev->badblocks);
2326 md_error(mddev, rdev);
2327 }
2328 clear_bit(Blocked, &rdev->flags);
2329 clear_bit(BlockedBadBlocks, &rdev->flags);
2330 wake_up(&rdev->blocked_wait);
2331 }
2332 }
2333 wake_up(&mddev->sb_wait);
2334 return;
2335 }
2336
2337 spin_lock(&mddev->lock);
2338
2339 mddev->utime = ktime_get_real_seconds();
2340
2341 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2342 force_change = 1;
2343 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2344
2345
2346
2347
2348 nospares = 1;
2349 if (force_change)
2350 nospares = 0;
2351 if (mddev->degraded)
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361 nospares = 0;
2362
2363 sync_req = mddev->in_sync;
2364
2365
2366
2367 if (nospares
2368 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2369 && mddev->can_decrease_events
2370 && mddev->events != 1) {
2371 mddev->events--;
2372 mddev->can_decrease_events = 0;
2373 } else {
2374
2375 mddev->events ++;
2376 mddev->can_decrease_events = nospares;
2377 }
2378
2379
2380
2381
2382
2383
2384 WARN_ON(mddev->events == 0);
2385
2386 rdev_for_each(rdev, mddev) {
2387 if (rdev->badblocks.changed)
2388 any_badblocks_changed++;
2389 if (test_bit(Faulty, &rdev->flags))
2390 set_bit(FaultRecorded, &rdev->flags);
2391 }
2392
2393 sync_sbs(mddev, nospares);
2394 spin_unlock(&mddev->lock);
2395
2396 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2397 mdname(mddev), mddev->in_sync);
2398
2399 bitmap_update_sb(mddev->bitmap);
2400 rdev_for_each(rdev, mddev) {
2401 char b[BDEVNAME_SIZE];
2402
2403 if (rdev->sb_loaded != 1)
2404 continue;
2405
2406 if (!test_bit(Faulty, &rdev->flags)) {
2407 md_super_write(mddev,rdev,
2408 rdev->sb_start, rdev->sb_size,
2409 rdev->sb_page);
2410 pr_debug("md: (write) %s's sb offset: %llu\n",
2411 bdevname(rdev->bdev, b),
2412 (unsigned long long)rdev->sb_start);
2413 rdev->sb_events = mddev->events;
2414 if (rdev->badblocks.size) {
2415 md_super_write(mddev, rdev,
2416 rdev->badblocks.sector,
2417 rdev->badblocks.size << 9,
2418 rdev->bb_page);
2419 rdev->badblocks.size = 0;
2420 }
2421
2422 } else
2423 pr_debug("md: %s (skipping faulty)\n",
2424 bdevname(rdev->bdev, b));
2425
2426 if (mddev->level == LEVEL_MULTIPATH)
2427
2428 break;
2429 }
2430 md_super_wait(mddev);
2431
2432
2433 spin_lock(&mddev->lock);
2434 if (mddev->in_sync != sync_req ||
2435 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2436
2437 spin_unlock(&mddev->lock);
2438 goto repeat;
2439 }
2440 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2441 spin_unlock(&mddev->lock);
2442 wake_up(&mddev->sb_wait);
2443 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2444 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2445
2446 rdev_for_each(rdev, mddev) {
2447 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2448 clear_bit(Blocked, &rdev->flags);
2449
2450 if (any_badblocks_changed)
2451 ack_all_badblocks(&rdev->badblocks);
2452 clear_bit(BlockedBadBlocks, &rdev->flags);
2453 wake_up(&rdev->blocked_wait);
2454 }
2455
2456 if (mddev_is_clustered(mddev) && ret == 0)
2457 md_cluster_ops->metadata_update_finish(mddev);
2458}
2459EXPORT_SYMBOL(md_update_sb);
2460
2461static int add_bound_rdev(struct md_rdev *rdev)
2462{
2463 struct mddev *mddev = rdev->mddev;
2464 int err = 0;
2465 bool add_journal = test_bit(Journal, &rdev->flags);
2466
2467 if (!mddev->pers->hot_remove_disk || add_journal) {
2468
2469
2470
2471
2472 super_types[mddev->major_version].
2473 validate_super(mddev, rdev);
2474 if (add_journal)
2475 mddev_suspend(mddev);
2476 err = mddev->pers->hot_add_disk(mddev, rdev);
2477 if (add_journal)
2478 mddev_resume(mddev);
2479 if (err) {
2480 unbind_rdev_from_array(rdev);
2481 export_rdev(rdev);
2482 return err;
2483 }
2484 }
2485 sysfs_notify_dirent_safe(rdev->sysfs_state);
2486
2487 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2488 if (mddev->degraded)
2489 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2490 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2491 md_new_event(mddev);
2492 md_wakeup_thread(mddev->thread);
2493 return 0;
2494}
2495
2496
2497
2498
2499static int cmd_match(const char *cmd, const char *str)
2500{
2501
2502
2503
2504
2505 while (*cmd && *str && *cmd == *str) {
2506 cmd++;
2507 str++;
2508 }
2509 if (*cmd == '\n')
2510 cmd++;
2511 if (*str || *cmd)
2512 return 0;
2513 return 1;
2514}
2515
2516struct rdev_sysfs_entry {
2517 struct attribute attr;
2518 ssize_t (*show)(struct md_rdev *, char *);
2519 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2520};
2521
2522static ssize_t
2523state_show(struct md_rdev *rdev, char *page)
2524{
2525 char *sep = "";
2526 size_t len = 0;
2527 unsigned long flags = ACCESS_ONCE(rdev->flags);
2528
2529 if (test_bit(Faulty, &flags) ||
2530 rdev->badblocks.unacked_exist) {
2531 len+= sprintf(page+len, "%sfaulty",sep);
2532 sep = ",";
2533 }
2534 if (test_bit(In_sync, &flags)) {
2535 len += sprintf(page+len, "%sin_sync",sep);
2536 sep = ",";
2537 }
2538 if (test_bit(Journal, &flags)) {
2539 len += sprintf(page+len, "%sjournal",sep);
2540 sep = ",";
2541 }
2542 if (test_bit(WriteMostly, &flags)) {
2543 len += sprintf(page+len, "%swrite_mostly",sep);
2544 sep = ",";
2545 }
2546 if (test_bit(Blocked, &flags) ||
2547 (rdev->badblocks.unacked_exist
2548 && !test_bit(Faulty, &flags))) {
2549 len += sprintf(page+len, "%sblocked", sep);
2550 sep = ",";
2551 }
2552 if (!test_bit(Faulty, &flags) &&
2553 !test_bit(Journal, &flags) &&
2554 !test_bit(In_sync, &flags)) {
2555 len += sprintf(page+len, "%sspare", sep);
2556 sep = ",";
2557 }
2558 if (test_bit(WriteErrorSeen, &flags)) {
2559 len += sprintf(page+len, "%swrite_error", sep);
2560 sep = ",";
2561 }
2562 if (test_bit(WantReplacement, &flags)) {
2563 len += sprintf(page+len, "%swant_replacement", sep);
2564 sep = ",";
2565 }
2566 if (test_bit(Replacement, &flags)) {
2567 len += sprintf(page+len, "%sreplacement", sep);
2568 sep = ",";
2569 }
2570
2571 return len+sprintf(page+len, "\n");
2572}
2573
2574static ssize_t
2575state_store(struct md_rdev *rdev, const char *buf, size_t len)
2576{
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590 int err = -EINVAL;
2591 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2592 md_error(rdev->mddev, rdev);
2593 if (test_bit(Faulty, &rdev->flags))
2594 err = 0;
2595 else
2596 err = -EBUSY;
2597 } else if (cmd_match(buf, "remove")) {
2598 if (rdev->raid_disk >= 0)
2599 err = -EBUSY;
2600 else {
2601 struct mddev *mddev = rdev->mddev;
2602 err = 0;
2603 if (mddev_is_clustered(mddev))
2604 err = md_cluster_ops->remove_disk(mddev, rdev);
2605
2606 if (err == 0) {
2607 md_kick_rdev_from_array(rdev);
2608 if (mddev->pers)
2609 md_update_sb(mddev, 1);
2610 md_new_event(mddev);
2611 }
2612 }
2613 } else if (cmd_match(buf, "writemostly")) {
2614 set_bit(WriteMostly, &rdev->flags);
2615 err = 0;
2616 } else if (cmd_match(buf, "-writemostly")) {
2617 clear_bit(WriteMostly, &rdev->flags);
2618 err = 0;
2619 } else if (cmd_match(buf, "blocked")) {
2620 set_bit(Blocked, &rdev->flags);
2621 err = 0;
2622 } else if (cmd_match(buf, "-blocked")) {
2623 if (!test_bit(Faulty, &rdev->flags) &&
2624 rdev->badblocks.unacked_exist) {
2625
2626
2627
2628 md_error(rdev->mddev, rdev);
2629 }
2630 clear_bit(Blocked, &rdev->flags);
2631 clear_bit(BlockedBadBlocks, &rdev->flags);
2632 wake_up(&rdev->blocked_wait);
2633 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2634 md_wakeup_thread(rdev->mddev->thread);
2635
2636 err = 0;
2637 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2638 set_bit(In_sync, &rdev->flags);
2639 err = 0;
2640 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2641 !test_bit(Journal, &rdev->flags)) {
2642 if (rdev->mddev->pers == NULL) {
2643 clear_bit(In_sync, &rdev->flags);
2644 rdev->saved_raid_disk = rdev->raid_disk;
2645 rdev->raid_disk = -1;
2646 err = 0;
2647 }
2648 } else if (cmd_match(buf, "write_error")) {
2649 set_bit(WriteErrorSeen, &rdev->flags);
2650 err = 0;
2651 } else if (cmd_match(buf, "-write_error")) {
2652 clear_bit(WriteErrorSeen, &rdev->flags);
2653 err = 0;
2654 } else if (cmd_match(buf, "want_replacement")) {
2655
2656
2657
2658
2659 if (rdev->raid_disk >= 0 &&
2660 !test_bit(Journal, &rdev->flags) &&
2661 !test_bit(Replacement, &rdev->flags))
2662 set_bit(WantReplacement, &rdev->flags);
2663 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2664 md_wakeup_thread(rdev->mddev->thread);
2665 err = 0;
2666 } else if (cmd_match(buf, "-want_replacement")) {
2667
2668
2669
2670 err = 0;
2671 clear_bit(WantReplacement, &rdev->flags);
2672 } else if (cmd_match(buf, "replacement")) {
2673
2674
2675
2676
2677 if (rdev->mddev->pers)
2678 err = -EBUSY;
2679 else {
2680 set_bit(Replacement, &rdev->flags);
2681 err = 0;
2682 }
2683 } else if (cmd_match(buf, "-replacement")) {
2684
2685 if (rdev->mddev->pers)
2686 err = -EBUSY;
2687 else {
2688 clear_bit(Replacement, &rdev->flags);
2689 err = 0;
2690 }
2691 } else if (cmd_match(buf, "re-add")) {
2692 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2693
2694
2695
2696
2697
2698
2699 if (!mddev_is_clustered(rdev->mddev) ||
2700 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2701 clear_bit(Faulty, &rdev->flags);
2702 err = add_bound_rdev(rdev);
2703 }
2704 } else
2705 err = -EBUSY;
2706 }
2707 if (!err)
2708 sysfs_notify_dirent_safe(rdev->sysfs_state);
2709 return err ? err : len;
2710}
2711static struct rdev_sysfs_entry rdev_state =
2712__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2713
2714static ssize_t
2715errors_show(struct md_rdev *rdev, char *page)
2716{
2717 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2718}
2719
2720static ssize_t
2721errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2722{
2723 unsigned int n;
2724 int rv;
2725
2726 rv = kstrtouint(buf, 10, &n);
2727 if (rv < 0)
2728 return rv;
2729 atomic_set(&rdev->corrected_errors, n);
2730 return len;
2731}
2732static struct rdev_sysfs_entry rdev_errors =
2733__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2734
2735static ssize_t
2736slot_show(struct md_rdev *rdev, char *page)
2737{
2738 if (test_bit(Journal, &rdev->flags))
2739 return sprintf(page, "journal\n");
2740 else if (rdev->raid_disk < 0)
2741 return sprintf(page, "none\n");
2742 else
2743 return sprintf(page, "%d\n", rdev->raid_disk);
2744}
2745
2746static ssize_t
2747slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2748{
2749 int slot;
2750 int err;
2751
2752 if (test_bit(Journal, &rdev->flags))
2753 return -EBUSY;
2754 if (strncmp(buf, "none", 4)==0)
2755 slot = -1;
2756 else {
2757 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2758 if (err < 0)
2759 return err;
2760 }
2761 if (rdev->mddev->pers && slot == -1) {
2762
2763
2764
2765
2766
2767
2768
2769 if (rdev->raid_disk == -1)
2770 return -EEXIST;
2771
2772 if (rdev->mddev->pers->hot_remove_disk == NULL)
2773 return -EINVAL;
2774 clear_bit(Blocked, &rdev->flags);
2775 remove_and_add_spares(rdev->mddev, rdev);
2776 if (rdev->raid_disk >= 0)
2777 return -EBUSY;
2778 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2779 md_wakeup_thread(rdev->mddev->thread);
2780 } else if (rdev->mddev->pers) {
2781
2782
2783
2784 int err;
2785
2786 if (rdev->raid_disk != -1)
2787 return -EBUSY;
2788
2789 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2790 return -EBUSY;
2791
2792 if (rdev->mddev->pers->hot_add_disk == NULL)
2793 return -EINVAL;
2794
2795 if (slot >= rdev->mddev->raid_disks &&
2796 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2797 return -ENOSPC;
2798
2799 rdev->raid_disk = slot;
2800 if (test_bit(In_sync, &rdev->flags))
2801 rdev->saved_raid_disk = slot;
2802 else
2803 rdev->saved_raid_disk = -1;
2804 clear_bit(In_sync, &rdev->flags);
2805 clear_bit(Bitmap_sync, &rdev->flags);
2806 err = rdev->mddev->pers->
2807 hot_add_disk(rdev->mddev, rdev);
2808 if (err) {
2809 rdev->raid_disk = -1;
2810 return err;
2811 } else
2812 sysfs_notify_dirent_safe(rdev->sysfs_state);
2813 if (sysfs_link_rdev(rdev->mddev, rdev))
2814 ;
2815
2816 } else {
2817 if (slot >= rdev->mddev->raid_disks &&
2818 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2819 return -ENOSPC;
2820 rdev->raid_disk = slot;
2821
2822 clear_bit(Faulty, &rdev->flags);
2823 clear_bit(WriteMostly, &rdev->flags);
2824 set_bit(In_sync, &rdev->flags);
2825 sysfs_notify_dirent_safe(rdev->sysfs_state);
2826 }
2827 return len;
2828}
2829
2830static struct rdev_sysfs_entry rdev_slot =
2831__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2832
2833static ssize_t
2834offset_show(struct md_rdev *rdev, char *page)
2835{
2836 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2837}
2838
2839static ssize_t
2840offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2841{
2842 unsigned long long offset;
2843 if (kstrtoull(buf, 10, &offset) < 0)
2844 return -EINVAL;
2845 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2846 return -EBUSY;
2847 if (rdev->sectors && rdev->mddev->external)
2848
2849
2850 return -EBUSY;
2851 rdev->data_offset = offset;
2852 rdev->new_data_offset = offset;
2853 return len;
2854}
2855
2856static struct rdev_sysfs_entry rdev_offset =
2857__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2858
2859static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2860{
2861 return sprintf(page, "%llu\n",
2862 (unsigned long long)rdev->new_data_offset);
2863}
2864
2865static ssize_t new_offset_store(struct md_rdev *rdev,
2866 const char *buf, size_t len)
2867{
2868 unsigned long long new_offset;
2869 struct mddev *mddev = rdev->mddev;
2870
2871 if (kstrtoull(buf, 10, &new_offset) < 0)
2872 return -EINVAL;
2873
2874 if (mddev->sync_thread ||
2875 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2876 return -EBUSY;
2877 if (new_offset == rdev->data_offset)
2878
2879 ;
2880 else if (new_offset > rdev->data_offset) {
2881
2882 if (new_offset - rdev->data_offset
2883 + mddev->dev_sectors > rdev->sectors)
2884 return -E2BIG;
2885 }
2886
2887
2888
2889
2890
2891 if (new_offset < rdev->data_offset &&
2892 mddev->reshape_backwards)
2893 return -EINVAL;
2894
2895
2896
2897
2898 if (new_offset > rdev->data_offset &&
2899 !mddev->reshape_backwards)
2900 return -EINVAL;
2901
2902 if (mddev->pers && mddev->persistent &&
2903 !super_types[mddev->major_version]
2904 .allow_new_offset(rdev, new_offset))
2905 return -E2BIG;
2906 rdev->new_data_offset = new_offset;
2907 if (new_offset > rdev->data_offset)
2908 mddev->reshape_backwards = 1;
2909 else if (new_offset < rdev->data_offset)
2910 mddev->reshape_backwards = 0;
2911
2912 return len;
2913}
2914static struct rdev_sysfs_entry rdev_new_offset =
2915__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2916
2917static ssize_t
2918rdev_size_show(struct md_rdev *rdev, char *page)
2919{
2920 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2921}
2922
2923static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2924{
2925
2926 if (s1+l1 <= s2)
2927 return 0;
2928 if (s2+l2 <= s1)
2929 return 0;
2930 return 1;
2931}
2932
2933static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2934{
2935 unsigned long long blocks;
2936 sector_t new;
2937
2938 if (kstrtoull(buf, 10, &blocks) < 0)
2939 return -EINVAL;
2940
2941 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2942 return -EINVAL;
2943
2944 new = blocks * 2;
2945 if (new != blocks * 2)
2946 return -EINVAL;
2947
2948 *sectors = new;
2949 return 0;
2950}
2951
2952static ssize_t
2953rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2954{
2955 struct mddev *my_mddev = rdev->mddev;
2956 sector_t oldsectors = rdev->sectors;
2957 sector_t sectors;
2958
2959 if (test_bit(Journal, &rdev->flags))
2960 return -EBUSY;
2961 if (strict_blocks_to_sectors(buf, §ors) < 0)
2962 return -EINVAL;
2963 if (rdev->data_offset != rdev->new_data_offset)
2964 return -EINVAL;
2965 if (my_mddev->pers && rdev->raid_disk >= 0) {
2966 if (my_mddev->persistent) {
2967 sectors = super_types[my_mddev->major_version].
2968 rdev_size_change(rdev, sectors);
2969 if (!sectors)
2970 return -EBUSY;
2971 } else if (!sectors)
2972 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2973 rdev->data_offset;
2974 if (!my_mddev->pers->resize)
2975
2976 return -EINVAL;
2977 }
2978 if (sectors < my_mddev->dev_sectors)
2979 return -EINVAL;
2980
2981 rdev->sectors = sectors;
2982 if (sectors > oldsectors && my_mddev->external) {
2983
2984
2985
2986
2987
2988
2989 struct mddev *mddev;
2990 int overlap = 0;
2991 struct list_head *tmp;
2992
2993 rcu_read_lock();
2994 for_each_mddev(mddev, tmp) {
2995 struct md_rdev *rdev2;
2996
2997 rdev_for_each(rdev2, mddev)
2998 if (rdev->bdev == rdev2->bdev &&
2999 rdev != rdev2 &&
3000 overlaps(rdev->data_offset, rdev->sectors,
3001 rdev2->data_offset,
3002 rdev2->sectors)) {
3003 overlap = 1;
3004 break;
3005 }
3006 if (overlap) {
3007 mddev_put(mddev);
3008 break;
3009 }
3010 }
3011 rcu_read_unlock();
3012 if (overlap) {
3013
3014
3015
3016
3017
3018
3019 rdev->sectors = oldsectors;
3020 return -EBUSY;
3021 }
3022 }
3023 return len;
3024}
3025
3026static struct rdev_sysfs_entry rdev_size =
3027__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3028
3029static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3030{
3031 unsigned long long recovery_start = rdev->recovery_offset;
3032
3033 if (test_bit(In_sync, &rdev->flags) ||
3034 recovery_start == MaxSector)
3035 return sprintf(page, "none\n");
3036
3037 return sprintf(page, "%llu\n", recovery_start);
3038}
3039
3040static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3041{
3042 unsigned long long recovery_start;
3043
3044 if (cmd_match(buf, "none"))
3045 recovery_start = MaxSector;
3046 else if (kstrtoull(buf, 10, &recovery_start))
3047 return -EINVAL;
3048
3049 if (rdev->mddev->pers &&
3050 rdev->raid_disk >= 0)
3051 return -EBUSY;
3052
3053 rdev->recovery_offset = recovery_start;
3054 if (recovery_start == MaxSector)
3055 set_bit(In_sync, &rdev->flags);
3056 else
3057 clear_bit(In_sync, &rdev->flags);
3058 return len;
3059}
3060
3061static struct rdev_sysfs_entry rdev_recovery_start =
3062__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075static ssize_t bb_show(struct md_rdev *rdev, char *page)
3076{
3077 return badblocks_show(&rdev->badblocks, page, 0);
3078}
3079static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3080{
3081 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3082
3083 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3084 wake_up(&rdev->blocked_wait);
3085 return rv;
3086}
3087static struct rdev_sysfs_entry rdev_bad_blocks =
3088__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3089
3090static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3091{
3092 return badblocks_show(&rdev->badblocks, page, 1);
3093}
3094static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3095{
3096 return badblocks_store(&rdev->badblocks, page, len, 1);
3097}
3098static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3099__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3100
3101static struct attribute *rdev_default_attrs[] = {
3102 &rdev_state.attr,
3103 &rdev_errors.attr,
3104 &rdev_slot.attr,
3105 &rdev_offset.attr,
3106 &rdev_new_offset.attr,
3107 &rdev_size.attr,
3108 &rdev_recovery_start.attr,
3109 &rdev_bad_blocks.attr,
3110 &rdev_unack_bad_blocks.attr,
3111 NULL,
3112};
3113static ssize_t
3114rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3115{
3116 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3117 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3118
3119 if (!entry->show)
3120 return -EIO;
3121 if (!rdev->mddev)
3122 return -EBUSY;
3123 return entry->show(rdev, page);
3124}
3125
3126static ssize_t
3127rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3128 const char *page, size_t length)
3129{
3130 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3131 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3132 ssize_t rv;
3133 struct mddev *mddev = rdev->mddev;
3134
3135 if (!entry->store)
3136 return -EIO;
3137 if (!capable(CAP_SYS_ADMIN))
3138 return -EACCES;
3139 rv = mddev ? mddev_lock(mddev): -EBUSY;
3140 if (!rv) {
3141 if (rdev->mddev == NULL)
3142 rv = -EBUSY;
3143 else
3144 rv = entry->store(rdev, page, length);
3145 mddev_unlock(mddev);
3146 }
3147 return rv;
3148}
3149
3150static void rdev_free(struct kobject *ko)
3151{
3152 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3153 kfree(rdev);
3154}
3155static const struct sysfs_ops rdev_sysfs_ops = {
3156 .show = rdev_attr_show,
3157 .store = rdev_attr_store,
3158};
3159static struct kobj_type rdev_ktype = {
3160 .release = rdev_free,
3161 .sysfs_ops = &rdev_sysfs_ops,
3162 .default_attrs = rdev_default_attrs,
3163};
3164
3165int md_rdev_init(struct md_rdev *rdev)
3166{
3167 rdev->desc_nr = -1;
3168 rdev->saved_raid_disk = -1;
3169 rdev->raid_disk = -1;
3170 rdev->flags = 0;
3171 rdev->data_offset = 0;
3172 rdev->new_data_offset = 0;
3173 rdev->sb_events = 0;
3174 rdev->last_read_error.tv_sec = 0;
3175 rdev->last_read_error.tv_nsec = 0;
3176 rdev->sb_loaded = 0;
3177 rdev->bb_page = NULL;
3178 atomic_set(&rdev->nr_pending, 0);
3179 atomic_set(&rdev->read_errors, 0);
3180 atomic_set(&rdev->corrected_errors, 0);
3181
3182 INIT_LIST_HEAD(&rdev->same_set);
3183 init_waitqueue_head(&rdev->blocked_wait);
3184
3185
3186
3187
3188
3189 return badblocks_init(&rdev->badblocks, 0);
3190}
3191EXPORT_SYMBOL_GPL(md_rdev_init);
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3203{
3204 char b[BDEVNAME_SIZE];
3205 int err;
3206 struct md_rdev *rdev;
3207 sector_t size;
3208
3209 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3210 if (!rdev) {
3211 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3212 return ERR_PTR(-ENOMEM);
3213 }
3214
3215 err = md_rdev_init(rdev);
3216 if (err)
3217 goto abort_free;
3218 err = alloc_disk_sb(rdev);
3219 if (err)
3220 goto abort_free;
3221
3222 err = lock_rdev(rdev, newdev, super_format == -2);
3223 if (err)
3224 goto abort_free;
3225
3226 kobject_init(&rdev->kobj, &rdev_ktype);
3227
3228 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3229 if (!size) {
3230 printk(KERN_WARNING
3231 "md: %s has zero or unknown size, marking faulty!\n",
3232 bdevname(rdev->bdev,b));
3233 err = -EINVAL;
3234 goto abort_free;
3235 }
3236
3237 if (super_format >= 0) {
3238 err = super_types[super_format].
3239 load_super(rdev, NULL, super_minor);
3240 if (err == -EINVAL) {
3241 printk(KERN_WARNING
3242 "md: %s does not have a valid v%d.%d "
3243 "superblock, not importing!\n",
3244 bdevname(rdev->bdev,b),
3245 super_format, super_minor);
3246 goto abort_free;
3247 }
3248 if (err < 0) {
3249 printk(KERN_WARNING
3250 "md: could not read %s's sb, not importing!\n",
3251 bdevname(rdev->bdev,b));
3252 goto abort_free;
3253 }
3254 }
3255
3256 return rdev;
3257
3258abort_free:
3259 if (rdev->bdev)
3260 unlock_rdev(rdev);
3261 md_rdev_clear(rdev);
3262 kfree(rdev);
3263 return ERR_PTR(err);
3264}
3265
3266
3267
3268
3269
3270static void analyze_sbs(struct mddev *mddev)
3271{
3272 int i;
3273 struct md_rdev *rdev, *freshest, *tmp;
3274 char b[BDEVNAME_SIZE];
3275
3276 freshest = NULL;
3277 rdev_for_each_safe(rdev, tmp, mddev)
3278 switch (super_types[mddev->major_version].
3279 load_super(rdev, freshest, mddev->minor_version)) {
3280 case 1:
3281 freshest = rdev;
3282 break;
3283 case 0:
3284 break;
3285 default:
3286 printk( KERN_ERR \
3287 "md: fatal superblock inconsistency in %s"
3288 " -- removing from array\n",
3289 bdevname(rdev->bdev,b));
3290 md_kick_rdev_from_array(rdev);
3291 }
3292
3293 super_types[mddev->major_version].
3294 validate_super(mddev, freshest);
3295
3296 i = 0;
3297 rdev_for_each_safe(rdev, tmp, mddev) {
3298 if (mddev->max_disks &&
3299 (rdev->desc_nr >= mddev->max_disks ||
3300 i > mddev->max_disks)) {
3301 printk(KERN_WARNING
3302 "md: %s: %s: only %d devices permitted\n",
3303 mdname(mddev), bdevname(rdev->bdev, b),
3304 mddev->max_disks);
3305 md_kick_rdev_from_array(rdev);
3306 continue;
3307 }
3308 if (rdev != freshest) {
3309 if (super_types[mddev->major_version].
3310 validate_super(mddev, rdev)) {
3311 printk(KERN_WARNING "md: kicking non-fresh %s"
3312 " from array!\n",
3313 bdevname(rdev->bdev,b));
3314 md_kick_rdev_from_array(rdev);
3315 continue;
3316 }
3317 }
3318 if (mddev->level == LEVEL_MULTIPATH) {
3319 rdev->desc_nr = i++;
3320 rdev->raid_disk = rdev->desc_nr;
3321 set_bit(In_sync, &rdev->flags);
3322 } else if (rdev->raid_disk >=
3323 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3324 !test_bit(Journal, &rdev->flags)) {
3325 rdev->raid_disk = -1;
3326 clear_bit(In_sync, &rdev->flags);
3327 }
3328 }
3329}
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3342{
3343 unsigned long result = 0;
3344 long decimals = -1;
3345 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3346 if (*cp == '.')
3347 decimals = 0;
3348 else if (decimals < scale) {
3349 unsigned int value;
3350 value = *cp - '0';
3351 result = result * 10 + value;
3352 if (decimals >= 0)
3353 decimals++;
3354 }
3355 cp++;
3356 }
3357 if (*cp == '\n')
3358 cp++;
3359 if (*cp)
3360 return -EINVAL;
3361 if (decimals < 0)
3362 decimals = 0;
3363 while (decimals < scale) {
3364 result *= 10;
3365 decimals ++;
3366 }
3367 *res = result;
3368 return 0;
3369}
3370
3371static ssize_t
3372safe_delay_show(struct mddev *mddev, char *page)
3373{
3374 int msec = (mddev->safemode_delay*1000)/HZ;
3375 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3376}
3377static ssize_t
3378safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3379{
3380 unsigned long msec;
3381
3382 if (mddev_is_clustered(mddev)) {
3383 pr_info("md: Safemode is disabled for clustered mode\n");
3384 return -EINVAL;
3385 }
3386
3387 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3388 return -EINVAL;
3389 if (msec == 0)
3390 mddev->safemode_delay = 0;
3391 else {
3392 unsigned long old_delay = mddev->safemode_delay;
3393 unsigned long new_delay = (msec*HZ)/1000;
3394
3395 if (new_delay == 0)
3396 new_delay = 1;
3397 mddev->safemode_delay = new_delay;
3398 if (new_delay < old_delay || old_delay == 0)
3399 mod_timer(&mddev->safemode_timer, jiffies+1);
3400 }
3401 return len;
3402}
3403static struct md_sysfs_entry md_safe_delay =
3404__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3405
3406static ssize_t
3407level_show(struct mddev *mddev, char *page)
3408{
3409 struct md_personality *p;
3410 int ret;
3411 spin_lock(&mddev->lock);
3412 p = mddev->pers;
3413 if (p)
3414 ret = sprintf(page, "%s\n", p->name);
3415 else if (mddev->clevel[0])
3416 ret = sprintf(page, "%s\n", mddev->clevel);
3417 else if (mddev->level != LEVEL_NONE)
3418 ret = sprintf(page, "%d\n", mddev->level);
3419 else
3420 ret = 0;
3421 spin_unlock(&mddev->lock);
3422 return ret;
3423}
3424
3425static ssize_t
3426level_store(struct mddev *mddev, const char *buf, size_t len)
3427{
3428 char clevel[16];
3429 ssize_t rv;
3430 size_t slen = len;
3431 struct md_personality *pers, *oldpers;
3432 long level;
3433 void *priv, *oldpriv;
3434 struct md_rdev *rdev;
3435
3436 if (slen == 0 || slen >= sizeof(clevel))
3437 return -EINVAL;
3438
3439 rv = mddev_lock(mddev);
3440 if (rv)
3441 return rv;
3442
3443 if (mddev->pers == NULL) {
3444 strncpy(mddev->clevel, buf, slen);
3445 if (mddev->clevel[slen-1] == '\n')
3446 slen--;
3447 mddev->clevel[slen] = 0;
3448 mddev->level = LEVEL_NONE;
3449 rv = len;
3450 goto out_unlock;
3451 }
3452 rv = -EROFS;
3453 if (mddev->ro)
3454 goto out_unlock;
3455
3456
3457
3458
3459
3460
3461
3462 rv = -EBUSY;
3463 if (mddev->sync_thread ||
3464 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3465 mddev->reshape_position != MaxSector ||
3466 mddev->sysfs_active)
3467 goto out_unlock;
3468
3469 rv = -EINVAL;
3470 if (!mddev->pers->quiesce) {
3471 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3472 mdname(mddev), mddev->pers->name);
3473 goto out_unlock;
3474 }
3475
3476
3477 strncpy(clevel, buf, slen);
3478 if (clevel[slen-1] == '\n')
3479 slen--;
3480 clevel[slen] = 0;
3481 if (kstrtol(clevel, 10, &level))
3482 level = LEVEL_NONE;
3483
3484 if (request_module("md-%s", clevel) != 0)
3485 request_module("md-level-%s", clevel);
3486 spin_lock(&pers_lock);
3487 pers = find_pers(level, clevel);
3488 if (!pers || !try_module_get(pers->owner)) {
3489 spin_unlock(&pers_lock);
3490 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3491 rv = -EINVAL;
3492 goto out_unlock;
3493 }
3494 spin_unlock(&pers_lock);
3495
3496 if (pers == mddev->pers) {
3497
3498 module_put(pers->owner);
3499 rv = len;
3500 goto out_unlock;
3501 }
3502 if (!pers->takeover) {
3503 module_put(pers->owner);
3504 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3505 mdname(mddev), clevel);
3506 rv = -EINVAL;
3507 goto out_unlock;
3508 }
3509
3510 rdev_for_each(rdev, mddev)
3511 rdev->new_raid_disk = rdev->raid_disk;
3512
3513
3514
3515
3516 priv = pers->takeover(mddev);
3517 if (IS_ERR(priv)) {
3518 mddev->new_level = mddev->level;
3519 mddev->new_layout = mddev->layout;
3520 mddev->new_chunk_sectors = mddev->chunk_sectors;
3521 mddev->raid_disks -= mddev->delta_disks;
3522 mddev->delta_disks = 0;
3523 mddev->reshape_backwards = 0;
3524 module_put(pers->owner);
3525 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3526 mdname(mddev), clevel);
3527 rv = PTR_ERR(priv);
3528 goto out_unlock;
3529 }
3530
3531
3532 mddev_suspend(mddev);
3533 mddev_detach(mddev);
3534
3535 spin_lock(&mddev->lock);
3536 oldpers = mddev->pers;
3537 oldpriv = mddev->private;
3538 mddev->pers = pers;
3539 mddev->private = priv;
3540 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3541 mddev->level = mddev->new_level;
3542 mddev->layout = mddev->new_layout;
3543 mddev->chunk_sectors = mddev->new_chunk_sectors;
3544 mddev->delta_disks = 0;
3545 mddev->reshape_backwards = 0;
3546 mddev->degraded = 0;
3547 spin_unlock(&mddev->lock);
3548
3549 if (oldpers->sync_request == NULL &&
3550 mddev->external) {
3551
3552
3553
3554
3555
3556
3557
3558 mddev->in_sync = 0;
3559 mddev->safemode_delay = 0;
3560 mddev->safemode = 0;
3561 }
3562
3563 oldpers->free(mddev, oldpriv);
3564
3565 if (oldpers->sync_request == NULL &&
3566 pers->sync_request != NULL) {
3567
3568 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3569 printk(KERN_WARNING
3570 "md: cannot register extra attributes for %s\n",
3571 mdname(mddev));
3572 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3573 }
3574 if (oldpers->sync_request != NULL &&
3575 pers->sync_request == NULL) {
3576
3577 if (mddev->to_remove == NULL)
3578 mddev->to_remove = &md_redundancy_group;
3579 }
3580
3581 rdev_for_each(rdev, mddev) {
3582 if (rdev->raid_disk < 0)
3583 continue;
3584 if (rdev->new_raid_disk >= mddev->raid_disks)
3585 rdev->new_raid_disk = -1;
3586 if (rdev->new_raid_disk == rdev->raid_disk)
3587 continue;
3588 sysfs_unlink_rdev(mddev, rdev);
3589 }
3590 rdev_for_each(rdev, mddev) {
3591 if (rdev->raid_disk < 0)
3592 continue;
3593 if (rdev->new_raid_disk == rdev->raid_disk)
3594 continue;
3595 rdev->raid_disk = rdev->new_raid_disk;
3596 if (rdev->raid_disk < 0)
3597 clear_bit(In_sync, &rdev->flags);
3598 else {
3599 if (sysfs_link_rdev(mddev, rdev))
3600 printk(KERN_WARNING "md: cannot register rd%d"
3601 " for %s after level change\n",
3602 rdev->raid_disk, mdname(mddev));
3603 }
3604 }
3605
3606 if (pers->sync_request == NULL) {
3607
3608
3609
3610 mddev->in_sync = 1;
3611 del_timer_sync(&mddev->safemode_timer);
3612 }
3613 blk_set_stacking_limits(&mddev->queue->limits);
3614 pers->run(mddev);
3615 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3616 mddev_resume(mddev);
3617 if (!mddev->thread)
3618 md_update_sb(mddev, 1);
3619 sysfs_notify(&mddev->kobj, NULL, "level");
3620 md_new_event(mddev);
3621 rv = len;
3622out_unlock:
3623 mddev_unlock(mddev);
3624 return rv;
3625}
3626
3627static struct md_sysfs_entry md_level =
3628__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3629
3630static ssize_t
3631layout_show(struct mddev *mddev, char *page)
3632{
3633
3634 if (mddev->reshape_position != MaxSector &&
3635 mddev->layout != mddev->new_layout)
3636 return sprintf(page, "%d (%d)\n",
3637 mddev->new_layout, mddev->layout);
3638 return sprintf(page, "%d\n", mddev->layout);
3639}
3640
3641static ssize_t
3642layout_store(struct mddev *mddev, const char *buf, size_t len)
3643{
3644 unsigned int n;
3645 int err;
3646
3647 err = kstrtouint(buf, 10, &n);
3648 if (err < 0)
3649 return err;
3650 err = mddev_lock(mddev);
3651 if (err)
3652 return err;
3653
3654 if (mddev->pers) {
3655 if (mddev->pers->check_reshape == NULL)
3656 err = -EBUSY;
3657 else if (mddev->ro)
3658 err = -EROFS;
3659 else {
3660 mddev->new_layout = n;
3661 err = mddev->pers->check_reshape(mddev);
3662 if (err)
3663 mddev->new_layout = mddev->layout;
3664 }
3665 } else {
3666 mddev->new_layout = n;
3667 if (mddev->reshape_position == MaxSector)
3668 mddev->layout = n;
3669 }
3670 mddev_unlock(mddev);
3671 return err ?: len;
3672}
3673static struct md_sysfs_entry md_layout =
3674__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3675
3676static ssize_t
3677raid_disks_show(struct mddev *mddev, char *page)
3678{
3679 if (mddev->raid_disks == 0)
3680 return 0;
3681 if (mddev->reshape_position != MaxSector &&
3682 mddev->delta_disks != 0)
3683 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3684 mddev->raid_disks - mddev->delta_disks);
3685 return sprintf(page, "%d\n", mddev->raid_disks);
3686}
3687
3688static int update_raid_disks(struct mddev *mddev, int raid_disks);
3689
3690static ssize_t
3691raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3692{
3693 unsigned int n;
3694 int err;
3695
3696 err = kstrtouint(buf, 10, &n);
3697 if (err < 0)
3698 return err;
3699
3700 err = mddev_lock(mddev);
3701 if (err)
3702 return err;
3703 if (mddev->pers)
3704 err = update_raid_disks(mddev, n);
3705 else if (mddev->reshape_position != MaxSector) {
3706 struct md_rdev *rdev;
3707 int olddisks = mddev->raid_disks - mddev->delta_disks;
3708
3709 err = -EINVAL;
3710 rdev_for_each(rdev, mddev) {
3711 if (olddisks < n &&
3712 rdev->data_offset < rdev->new_data_offset)
3713 goto out_unlock;
3714 if (olddisks > n &&
3715 rdev->data_offset > rdev->new_data_offset)
3716 goto out_unlock;
3717 }
3718 err = 0;
3719 mddev->delta_disks = n - olddisks;
3720 mddev->raid_disks = n;
3721 mddev->reshape_backwards = (mddev->delta_disks < 0);
3722 } else
3723 mddev->raid_disks = n;
3724out_unlock:
3725 mddev_unlock(mddev);
3726 return err ? err : len;
3727}
3728static struct md_sysfs_entry md_raid_disks =
3729__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3730
3731static ssize_t
3732chunk_size_show(struct mddev *mddev, char *page)
3733{
3734 if (mddev->reshape_position != MaxSector &&
3735 mddev->chunk_sectors != mddev->new_chunk_sectors)
3736 return sprintf(page, "%d (%d)\n",
3737 mddev->new_chunk_sectors << 9,
3738 mddev->chunk_sectors << 9);
3739 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3740}
3741
3742static ssize_t
3743chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3744{
3745 unsigned long n;
3746 int err;
3747
3748 err = kstrtoul(buf, 10, &n);
3749 if (err < 0)
3750 return err;
3751
3752 err = mddev_lock(mddev);
3753 if (err)
3754 return err;
3755 if (mddev->pers) {
3756 if (mddev->pers->check_reshape == NULL)
3757 err = -EBUSY;
3758 else if (mddev->ro)
3759 err = -EROFS;
3760 else {
3761 mddev->new_chunk_sectors = n >> 9;
3762 err = mddev->pers->check_reshape(mddev);
3763 if (err)
3764 mddev->new_chunk_sectors = mddev->chunk_sectors;
3765 }
3766 } else {
3767 mddev->new_chunk_sectors = n >> 9;
3768 if (mddev->reshape_position == MaxSector)
3769 mddev->chunk_sectors = n >> 9;
3770 }
3771 mddev_unlock(mddev);
3772 return err ?: len;
3773}
3774static struct md_sysfs_entry md_chunk_size =
3775__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3776
3777static ssize_t
3778resync_start_show(struct mddev *mddev, char *page)
3779{
3780 if (mddev->recovery_cp == MaxSector)
3781 return sprintf(page, "none\n");
3782 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3783}
3784
3785static ssize_t
3786resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3787{
3788 unsigned long long n;
3789 int err;
3790
3791 if (cmd_match(buf, "none"))
3792 n = MaxSector;
3793 else {
3794 err = kstrtoull(buf, 10, &n);
3795 if (err < 0)
3796 return err;
3797 if (n != (sector_t)n)
3798 return -EINVAL;
3799 }
3800
3801 err = mddev_lock(mddev);
3802 if (err)
3803 return err;
3804 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3805 err = -EBUSY;
3806
3807 if (!err) {
3808 mddev->recovery_cp = n;
3809 if (mddev->pers)
3810 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3811 }
3812 mddev_unlock(mddev);
3813 return err ?: len;
3814}
3815static struct md_sysfs_entry md_resync_start =
3816__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3817 resync_start_show, resync_start_store);
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3856 write_pending, active_idle, bad_word};
3857static char *array_states[] = {
3858 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3859 "write-pending", "active-idle", NULL };
3860
3861static int match_word(const char *word, char **list)
3862{
3863 int n;
3864 for (n=0; list[n]; n++)
3865 if (cmd_match(word, list[n]))
3866 break;
3867 return n;
3868}
3869
3870static ssize_t
3871array_state_show(struct mddev *mddev, char *page)
3872{
3873 enum array_state st = inactive;
3874
3875 if (mddev->pers)
3876 switch(mddev->ro) {
3877 case 1:
3878 st = readonly;
3879 break;
3880 case 2:
3881 st = read_auto;
3882 break;
3883 case 0:
3884 if (mddev->in_sync)
3885 st = clean;
3886 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3887 st = write_pending;
3888 else if (mddev->safemode)
3889 st = active_idle;
3890 else
3891 st = active;
3892 }
3893 else {
3894 if (list_empty(&mddev->disks) &&
3895 mddev->raid_disks == 0 &&
3896 mddev->dev_sectors == 0)
3897 st = clear;
3898 else
3899 st = inactive;
3900 }
3901 return sprintf(page, "%s\n", array_states[st]);
3902}
3903
3904static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3905static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3906static int do_md_run(struct mddev *mddev);
3907static int restart_array(struct mddev *mddev);
3908
3909static ssize_t
3910array_state_store(struct mddev *mddev, const char *buf, size_t len)
3911{
3912 int err;
3913 enum array_state st = match_word(buf, array_states);
3914
3915 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3916
3917
3918
3919 spin_lock(&mddev->lock);
3920 if (st == active) {
3921 restart_array(mddev);
3922 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3923 wake_up(&mddev->sb_wait);
3924 err = 0;
3925 } else {
3926 restart_array(mddev);
3927 if (atomic_read(&mddev->writes_pending) == 0) {
3928 if (mddev->in_sync == 0) {
3929 mddev->in_sync = 1;
3930 if (mddev->safemode == 1)
3931 mddev->safemode = 0;
3932 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3933 }
3934 err = 0;
3935 } else
3936 err = -EBUSY;
3937 }
3938 spin_unlock(&mddev->lock);
3939 return err ?: len;
3940 }
3941 err = mddev_lock(mddev);
3942 if (err)
3943 return err;
3944 err = -EINVAL;
3945 switch(st) {
3946 case bad_word:
3947 break;
3948 case clear:
3949
3950 err = do_md_stop(mddev, 0, NULL);
3951 break;
3952 case inactive:
3953
3954 if (mddev->pers)
3955 err = do_md_stop(mddev, 2, NULL);
3956 else
3957 err = 0;
3958 break;
3959 case suspended:
3960 break;
3961 case readonly:
3962 if (mddev->pers)
3963 err = md_set_readonly(mddev, NULL);
3964 else {
3965 mddev->ro = 1;
3966 set_disk_ro(mddev->gendisk, 1);
3967 err = do_md_run(mddev);
3968 }
3969 break;
3970 case read_auto:
3971 if (mddev->pers) {
3972 if (mddev->ro == 0)
3973 err = md_set_readonly(mddev, NULL);
3974 else if (mddev->ro == 1)
3975 err = restart_array(mddev);
3976 if (err == 0) {
3977 mddev->ro = 2;
3978 set_disk_ro(mddev->gendisk, 0);
3979 }
3980 } else {
3981 mddev->ro = 2;
3982 err = do_md_run(mddev);
3983 }
3984 break;
3985 case clean:
3986 if (mddev->pers) {
3987 err = restart_array(mddev);
3988 if (err)
3989 break;
3990 spin_lock(&mddev->lock);
3991 if (atomic_read(&mddev->writes_pending) == 0) {
3992 if (mddev->in_sync == 0) {
3993 mddev->in_sync = 1;
3994 if (mddev->safemode == 1)
3995 mddev->safemode = 0;
3996 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3997 }
3998 err = 0;
3999 } else
4000 err = -EBUSY;
4001 spin_unlock(&mddev->lock);
4002 } else
4003 err = -EINVAL;
4004 break;
4005 case active:
4006 if (mddev->pers) {
4007 err = restart_array(mddev);
4008 if (err)
4009 break;
4010 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
4011 wake_up(&mddev->sb_wait);
4012 err = 0;
4013 } else {
4014 mddev->ro = 0;
4015 set_disk_ro(mddev->gendisk, 0);
4016 err = do_md_run(mddev);
4017 }
4018 break;
4019 case write_pending:
4020 case active_idle:
4021
4022 break;
4023 }
4024
4025 if (!err) {
4026 if (mddev->hold_active == UNTIL_IOCTL)
4027 mddev->hold_active = 0;
4028 sysfs_notify_dirent_safe(mddev->sysfs_state);
4029 }
4030 mddev_unlock(mddev);
4031 return err ?: len;
4032}
4033static struct md_sysfs_entry md_array_state =
4034__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4035
4036static ssize_t
4037max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4038 return sprintf(page, "%d\n",
4039 atomic_read(&mddev->max_corr_read_errors));
4040}
4041
4042static ssize_t
4043max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4044{
4045 unsigned int n;
4046 int rv;
4047
4048 rv = kstrtouint(buf, 10, &n);
4049 if (rv < 0)
4050 return rv;
4051 atomic_set(&mddev->max_corr_read_errors, n);
4052 return len;
4053}
4054
4055static struct md_sysfs_entry max_corr_read_errors =
4056__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4057 max_corrected_read_errors_store);
4058
4059static ssize_t
4060null_show(struct mddev *mddev, char *page)
4061{
4062 return -EINVAL;
4063}
4064
4065static ssize_t
4066new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4067{
4068
4069
4070
4071
4072
4073
4074
4075 char *e;
4076 int major = simple_strtoul(buf, &e, 10);
4077 int minor;
4078 dev_t dev;
4079 struct md_rdev *rdev;
4080 int err;
4081
4082 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4083 return -EINVAL;
4084 minor = simple_strtoul(e+1, &e, 10);
4085 if (*e && *e != '\n')
4086 return -EINVAL;
4087 dev = MKDEV(major, minor);
4088 if (major != MAJOR(dev) ||
4089 minor != MINOR(dev))
4090 return -EOVERFLOW;
4091
4092 flush_workqueue(md_misc_wq);
4093
4094 err = mddev_lock(mddev);
4095 if (err)
4096 return err;
4097 if (mddev->persistent) {
4098 rdev = md_import_device(dev, mddev->major_version,
4099 mddev->minor_version);
4100 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4101 struct md_rdev *rdev0
4102 = list_entry(mddev->disks.next,
4103 struct md_rdev, same_set);
4104 err = super_types[mddev->major_version]
4105 .load_super(rdev, rdev0, mddev->minor_version);
4106 if (err < 0)
4107 goto out;
4108 }
4109 } else if (mddev->external)
4110 rdev = md_import_device(dev, -2, -1);
4111 else
4112 rdev = md_import_device(dev, -1, -1);
4113
4114 if (IS_ERR(rdev)) {
4115 mddev_unlock(mddev);
4116 return PTR_ERR(rdev);
4117 }
4118 err = bind_rdev_to_array(rdev, mddev);
4119 out:
4120 if (err)
4121 export_rdev(rdev);
4122 mddev_unlock(mddev);
4123 return err ? err : len;
4124}
4125
4126static struct md_sysfs_entry md_new_device =
4127__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4128
4129static ssize_t
4130bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4131{
4132 char *end;
4133 unsigned long chunk, end_chunk;
4134 int err;
4135
4136 err = mddev_lock(mddev);
4137 if (err)
4138 return err;
4139 if (!mddev->bitmap)
4140 goto out;
4141
4142 while (*buf) {
4143 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4144 if (buf == end) break;
4145 if (*end == '-') {
4146 buf = end + 1;
4147 end_chunk = simple_strtoul(buf, &end, 0);
4148 if (buf == end) break;
4149 }
4150 if (*end && !isspace(*end)) break;
4151 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4152 buf = skip_spaces(end);
4153 }
4154 bitmap_unplug(mddev->bitmap);
4155out:
4156 mddev_unlock(mddev);
4157 return len;
4158}
4159
4160static struct md_sysfs_entry md_bitmap =
4161__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4162
4163static ssize_t
4164size_show(struct mddev *mddev, char *page)
4165{
4166 return sprintf(page, "%llu\n",
4167 (unsigned long long)mddev->dev_sectors / 2);
4168}
4169
4170static int update_size(struct mddev *mddev, sector_t num_sectors);
4171
4172static ssize_t
4173size_store(struct mddev *mddev, const char *buf, size_t len)
4174{
4175
4176
4177
4178
4179 sector_t sectors;
4180 int err = strict_blocks_to_sectors(buf, §ors);
4181
4182 if (err < 0)
4183 return err;
4184 err = mddev_lock(mddev);
4185 if (err)
4186 return err;
4187 if (mddev->pers) {
4188 err = update_size(mddev, sectors);
4189 md_update_sb(mddev, 1);
4190 } else {
4191 if (mddev->dev_sectors == 0 ||
4192 mddev->dev_sectors > sectors)
4193 mddev->dev_sectors = sectors;
4194 else
4195 err = -ENOSPC;
4196 }
4197 mddev_unlock(mddev);
4198 return err ? err : len;
4199}
4200
4201static struct md_sysfs_entry md_size =
4202__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4203
4204
4205
4206
4207
4208
4209
4210static ssize_t
4211metadata_show(struct mddev *mddev, char *page)
4212{
4213 if (mddev->persistent)
4214 return sprintf(page, "%d.%d\n",
4215 mddev->major_version, mddev->minor_version);
4216 else if (mddev->external)
4217 return sprintf(page, "external:%s\n", mddev->metadata_type);
4218 else
4219 return sprintf(page, "none\n");
4220}
4221
4222static ssize_t
4223metadata_store(struct mddev *mddev, const char *buf, size_t len)
4224{
4225 int major, minor;
4226 char *e;
4227 int err;
4228
4229
4230
4231
4232
4233 err = mddev_lock(mddev);
4234 if (err)
4235 return err;
4236 err = -EBUSY;
4237 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4238 ;
4239 else if (!list_empty(&mddev->disks))
4240 goto out_unlock;
4241
4242 err = 0;
4243 if (cmd_match(buf, "none")) {
4244 mddev->persistent = 0;
4245 mddev->external = 0;
4246 mddev->major_version = 0;
4247 mddev->minor_version = 90;
4248 goto out_unlock;
4249 }
4250 if (strncmp(buf, "external:", 9) == 0) {
4251 size_t namelen = len-9;
4252 if (namelen >= sizeof(mddev->metadata_type))
4253 namelen = sizeof(mddev->metadata_type)-1;
4254 strncpy(mddev->metadata_type, buf+9, namelen);
4255 mddev->metadata_type[namelen] = 0;
4256 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4257 mddev->metadata_type[--namelen] = 0;
4258 mddev->persistent = 0;
4259 mddev->external = 1;
4260 mddev->major_version = 0;
4261 mddev->minor_version = 90;
4262 goto out_unlock;
4263 }
4264 major = simple_strtoul(buf, &e, 10);
4265 err = -EINVAL;
4266 if (e==buf || *e != '.')
4267 goto out_unlock;
4268 buf = e+1;
4269 minor = simple_strtoul(buf, &e, 10);
4270 if (e==buf || (*e && *e != '\n') )
4271 goto out_unlock;
4272 err = -ENOENT;
4273 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4274 goto out_unlock;
4275 mddev->major_version = major;
4276 mddev->minor_version = minor;
4277 mddev->persistent = 1;
4278 mddev->external = 0;
4279 err = 0;
4280out_unlock:
4281 mddev_unlock(mddev);
4282 return err ?: len;
4283}
4284
4285static struct md_sysfs_entry md_metadata =
4286__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4287
4288static ssize_t
4289action_show(struct mddev *mddev, char *page)
4290{
4291 char *type = "idle";
4292 unsigned long recovery = mddev->recovery;
4293 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4294 type = "frozen";
4295 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4296 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4297 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4298 type = "reshape";
4299 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4300 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4301 type = "resync";
4302 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4303 type = "check";
4304 else
4305 type = "repair";
4306 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4307 type = "recover";
4308 else if (mddev->reshape_position != MaxSector)
4309 type = "reshape";
4310 }
4311 return sprintf(page, "%s\n", type);
4312}
4313
4314static ssize_t
4315action_store(struct mddev *mddev, const char *page, size_t len)
4316{
4317 if (!mddev->pers || !mddev->pers->sync_request)
4318 return -EINVAL;
4319
4320
4321 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4322 if (cmd_match(page, "frozen"))
4323 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4324 else
4325 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4326 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4327 mddev_lock(mddev) == 0) {
4328 flush_workqueue(md_misc_wq);
4329 if (mddev->sync_thread) {
4330 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4331 md_reap_sync_thread(mddev);
4332 }
4333 mddev_unlock(mddev);
4334 }
4335 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4336 return -EBUSY;
4337 else if (cmd_match(page, "resync"))
4338 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4339 else if (cmd_match(page, "recover")) {
4340 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4341 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4342 } else if (cmd_match(page, "reshape")) {
4343 int err;
4344 if (mddev->pers->start_reshape == NULL)
4345 return -EINVAL;
4346 err = mddev_lock(mddev);
4347 if (!err) {
4348 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4349 err = -EBUSY;
4350 else {
4351 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4352 err = mddev->pers->start_reshape(mddev);
4353 }
4354 mddev_unlock(mddev);
4355 }
4356 if (err)
4357 return err;
4358 sysfs_notify(&mddev->kobj, NULL, "degraded");
4359 } else {
4360 if (cmd_match(page, "check"))
4361 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4362 else if (!cmd_match(page, "repair"))
4363 return -EINVAL;
4364 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4365 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4366 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4367 }
4368 if (mddev->ro == 2) {
4369
4370
4371
4372 mddev->ro = 0;
4373 md_wakeup_thread(mddev->sync_thread);
4374 }
4375 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4376 md_wakeup_thread(mddev->thread);
4377 sysfs_notify_dirent_safe(mddev->sysfs_action);
4378 return len;
4379}
4380
4381static struct md_sysfs_entry md_scan_mode =
4382__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4383
4384static ssize_t
4385last_sync_action_show(struct mddev *mddev, char *page)
4386{
4387 return sprintf(page, "%s\n", mddev->last_sync_action);
4388}
4389
4390static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4391
4392static ssize_t
4393mismatch_cnt_show(struct mddev *mddev, char *page)
4394{
4395 return sprintf(page, "%llu\n",
4396 (unsigned long long)
4397 atomic64_read(&mddev->resync_mismatches));
4398}
4399
4400static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4401
4402static ssize_t
4403sync_min_show(struct mddev *mddev, char *page)
4404{
4405 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4406 mddev->sync_speed_min ? "local": "system");
4407}
4408
4409static ssize_t
4410sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4411{
4412 unsigned int min;
4413 int rv;
4414
4415 if (strncmp(buf, "system", 6)==0) {
4416 min = 0;
4417 } else {
4418 rv = kstrtouint(buf, 10, &min);
4419 if (rv < 0)
4420 return rv;
4421 if (min == 0)
4422 return -EINVAL;
4423 }
4424 mddev->sync_speed_min = min;
4425 return len;
4426}
4427
4428static struct md_sysfs_entry md_sync_min =
4429__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4430
4431static ssize_t
4432sync_max_show(struct mddev *mddev, char *page)
4433{
4434 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4435 mddev->sync_speed_max ? "local": "system");
4436}
4437
4438static ssize_t
4439sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4440{
4441 unsigned int max;
4442 int rv;
4443
4444 if (strncmp(buf, "system", 6)==0) {
4445 max = 0;
4446 } else {
4447 rv = kstrtouint(buf, 10, &max);
4448 if (rv < 0)
4449 return rv;
4450 if (max == 0)
4451 return -EINVAL;
4452 }
4453 mddev->sync_speed_max = max;
4454 return len;
4455}
4456
4457static struct md_sysfs_entry md_sync_max =
4458__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4459
4460static ssize_t
4461degraded_show(struct mddev *mddev, char *page)
4462{
4463 return sprintf(page, "%d\n", mddev->degraded);
4464}
4465static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4466
4467static ssize_t
4468sync_force_parallel_show(struct mddev *mddev, char *page)
4469{
4470 return sprintf(page, "%d\n", mddev->parallel_resync);
4471}
4472
4473static ssize_t
4474sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4475{
4476 long n;
4477
4478 if (kstrtol(buf, 10, &n))
4479 return -EINVAL;
4480
4481 if (n != 0 && n != 1)
4482 return -EINVAL;
4483
4484 mddev->parallel_resync = n;
4485
4486 if (mddev->sync_thread)
4487 wake_up(&resync_wait);
4488
4489 return len;
4490}
4491
4492
4493static struct md_sysfs_entry md_sync_force_parallel =
4494__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4495 sync_force_parallel_show, sync_force_parallel_store);
4496
4497static ssize_t
4498sync_speed_show(struct mddev *mddev, char *page)
4499{
4500 unsigned long resync, dt, db;
4501 if (mddev->curr_resync == 0)
4502 return sprintf(page, "none\n");
4503 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4504 dt = (jiffies - mddev->resync_mark) / HZ;
4505 if (!dt) dt++;
4506 db = resync - mddev->resync_mark_cnt;
4507 return sprintf(page, "%lu\n", db/dt/2);
4508}
4509
4510static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4511
4512static ssize_t
4513sync_completed_show(struct mddev *mddev, char *page)
4514{
4515 unsigned long long max_sectors, resync;
4516
4517 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4518 return sprintf(page, "none\n");
4519
4520 if (mddev->curr_resync == 1 ||
4521 mddev->curr_resync == 2)
4522 return sprintf(page, "delayed\n");
4523
4524 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4525 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4526 max_sectors = mddev->resync_max_sectors;
4527 else
4528 max_sectors = mddev->dev_sectors;
4529
4530 resync = mddev->curr_resync_completed;
4531 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4532}
4533
4534static struct md_sysfs_entry md_sync_completed =
4535 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4536
4537static ssize_t
4538min_sync_show(struct mddev *mddev, char *page)
4539{
4540 return sprintf(page, "%llu\n",
4541 (unsigned long long)mddev->resync_min);
4542}
4543static ssize_t
4544min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4545{
4546 unsigned long long min;
4547 int err;
4548
4549 if (kstrtoull(buf, 10, &min))
4550 return -EINVAL;
4551
4552 spin_lock(&mddev->lock);
4553 err = -EINVAL;
4554 if (min > mddev->resync_max)
4555 goto out_unlock;
4556
4557 err = -EBUSY;
4558 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4559 goto out_unlock;
4560
4561
4562 mddev->resync_min = round_down(min, 8);
4563 err = 0;
4564
4565out_unlock:
4566 spin_unlock(&mddev->lock);
4567 return err ?: len;
4568}
4569
4570static struct md_sysfs_entry md_min_sync =
4571__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4572
4573static ssize_t
4574max_sync_show(struct mddev *mddev, char *page)
4575{
4576 if (mddev->resync_max == MaxSector)
4577 return sprintf(page, "max\n");
4578 else
4579 return sprintf(page, "%llu\n",
4580 (unsigned long long)mddev->resync_max);
4581}
4582static ssize_t
4583max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4584{
4585 int err;
4586 spin_lock(&mddev->lock);
4587 if (strncmp(buf, "max", 3) == 0)
4588 mddev->resync_max = MaxSector;
4589 else {
4590 unsigned long long max;
4591 int chunk;
4592
4593 err = -EINVAL;
4594 if (kstrtoull(buf, 10, &max))
4595 goto out_unlock;
4596 if (max < mddev->resync_min)
4597 goto out_unlock;
4598
4599 err = -EBUSY;
4600 if (max < mddev->resync_max &&
4601 mddev->ro == 0 &&
4602 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4603 goto out_unlock;
4604
4605
4606 chunk = mddev->chunk_sectors;
4607 if (chunk) {
4608 sector_t temp = max;
4609
4610 err = -EINVAL;
4611 if (sector_div(temp, chunk))
4612 goto out_unlock;
4613 }
4614 mddev->resync_max = max;
4615 }
4616 wake_up(&mddev->recovery_wait);
4617 err = 0;
4618out_unlock:
4619 spin_unlock(&mddev->lock);
4620 return err ?: len;
4621}
4622
4623static struct md_sysfs_entry md_max_sync =
4624__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4625
4626static ssize_t
4627suspend_lo_show(struct mddev *mddev, char *page)
4628{
4629 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4630}
4631
4632static ssize_t
4633suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4634{
4635 unsigned long long old, new;
4636 int err;
4637
4638 err = kstrtoull(buf, 10, &new);
4639 if (err < 0)
4640 return err;
4641 if (new != (sector_t)new)
4642 return -EINVAL;
4643
4644 err = mddev_lock(mddev);
4645 if (err)
4646 return err;
4647 err = -EINVAL;
4648 if (mddev->pers == NULL ||
4649 mddev->pers->quiesce == NULL)
4650 goto unlock;
4651 old = mddev->suspend_lo;
4652 mddev->suspend_lo = new;
4653 if (new >= old)
4654
4655 mddev->pers->quiesce(mddev, 2);
4656 else {
4657
4658 mddev->pers->quiesce(mddev, 1);
4659 mddev->pers->quiesce(mddev, 0);
4660 }
4661 err = 0;
4662unlock:
4663 mddev_unlock(mddev);
4664 return err ?: len;
4665}
4666static struct md_sysfs_entry md_suspend_lo =
4667__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4668
4669static ssize_t
4670suspend_hi_show(struct mddev *mddev, char *page)
4671{
4672 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4673}
4674
4675static ssize_t
4676suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4677{
4678 unsigned long long old, new;
4679 int err;
4680
4681 err = kstrtoull(buf, 10, &new);
4682 if (err < 0)
4683 return err;
4684 if (new != (sector_t)new)
4685 return -EINVAL;
4686
4687 err = mddev_lock(mddev);
4688 if (err)
4689 return err;
4690 err = -EINVAL;
4691 if (mddev->pers == NULL ||
4692 mddev->pers->quiesce == NULL)
4693 goto unlock;
4694 old = mddev->suspend_hi;
4695 mddev->suspend_hi = new;
4696 if (new <= old)
4697
4698 mddev->pers->quiesce(mddev, 2);
4699 else {
4700
4701 mddev->pers->quiesce(mddev, 1);
4702 mddev->pers->quiesce(mddev, 0);
4703 }
4704 err = 0;
4705unlock:
4706 mddev_unlock(mddev);
4707 return err ?: len;
4708}
4709static struct md_sysfs_entry md_suspend_hi =
4710__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4711
4712static ssize_t
4713reshape_position_show(struct mddev *mddev, char *page)
4714{
4715 if (mddev->reshape_position != MaxSector)
4716 return sprintf(page, "%llu\n",
4717 (unsigned long long)mddev->reshape_position);
4718 strcpy(page, "none\n");
4719 return 5;
4720}
4721
4722static ssize_t
4723reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4724{
4725 struct md_rdev *rdev;
4726 unsigned long long new;
4727 int err;
4728
4729 err = kstrtoull(buf, 10, &new);
4730 if (err < 0)
4731 return err;
4732 if (new != (sector_t)new)
4733 return -EINVAL;
4734 err = mddev_lock(mddev);
4735 if (err)
4736 return err;
4737 err = -EBUSY;
4738 if (mddev->pers)
4739 goto unlock;
4740 mddev->reshape_position = new;
4741 mddev->delta_disks = 0;
4742 mddev->reshape_backwards = 0;
4743 mddev->new_level = mddev->level;
4744 mddev->new_layout = mddev->layout;
4745 mddev->new_chunk_sectors = mddev->chunk_sectors;
4746 rdev_for_each(rdev, mddev)
4747 rdev->new_data_offset = rdev->data_offset;
4748 err = 0;
4749unlock:
4750 mddev_unlock(mddev);
4751 return err ?: len;
4752}
4753
4754static struct md_sysfs_entry md_reshape_position =
4755__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4756 reshape_position_store);
4757
4758static ssize_t
4759reshape_direction_show(struct mddev *mddev, char *page)
4760{
4761 return sprintf(page, "%s\n",
4762 mddev->reshape_backwards ? "backwards" : "forwards");
4763}
4764
4765static ssize_t
4766reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4767{
4768 int backwards = 0;
4769 int err;
4770
4771 if (cmd_match(buf, "forwards"))
4772 backwards = 0;
4773 else if (cmd_match(buf, "backwards"))
4774 backwards = 1;
4775 else
4776 return -EINVAL;
4777 if (mddev->reshape_backwards == backwards)
4778 return len;
4779
4780 err = mddev_lock(mddev);
4781 if (err)
4782 return err;
4783
4784 if (mddev->delta_disks)
4785 err = -EBUSY;
4786 else if (mddev->persistent &&
4787 mddev->major_version == 0)
4788 err = -EINVAL;
4789 else
4790 mddev->reshape_backwards = backwards;
4791 mddev_unlock(mddev);
4792 return err ?: len;
4793}
4794
4795static struct md_sysfs_entry md_reshape_direction =
4796__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4797 reshape_direction_store);
4798
4799static ssize_t
4800array_size_show(struct mddev *mddev, char *page)
4801{
4802 if (mddev->external_size)
4803 return sprintf(page, "%llu\n",
4804 (unsigned long long)mddev->array_sectors/2);
4805 else
4806 return sprintf(page, "default\n");
4807}
4808
4809static ssize_t
4810array_size_store(struct mddev *mddev, const char *buf, size_t len)
4811{
4812 sector_t sectors;
4813 int err;
4814
4815 err = mddev_lock(mddev);
4816 if (err)
4817 return err;
4818
4819 if (strncmp(buf, "default", 7) == 0) {
4820 if (mddev->pers)
4821 sectors = mddev->pers->size(mddev, 0, 0);
4822 else
4823 sectors = mddev->array_sectors;
4824
4825 mddev->external_size = 0;
4826 } else {
4827 if (strict_blocks_to_sectors(buf, §ors) < 0)
4828 err = -EINVAL;
4829 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4830 err = -E2BIG;
4831 else
4832 mddev->external_size = 1;
4833 }
4834
4835 if (!err) {
4836 mddev->array_sectors = sectors;
4837 if (mddev->pers) {
4838 set_capacity(mddev->gendisk, mddev->array_sectors);
4839 revalidate_disk(mddev->gendisk);
4840 }
4841 }
4842 mddev_unlock(mddev);
4843 return err ?: len;
4844}
4845
4846static struct md_sysfs_entry md_array_size =
4847__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4848 array_size_store);
4849
4850static struct attribute *md_default_attrs[] = {
4851 &md_level.attr,
4852 &md_layout.attr,
4853 &md_raid_disks.attr,
4854 &md_chunk_size.attr,
4855 &md_size.attr,
4856 &md_resync_start.attr,
4857 &md_metadata.attr,
4858 &md_new_device.attr,
4859 &md_safe_delay.attr,
4860 &md_array_state.attr,
4861 &md_reshape_position.attr,
4862 &md_reshape_direction.attr,
4863 &md_array_size.attr,
4864 &max_corr_read_errors.attr,
4865 NULL,
4866};
4867
4868static struct attribute *md_redundancy_attrs[] = {
4869 &md_scan_mode.attr,
4870 &md_last_scan_mode.attr,
4871 &md_mismatches.attr,
4872 &md_sync_min.attr,
4873 &md_sync_max.attr,
4874 &md_sync_speed.attr,
4875 &md_sync_force_parallel.attr,
4876 &md_sync_completed.attr,
4877 &md_min_sync.attr,
4878 &md_max_sync.attr,
4879 &md_suspend_lo.attr,
4880 &md_suspend_hi.attr,
4881 &md_bitmap.attr,
4882 &md_degraded.attr,
4883 NULL,
4884};
4885static struct attribute_group md_redundancy_group = {
4886 .name = NULL,
4887 .attrs = md_redundancy_attrs,
4888};
4889
4890static ssize_t
4891md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4892{
4893 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4894 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4895 ssize_t rv;
4896
4897 if (!entry->show)
4898 return -EIO;
4899 spin_lock(&all_mddevs_lock);
4900 if (list_empty(&mddev->all_mddevs)) {
4901 spin_unlock(&all_mddevs_lock);
4902 return -EBUSY;
4903 }
4904 mddev_get(mddev);
4905 spin_unlock(&all_mddevs_lock);
4906
4907 rv = entry->show(mddev, page);
4908 mddev_put(mddev);
4909 return rv;
4910}
4911
4912static ssize_t
4913md_attr_store(struct kobject *kobj, struct attribute *attr,
4914 const char *page, size_t length)
4915{
4916 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4917 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4918 ssize_t rv;
4919
4920 if (!entry->store)
4921 return -EIO;
4922 if (!capable(CAP_SYS_ADMIN))
4923 return -EACCES;
4924 spin_lock(&all_mddevs_lock);
4925 if (list_empty(&mddev->all_mddevs)) {
4926 spin_unlock(&all_mddevs_lock);
4927 return -EBUSY;
4928 }
4929 mddev_get(mddev);
4930 spin_unlock(&all_mddevs_lock);
4931 rv = entry->store(mddev, page, length);
4932 mddev_put(mddev);
4933 return rv;
4934}
4935
4936static void md_free(struct kobject *ko)
4937{
4938 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4939
4940 if (mddev->sysfs_state)
4941 sysfs_put(mddev->sysfs_state);
4942
4943 if (mddev->queue)
4944 blk_cleanup_queue(mddev->queue);
4945 if (mddev->gendisk) {
4946 del_gendisk(mddev->gendisk);
4947 put_disk(mddev->gendisk);
4948 }
4949
4950 kfree(mddev);
4951}
4952
4953static const struct sysfs_ops md_sysfs_ops = {
4954 .show = md_attr_show,
4955 .store = md_attr_store,
4956};
4957static struct kobj_type md_ktype = {
4958 .release = md_free,
4959 .sysfs_ops = &md_sysfs_ops,
4960 .default_attrs = md_default_attrs,
4961};
4962
4963int mdp_major = 0;
4964
4965static void mddev_delayed_delete(struct work_struct *ws)
4966{
4967 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4968
4969 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4970 kobject_del(&mddev->kobj);
4971 kobject_put(&mddev->kobj);
4972}
4973
4974static int md_alloc(dev_t dev, char *name)
4975{
4976 static DEFINE_MUTEX(disks_mutex);
4977 struct mddev *mddev = mddev_find(dev);
4978 struct gendisk *disk;
4979 int partitioned;
4980 int shift;
4981 int unit;
4982 int error;
4983
4984 if (!mddev)
4985 return -ENODEV;
4986
4987 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4988 shift = partitioned ? MdpMinorShift : 0;
4989 unit = MINOR(mddev->unit) >> shift;
4990
4991
4992
4993
4994 flush_workqueue(md_misc_wq);
4995
4996 mutex_lock(&disks_mutex);
4997 error = -EEXIST;
4998 if (mddev->gendisk)
4999 goto abort;
5000
5001 if (name) {
5002
5003
5004 struct mddev *mddev2;
5005 spin_lock(&all_mddevs_lock);
5006
5007 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5008 if (mddev2->gendisk &&
5009 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5010 spin_unlock(&all_mddevs_lock);
5011 goto abort;
5012 }
5013 spin_unlock(&all_mddevs_lock);
5014 }
5015
5016 error = -ENOMEM;
5017 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5018 if (!mddev->queue)
5019 goto abort;
5020 mddev->queue->queuedata = mddev;
5021
5022 blk_queue_make_request(mddev->queue, md_make_request);
5023 blk_set_stacking_limits(&mddev->queue->limits);
5024
5025 disk = alloc_disk(1 << shift);
5026 if (!disk) {
5027 blk_cleanup_queue(mddev->queue);
5028 mddev->queue = NULL;
5029 goto abort;
5030 }
5031 disk->major = MAJOR(mddev->unit);
5032 disk->first_minor = unit << shift;
5033 if (name)
5034 strcpy(disk->disk_name, name);
5035 else if (partitioned)
5036 sprintf(disk->disk_name, "md_d%d", unit);
5037 else
5038 sprintf(disk->disk_name, "md%d", unit);
5039 disk->fops = &md_fops;
5040 disk->private_data = mddev;
5041 disk->queue = mddev->queue;
5042 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
5043
5044
5045
5046
5047 disk->flags |= GENHD_FL_EXT_DEVT;
5048 mddev->gendisk = disk;
5049
5050
5051
5052 mutex_lock(&mddev->open_mutex);
5053 add_disk(disk);
5054
5055 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5056 &disk_to_dev(disk)->kobj, "%s", "md");
5057 if (error) {
5058
5059
5060
5061 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
5062 disk->disk_name);
5063 error = 0;
5064 }
5065 if (mddev->kobj.sd &&
5066 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5067 printk(KERN_DEBUG "pointless warning\n");
5068 mutex_unlock(&mddev->open_mutex);
5069 abort:
5070 mutex_unlock(&disks_mutex);
5071 if (!error && mddev->kobj.sd) {
5072 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5073 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5074 }
5075 mddev_put(mddev);
5076 return error;
5077}
5078
5079static struct kobject *md_probe(dev_t dev, int *part, void *data)
5080{
5081 md_alloc(dev, NULL);
5082 return NULL;
5083}
5084
5085static int add_named_array(const char *val, struct kernel_param *kp)
5086{
5087
5088
5089
5090
5091 int len = strlen(val);
5092 char buf[DISK_NAME_LEN];
5093
5094 while (len && val[len-1] == '\n')
5095 len--;
5096 if (len >= DISK_NAME_LEN)
5097 return -E2BIG;
5098 strlcpy(buf, val, len+1);
5099 if (strncmp(buf, "md_", 3) != 0)
5100 return -EINVAL;
5101 return md_alloc(0, buf);
5102}
5103
5104static void md_safemode_timeout(unsigned long data)
5105{
5106 struct mddev *mddev = (struct mddev *) data;
5107
5108 if (!atomic_read(&mddev->writes_pending)) {
5109 mddev->safemode = 1;
5110 if (mddev->external)
5111 sysfs_notify_dirent_safe(mddev->sysfs_state);
5112 }
5113 md_wakeup_thread(mddev->thread);
5114}
5115
5116static int start_dirty_degraded;
5117
5118int md_run(struct mddev *mddev)
5119{
5120 int err;
5121 struct md_rdev *rdev;
5122 struct md_personality *pers;
5123
5124 if (list_empty(&mddev->disks))
5125
5126 return -EINVAL;
5127
5128 if (mddev->pers)
5129 return -EBUSY;
5130
5131 if (mddev->sysfs_active)
5132 return -EBUSY;
5133
5134
5135
5136
5137 if (!mddev->raid_disks) {
5138 if (!mddev->persistent)
5139 return -EINVAL;
5140 analyze_sbs(mddev);
5141 }
5142
5143 if (mddev->level != LEVEL_NONE)
5144 request_module("md-level-%d", mddev->level);
5145 else if (mddev->clevel[0])
5146 request_module("md-%s", mddev->clevel);
5147
5148
5149
5150
5151
5152
5153 rdev_for_each(rdev, mddev) {
5154 if (test_bit(Faulty, &rdev->flags))
5155 continue;
5156 sync_blockdev(rdev->bdev);
5157 invalidate_bdev(rdev->bdev);
5158
5159
5160
5161
5162
5163 if (rdev->meta_bdev) {
5164 ;
5165 } else if (rdev->data_offset < rdev->sb_start) {
5166 if (mddev->dev_sectors &&
5167 rdev->data_offset + mddev->dev_sectors
5168 > rdev->sb_start) {
5169 printk("md: %s: data overlaps metadata\n",
5170 mdname(mddev));
5171 return -EINVAL;
5172 }
5173 } else {
5174 if (rdev->sb_start + rdev->sb_size/512
5175 > rdev->data_offset) {
5176 printk("md: %s: metadata overlaps data\n",
5177 mdname(mddev));
5178 return -EINVAL;
5179 }
5180 }
5181 sysfs_notify_dirent_safe(rdev->sysfs_state);
5182 }
5183
5184 if (mddev->bio_set == NULL)
5185 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5186
5187 spin_lock(&pers_lock);
5188 pers = find_pers(mddev->level, mddev->clevel);
5189 if (!pers || !try_module_get(pers->owner)) {
5190 spin_unlock(&pers_lock);
5191 if (mddev->level != LEVEL_NONE)
5192 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5193 mddev->level);
5194 else
5195 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5196 mddev->clevel);
5197 return -EINVAL;
5198 }
5199 spin_unlock(&pers_lock);
5200 if (mddev->level != pers->level) {
5201 mddev->level = pers->level;
5202 mddev->new_level = pers->level;
5203 }
5204 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5205
5206 if (mddev->reshape_position != MaxSector &&
5207 pers->start_reshape == NULL) {
5208
5209 module_put(pers->owner);
5210 return -EINVAL;
5211 }
5212
5213 if (pers->sync_request) {
5214
5215
5216
5217 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5218 struct md_rdev *rdev2;
5219 int warned = 0;
5220
5221 rdev_for_each(rdev, mddev)
5222 rdev_for_each(rdev2, mddev) {
5223 if (rdev < rdev2 &&
5224 rdev->bdev->bd_contains ==
5225 rdev2->bdev->bd_contains) {
5226 printk(KERN_WARNING
5227 "%s: WARNING: %s appears to be"
5228 " on the same physical disk as"
5229 " %s.\n",
5230 mdname(mddev),
5231 bdevname(rdev->bdev,b),
5232 bdevname(rdev2->bdev,b2));
5233 warned = 1;
5234 }
5235 }
5236
5237 if (warned)
5238 printk(KERN_WARNING
5239 "True protection against single-disk"
5240 " failure might be compromised.\n");
5241 }
5242
5243 mddev->recovery = 0;
5244
5245 mddev->resync_max_sectors = mddev->dev_sectors;
5246
5247 mddev->ok_start_degraded = start_dirty_degraded;
5248
5249 if (start_readonly && mddev->ro == 0)
5250 mddev->ro = 2;
5251
5252 err = pers->run(mddev);
5253 if (err)
5254 printk(KERN_ERR "md: pers->run() failed ...\n");
5255 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5256 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5257 " but 'external_size' not in effect?\n", __func__);
5258 printk(KERN_ERR
5259 "md: invalid array_size %llu > default size %llu\n",
5260 (unsigned long long)mddev->array_sectors / 2,
5261 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5262 err = -EINVAL;
5263 }
5264 if (err == 0 && pers->sync_request &&
5265 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5266 struct bitmap *bitmap;
5267
5268 bitmap = bitmap_create(mddev, -1);
5269 if (IS_ERR(bitmap)) {
5270 err = PTR_ERR(bitmap);
5271 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5272 mdname(mddev), err);
5273 } else
5274 mddev->bitmap = bitmap;
5275
5276 }
5277 if (err) {
5278 mddev_detach(mddev);
5279 if (mddev->private)
5280 pers->free(mddev, mddev->private);
5281 mddev->private = NULL;
5282 module_put(pers->owner);
5283 bitmap_destroy(mddev);
5284 return err;
5285 }
5286 if (mddev->queue) {
5287 mddev->queue->backing_dev_info.congested_data = mddev;
5288 mddev->queue->backing_dev_info.congested_fn = md_congested;
5289 }
5290 if (pers->sync_request) {
5291 if (mddev->kobj.sd &&
5292 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5293 printk(KERN_WARNING
5294 "md: cannot register extra attributes for %s\n",
5295 mdname(mddev));
5296 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5297 } else if (mddev->ro == 2)
5298 mddev->ro = 0;
5299
5300 atomic_set(&mddev->writes_pending,0);
5301 atomic_set(&mddev->max_corr_read_errors,
5302 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5303 mddev->safemode = 0;
5304 if (mddev_is_clustered(mddev))
5305 mddev->safemode_delay = 0;
5306 else
5307 mddev->safemode_delay = (200 * HZ)/1000 +1;
5308 mddev->in_sync = 1;
5309 smp_wmb();
5310 spin_lock(&mddev->lock);
5311 mddev->pers = pers;
5312 spin_unlock(&mddev->lock);
5313 rdev_for_each(rdev, mddev)
5314 if (rdev->raid_disk >= 0)
5315 if (sysfs_link_rdev(mddev, rdev))
5316 ;
5317
5318 if (mddev->degraded && !mddev->ro)
5319
5320
5321
5322 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5323 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5324
5325 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5326 md_update_sb(mddev, 0);
5327
5328 md_new_event(mddev);
5329 sysfs_notify_dirent_safe(mddev->sysfs_state);
5330 sysfs_notify_dirent_safe(mddev->sysfs_action);
5331 sysfs_notify(&mddev->kobj, NULL, "degraded");
5332 return 0;
5333}
5334EXPORT_SYMBOL_GPL(md_run);
5335
5336static int do_md_run(struct mddev *mddev)
5337{
5338 int err;
5339
5340 err = md_run(mddev);
5341 if (err)
5342 goto out;
5343 err = bitmap_load(mddev);
5344 if (err) {
5345 bitmap_destroy(mddev);
5346 goto out;
5347 }
5348
5349 if (mddev_is_clustered(mddev))
5350 md_allow_write(mddev);
5351
5352 md_wakeup_thread(mddev->thread);
5353 md_wakeup_thread(mddev->sync_thread);
5354
5355 set_capacity(mddev->gendisk, mddev->array_sectors);
5356 revalidate_disk(mddev->gendisk);
5357 mddev->changed = 1;
5358 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5359out:
5360 return err;
5361}
5362
5363static int restart_array(struct mddev *mddev)
5364{
5365 struct gendisk *disk = mddev->gendisk;
5366
5367
5368 if (list_empty(&mddev->disks))
5369 return -ENXIO;
5370 if (!mddev->pers)
5371 return -EINVAL;
5372 if (!mddev->ro)
5373 return -EBUSY;
5374 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5375 struct md_rdev *rdev;
5376 bool has_journal = false;
5377
5378 rcu_read_lock();
5379 rdev_for_each_rcu(rdev, mddev) {
5380 if (test_bit(Journal, &rdev->flags) &&
5381 !test_bit(Faulty, &rdev->flags)) {
5382 has_journal = true;
5383 break;
5384 }
5385 }
5386 rcu_read_unlock();
5387
5388
5389 if (!has_journal)
5390 return -EINVAL;
5391 }
5392
5393 mddev->safemode = 0;
5394 mddev->ro = 0;
5395 set_disk_ro(disk, 0);
5396 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5397 mdname(mddev));
5398
5399 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5400 md_wakeup_thread(mddev->thread);
5401 md_wakeup_thread(mddev->sync_thread);
5402 sysfs_notify_dirent_safe(mddev->sysfs_state);
5403 return 0;
5404}
5405
5406static void md_clean(struct mddev *mddev)
5407{
5408 mddev->array_sectors = 0;
5409 mddev->external_size = 0;
5410 mddev->dev_sectors = 0;
5411 mddev->raid_disks = 0;
5412 mddev->recovery_cp = 0;
5413 mddev->resync_min = 0;
5414 mddev->resync_max = MaxSector;
5415 mddev->reshape_position = MaxSector;
5416 mddev->external = 0;
5417 mddev->persistent = 0;
5418 mddev->level = LEVEL_NONE;
5419 mddev->clevel[0] = 0;
5420 mddev->flags = 0;
5421 mddev->ro = 0;
5422 mddev->metadata_type[0] = 0;
5423 mddev->chunk_sectors = 0;
5424 mddev->ctime = mddev->utime = 0;
5425 mddev->layout = 0;
5426 mddev->max_disks = 0;
5427 mddev->events = 0;
5428 mddev->can_decrease_events = 0;
5429 mddev->delta_disks = 0;
5430 mddev->reshape_backwards = 0;
5431 mddev->new_level = LEVEL_NONE;
5432 mddev->new_layout = 0;
5433 mddev->new_chunk_sectors = 0;
5434 mddev->curr_resync = 0;
5435 atomic64_set(&mddev->resync_mismatches, 0);
5436 mddev->suspend_lo = mddev->suspend_hi = 0;
5437 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5438 mddev->recovery = 0;
5439 mddev->in_sync = 0;
5440 mddev->changed = 0;
5441 mddev->degraded = 0;
5442 mddev->safemode = 0;
5443 mddev->private = NULL;
5444 mddev->bitmap_info.offset = 0;
5445 mddev->bitmap_info.default_offset = 0;
5446 mddev->bitmap_info.default_space = 0;
5447 mddev->bitmap_info.chunksize = 0;
5448 mddev->bitmap_info.daemon_sleep = 0;
5449 mddev->bitmap_info.max_write_behind = 0;
5450}
5451
5452static void __md_stop_writes(struct mddev *mddev)
5453{
5454 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5455 flush_workqueue(md_misc_wq);
5456 if (mddev->sync_thread) {
5457 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5458 md_reap_sync_thread(mddev);
5459 }
5460
5461 del_timer_sync(&mddev->safemode_timer);
5462
5463 bitmap_flush(mddev);
5464 md_super_wait(mddev);
5465
5466 if (mddev->ro == 0 &&
5467 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5468 (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5469
5470 if (!mddev_is_clustered(mddev))
5471 mddev->in_sync = 1;
5472 md_update_sb(mddev, 1);
5473 }
5474}
5475
5476void md_stop_writes(struct mddev *mddev)
5477{
5478 mddev_lock_nointr(mddev);
5479 __md_stop_writes(mddev);
5480 mddev_unlock(mddev);
5481}
5482EXPORT_SYMBOL_GPL(md_stop_writes);
5483
5484static void mddev_detach(struct mddev *mddev)
5485{
5486 struct bitmap *bitmap = mddev->bitmap;
5487
5488 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5489 printk(KERN_INFO "md:%s: behind writes in progress - waiting to stop.\n",
5490 mdname(mddev));
5491
5492 wait_event(bitmap->behind_wait,
5493 atomic_read(&bitmap->behind_writes) == 0);
5494 }
5495 if (mddev->pers && mddev->pers->quiesce) {
5496 mddev->pers->quiesce(mddev, 1);
5497 mddev->pers->quiesce(mddev, 0);
5498 }
5499 md_unregister_thread(&mddev->thread);
5500 if (mddev->queue)
5501 blk_sync_queue(mddev->queue);
5502}
5503
5504static void __md_stop(struct mddev *mddev)
5505{
5506 struct md_personality *pers = mddev->pers;
5507 mddev_detach(mddev);
5508
5509 flush_workqueue(md_misc_wq);
5510 spin_lock(&mddev->lock);
5511 mddev->pers = NULL;
5512 spin_unlock(&mddev->lock);
5513 pers->free(mddev, mddev->private);
5514 mddev->private = NULL;
5515 if (pers->sync_request && mddev->to_remove == NULL)
5516 mddev->to_remove = &md_redundancy_group;
5517 module_put(pers->owner);
5518 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5519}
5520
5521void md_stop(struct mddev *mddev)
5522{
5523
5524
5525
5526 __md_stop(mddev);
5527 bitmap_destroy(mddev);
5528 if (mddev->bio_set)
5529 bioset_free(mddev->bio_set);
5530}
5531
5532EXPORT_SYMBOL_GPL(md_stop);
5533
5534static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5535{
5536 int err = 0;
5537 int did_freeze = 0;
5538
5539 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5540 did_freeze = 1;
5541 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5542 md_wakeup_thread(mddev->thread);
5543 }
5544 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5545 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5546 if (mddev->sync_thread)
5547
5548
5549 wake_up_process(mddev->sync_thread->tsk);
5550
5551 if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags))
5552 return -EBUSY;
5553 mddev_unlock(mddev);
5554 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5555 &mddev->recovery));
5556 wait_event(mddev->sb_wait,
5557 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5558 mddev_lock_nointr(mddev);
5559
5560 mutex_lock(&mddev->open_mutex);
5561 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5562 mddev->sync_thread ||
5563 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
5564 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5565 printk("md: %s still in use.\n",mdname(mddev));
5566 if (did_freeze) {
5567 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5568 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5569 md_wakeup_thread(mddev->thread);
5570 }
5571 err = -EBUSY;
5572 goto out;
5573 }
5574 if (mddev->pers) {
5575 __md_stop_writes(mddev);
5576
5577 err = -ENXIO;
5578 if (mddev->ro==1)
5579 goto out;
5580 mddev->ro = 1;
5581 set_disk_ro(mddev->gendisk, 1);
5582 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5583 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5584 md_wakeup_thread(mddev->thread);
5585 sysfs_notify_dirent_safe(mddev->sysfs_state);
5586 err = 0;
5587 }
5588out:
5589 mutex_unlock(&mddev->open_mutex);
5590 return err;
5591}
5592
5593
5594
5595
5596
5597static int do_md_stop(struct mddev *mddev, int mode,
5598 struct block_device *bdev)
5599{
5600 struct gendisk *disk = mddev->gendisk;
5601 struct md_rdev *rdev;
5602 int did_freeze = 0;
5603
5604 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5605 did_freeze = 1;
5606 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5607 md_wakeup_thread(mddev->thread);
5608 }
5609 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5610 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5611 if (mddev->sync_thread)
5612
5613
5614 wake_up_process(mddev->sync_thread->tsk);
5615
5616 mddev_unlock(mddev);
5617 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5618 !test_bit(MD_RECOVERY_RUNNING,
5619 &mddev->recovery)));
5620 mddev_lock_nointr(mddev);
5621
5622 mutex_lock(&mddev->open_mutex);
5623 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5624 mddev->sysfs_active ||
5625 mddev->sync_thread ||
5626 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
5627 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5628 printk("md: %s still in use.\n",mdname(mddev));
5629 mutex_unlock(&mddev->open_mutex);
5630 if (did_freeze) {
5631 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5632 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5633 md_wakeup_thread(mddev->thread);
5634 }
5635 return -EBUSY;
5636 }
5637 if (mddev->pers) {
5638 if (mddev->ro)
5639 set_disk_ro(disk, 0);
5640
5641 __md_stop_writes(mddev);
5642 __md_stop(mddev);
5643 mddev->queue->backing_dev_info.congested_fn = NULL;
5644
5645
5646 sysfs_notify_dirent_safe(mddev->sysfs_state);
5647
5648 rdev_for_each(rdev, mddev)
5649 if (rdev->raid_disk >= 0)
5650 sysfs_unlink_rdev(mddev, rdev);
5651
5652 set_capacity(disk, 0);
5653 mutex_unlock(&mddev->open_mutex);
5654 mddev->changed = 1;
5655 revalidate_disk(disk);
5656
5657 if (mddev->ro)
5658 mddev->ro = 0;
5659 } else
5660 mutex_unlock(&mddev->open_mutex);
5661
5662
5663
5664 if (mode == 0) {
5665 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5666
5667 bitmap_destroy(mddev);
5668 if (mddev->bitmap_info.file) {
5669 struct file *f = mddev->bitmap_info.file;
5670 spin_lock(&mddev->lock);
5671 mddev->bitmap_info.file = NULL;
5672 spin_unlock(&mddev->lock);
5673 fput(f);
5674 }
5675 mddev->bitmap_info.offset = 0;
5676
5677 export_array(mddev);
5678
5679 md_clean(mddev);
5680 if (mddev->hold_active == UNTIL_STOP)
5681 mddev->hold_active = 0;
5682 }
5683 md_new_event(mddev);
5684 sysfs_notify_dirent_safe(mddev->sysfs_state);
5685 return 0;
5686}
5687
5688#ifndef MODULE
5689static void autorun_array(struct mddev *mddev)
5690{
5691 struct md_rdev *rdev;
5692 int err;
5693
5694 if (list_empty(&mddev->disks))
5695 return;
5696
5697 printk(KERN_INFO "md: running: ");
5698
5699 rdev_for_each(rdev, mddev) {
5700 char b[BDEVNAME_SIZE];
5701 printk("<%s>", bdevname(rdev->bdev,b));
5702 }
5703 printk("\n");
5704
5705 err = do_md_run(mddev);
5706 if (err) {
5707 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5708 do_md_stop(mddev, 0, NULL);
5709 }
5710}
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724static void autorun_devices(int part)
5725{
5726 struct md_rdev *rdev0, *rdev, *tmp;
5727 struct mddev *mddev;
5728 char b[BDEVNAME_SIZE];
5729
5730 printk(KERN_INFO "md: autorun ...\n");
5731 while (!list_empty(&pending_raid_disks)) {
5732 int unit;
5733 dev_t dev;
5734 LIST_HEAD(candidates);
5735 rdev0 = list_entry(pending_raid_disks.next,
5736 struct md_rdev, same_set);
5737
5738 printk(KERN_INFO "md: considering %s ...\n",
5739 bdevname(rdev0->bdev,b));
5740 INIT_LIST_HEAD(&candidates);
5741 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5742 if (super_90_load(rdev, rdev0, 0) >= 0) {
5743 printk(KERN_INFO "md: adding %s ...\n",
5744 bdevname(rdev->bdev,b));
5745 list_move(&rdev->same_set, &candidates);
5746 }
5747
5748
5749
5750
5751
5752 if (part) {
5753 dev = MKDEV(mdp_major,
5754 rdev0->preferred_minor << MdpMinorShift);
5755 unit = MINOR(dev) >> MdpMinorShift;
5756 } else {
5757 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5758 unit = MINOR(dev);
5759 }
5760 if (rdev0->preferred_minor != unit) {
5761 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5762 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5763 break;
5764 }
5765
5766 md_probe(dev, NULL, NULL);
5767 mddev = mddev_find(dev);
5768 if (!mddev || !mddev->gendisk) {
5769 if (mddev)
5770 mddev_put(mddev);
5771 printk(KERN_ERR
5772 "md: cannot allocate memory for md drive.\n");
5773 break;
5774 }
5775 if (mddev_lock(mddev))
5776 printk(KERN_WARNING "md: %s locked, cannot run\n",
5777 mdname(mddev));
5778 else if (mddev->raid_disks || mddev->major_version
5779 || !list_empty(&mddev->disks)) {
5780 printk(KERN_WARNING
5781 "md: %s already running, cannot run %s\n",
5782 mdname(mddev), bdevname(rdev0->bdev,b));
5783 mddev_unlock(mddev);
5784 } else {
5785 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5786 mddev->persistent = 1;
5787 rdev_for_each_list(rdev, tmp, &candidates) {
5788 list_del_init(&rdev->same_set);
5789 if (bind_rdev_to_array(rdev, mddev))
5790 export_rdev(rdev);
5791 }
5792 autorun_array(mddev);
5793 mddev_unlock(mddev);
5794 }
5795
5796
5797
5798 rdev_for_each_list(rdev, tmp, &candidates) {
5799 list_del_init(&rdev->same_set);
5800 export_rdev(rdev);
5801 }
5802 mddev_put(mddev);
5803 }
5804 printk(KERN_INFO "md: ... autorun DONE.\n");
5805}
5806#endif
5807
5808static int get_version(void __user *arg)
5809{
5810 mdu_version_t ver;
5811
5812 ver.major = MD_MAJOR_VERSION;
5813 ver.minor = MD_MINOR_VERSION;
5814 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5815
5816 if (copy_to_user(arg, &ver, sizeof(ver)))
5817 return -EFAULT;
5818
5819 return 0;
5820}
5821
5822static int get_array_info(struct mddev *mddev, void __user *arg)
5823{
5824 mdu_array_info_t info;
5825 int nr,working,insync,failed,spare;
5826 struct md_rdev *rdev;
5827
5828 nr = working = insync = failed = spare = 0;
5829 rcu_read_lock();
5830 rdev_for_each_rcu(rdev, mddev) {
5831 nr++;
5832 if (test_bit(Faulty, &rdev->flags))
5833 failed++;
5834 else {
5835 working++;
5836 if (test_bit(In_sync, &rdev->flags))
5837 insync++;
5838 else
5839 spare++;
5840 }
5841 }
5842 rcu_read_unlock();
5843
5844 info.major_version = mddev->major_version;
5845 info.minor_version = mddev->minor_version;
5846 info.patch_version = MD_PATCHLEVEL_VERSION;
5847 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
5848 info.level = mddev->level;
5849 info.size = mddev->dev_sectors / 2;
5850 if (info.size != mddev->dev_sectors / 2)
5851 info.size = -1;
5852 info.nr_disks = nr;
5853 info.raid_disks = mddev->raid_disks;
5854 info.md_minor = mddev->md_minor;
5855 info.not_persistent= !mddev->persistent;
5856
5857 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
5858 info.state = 0;
5859 if (mddev->in_sync)
5860 info.state = (1<<MD_SB_CLEAN);
5861 if (mddev->bitmap && mddev->bitmap_info.offset)
5862 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5863 if (mddev_is_clustered(mddev))
5864 info.state |= (1<<MD_SB_CLUSTERED);
5865 info.active_disks = insync;
5866 info.working_disks = working;
5867 info.failed_disks = failed;
5868 info.spare_disks = spare;
5869
5870 info.layout = mddev->layout;
5871 info.chunk_size = mddev->chunk_sectors << 9;
5872
5873 if (copy_to_user(arg, &info, sizeof(info)))
5874 return -EFAULT;
5875
5876 return 0;
5877}
5878
5879static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5880{
5881 mdu_bitmap_file_t *file = NULL;
5882 char *ptr;
5883 int err;
5884
5885 file = kzalloc(sizeof(*file), GFP_NOIO);
5886 if (!file)
5887 return -ENOMEM;
5888
5889 err = 0;
5890 spin_lock(&mddev->lock);
5891
5892 if (mddev->bitmap_info.file) {
5893 ptr = file_path(mddev->bitmap_info.file, file->pathname,
5894 sizeof(file->pathname));
5895 if (IS_ERR(ptr))
5896 err = PTR_ERR(ptr);
5897 else
5898 memmove(file->pathname, ptr,
5899 sizeof(file->pathname)-(ptr-file->pathname));
5900 }
5901 spin_unlock(&mddev->lock);
5902
5903 if (err == 0 &&
5904 copy_to_user(arg, file, sizeof(*file)))
5905 err = -EFAULT;
5906
5907 kfree(file);
5908 return err;
5909}
5910
5911static int get_disk_info(struct mddev *mddev, void __user * arg)
5912{
5913 mdu_disk_info_t info;
5914 struct md_rdev *rdev;
5915
5916 if (copy_from_user(&info, arg, sizeof(info)))
5917 return -EFAULT;
5918
5919 rcu_read_lock();
5920 rdev = md_find_rdev_nr_rcu(mddev, info.number);
5921 if (rdev) {
5922 info.major = MAJOR(rdev->bdev->bd_dev);
5923 info.minor = MINOR(rdev->bdev->bd_dev);
5924 info.raid_disk = rdev->raid_disk;
5925 info.state = 0;
5926 if (test_bit(Faulty, &rdev->flags))
5927 info.state |= (1<<MD_DISK_FAULTY);
5928 else if (test_bit(In_sync, &rdev->flags)) {
5929 info.state |= (1<<MD_DISK_ACTIVE);
5930 info.state |= (1<<MD_DISK_SYNC);
5931 }
5932 if (test_bit(Journal, &rdev->flags))
5933 info.state |= (1<<MD_DISK_JOURNAL);
5934 if (test_bit(WriteMostly, &rdev->flags))
5935 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5936 } else {
5937 info.major = info.minor = 0;
5938 info.raid_disk = -1;
5939 info.state = (1<<MD_DISK_REMOVED);
5940 }
5941 rcu_read_unlock();
5942
5943 if (copy_to_user(arg, &info, sizeof(info)))
5944 return -EFAULT;
5945
5946 return 0;
5947}
5948
5949static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
5950{
5951 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5952 struct md_rdev *rdev;
5953 dev_t dev = MKDEV(info->major,info->minor);
5954
5955 if (mddev_is_clustered(mddev) &&
5956 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
5957 pr_err("%s: Cannot add to clustered mddev.\n",
5958 mdname(mddev));
5959 return -EINVAL;
5960 }
5961
5962 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5963 return -EOVERFLOW;
5964
5965 if (!mddev->raid_disks) {
5966 int err;
5967
5968 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5969 if (IS_ERR(rdev)) {
5970 printk(KERN_WARNING
5971 "md: md_import_device returned %ld\n",
5972 PTR_ERR(rdev));
5973 return PTR_ERR(rdev);
5974 }
5975 if (!list_empty(&mddev->disks)) {
5976 struct md_rdev *rdev0
5977 = list_entry(mddev->disks.next,
5978 struct md_rdev, same_set);
5979 err = super_types[mddev->major_version]
5980 .load_super(rdev, rdev0, mddev->minor_version);
5981 if (err < 0) {
5982 printk(KERN_WARNING
5983 "md: %s has different UUID to %s\n",
5984 bdevname(rdev->bdev,b),
5985 bdevname(rdev0->bdev,b2));
5986 export_rdev(rdev);
5987 return -EINVAL;
5988 }
5989 }
5990 err = bind_rdev_to_array(rdev, mddev);
5991 if (err)
5992 export_rdev(rdev);
5993 return err;
5994 }
5995
5996
5997
5998
5999
6000
6001 if (mddev->pers) {
6002 int err;
6003 if (!mddev->pers->hot_add_disk) {
6004 printk(KERN_WARNING
6005 "%s: personality does not support diskops!\n",
6006 mdname(mddev));
6007 return -EINVAL;
6008 }
6009 if (mddev->persistent)
6010 rdev = md_import_device(dev, mddev->major_version,
6011 mddev->minor_version);
6012 else
6013 rdev = md_import_device(dev, -1, -1);
6014 if (IS_ERR(rdev)) {
6015 printk(KERN_WARNING
6016 "md: md_import_device returned %ld\n",
6017 PTR_ERR(rdev));
6018 return PTR_ERR(rdev);
6019 }
6020
6021 if (!mddev->persistent) {
6022 if (info->state & (1<<MD_DISK_SYNC) &&
6023 info->raid_disk < mddev->raid_disks) {
6024 rdev->raid_disk = info->raid_disk;
6025 set_bit(In_sync, &rdev->flags);
6026 clear_bit(Bitmap_sync, &rdev->flags);
6027 } else
6028 rdev->raid_disk = -1;
6029 rdev->saved_raid_disk = rdev->raid_disk;
6030 } else
6031 super_types[mddev->major_version].
6032 validate_super(mddev, rdev);
6033 if ((info->state & (1<<MD_DISK_SYNC)) &&
6034 rdev->raid_disk != info->raid_disk) {
6035
6036
6037
6038 export_rdev(rdev);
6039 return -EINVAL;
6040 }
6041
6042 clear_bit(In_sync, &rdev->flags);
6043 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6044 set_bit(WriteMostly, &rdev->flags);
6045 else
6046 clear_bit(WriteMostly, &rdev->flags);
6047
6048 if (info->state & (1<<MD_DISK_JOURNAL)) {
6049 struct md_rdev *rdev2;
6050 bool has_journal = false;
6051
6052
6053 rdev_for_each(rdev2, mddev) {
6054 if (test_bit(Journal, &rdev2->flags)) {
6055 has_journal = true;
6056 break;
6057 }
6058 }
6059 if (has_journal) {
6060 export_rdev(rdev);
6061 return -EBUSY;
6062 }
6063 set_bit(Journal, &rdev->flags);
6064 }
6065
6066
6067
6068 if (mddev_is_clustered(mddev)) {
6069 if (info->state & (1 << MD_DISK_CANDIDATE))
6070 set_bit(Candidate, &rdev->flags);
6071 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6072
6073 err = md_cluster_ops->add_new_disk(mddev, rdev);
6074 if (err) {
6075 export_rdev(rdev);
6076 return err;
6077 }
6078 }
6079 }
6080
6081 rdev->raid_disk = -1;
6082 err = bind_rdev_to_array(rdev, mddev);
6083
6084 if (err)
6085 export_rdev(rdev);
6086
6087 if (mddev_is_clustered(mddev)) {
6088 if (info->state & (1 << MD_DISK_CANDIDATE))
6089 md_cluster_ops->new_disk_ack(mddev, (err == 0));
6090 else {
6091 if (err)
6092 md_cluster_ops->add_new_disk_cancel(mddev);
6093 else
6094 err = add_bound_rdev(rdev);
6095 }
6096
6097 } else if (!err)
6098 err = add_bound_rdev(rdev);
6099
6100 return err;
6101 }
6102
6103
6104
6105
6106 if (mddev->major_version != 0) {
6107 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
6108 mdname(mddev));
6109 return -EINVAL;
6110 }
6111
6112 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6113 int err;
6114 rdev = md_import_device(dev, -1, 0);
6115 if (IS_ERR(rdev)) {
6116 printk(KERN_WARNING
6117 "md: error, md_import_device() returned %ld\n",
6118 PTR_ERR(rdev));
6119 return PTR_ERR(rdev);
6120 }
6121 rdev->desc_nr = info->number;
6122 if (info->raid_disk < mddev->raid_disks)
6123 rdev->raid_disk = info->raid_disk;
6124 else
6125 rdev->raid_disk = -1;
6126
6127 if (rdev->raid_disk < mddev->raid_disks)
6128 if (info->state & (1<<MD_DISK_SYNC))
6129 set_bit(In_sync, &rdev->flags);
6130
6131 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6132 set_bit(WriteMostly, &rdev->flags);
6133
6134 if (!mddev->persistent) {
6135 printk(KERN_INFO "md: nonpersistent superblock ...\n");
6136 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6137 } else
6138 rdev->sb_start = calc_dev_sboffset(rdev);
6139 rdev->sectors = rdev->sb_start;
6140
6141 err = bind_rdev_to_array(rdev, mddev);
6142 if (err) {
6143 export_rdev(rdev);
6144 return err;
6145 }
6146 }
6147
6148 return 0;
6149}
6150
6151static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6152{
6153 char b[BDEVNAME_SIZE];
6154 struct md_rdev *rdev;
6155
6156 rdev = find_rdev(mddev, dev);
6157 if (!rdev)
6158 return -ENXIO;
6159
6160 if (rdev->raid_disk < 0)
6161 goto kick_rdev;
6162
6163 clear_bit(Blocked, &rdev->flags);
6164 remove_and_add_spares(mddev, rdev);
6165
6166 if (rdev->raid_disk >= 0)
6167 goto busy;
6168
6169kick_rdev:
6170 if (mddev_is_clustered(mddev))
6171 md_cluster_ops->remove_disk(mddev, rdev);
6172
6173 md_kick_rdev_from_array(rdev);
6174 md_update_sb(mddev, 1);
6175 md_new_event(mddev);
6176
6177 return 0;
6178busy:
6179 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
6180 bdevname(rdev->bdev,b), mdname(mddev));
6181 return -EBUSY;
6182}
6183
6184static int hot_add_disk(struct mddev *mddev, dev_t dev)
6185{
6186 char b[BDEVNAME_SIZE];
6187 int err;
6188 struct md_rdev *rdev;
6189
6190 if (!mddev->pers)
6191 return -ENODEV;
6192
6193 if (mddev->major_version != 0) {
6194 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
6195 " version-0 superblocks.\n",
6196 mdname(mddev));
6197 return -EINVAL;
6198 }
6199 if (!mddev->pers->hot_add_disk) {
6200 printk(KERN_WARNING
6201 "%s: personality does not support diskops!\n",
6202 mdname(mddev));
6203 return -EINVAL;
6204 }
6205
6206 rdev = md_import_device(dev, -1, 0);
6207 if (IS_ERR(rdev)) {
6208 printk(KERN_WARNING
6209 "md: error, md_import_device() returned %ld\n",
6210 PTR_ERR(rdev));
6211 return -EINVAL;
6212 }
6213
6214 if (mddev->persistent)
6215 rdev->sb_start = calc_dev_sboffset(rdev);
6216 else
6217 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6218
6219 rdev->sectors = rdev->sb_start;
6220
6221 if (test_bit(Faulty, &rdev->flags)) {
6222 printk(KERN_WARNING
6223 "md: can not hot-add faulty %s disk to %s!\n",
6224 bdevname(rdev->bdev,b), mdname(mddev));
6225 err = -EINVAL;
6226 goto abort_export;
6227 }
6228
6229 clear_bit(In_sync, &rdev->flags);
6230 rdev->desc_nr = -1;
6231 rdev->saved_raid_disk = -1;
6232 err = bind_rdev_to_array(rdev, mddev);
6233 if (err)
6234 goto abort_export;
6235
6236
6237
6238
6239
6240
6241 rdev->raid_disk = -1;
6242
6243 md_update_sb(mddev, 1);
6244
6245
6246
6247
6248 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6249 md_wakeup_thread(mddev->thread);
6250 md_new_event(mddev);
6251 return 0;
6252
6253abort_export:
6254 export_rdev(rdev);
6255 return err;
6256}
6257
6258static int set_bitmap_file(struct mddev *mddev, int fd)
6259{
6260 int err = 0;
6261
6262 if (mddev->pers) {
6263 if (!mddev->pers->quiesce || !mddev->thread)
6264 return -EBUSY;
6265 if (mddev->recovery || mddev->sync_thread)
6266 return -EBUSY;
6267
6268 }
6269
6270 if (fd >= 0) {
6271 struct inode *inode;
6272 struct file *f;
6273
6274 if (mddev->bitmap || mddev->bitmap_info.file)
6275 return -EEXIST;
6276 f = fget(fd);
6277
6278 if (f == NULL) {
6279 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
6280 mdname(mddev));
6281 return -EBADF;
6282 }
6283
6284 inode = f->f_mapping->host;
6285 if (!S_ISREG(inode->i_mode)) {
6286 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
6287 mdname(mddev));
6288 err = -EBADF;
6289 } else if (!(f->f_mode & FMODE_WRITE)) {
6290 printk(KERN_ERR "%s: error: bitmap file must open for write\n",
6291 mdname(mddev));
6292 err = -EBADF;
6293 } else if (atomic_read(&inode->i_writecount) != 1) {
6294 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6295 mdname(mddev));
6296 err = -EBUSY;
6297 }
6298 if (err) {
6299 fput(f);
6300 return err;
6301 }
6302 mddev->bitmap_info.file = f;
6303 mddev->bitmap_info.offset = 0;
6304 } else if (mddev->bitmap == NULL)
6305 return -ENOENT;
6306 err = 0;
6307 if (mddev->pers) {
6308 mddev->pers->quiesce(mddev, 1);
6309 if (fd >= 0) {
6310 struct bitmap *bitmap;
6311
6312 bitmap = bitmap_create(mddev, -1);
6313 if (!IS_ERR(bitmap)) {
6314 mddev->bitmap = bitmap;
6315 err = bitmap_load(mddev);
6316 } else
6317 err = PTR_ERR(bitmap);
6318 }
6319 if (fd < 0 || err) {
6320 bitmap_destroy(mddev);
6321 fd = -1;
6322 }
6323 mddev->pers->quiesce(mddev, 0);
6324 }
6325 if (fd < 0) {
6326 struct file *f = mddev->bitmap_info.file;
6327 if (f) {
6328 spin_lock(&mddev->lock);
6329 mddev->bitmap_info.file = NULL;
6330 spin_unlock(&mddev->lock);
6331 fput(f);
6332 }
6333 }
6334
6335 return err;
6336}
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6352{
6353
6354 if (info->raid_disks == 0) {
6355
6356 if (info->major_version < 0 ||
6357 info->major_version >= ARRAY_SIZE(super_types) ||
6358 super_types[info->major_version].name == NULL) {
6359
6360 printk(KERN_INFO
6361 "md: superblock version %d not known\n",
6362 info->major_version);
6363 return -EINVAL;
6364 }
6365 mddev->major_version = info->major_version;
6366 mddev->minor_version = info->minor_version;
6367 mddev->patch_version = info->patch_version;
6368 mddev->persistent = !info->not_persistent;
6369
6370
6371
6372 mddev->ctime = ktime_get_real_seconds();
6373 return 0;
6374 }
6375 mddev->major_version = MD_MAJOR_VERSION;
6376 mddev->minor_version = MD_MINOR_VERSION;
6377 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6378 mddev->ctime = ktime_get_real_seconds();
6379
6380 mddev->level = info->level;
6381 mddev->clevel[0] = 0;
6382 mddev->dev_sectors = 2 * (sector_t)info->size;
6383 mddev->raid_disks = info->raid_disks;
6384
6385
6386
6387 if (info->state & (1<<MD_SB_CLEAN))
6388 mddev->recovery_cp = MaxSector;
6389 else
6390 mddev->recovery_cp = 0;
6391 mddev->persistent = ! info->not_persistent;
6392 mddev->external = 0;
6393
6394 mddev->layout = info->layout;
6395 mddev->chunk_sectors = info->chunk_size >> 9;
6396
6397 mddev->max_disks = MD_SB_DISKS;
6398
6399 if (mddev->persistent)
6400 mddev->flags = 0;
6401 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6402
6403 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6404 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6405 mddev->bitmap_info.offset = 0;
6406
6407 mddev->reshape_position = MaxSector;
6408
6409
6410
6411
6412 get_random_bytes(mddev->uuid, 16);
6413
6414 mddev->new_level = mddev->level;
6415 mddev->new_chunk_sectors = mddev->chunk_sectors;
6416 mddev->new_layout = mddev->layout;
6417 mddev->delta_disks = 0;
6418 mddev->reshape_backwards = 0;
6419
6420 return 0;
6421}
6422
6423void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6424{
6425 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6426
6427 if (mddev->external_size)
6428 return;
6429
6430 mddev->array_sectors = array_sectors;
6431}
6432EXPORT_SYMBOL(md_set_array_sectors);
6433
6434static int update_size(struct mddev *mddev, sector_t num_sectors)
6435{
6436 struct md_rdev *rdev;
6437 int rv;
6438 int fit = (num_sectors == 0);
6439
6440 if (mddev->pers->resize == NULL)
6441 return -EINVAL;
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6452 mddev->sync_thread)
6453 return -EBUSY;
6454 if (mddev->ro)
6455 return -EROFS;
6456
6457 rdev_for_each(rdev, mddev) {
6458 sector_t avail = rdev->sectors;
6459
6460 if (fit && (num_sectors == 0 || num_sectors > avail))
6461 num_sectors = avail;
6462 if (avail < num_sectors)
6463 return -ENOSPC;
6464 }
6465 rv = mddev->pers->resize(mddev, num_sectors);
6466 if (!rv)
6467 revalidate_disk(mddev->gendisk);
6468 return rv;
6469}
6470
6471static int update_raid_disks(struct mddev *mddev, int raid_disks)
6472{
6473 int rv;
6474 struct md_rdev *rdev;
6475
6476 if (mddev->pers->check_reshape == NULL)
6477 return -EINVAL;
6478 if (mddev->ro)
6479 return -EROFS;
6480 if (raid_disks <= 0 ||
6481 (mddev->max_disks && raid_disks >= mddev->max_disks))
6482 return -EINVAL;
6483 if (mddev->sync_thread ||
6484 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6485 mddev->reshape_position != MaxSector)
6486 return -EBUSY;
6487
6488 rdev_for_each(rdev, mddev) {
6489 if (mddev->raid_disks < raid_disks &&
6490 rdev->data_offset < rdev->new_data_offset)
6491 return -EINVAL;
6492 if (mddev->raid_disks > raid_disks &&
6493 rdev->data_offset > rdev->new_data_offset)
6494 return -EINVAL;
6495 }
6496
6497 mddev->delta_disks = raid_disks - mddev->raid_disks;
6498 if (mddev->delta_disks < 0)
6499 mddev->reshape_backwards = 1;
6500 else if (mddev->delta_disks > 0)
6501 mddev->reshape_backwards = 0;
6502
6503 rv = mddev->pers->check_reshape(mddev);
6504 if (rv < 0) {
6505 mddev->delta_disks = 0;
6506 mddev->reshape_backwards = 0;
6507 }
6508 return rv;
6509}
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6520{
6521 int rv = 0;
6522 int cnt = 0;
6523 int state = 0;
6524
6525
6526 if (mddev->bitmap && mddev->bitmap_info.offset)
6527 state |= (1 << MD_SB_BITMAP_PRESENT);
6528
6529 if (mddev->major_version != info->major_version ||
6530 mddev->minor_version != info->minor_version ||
6531
6532 mddev->ctime != info->ctime ||
6533 mddev->level != info->level ||
6534
6535 mddev->persistent != !info->not_persistent ||
6536 mddev->chunk_sectors != info->chunk_size >> 9 ||
6537
6538 ((state^info->state) & 0xfffffe00)
6539 )
6540 return -EINVAL;
6541
6542 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6543 cnt++;
6544 if (mddev->raid_disks != info->raid_disks)
6545 cnt++;
6546 if (mddev->layout != info->layout)
6547 cnt++;
6548 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6549 cnt++;
6550 if (cnt == 0)
6551 return 0;
6552 if (cnt > 1)
6553 return -EINVAL;
6554
6555 if (mddev->layout != info->layout) {
6556
6557
6558
6559
6560 if (mddev->pers->check_reshape == NULL)
6561 return -EINVAL;
6562 else {
6563 mddev->new_layout = info->layout;
6564 rv = mddev->pers->check_reshape(mddev);
6565 if (rv)
6566 mddev->new_layout = mddev->layout;
6567 return rv;
6568 }
6569 }
6570 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6571 rv = update_size(mddev, (sector_t)info->size * 2);
6572
6573 if (mddev->raid_disks != info->raid_disks)
6574 rv = update_raid_disks(mddev, info->raid_disks);
6575
6576 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6577 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6578 rv = -EINVAL;
6579 goto err;
6580 }
6581 if (mddev->recovery || mddev->sync_thread) {
6582 rv = -EBUSY;
6583 goto err;
6584 }
6585 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6586 struct bitmap *bitmap;
6587
6588 if (mddev->bitmap) {
6589 rv = -EEXIST;
6590 goto err;
6591 }
6592 if (mddev->bitmap_info.default_offset == 0) {
6593 rv = -EINVAL;
6594 goto err;
6595 }
6596 mddev->bitmap_info.offset =
6597 mddev->bitmap_info.default_offset;
6598 mddev->bitmap_info.space =
6599 mddev->bitmap_info.default_space;
6600 mddev->pers->quiesce(mddev, 1);
6601 bitmap = bitmap_create(mddev, -1);
6602 if (!IS_ERR(bitmap)) {
6603 mddev->bitmap = bitmap;
6604 rv = bitmap_load(mddev);
6605 } else
6606 rv = PTR_ERR(bitmap);
6607 if (rv)
6608 bitmap_destroy(mddev);
6609 mddev->pers->quiesce(mddev, 0);
6610 } else {
6611
6612 if (!mddev->bitmap) {
6613 rv = -ENOENT;
6614 goto err;
6615 }
6616 if (mddev->bitmap->storage.file) {
6617 rv = -EINVAL;
6618 goto err;
6619 }
6620 if (mddev->bitmap_info.nodes) {
6621
6622 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
6623 printk("md: can't change bitmap to none since the"
6624 " array is in use by more than one node\n");
6625 rv = -EPERM;
6626 md_cluster_ops->unlock_all_bitmaps(mddev);
6627 goto err;
6628 }
6629
6630 mddev->bitmap_info.nodes = 0;
6631 md_cluster_ops->leave(mddev);
6632 }
6633 mddev->pers->quiesce(mddev, 1);
6634 bitmap_destroy(mddev);
6635 mddev->pers->quiesce(mddev, 0);
6636 mddev->bitmap_info.offset = 0;
6637 }
6638 }
6639 md_update_sb(mddev, 1);
6640 return rv;
6641err:
6642 return rv;
6643}
6644
6645static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6646{
6647 struct md_rdev *rdev;
6648 int err = 0;
6649
6650 if (mddev->pers == NULL)
6651 return -ENODEV;
6652
6653 rcu_read_lock();
6654 rdev = find_rdev_rcu(mddev, dev);
6655 if (!rdev)
6656 err = -ENODEV;
6657 else {
6658 md_error(mddev, rdev);
6659 if (!test_bit(Faulty, &rdev->flags))
6660 err = -EBUSY;
6661 }
6662 rcu_read_unlock();
6663 return err;
6664}
6665
6666
6667
6668
6669
6670
6671
6672static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6673{
6674 struct mddev *mddev = bdev->bd_disk->private_data;
6675
6676 geo->heads = 2;
6677 geo->sectors = 4;
6678 geo->cylinders = mddev->array_sectors / 8;
6679 return 0;
6680}
6681
6682static inline bool md_ioctl_valid(unsigned int cmd)
6683{
6684 switch (cmd) {
6685 case ADD_NEW_DISK:
6686 case BLKROSET:
6687 case GET_ARRAY_INFO:
6688 case GET_BITMAP_FILE:
6689 case GET_DISK_INFO:
6690 case HOT_ADD_DISK:
6691 case HOT_REMOVE_DISK:
6692 case RAID_AUTORUN:
6693 case RAID_VERSION:
6694 case RESTART_ARRAY_RW:
6695 case RUN_ARRAY:
6696 case SET_ARRAY_INFO:
6697 case SET_BITMAP_FILE:
6698 case SET_DISK_FAULTY:
6699 case STOP_ARRAY:
6700 case STOP_ARRAY_RO:
6701 case CLUSTERED_DISK_NACK:
6702 return true;
6703 default:
6704 return false;
6705 }
6706}
6707
6708static int md_ioctl(struct block_device *bdev, fmode_t mode,
6709 unsigned int cmd, unsigned long arg)
6710{
6711 int err = 0;
6712 void __user *argp = (void __user *)arg;
6713 struct mddev *mddev = NULL;
6714 int ro;
6715
6716 if (!md_ioctl_valid(cmd))
6717 return -ENOTTY;
6718
6719 switch (cmd) {
6720 case RAID_VERSION:
6721 case GET_ARRAY_INFO:
6722 case GET_DISK_INFO:
6723 break;
6724 default:
6725 if (!capable(CAP_SYS_ADMIN))
6726 return -EACCES;
6727 }
6728
6729
6730
6731
6732
6733 switch (cmd) {
6734 case RAID_VERSION:
6735 err = get_version(argp);
6736 goto out;
6737
6738#ifndef MODULE
6739 case RAID_AUTORUN:
6740 err = 0;
6741 autostart_arrays(arg);
6742 goto out;
6743#endif
6744 default:;
6745 }
6746
6747
6748
6749
6750
6751 mddev = bdev->bd_disk->private_data;
6752
6753 if (!mddev) {
6754 BUG();
6755 goto out;
6756 }
6757
6758
6759 switch (cmd) {
6760 case GET_ARRAY_INFO:
6761 if (!mddev->raid_disks && !mddev->external)
6762 err = -ENODEV;
6763 else
6764 err = get_array_info(mddev, argp);
6765 goto out;
6766
6767 case GET_DISK_INFO:
6768 if (!mddev->raid_disks && !mddev->external)
6769 err = -ENODEV;
6770 else
6771 err = get_disk_info(mddev, argp);
6772 goto out;
6773
6774 case SET_DISK_FAULTY:
6775 err = set_disk_faulty(mddev, new_decode_dev(arg));
6776 goto out;
6777
6778 case GET_BITMAP_FILE:
6779 err = get_bitmap_file(mddev, argp);
6780 goto out;
6781
6782 }
6783
6784 if (cmd == ADD_NEW_DISK)
6785
6786 flush_workqueue(md_misc_wq);
6787
6788 if (cmd == HOT_REMOVE_DISK)
6789
6790 wait_event_interruptible_timeout(mddev->sb_wait,
6791 !test_bit(MD_RECOVERY_NEEDED,
6792 &mddev->flags),
6793 msecs_to_jiffies(5000));
6794 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6795
6796
6797
6798 mutex_lock(&mddev->open_mutex);
6799 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6800 mutex_unlock(&mddev->open_mutex);
6801 err = -EBUSY;
6802 goto out;
6803 }
6804 set_bit(MD_STILL_CLOSED, &mddev->flags);
6805 mutex_unlock(&mddev->open_mutex);
6806 sync_blockdev(bdev);
6807 }
6808 err = mddev_lock(mddev);
6809 if (err) {
6810 printk(KERN_INFO
6811 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6812 err, cmd);
6813 goto out;
6814 }
6815
6816 if (cmd == SET_ARRAY_INFO) {
6817 mdu_array_info_t info;
6818 if (!arg)
6819 memset(&info, 0, sizeof(info));
6820 else if (copy_from_user(&info, argp, sizeof(info))) {
6821 err = -EFAULT;
6822 goto unlock;
6823 }
6824 if (mddev->pers) {
6825 err = update_array_info(mddev, &info);
6826 if (err) {
6827 printk(KERN_WARNING "md: couldn't update"
6828 " array info. %d\n", err);
6829 goto unlock;
6830 }
6831 goto unlock;
6832 }
6833 if (!list_empty(&mddev->disks)) {
6834 printk(KERN_WARNING
6835 "md: array %s already has disks!\n",
6836 mdname(mddev));
6837 err = -EBUSY;
6838 goto unlock;
6839 }
6840 if (mddev->raid_disks) {
6841 printk(KERN_WARNING
6842 "md: array %s already initialised!\n",
6843 mdname(mddev));
6844 err = -EBUSY;
6845 goto unlock;
6846 }
6847 err = set_array_info(mddev, &info);
6848 if (err) {
6849 printk(KERN_WARNING "md: couldn't set"
6850 " array info. %d\n", err);
6851 goto unlock;
6852 }
6853 goto unlock;
6854 }
6855
6856
6857
6858
6859
6860
6861 if ((!mddev->raid_disks && !mddev->external)
6862 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6863 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6864 && cmd != GET_BITMAP_FILE) {
6865 err = -ENODEV;
6866 goto unlock;
6867 }
6868
6869
6870
6871
6872 switch (cmd) {
6873 case RESTART_ARRAY_RW:
6874 err = restart_array(mddev);
6875 goto unlock;
6876
6877 case STOP_ARRAY:
6878 err = do_md_stop(mddev, 0, bdev);
6879 goto unlock;
6880
6881 case STOP_ARRAY_RO:
6882 err = md_set_readonly(mddev, bdev);
6883 goto unlock;
6884
6885 case HOT_REMOVE_DISK:
6886 err = hot_remove_disk(mddev, new_decode_dev(arg));
6887 goto unlock;
6888
6889 case ADD_NEW_DISK:
6890
6891
6892
6893
6894 if (mddev->pers) {
6895 mdu_disk_info_t info;
6896 if (copy_from_user(&info, argp, sizeof(info)))
6897 err = -EFAULT;
6898 else if (!(info.state & (1<<MD_DISK_SYNC)))
6899
6900 break;
6901 else
6902 err = add_new_disk(mddev, &info);
6903 goto unlock;
6904 }
6905 break;
6906
6907 case BLKROSET:
6908 if (get_user(ro, (int __user *)(arg))) {
6909 err = -EFAULT;
6910 goto unlock;
6911 }
6912 err = -EINVAL;
6913
6914
6915
6916
6917 if (ro)
6918 goto unlock;
6919
6920
6921 if (mddev->ro != 1)
6922 goto unlock;
6923
6924
6925
6926
6927 if (mddev->pers) {
6928 err = restart_array(mddev);
6929 if (err == 0) {
6930 mddev->ro = 2;
6931 set_disk_ro(mddev->gendisk, 0);
6932 }
6933 }
6934 goto unlock;
6935 }
6936
6937
6938
6939
6940
6941 if (mddev->ro && mddev->pers) {
6942 if (mddev->ro == 2) {
6943 mddev->ro = 0;
6944 sysfs_notify_dirent_safe(mddev->sysfs_state);
6945 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6946
6947
6948
6949
6950 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6951 mddev_unlock(mddev);
6952 wait_event(mddev->sb_wait,
6953 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6954 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6955 mddev_lock_nointr(mddev);
6956 }
6957 } else {
6958 err = -EROFS;
6959 goto unlock;
6960 }
6961 }
6962
6963 switch (cmd) {
6964 case ADD_NEW_DISK:
6965 {
6966 mdu_disk_info_t info;
6967 if (copy_from_user(&info, argp, sizeof(info)))
6968 err = -EFAULT;
6969 else
6970 err = add_new_disk(mddev, &info);
6971 goto unlock;
6972 }
6973
6974 case CLUSTERED_DISK_NACK:
6975 if (mddev_is_clustered(mddev))
6976 md_cluster_ops->new_disk_ack(mddev, false);
6977 else
6978 err = -EINVAL;
6979 goto unlock;
6980
6981 case HOT_ADD_DISK:
6982 err = hot_add_disk(mddev, new_decode_dev(arg));
6983 goto unlock;
6984
6985 case RUN_ARRAY:
6986 err = do_md_run(mddev);
6987 goto unlock;
6988
6989 case SET_BITMAP_FILE:
6990 err = set_bitmap_file(mddev, (int)arg);
6991 goto unlock;
6992
6993 default:
6994 err = -EINVAL;
6995 goto unlock;
6996 }
6997
6998unlock:
6999 if (mddev->hold_active == UNTIL_IOCTL &&
7000 err != -EINVAL)
7001 mddev->hold_active = 0;
7002 mddev_unlock(mddev);
7003out:
7004 return err;
7005}
7006#ifdef CONFIG_COMPAT
7007static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7008 unsigned int cmd, unsigned long arg)
7009{
7010 switch (cmd) {
7011 case HOT_REMOVE_DISK:
7012 case HOT_ADD_DISK:
7013 case SET_DISK_FAULTY:
7014 case SET_BITMAP_FILE:
7015
7016 break;
7017 default:
7018 arg = (unsigned long)compat_ptr(arg);
7019 break;
7020 }
7021
7022 return md_ioctl(bdev, mode, cmd, arg);
7023}
7024#endif
7025
7026static int md_open(struct block_device *bdev, fmode_t mode)
7027{
7028
7029
7030
7031
7032 struct mddev *mddev = mddev_find(bdev->bd_dev);
7033 int err;
7034
7035 if (!mddev)
7036 return -ENODEV;
7037
7038 if (mddev->gendisk != bdev->bd_disk) {
7039
7040
7041
7042 mddev_put(mddev);
7043
7044 flush_workqueue(md_misc_wq);
7045
7046 return -ERESTARTSYS;
7047 }
7048 BUG_ON(mddev != bdev->bd_disk->private_data);
7049
7050 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7051 goto out;
7052
7053 err = 0;
7054 atomic_inc(&mddev->openers);
7055 clear_bit(MD_STILL_CLOSED, &mddev->flags);
7056 mutex_unlock(&mddev->open_mutex);
7057
7058 check_disk_change(bdev);
7059 out:
7060 return err;
7061}
7062
7063static void md_release(struct gendisk *disk, fmode_t mode)
7064{
7065 struct mddev *mddev = disk->private_data;
7066
7067 BUG_ON(!mddev);
7068 atomic_dec(&mddev->openers);
7069 mddev_put(mddev);
7070}
7071
7072static int md_media_changed(struct gendisk *disk)
7073{
7074 struct mddev *mddev = disk->private_data;
7075
7076 return mddev->changed;
7077}
7078
7079static int md_revalidate(struct gendisk *disk)
7080{
7081 struct mddev *mddev = disk->private_data;
7082
7083 mddev->changed = 0;
7084 return 0;
7085}
7086static const struct block_device_operations md_fops =
7087{
7088 .owner = THIS_MODULE,
7089 .open = md_open,
7090 .release = md_release,
7091 .ioctl = md_ioctl,
7092#ifdef CONFIG_COMPAT
7093 .compat_ioctl = md_compat_ioctl,
7094#endif
7095 .getgeo = md_getgeo,
7096 .media_changed = md_media_changed,
7097 .revalidate_disk= md_revalidate,
7098};
7099
7100static int md_thread(void *arg)
7101{
7102 struct md_thread *thread = arg;
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116 allow_signal(SIGKILL);
7117 while (!kthread_should_stop()) {
7118
7119
7120
7121
7122
7123
7124 if (signal_pending(current))
7125 flush_signals(current);
7126
7127 wait_event_interruptible_timeout
7128 (thread->wqueue,
7129 test_bit(THREAD_WAKEUP, &thread->flags)
7130 || kthread_should_stop(),
7131 thread->timeout);
7132
7133 clear_bit(THREAD_WAKEUP, &thread->flags);
7134 if (!kthread_should_stop())
7135 thread->run(thread);
7136 }
7137
7138 return 0;
7139}
7140
7141void md_wakeup_thread(struct md_thread *thread)
7142{
7143 if (thread) {
7144 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7145 set_bit(THREAD_WAKEUP, &thread->flags);
7146 wake_up(&thread->wqueue);
7147 }
7148}
7149EXPORT_SYMBOL(md_wakeup_thread);
7150
7151struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7152 struct mddev *mddev, const char *name)
7153{
7154 struct md_thread *thread;
7155
7156 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7157 if (!thread)
7158 return NULL;
7159
7160 init_waitqueue_head(&thread->wqueue);
7161
7162 thread->run = run;
7163 thread->mddev = mddev;
7164 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7165 thread->tsk = kthread_run(md_thread, thread,
7166 "%s_%s",
7167 mdname(thread->mddev),
7168 name);
7169 if (IS_ERR(thread->tsk)) {
7170 kfree(thread);
7171 return NULL;
7172 }
7173 return thread;
7174}
7175EXPORT_SYMBOL(md_register_thread);
7176
7177void md_unregister_thread(struct md_thread **threadp)
7178{
7179 struct md_thread *thread = *threadp;
7180 if (!thread)
7181 return;
7182 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7183
7184
7185
7186 spin_lock(&pers_lock);
7187 *threadp = NULL;
7188 spin_unlock(&pers_lock);
7189
7190 kthread_stop(thread->tsk);
7191 kfree(thread);
7192}
7193EXPORT_SYMBOL(md_unregister_thread);
7194
7195void md_error(struct mddev *mddev, struct md_rdev *rdev)
7196{
7197 if (!rdev || test_bit(Faulty, &rdev->flags))
7198 return;
7199
7200 if (!mddev->pers || !mddev->pers->error_handler)
7201 return;
7202 mddev->pers->error_handler(mddev,rdev);
7203 if (mddev->degraded)
7204 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7205 sysfs_notify_dirent_safe(rdev->sysfs_state);
7206 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7207 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7208 md_wakeup_thread(mddev->thread);
7209 if (mddev->event_work.func)
7210 queue_work(md_misc_wq, &mddev->event_work);
7211 md_new_event(mddev);
7212}
7213EXPORT_SYMBOL(md_error);
7214
7215
7216
7217static void status_unused(struct seq_file *seq)
7218{
7219 int i = 0;
7220 struct md_rdev *rdev;
7221
7222 seq_printf(seq, "unused devices: ");
7223
7224 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7225 char b[BDEVNAME_SIZE];
7226 i++;
7227 seq_printf(seq, "%s ",
7228 bdevname(rdev->bdev,b));
7229 }
7230 if (!i)
7231 seq_printf(seq, "<none>");
7232
7233 seq_printf(seq, "\n");
7234}
7235
7236static int status_resync(struct seq_file *seq, struct mddev *mddev)
7237{
7238 sector_t max_sectors, resync, res;
7239 unsigned long dt, db;
7240 sector_t rt;
7241 int scale;
7242 unsigned int per_milli;
7243
7244 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7245 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7246 max_sectors = mddev->resync_max_sectors;
7247 else
7248 max_sectors = mddev->dev_sectors;
7249
7250 resync = mddev->curr_resync;
7251 if (resync <= 3) {
7252 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7253
7254 resync = max_sectors;
7255 } else
7256 resync -= atomic_read(&mddev->recovery_active);
7257
7258 if (resync == 0) {
7259 if (mddev->recovery_cp < MaxSector) {
7260 seq_printf(seq, "\tresync=PENDING");
7261 return 1;
7262 }
7263 return 0;
7264 }
7265 if (resync < 3) {
7266 seq_printf(seq, "\tresync=DELAYED");
7267 return 1;
7268 }
7269
7270 WARN_ON(max_sectors == 0);
7271
7272
7273
7274
7275
7276 scale = 10;
7277 if (sizeof(sector_t) > sizeof(unsigned long)) {
7278 while ( max_sectors/2 > (1ULL<<(scale+32)))
7279 scale++;
7280 }
7281 res = (resync>>scale)*1000;
7282 sector_div(res, (u32)((max_sectors>>scale)+1));
7283
7284 per_milli = res;
7285 {
7286 int i, x = per_milli/50, y = 20-x;
7287 seq_printf(seq, "[");
7288 for (i = 0; i < x; i++)
7289 seq_printf(seq, "=");
7290 seq_printf(seq, ">");
7291 for (i = 0; i < y; i++)
7292 seq_printf(seq, ".");
7293 seq_printf(seq, "] ");
7294 }
7295 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7296 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7297 "reshape" :
7298 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7299 "check" :
7300 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7301 "resync" : "recovery"))),
7302 per_milli/10, per_milli % 10,
7303 (unsigned long long) resync/2,
7304 (unsigned long long) max_sectors/2);
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320 dt = ((jiffies - mddev->resync_mark) / HZ);
7321 if (!dt) dt++;
7322 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7323 - mddev->resync_mark_cnt;
7324
7325 rt = max_sectors - resync;
7326 sector_div(rt, db/32+1);
7327 rt *= dt;
7328 rt >>= 5;
7329
7330 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7331 ((unsigned long)rt % 60)/6);
7332
7333 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7334 return 1;
7335}
7336
7337static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7338{
7339 struct list_head *tmp;
7340 loff_t l = *pos;
7341 struct mddev *mddev;
7342
7343 if (l >= 0x10000)
7344 return NULL;
7345 if (!l--)
7346
7347 return (void*)1;
7348
7349 spin_lock(&all_mddevs_lock);
7350 list_for_each(tmp,&all_mddevs)
7351 if (!l--) {
7352 mddev = list_entry(tmp, struct mddev, all_mddevs);
7353 mddev_get(mddev);
7354 spin_unlock(&all_mddevs_lock);
7355 return mddev;
7356 }
7357 spin_unlock(&all_mddevs_lock);
7358 if (!l--)
7359 return (void*)2;
7360 return NULL;
7361}
7362
7363static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7364{
7365 struct list_head *tmp;
7366 struct mddev *next_mddev, *mddev = v;
7367
7368 ++*pos;
7369 if (v == (void*)2)
7370 return NULL;
7371
7372 spin_lock(&all_mddevs_lock);
7373 if (v == (void*)1)
7374 tmp = all_mddevs.next;
7375 else
7376 tmp = mddev->all_mddevs.next;
7377 if (tmp != &all_mddevs)
7378 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7379 else {
7380 next_mddev = (void*)2;
7381 *pos = 0x10000;
7382 }
7383 spin_unlock(&all_mddevs_lock);
7384
7385 if (v != (void*)1)
7386 mddev_put(mddev);
7387 return next_mddev;
7388
7389}
7390
7391static void md_seq_stop(struct seq_file *seq, void *v)
7392{
7393 struct mddev *mddev = v;
7394
7395 if (mddev && v != (void*)1 && v != (void*)2)
7396 mddev_put(mddev);
7397}
7398
7399static int md_seq_show(struct seq_file *seq, void *v)
7400{
7401 struct mddev *mddev = v;
7402 sector_t sectors;
7403 struct md_rdev *rdev;
7404
7405 if (v == (void*)1) {
7406 struct md_personality *pers;
7407 seq_printf(seq, "Personalities : ");
7408 spin_lock(&pers_lock);
7409 list_for_each_entry(pers, &pers_list, list)
7410 seq_printf(seq, "[%s] ", pers->name);
7411
7412 spin_unlock(&pers_lock);
7413 seq_printf(seq, "\n");
7414 seq->poll_event = atomic_read(&md_event_count);
7415 return 0;
7416 }
7417 if (v == (void*)2) {
7418 status_unused(seq);
7419 return 0;
7420 }
7421
7422 spin_lock(&mddev->lock);
7423 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7424 seq_printf(seq, "%s : %sactive", mdname(mddev),
7425 mddev->pers ? "" : "in");
7426 if (mddev->pers) {
7427 if (mddev->ro==1)
7428 seq_printf(seq, " (read-only)");
7429 if (mddev->ro==2)
7430 seq_printf(seq, " (auto-read-only)");
7431 seq_printf(seq, " %s", mddev->pers->name);
7432 }
7433
7434 sectors = 0;
7435 rcu_read_lock();
7436 rdev_for_each_rcu(rdev, mddev) {
7437 char b[BDEVNAME_SIZE];
7438 seq_printf(seq, " %s[%d]",
7439 bdevname(rdev->bdev,b), rdev->desc_nr);
7440 if (test_bit(WriteMostly, &rdev->flags))
7441 seq_printf(seq, "(W)");
7442 if (test_bit(Journal, &rdev->flags))
7443 seq_printf(seq, "(J)");
7444 if (test_bit(Faulty, &rdev->flags)) {
7445 seq_printf(seq, "(F)");
7446 continue;
7447 }
7448 if (rdev->raid_disk < 0)
7449 seq_printf(seq, "(S)");
7450 if (test_bit(Replacement, &rdev->flags))
7451 seq_printf(seq, "(R)");
7452 sectors += rdev->sectors;
7453 }
7454 rcu_read_unlock();
7455
7456 if (!list_empty(&mddev->disks)) {
7457 if (mddev->pers)
7458 seq_printf(seq, "\n %llu blocks",
7459 (unsigned long long)
7460 mddev->array_sectors / 2);
7461 else
7462 seq_printf(seq, "\n %llu blocks",
7463 (unsigned long long)sectors / 2);
7464 }
7465 if (mddev->persistent) {
7466 if (mddev->major_version != 0 ||
7467 mddev->minor_version != 90) {
7468 seq_printf(seq," super %d.%d",
7469 mddev->major_version,
7470 mddev->minor_version);
7471 }
7472 } else if (mddev->external)
7473 seq_printf(seq, " super external:%s",
7474 mddev->metadata_type);
7475 else
7476 seq_printf(seq, " super non-persistent");
7477
7478 if (mddev->pers) {
7479 mddev->pers->status(seq, mddev);
7480 seq_printf(seq, "\n ");
7481 if (mddev->pers->sync_request) {
7482 if (status_resync(seq, mddev))
7483 seq_printf(seq, "\n ");
7484 }
7485 } else
7486 seq_printf(seq, "\n ");
7487
7488 bitmap_status(seq, mddev->bitmap);
7489
7490 seq_printf(seq, "\n");
7491 }
7492 spin_unlock(&mddev->lock);
7493
7494 return 0;
7495}
7496
7497static const struct seq_operations md_seq_ops = {
7498 .start = md_seq_start,
7499 .next = md_seq_next,
7500 .stop = md_seq_stop,
7501 .show = md_seq_show,
7502};
7503
7504static int md_seq_open(struct inode *inode, struct file *file)
7505{
7506 struct seq_file *seq;
7507 int error;
7508
7509 error = seq_open(file, &md_seq_ops);
7510 if (error)
7511 return error;
7512
7513 seq = file->private_data;
7514 seq->poll_event = atomic_read(&md_event_count);
7515 return error;
7516}
7517
7518static int md_unloading;
7519static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7520{
7521 struct seq_file *seq = filp->private_data;
7522 int mask;
7523
7524 if (md_unloading)
7525 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7526 poll_wait(filp, &md_event_waiters, wait);
7527
7528
7529 mask = POLLIN | POLLRDNORM;
7530
7531 if (seq->poll_event != atomic_read(&md_event_count))
7532 mask |= POLLERR | POLLPRI;
7533 return mask;
7534}
7535
7536static const struct file_operations md_seq_fops = {
7537 .owner = THIS_MODULE,
7538 .open = md_seq_open,
7539 .read = seq_read,
7540 .llseek = seq_lseek,
7541 .release = seq_release_private,
7542 .poll = mdstat_poll,
7543};
7544
7545int register_md_personality(struct md_personality *p)
7546{
7547 printk(KERN_INFO "md: %s personality registered for level %d\n",
7548 p->name, p->level);
7549 spin_lock(&pers_lock);
7550 list_add_tail(&p->list, &pers_list);
7551 spin_unlock(&pers_lock);
7552 return 0;
7553}
7554EXPORT_SYMBOL(register_md_personality);
7555
7556int unregister_md_personality(struct md_personality *p)
7557{
7558 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7559 spin_lock(&pers_lock);
7560 list_del_init(&p->list);
7561 spin_unlock(&pers_lock);
7562 return 0;
7563}
7564EXPORT_SYMBOL(unregister_md_personality);
7565
7566int register_md_cluster_operations(struct md_cluster_operations *ops,
7567 struct module *module)
7568{
7569 int ret = 0;
7570 spin_lock(&pers_lock);
7571 if (md_cluster_ops != NULL)
7572 ret = -EALREADY;
7573 else {
7574 md_cluster_ops = ops;
7575 md_cluster_mod = module;
7576 }
7577 spin_unlock(&pers_lock);
7578 return ret;
7579}
7580EXPORT_SYMBOL(register_md_cluster_operations);
7581
7582int unregister_md_cluster_operations(void)
7583{
7584 spin_lock(&pers_lock);
7585 md_cluster_ops = NULL;
7586 spin_unlock(&pers_lock);
7587 return 0;
7588}
7589EXPORT_SYMBOL(unregister_md_cluster_operations);
7590
7591int md_setup_cluster(struct mddev *mddev, int nodes)
7592{
7593 int err;
7594
7595 err = request_module("md-cluster");
7596 if (err) {
7597 pr_err("md-cluster module not found.\n");
7598 return -ENOENT;
7599 }
7600
7601 spin_lock(&pers_lock);
7602 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7603 spin_unlock(&pers_lock);
7604 return -ENOENT;
7605 }
7606 spin_unlock(&pers_lock);
7607
7608 return md_cluster_ops->join(mddev, nodes);
7609}
7610
7611void md_cluster_stop(struct mddev *mddev)
7612{
7613 if (!md_cluster_ops)
7614 return;
7615 md_cluster_ops->leave(mddev);
7616 module_put(md_cluster_mod);
7617}
7618
7619static int is_mddev_idle(struct mddev *mddev, int init)
7620{
7621 struct md_rdev *rdev;
7622 int idle;
7623 int curr_events;
7624
7625 idle = 1;
7626 rcu_read_lock();
7627 rdev_for_each_rcu(rdev, mddev) {
7628 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7629 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7630 (int)part_stat_read(&disk->part0, sectors[1]) -
7631 atomic_read(&disk->sync_io);
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654 if (init || curr_events - rdev->last_events > 64) {
7655 rdev->last_events = curr_events;
7656 idle = 0;
7657 }
7658 }
7659 rcu_read_unlock();
7660 return idle;
7661}
7662
7663void md_done_sync(struct mddev *mddev, int blocks, int ok)
7664{
7665
7666 atomic_sub(blocks, &mddev->recovery_active);
7667 wake_up(&mddev->recovery_wait);
7668 if (!ok) {
7669 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7670 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7671 md_wakeup_thread(mddev->thread);
7672
7673 }
7674}
7675EXPORT_SYMBOL(md_done_sync);
7676
7677
7678
7679
7680
7681
7682void md_write_start(struct mddev *mddev, struct bio *bi)
7683{
7684 int did_change = 0;
7685 if (bio_data_dir(bi) != WRITE)
7686 return;
7687
7688 BUG_ON(mddev->ro == 1);
7689 if (mddev->ro == 2) {
7690
7691 mddev->ro = 0;
7692 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7693 md_wakeup_thread(mddev->thread);
7694 md_wakeup_thread(mddev->sync_thread);
7695 did_change = 1;
7696 }
7697 atomic_inc(&mddev->writes_pending);
7698 if (mddev->safemode == 1)
7699 mddev->safemode = 0;
7700 if (mddev->in_sync) {
7701 spin_lock(&mddev->lock);
7702 if (mddev->in_sync) {
7703 mddev->in_sync = 0;
7704 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7705 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7706 md_wakeup_thread(mddev->thread);
7707 did_change = 1;
7708 }
7709 spin_unlock(&mddev->lock);
7710 }
7711 if (did_change)
7712 sysfs_notify_dirent_safe(mddev->sysfs_state);
7713 wait_event(mddev->sb_wait,
7714 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7715}
7716EXPORT_SYMBOL(md_write_start);
7717
7718void md_write_end(struct mddev *mddev)
7719{
7720 if (atomic_dec_and_test(&mddev->writes_pending)) {
7721 if (mddev->safemode == 2)
7722 md_wakeup_thread(mddev->thread);
7723 else if (mddev->safemode_delay)
7724 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7725 }
7726}
7727EXPORT_SYMBOL(md_write_end);
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738int md_allow_write(struct mddev *mddev)
7739{
7740 if (!mddev->pers)
7741 return 0;
7742 if (mddev->ro)
7743 return 0;
7744 if (!mddev->pers->sync_request)
7745 return 0;
7746
7747 spin_lock(&mddev->lock);
7748 if (mddev->in_sync) {
7749 mddev->in_sync = 0;
7750 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7751 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7752 if (mddev->safemode_delay &&
7753 mddev->safemode == 0)
7754 mddev->safemode = 1;
7755 spin_unlock(&mddev->lock);
7756 md_update_sb(mddev, 0);
7757 sysfs_notify_dirent_safe(mddev->sysfs_state);
7758 } else
7759 spin_unlock(&mddev->lock);
7760
7761 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7762 return -EAGAIN;
7763 else
7764 return 0;
7765}
7766EXPORT_SYMBOL_GPL(md_allow_write);
7767
7768#define SYNC_MARKS 10
7769#define SYNC_MARK_STEP (3*HZ)
7770#define UPDATE_FREQUENCY (5*60*HZ)
7771void md_do_sync(struct md_thread *thread)
7772{
7773 struct mddev *mddev = thread->mddev;
7774 struct mddev *mddev2;
7775 unsigned int currspeed = 0,
7776 window;
7777 sector_t max_sectors,j, io_sectors, recovery_done;
7778 unsigned long mark[SYNC_MARKS];
7779 unsigned long update_time;
7780 sector_t mark_cnt[SYNC_MARKS];
7781 int last_mark,m;
7782 struct list_head *tmp;
7783 sector_t last_check;
7784 int skipped = 0;
7785 struct md_rdev *rdev;
7786 char *desc, *action = NULL;
7787 struct blk_plug plug;
7788 bool cluster_resync_finished = false;
7789
7790
7791 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7792 return;
7793 if (mddev->ro) {
7794 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7795 return;
7796 }
7797
7798 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7799 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7800 desc = "data-check";
7801 action = "check";
7802 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7803 desc = "requested-resync";
7804 action = "repair";
7805 } else
7806 desc = "resync";
7807 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7808 desc = "reshape";
7809 else
7810 desc = "recovery";
7811
7812 mddev->last_sync_action = action ?: desc;
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830 do {
7831 mddev->curr_resync = 2;
7832
7833 try_again:
7834 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7835 goto skip;
7836 for_each_mddev(mddev2, tmp) {
7837 if (mddev2 == mddev)
7838 continue;
7839 if (!mddev->parallel_resync
7840 && mddev2->curr_resync
7841 && match_mddev_units(mddev, mddev2)) {
7842 DEFINE_WAIT(wq);
7843 if (mddev < mddev2 && mddev->curr_resync == 2) {
7844
7845 mddev->curr_resync = 1;
7846 wake_up(&resync_wait);
7847 }
7848 if (mddev > mddev2 && mddev->curr_resync == 1)
7849
7850
7851
7852 continue;
7853
7854
7855
7856
7857 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7858 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7859 mddev2->curr_resync >= mddev->curr_resync) {
7860 printk(KERN_INFO "md: delaying %s of %s"
7861 " until %s has finished (they"
7862 " share one or more physical units)\n",
7863 desc, mdname(mddev), mdname(mddev2));
7864 mddev_put(mddev2);
7865 if (signal_pending(current))
7866 flush_signals(current);
7867 schedule();
7868 finish_wait(&resync_wait, &wq);
7869 goto try_again;
7870 }
7871 finish_wait(&resync_wait, &wq);
7872 }
7873 }
7874 } while (mddev->curr_resync < 2);
7875
7876 j = 0;
7877 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7878
7879
7880
7881 max_sectors = mddev->resync_max_sectors;
7882 atomic64_set(&mddev->resync_mismatches, 0);
7883
7884 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7885 j = mddev->resync_min;
7886 else if (!mddev->bitmap)
7887 j = mddev->recovery_cp;
7888
7889 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7890 max_sectors = mddev->resync_max_sectors;
7891 else {
7892
7893 max_sectors = mddev->dev_sectors;
7894 j = MaxSector;
7895 rcu_read_lock();
7896 rdev_for_each_rcu(rdev, mddev)
7897 if (rdev->raid_disk >= 0 &&
7898 !test_bit(Journal, &rdev->flags) &&
7899 !test_bit(Faulty, &rdev->flags) &&
7900 !test_bit(In_sync, &rdev->flags) &&
7901 rdev->recovery_offset < j)
7902 j = rdev->recovery_offset;
7903 rcu_read_unlock();
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913 if (mddev->bitmap) {
7914 mddev->pers->quiesce(mddev, 1);
7915 mddev->pers->quiesce(mddev, 0);
7916 }
7917 }
7918
7919 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7920 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7921 " %d KB/sec/disk.\n", speed_min(mddev));
7922 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7923 "(but not more than %d KB/sec) for %s.\n",
7924 speed_max(mddev), desc);
7925
7926 is_mddev_idle(mddev, 1);
7927
7928 io_sectors = 0;
7929 for (m = 0; m < SYNC_MARKS; m++) {
7930 mark[m] = jiffies;
7931 mark_cnt[m] = io_sectors;
7932 }
7933 last_mark = 0;
7934 mddev->resync_mark = mark[last_mark];
7935 mddev->resync_mark_cnt = mark_cnt[last_mark];
7936
7937
7938
7939
7940 window = 32*(PAGE_SIZE/512);
7941 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7942 window/2, (unsigned long long)max_sectors/2);
7943
7944 atomic_set(&mddev->recovery_active, 0);
7945 last_check = 0;
7946
7947 if (j>2) {
7948 printk(KERN_INFO
7949 "md: resuming %s of %s from checkpoint.\n",
7950 desc, mdname(mddev));
7951 mddev->curr_resync = j;
7952 } else
7953 mddev->curr_resync = 3;
7954 mddev->curr_resync_completed = j;
7955 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7956 md_new_event(mddev);
7957 update_time = jiffies;
7958
7959 blk_start_plug(&plug);
7960 while (j < max_sectors) {
7961 sector_t sectors;
7962
7963 skipped = 0;
7964
7965 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7966 ((mddev->curr_resync > mddev->curr_resync_completed &&
7967 (mddev->curr_resync - mddev->curr_resync_completed)
7968 > (max_sectors >> 4)) ||
7969 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7970 (j - mddev->curr_resync_completed)*2
7971 >= mddev->resync_max - mddev->curr_resync_completed ||
7972 mddev->curr_resync_completed > mddev->resync_max
7973 )) {
7974
7975 wait_event(mddev->recovery_wait,
7976 atomic_read(&mddev->recovery_active) == 0);
7977 mddev->curr_resync_completed = j;
7978 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7979 j > mddev->recovery_cp)
7980 mddev->recovery_cp = j;
7981 update_time = jiffies;
7982 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7983 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7984 }
7985
7986 while (j >= mddev->resync_max &&
7987 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7988
7989
7990
7991
7992 flush_signals(current);
7993 wait_event_interruptible(mddev->recovery_wait,
7994 mddev->resync_max > j
7995 || test_bit(MD_RECOVERY_INTR,
7996 &mddev->recovery));
7997 }
7998
7999 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8000 break;
8001
8002 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8003 if (sectors == 0) {
8004 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8005 break;
8006 }
8007
8008 if (!skipped) {
8009 io_sectors += sectors;
8010 atomic_add(sectors, &mddev->recovery_active);
8011 }
8012
8013 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8014 break;
8015
8016 j += sectors;
8017 if (j > max_sectors)
8018
8019 j = max_sectors;
8020 if (j > 2)
8021 mddev->curr_resync = j;
8022 mddev->curr_mark_cnt = io_sectors;
8023 if (last_check == 0)
8024
8025
8026
8027 md_new_event(mddev);
8028
8029 if (last_check + window > io_sectors || j == max_sectors)
8030 continue;
8031
8032 last_check = io_sectors;
8033 repeat:
8034 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8035
8036 int next = (last_mark+1) % SYNC_MARKS;
8037
8038 mddev->resync_mark = mark[next];
8039 mddev->resync_mark_cnt = mark_cnt[next];
8040 mark[next] = jiffies;
8041 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8042 last_mark = next;
8043 }
8044
8045 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8046 break;
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056 cond_resched();
8057
8058 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8059 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8060 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8061
8062 if (currspeed > speed_min(mddev)) {
8063 if (currspeed > speed_max(mddev)) {
8064 msleep(500);
8065 goto repeat;
8066 }
8067 if (!is_mddev_idle(mddev, 0)) {
8068
8069
8070
8071
8072 wait_event(mddev->recovery_wait,
8073 !atomic_read(&mddev->recovery_active));
8074 }
8075 }
8076 }
8077 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
8078 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8079 ? "interrupted" : "done");
8080
8081
8082
8083 blk_finish_plug(&plug);
8084 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8085
8086 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8087 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8088 mddev->curr_resync > 2) {
8089 mddev->curr_resync_completed = mddev->curr_resync;
8090 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8091 }
8092
8093 if (mddev_is_clustered(mddev)) {
8094 md_cluster_ops->resync_finish(mddev);
8095 cluster_resync_finished = true;
8096 }
8097 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8098
8099 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8100 mddev->curr_resync > 2) {
8101 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8102 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8103 if (mddev->curr_resync >= mddev->recovery_cp) {
8104 printk(KERN_INFO
8105 "md: checkpointing %s of %s.\n",
8106 desc, mdname(mddev));
8107 if (test_bit(MD_RECOVERY_ERROR,
8108 &mddev->recovery))
8109 mddev->recovery_cp =
8110 mddev->curr_resync_completed;
8111 else
8112 mddev->recovery_cp =
8113 mddev->curr_resync;
8114 }
8115 } else
8116 mddev->recovery_cp = MaxSector;
8117 } else {
8118 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8119 mddev->curr_resync = MaxSector;
8120 rcu_read_lock();
8121 rdev_for_each_rcu(rdev, mddev)
8122 if (rdev->raid_disk >= 0 &&
8123 mddev->delta_disks >= 0 &&
8124 !test_bit(Journal, &rdev->flags) &&
8125 !test_bit(Faulty, &rdev->flags) &&
8126 !test_bit(In_sync, &rdev->flags) &&
8127 rdev->recovery_offset < mddev->curr_resync)
8128 rdev->recovery_offset = mddev->curr_resync;
8129 rcu_read_unlock();
8130 }
8131 }
8132 skip:
8133 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8134
8135 if (mddev_is_clustered(mddev) &&
8136 test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8137 !cluster_resync_finished)
8138 md_cluster_ops->resync_finish(mddev);
8139
8140 spin_lock(&mddev->lock);
8141 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8142
8143 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8144 mddev->resync_min = 0;
8145 mddev->resync_max = MaxSector;
8146 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8147 mddev->resync_min = mddev->curr_resync_completed;
8148 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8149 mddev->curr_resync = 0;
8150 spin_unlock(&mddev->lock);
8151
8152 wake_up(&resync_wait);
8153 md_wakeup_thread(mddev->thread);
8154 return;
8155}
8156EXPORT_SYMBOL_GPL(md_do_sync);
8157
8158static int remove_and_add_spares(struct mddev *mddev,
8159 struct md_rdev *this)
8160{
8161 struct md_rdev *rdev;
8162 int spares = 0;
8163 int removed = 0;
8164
8165 rdev_for_each(rdev, mddev)
8166 if ((this == NULL || rdev == this) &&
8167 rdev->raid_disk >= 0 &&
8168 !test_bit(Blocked, &rdev->flags) &&
8169 (test_bit(Faulty, &rdev->flags) ||
8170 (!test_bit(In_sync, &rdev->flags) &&
8171 !test_bit(Journal, &rdev->flags))) &&
8172 atomic_read(&rdev->nr_pending)==0) {
8173 if (mddev->pers->hot_remove_disk(
8174 mddev, rdev) == 0) {
8175 sysfs_unlink_rdev(mddev, rdev);
8176 rdev->raid_disk = -1;
8177 removed++;
8178 }
8179 }
8180 if (removed && mddev->kobj.sd)
8181 sysfs_notify(&mddev->kobj, NULL, "degraded");
8182
8183 if (this && removed)
8184 goto no_add;
8185
8186 rdev_for_each(rdev, mddev) {
8187 if (this && this != rdev)
8188 continue;
8189 if (test_bit(Candidate, &rdev->flags))
8190 continue;
8191 if (rdev->raid_disk >= 0 &&
8192 !test_bit(In_sync, &rdev->flags) &&
8193 !test_bit(Journal, &rdev->flags) &&
8194 !test_bit(Faulty, &rdev->flags))
8195 spares++;
8196 if (rdev->raid_disk >= 0)
8197 continue;
8198 if (test_bit(Faulty, &rdev->flags))
8199 continue;
8200 if (!test_bit(Journal, &rdev->flags)) {
8201 if (mddev->ro &&
8202 ! (rdev->saved_raid_disk >= 0 &&
8203 !test_bit(Bitmap_sync, &rdev->flags)))
8204 continue;
8205
8206 rdev->recovery_offset = 0;
8207 }
8208 if (mddev->pers->
8209 hot_add_disk(mddev, rdev) == 0) {
8210 if (sysfs_link_rdev(mddev, rdev))
8211 ;
8212 if (!test_bit(Journal, &rdev->flags))
8213 spares++;
8214 md_new_event(mddev);
8215 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8216 }
8217 }
8218no_add:
8219 if (removed)
8220 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8221 return spares;
8222}
8223
8224static void md_start_sync(struct work_struct *ws)
8225{
8226 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8227 int ret = 0;
8228
8229 if (mddev_is_clustered(mddev)) {
8230 ret = md_cluster_ops->resync_start(mddev);
8231 if (ret) {
8232 mddev->sync_thread = NULL;
8233 goto out;
8234 }
8235 }
8236
8237 mddev->sync_thread = md_register_thread(md_do_sync,
8238 mddev,
8239 "resync");
8240out:
8241 if (!mddev->sync_thread) {
8242 if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
8243 printk(KERN_ERR "%s: could not start resync"
8244 " thread...\n",
8245 mdname(mddev));
8246
8247 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8248 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8249 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8250 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8251 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8252 wake_up(&resync_wait);
8253 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8254 &mddev->recovery))
8255 if (mddev->sysfs_action)
8256 sysfs_notify_dirent_safe(mddev->sysfs_action);
8257 } else
8258 md_wakeup_thread(mddev->sync_thread);
8259 sysfs_notify_dirent_safe(mddev->sysfs_action);
8260 md_new_event(mddev);
8261}
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285void md_check_recovery(struct mddev *mddev)
8286{
8287 if (mddev->suspended)
8288 return;
8289
8290 if (mddev->bitmap)
8291 bitmap_daemon_work(mddev);
8292
8293 if (signal_pending(current)) {
8294 if (mddev->pers->sync_request && !mddev->external) {
8295 printk(KERN_INFO "md: %s in immediate safe mode\n",
8296 mdname(mddev));
8297 mddev->safemode = 2;
8298 }
8299 flush_signals(current);
8300 }
8301
8302 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8303 return;
8304 if ( ! (
8305 (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
8306 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8307 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8308 test_bit(MD_RELOAD_SB, &mddev->flags) ||
8309 (mddev->external == 0 && mddev->safemode == 1) ||
8310 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8311 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8312 ))
8313 return;
8314
8315 if (mddev_trylock(mddev)) {
8316 int spares = 0;
8317
8318 if (mddev->ro) {
8319 struct md_rdev *rdev;
8320 if (!mddev->external && mddev->in_sync)
8321
8322
8323
8324
8325
8326 rdev_for_each(rdev, mddev)
8327 clear_bit(Blocked, &rdev->flags);
8328
8329
8330
8331
8332
8333
8334
8335 remove_and_add_spares(mddev, NULL);
8336
8337
8338
8339 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8340 md_reap_sync_thread(mddev);
8341 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8342 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8343 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
8344 goto unlock;
8345 }
8346
8347 if (mddev_is_clustered(mddev)) {
8348 struct md_rdev *rdev;
8349
8350
8351
8352 rdev_for_each(rdev, mddev) {
8353 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8354 rdev->raid_disk < 0)
8355 md_kick_rdev_from_array(rdev);
8356 }
8357
8358 if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags))
8359 md_reload_sb(mddev, mddev->good_device_nr);
8360 }
8361
8362 if (!mddev->external) {
8363 int did_change = 0;
8364 spin_lock(&mddev->lock);
8365 if (mddev->safemode &&
8366 !atomic_read(&mddev->writes_pending) &&
8367 !mddev->in_sync &&
8368 mddev->recovery_cp == MaxSector) {
8369 mddev->in_sync = 1;
8370 did_change = 1;
8371 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
8372 }
8373 if (mddev->safemode == 1)
8374 mddev->safemode = 0;
8375 spin_unlock(&mddev->lock);
8376 if (did_change)
8377 sysfs_notify_dirent_safe(mddev->sysfs_state);
8378 }
8379
8380 if (mddev->flags & MD_UPDATE_SB_FLAGS)
8381 md_update_sb(mddev, 0);
8382
8383 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8384 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8385
8386 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8387 goto unlock;
8388 }
8389 if (mddev->sync_thread) {
8390 md_reap_sync_thread(mddev);
8391 goto unlock;
8392 }
8393
8394
8395
8396 mddev->curr_resync_completed = 0;
8397 spin_lock(&mddev->lock);
8398 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8399 spin_unlock(&mddev->lock);
8400
8401
8402
8403 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8404 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8405
8406 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8407 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8408 goto not_running;
8409
8410
8411
8412
8413
8414
8415
8416 if (mddev->reshape_position != MaxSector) {
8417 if (mddev->pers->check_reshape == NULL ||
8418 mddev->pers->check_reshape(mddev) != 0)
8419
8420 goto not_running;
8421 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8422 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8423 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8424 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8425 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8426 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8427 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8428 } else if (mddev->recovery_cp < MaxSector) {
8429 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8430 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8431 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8432
8433 goto not_running;
8434
8435 if (mddev->pers->sync_request) {
8436 if (spares) {
8437
8438
8439
8440
8441 bitmap_write_all(mddev->bitmap);
8442 }
8443 INIT_WORK(&mddev->del_work, md_start_sync);
8444 queue_work(md_misc_wq, &mddev->del_work);
8445 goto unlock;
8446 }
8447 not_running:
8448 if (!mddev->sync_thread) {
8449 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8450 wake_up(&resync_wait);
8451 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8452 &mddev->recovery))
8453 if (mddev->sysfs_action)
8454 sysfs_notify_dirent_safe(mddev->sysfs_action);
8455 }
8456 unlock:
8457 wake_up(&mddev->sb_wait);
8458 mddev_unlock(mddev);
8459 }
8460}
8461EXPORT_SYMBOL(md_check_recovery);
8462
8463void md_reap_sync_thread(struct mddev *mddev)
8464{
8465 struct md_rdev *rdev;
8466
8467
8468 md_unregister_thread(&mddev->sync_thread);
8469 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8470 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8471
8472
8473 if (mddev->pers->spare_active(mddev)) {
8474 sysfs_notify(&mddev->kobj, NULL,
8475 "degraded");
8476 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8477 }
8478 }
8479 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8480 mddev->pers->finish_reshape)
8481 mddev->pers->finish_reshape(mddev);
8482
8483
8484
8485
8486 if (!mddev->degraded)
8487 rdev_for_each(rdev, mddev)
8488 rdev->saved_raid_disk = -1;
8489
8490 md_update_sb(mddev, 1);
8491 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8492 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8493 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8494 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8495 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8496 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8497 wake_up(&resync_wait);
8498
8499 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8500 sysfs_notify_dirent_safe(mddev->sysfs_action);
8501 md_new_event(mddev);
8502 if (mddev->event_work.func)
8503 queue_work(md_misc_wq, &mddev->event_work);
8504}
8505EXPORT_SYMBOL(md_reap_sync_thread);
8506
8507void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8508{
8509 sysfs_notify_dirent_safe(rdev->sysfs_state);
8510 wait_event_timeout(rdev->blocked_wait,
8511 !test_bit(Blocked, &rdev->flags) &&
8512 !test_bit(BlockedBadBlocks, &rdev->flags),
8513 msecs_to_jiffies(5000));
8514 rdev_dec_pending(rdev, mddev);
8515}
8516EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8517
8518void md_finish_reshape(struct mddev *mddev)
8519{
8520
8521 struct md_rdev *rdev;
8522
8523 rdev_for_each(rdev, mddev) {
8524 if (rdev->data_offset > rdev->new_data_offset)
8525 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8526 else
8527 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8528 rdev->data_offset = rdev->new_data_offset;
8529 }
8530}
8531EXPORT_SYMBOL(md_finish_reshape);
8532
8533
8534
8535
8536int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8537 int is_new)
8538{
8539 int rv;
8540 if (is_new)
8541 s += rdev->new_data_offset;
8542 else
8543 s += rdev->data_offset;
8544 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8545 if (rv == 0) {
8546
8547 sysfs_notify_dirent_safe(rdev->sysfs_state);
8548 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8549 set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
8550 md_wakeup_thread(rdev->mddev->thread);
8551 return 1;
8552 } else
8553 return 0;
8554}
8555EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8556
8557int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8558 int is_new)
8559{
8560 if (is_new)
8561 s += rdev->new_data_offset;
8562 else
8563 s += rdev->data_offset;
8564 return badblocks_clear(&rdev->badblocks,
8565 s, sectors);
8566}
8567EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8568
8569static int md_notify_reboot(struct notifier_block *this,
8570 unsigned long code, void *x)
8571{
8572 struct list_head *tmp;
8573 struct mddev *mddev;
8574 int need_delay = 0;
8575
8576 for_each_mddev(mddev, tmp) {
8577 if (mddev_trylock(mddev)) {
8578 if (mddev->pers)
8579 __md_stop_writes(mddev);
8580 if (mddev->persistent)
8581 mddev->safemode = 2;
8582 mddev_unlock(mddev);
8583 }
8584 need_delay = 1;
8585 }
8586
8587
8588
8589
8590
8591
8592 if (need_delay)
8593 mdelay(1000*1);
8594
8595 return NOTIFY_DONE;
8596}
8597
8598static struct notifier_block md_notifier = {
8599 .notifier_call = md_notify_reboot,
8600 .next = NULL,
8601 .priority = INT_MAX,
8602};
8603
8604static void md_geninit(void)
8605{
8606 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8607
8608 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8609}
8610
8611static int __init md_init(void)
8612{
8613 int ret = -ENOMEM;
8614
8615 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8616 if (!md_wq)
8617 goto err_wq;
8618
8619 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8620 if (!md_misc_wq)
8621 goto err_misc_wq;
8622
8623 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8624 goto err_md;
8625
8626 if ((ret = register_blkdev(0, "mdp")) < 0)
8627 goto err_mdp;
8628 mdp_major = ret;
8629
8630 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8631 md_probe, NULL, NULL);
8632 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8633 md_probe, NULL, NULL);
8634
8635 register_reboot_notifier(&md_notifier);
8636 raid_table_header = register_sysctl_table(raid_root_table);
8637
8638 md_geninit();
8639 return 0;
8640
8641err_mdp:
8642 unregister_blkdev(MD_MAJOR, "md");
8643err_md:
8644 destroy_workqueue(md_misc_wq);
8645err_misc_wq:
8646 destroy_workqueue(md_wq);
8647err_wq:
8648 return ret;
8649}
8650
8651static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8652{
8653 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
8654 struct md_rdev *rdev2;
8655 int role, ret;
8656 char b[BDEVNAME_SIZE];
8657
8658
8659 rdev_for_each(rdev2, mddev) {
8660 if (test_bit(Faulty, &rdev2->flags))
8661 continue;
8662
8663
8664 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8665
8666 if (test_bit(Candidate, &rdev2->flags)) {
8667 if (role == 0xfffe) {
8668 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8669 md_kick_rdev_from_array(rdev2);
8670 continue;
8671 }
8672 else
8673 clear_bit(Candidate, &rdev2->flags);
8674 }
8675
8676 if (role != rdev2->raid_disk) {
8677
8678 if (rdev2->raid_disk == -1 && role != 0xffff) {
8679 rdev2->saved_raid_disk = role;
8680 ret = remove_and_add_spares(mddev, rdev2);
8681 pr_info("Activated spare: %s\n",
8682 bdevname(rdev2->bdev,b));
8683 }
8684
8685
8686
8687
8688
8689 if ((role == 0xfffe) || (role == 0xfffd)) {
8690 md_error(mddev, rdev2);
8691 clear_bit(Blocked, &rdev2->flags);
8692 }
8693 }
8694 }
8695
8696 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
8697 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
8698
8699
8700 mddev->events = le64_to_cpu(sb->events);
8701}
8702
8703static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
8704{
8705 int err;
8706 struct page *swapout = rdev->sb_page;
8707 struct mdp_superblock_1 *sb;
8708
8709
8710
8711
8712 rdev->sb_page = NULL;
8713 alloc_disk_sb(rdev);
8714 ClearPageUptodate(rdev->sb_page);
8715 rdev->sb_loaded = 0;
8716 err = super_types[mddev->major_version].load_super(rdev, NULL, mddev->minor_version);
8717
8718 if (err < 0) {
8719 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
8720 __func__, __LINE__, rdev->desc_nr, err);
8721 put_page(rdev->sb_page);
8722 rdev->sb_page = swapout;
8723 rdev->sb_loaded = 1;
8724 return err;
8725 }
8726
8727 sb = page_address(rdev->sb_page);
8728
8729
8730
8731
8732 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
8733 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
8734
8735
8736
8737
8738 if (rdev->recovery_offset == MaxSector &&
8739 !test_bit(In_sync, &rdev->flags) &&
8740 mddev->pers->spare_active(mddev))
8741 sysfs_notify(&mddev->kobj, NULL, "degraded");
8742
8743 put_page(swapout);
8744 return 0;
8745}
8746
8747void md_reload_sb(struct mddev *mddev, int nr)
8748{
8749 struct md_rdev *rdev;
8750 int err;
8751
8752
8753 rdev_for_each_rcu(rdev, mddev) {
8754 if (rdev->desc_nr == nr)
8755 break;
8756 }
8757
8758 if (!rdev || rdev->desc_nr != nr) {
8759 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
8760 return;
8761 }
8762
8763 err = read_rdev(mddev, rdev);
8764 if (err < 0)
8765 return;
8766
8767 check_sb_changes(mddev, rdev);
8768
8769
8770 rdev_for_each_rcu(rdev, mddev)
8771 read_rdev(mddev, rdev);
8772}
8773EXPORT_SYMBOL(md_reload_sb);
8774
8775#ifndef MODULE
8776
8777
8778
8779
8780
8781
8782static LIST_HEAD(all_detected_devices);
8783struct detected_devices_node {
8784 struct list_head list;
8785 dev_t dev;
8786};
8787
8788void md_autodetect_dev(dev_t dev)
8789{
8790 struct detected_devices_node *node_detected_dev;
8791
8792 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8793 if (node_detected_dev) {
8794 node_detected_dev->dev = dev;
8795 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8796 } else {
8797 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8798 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8799 }
8800}
8801
8802static void autostart_arrays(int part)
8803{
8804 struct md_rdev *rdev;
8805 struct detected_devices_node *node_detected_dev;
8806 dev_t dev;
8807 int i_scanned, i_passed;
8808
8809 i_scanned = 0;
8810 i_passed = 0;
8811
8812 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8813
8814 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8815 i_scanned++;
8816 node_detected_dev = list_entry(all_detected_devices.next,
8817 struct detected_devices_node, list);
8818 list_del(&node_detected_dev->list);
8819 dev = node_detected_dev->dev;
8820 kfree(node_detected_dev);
8821 rdev = md_import_device(dev,0, 90);
8822 if (IS_ERR(rdev))
8823 continue;
8824
8825 if (test_bit(Faulty, &rdev->flags))
8826 continue;
8827
8828 set_bit(AutoDetected, &rdev->flags);
8829 list_add(&rdev->same_set, &pending_raid_disks);
8830 i_passed++;
8831 }
8832
8833 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8834 i_scanned, i_passed);
8835
8836 autorun_devices(part);
8837}
8838
8839#endif
8840
8841static __exit void md_exit(void)
8842{
8843 struct mddev *mddev;
8844 struct list_head *tmp;
8845 int delay = 1;
8846
8847 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8848 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8849
8850 unregister_blkdev(MD_MAJOR,"md");
8851 unregister_blkdev(mdp_major, "mdp");
8852 unregister_reboot_notifier(&md_notifier);
8853 unregister_sysctl_table(raid_table_header);
8854
8855
8856
8857
8858 md_unloading = 1;
8859 while (waitqueue_active(&md_event_waiters)) {
8860
8861 wake_up(&md_event_waiters);
8862 msleep(delay);
8863 delay += delay;
8864 }
8865 remove_proc_entry("mdstat", NULL);
8866
8867 for_each_mddev(mddev, tmp) {
8868 export_array(mddev);
8869 mddev->hold_active = 0;
8870 }
8871 destroy_workqueue(md_misc_wq);
8872 destroy_workqueue(md_wq);
8873}
8874
8875subsys_initcall(md_init);
8876module_exit(md_exit)
8877
8878static int get_ro(char *buffer, struct kernel_param *kp)
8879{
8880 return sprintf(buffer, "%d", start_readonly);
8881}
8882static int set_ro(const char *val, struct kernel_param *kp)
8883{
8884 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8885}
8886
8887module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8888module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8889module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8890
8891MODULE_LICENSE("GPL");
8892MODULE_DESCRIPTION("MD RAID framework");
8893MODULE_ALIAS("md");
8894MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8895