1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/badblocks.h>
38#include <linux/sysctl.h>
39#include <linux/seq_file.h>
40#include <linux/fs.h>
41#include <linux/poll.h>
42#include <linux/ctype.h>
43#include <linux/string.h>
44#include <linux/hdreg.h>
45#include <linux/proc_fs.h>
46#include <linux/random.h>
47#include <linux/module.h>
48#include <linux/reboot.h>
49#include <linux/file.h>
50#include <linux/compat.h>
51#include <linux/delay.h>
52#include <linux/raid/md_p.h>
53#include <linux/raid/md_u.h>
54#include <linux/slab.h>
55#include "md.h"
56#include "bitmap.h"
57#include "md-cluster.h"
58
59#ifndef MODULE
60static void autostart_arrays(int part);
61#endif
62
63
64
65
66
67
68static LIST_HEAD(pers_list);
69static DEFINE_SPINLOCK(pers_lock);
70
71struct md_cluster_operations *md_cluster_ops;
72EXPORT_SYMBOL(md_cluster_ops);
73struct module *md_cluster_mod;
74EXPORT_SYMBOL(md_cluster_mod);
75
76static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
77static struct workqueue_struct *md_wq;
78static struct workqueue_struct *md_misc_wq;
79
80static int remove_and_add_spares(struct mddev *mddev,
81 struct md_rdev *this);
82static void mddev_detach(struct mddev *mddev);
83
84
85
86
87
88
89#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
90
91
92
93
94
95
96
97
98
99
100
101
102
103static int sysctl_speed_limit_min = 1000;
104static int sysctl_speed_limit_max = 200000;
105static inline int speed_min(struct mddev *mddev)
106{
107 return mddev->sync_speed_min ?
108 mddev->sync_speed_min : sysctl_speed_limit_min;
109}
110
111static inline int speed_max(struct mddev *mddev)
112{
113 return mddev->sync_speed_max ?
114 mddev->sync_speed_max : sysctl_speed_limit_max;
115}
116
117static struct ctl_table_header *raid_table_header;
118
119static struct ctl_table raid_table[] = {
120 {
121 .procname = "speed_limit_min",
122 .data = &sysctl_speed_limit_min,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 {
128 .procname = "speed_limit_max",
129 .data = &sysctl_speed_limit_max,
130 .maxlen = sizeof(int),
131 .mode = S_IRUGO|S_IWUSR,
132 .proc_handler = proc_dointvec,
133 },
134 { }
135};
136
137static struct ctl_table raid_dir_table[] = {
138 {
139 .procname = "raid",
140 .maxlen = 0,
141 .mode = S_IRUGO|S_IXUGO,
142 .child = raid_table,
143 },
144 { }
145};
146
147static struct ctl_table raid_root_table[] = {
148 {
149 .procname = "dev",
150 .maxlen = 0,
151 .mode = 0555,
152 .child = raid_dir_table,
153 },
154 { }
155};
156
157static const struct block_device_operations md_fops;
158
159static int start_readonly;
160
161
162
163
164
165struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
166 struct mddev *mddev)
167{
168 struct bio *b;
169
170 if (!mddev || !mddev->bio_set)
171 return bio_alloc(gfp_mask, nr_iovecs);
172
173 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
174 if (!b)
175 return NULL;
176 return b;
177}
178EXPORT_SYMBOL_GPL(bio_alloc_mddev);
179
180struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
181 struct mddev *mddev)
182{
183 if (!mddev || !mddev->bio_set)
184 return bio_clone(bio, gfp_mask);
185
186 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
187}
188EXPORT_SYMBOL_GPL(bio_clone_mddev);
189
190
191
192
193
194
195
196
197
198
199
200static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
201static atomic_t md_event_count;
202void md_new_event(struct mddev *mddev)
203{
204 atomic_inc(&md_event_count);
205 wake_up(&md_event_waiters);
206}
207EXPORT_SYMBOL_GPL(md_new_event);
208
209
210
211
212
213static LIST_HEAD(all_mddevs);
214static DEFINE_SPINLOCK(all_mddevs_lock);
215
216
217
218
219
220
221
222
223#define for_each_mddev(_mddev,_tmp) \
224 \
225 for (({ spin_lock(&all_mddevs_lock); \
226 _tmp = all_mddevs.next; \
227 _mddev = NULL;}); \
228 ({ if (_tmp != &all_mddevs) \
229 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
230 spin_unlock(&all_mddevs_lock); \
231 if (_mddev) mddev_put(_mddev); \
232 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
233 _tmp != &all_mddevs;}); \
234 ({ spin_lock(&all_mddevs_lock); \
235 _tmp = _tmp->next;}) \
236 )
237
238
239
240
241
242
243
244
245static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
246{
247 const int rw = bio_data_dir(bio);
248 struct mddev *mddev = q->queuedata;
249 unsigned int sectors;
250 int cpu;
251
252 blk_queue_split(q, &bio, q->bio_split);
253
254 if (mddev == NULL || mddev->pers == NULL) {
255 bio_io_error(bio);
256 return BLK_QC_T_NONE;
257 }
258 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
259 if (bio_sectors(bio) != 0)
260 bio->bi_error = -EROFS;
261 bio_endio(bio);
262 return BLK_QC_T_NONE;
263 }
264 smp_rmb();
265 rcu_read_lock();
266 if (mddev->suspended) {
267 DEFINE_WAIT(__wait);
268 for (;;) {
269 prepare_to_wait(&mddev->sb_wait, &__wait,
270 TASK_UNINTERRUPTIBLE);
271 if (!mddev->suspended)
272 break;
273 rcu_read_unlock();
274 schedule();
275 rcu_read_lock();
276 }
277 finish_wait(&mddev->sb_wait, &__wait);
278 }
279 atomic_inc(&mddev->active_io);
280 rcu_read_unlock();
281
282
283
284
285
286 sectors = bio_sectors(bio);
287 mddev->pers->make_request(mddev, bio);
288
289 cpu = part_stat_lock();
290 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
291 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
292 part_stat_unlock();
293
294 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
295 wake_up(&mddev->sb_wait);
296
297 return BLK_QC_T_NONE;
298}
299
300
301
302
303
304
305
306void mddev_suspend(struct mddev *mddev)
307{
308 if (mddev->suspended++)
309 return;
310 synchronize_rcu();
311 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
312 mddev->pers->quiesce(mddev, 1);
313
314 del_timer_sync(&mddev->safemode_timer);
315}
316EXPORT_SYMBOL_GPL(mddev_suspend);
317
318void mddev_resume(struct mddev *mddev)
319{
320 if (--mddev->suspended)
321 return;
322 wake_up(&mddev->sb_wait);
323 mddev->pers->quiesce(mddev, 0);
324
325 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
326 md_wakeup_thread(mddev->thread);
327 md_wakeup_thread(mddev->sync_thread);
328}
329EXPORT_SYMBOL_GPL(mddev_resume);
330
331int mddev_congested(struct mddev *mddev, int bits)
332{
333 struct md_personality *pers = mddev->pers;
334 int ret = 0;
335
336 rcu_read_lock();
337 if (mddev->suspended)
338 ret = 1;
339 else if (pers && pers->congested)
340 ret = pers->congested(mddev, bits);
341 rcu_read_unlock();
342 return ret;
343}
344EXPORT_SYMBOL_GPL(mddev_congested);
345static int md_congested(void *data, int bits)
346{
347 struct mddev *mddev = data;
348 return mddev_congested(mddev, bits);
349}
350
351
352
353
354
355static void md_end_flush(struct bio *bio)
356{
357 struct md_rdev *rdev = bio->bi_private;
358 struct mddev *mddev = rdev->mddev;
359
360 rdev_dec_pending(rdev, mddev);
361
362 if (atomic_dec_and_test(&mddev->flush_pending)) {
363
364 queue_work(md_wq, &mddev->flush_work);
365 }
366 bio_put(bio);
367}
368
369static void md_submit_flush_data(struct work_struct *ws);
370
371static void submit_flushes(struct work_struct *ws)
372{
373 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
374 struct md_rdev *rdev;
375
376 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
377 atomic_set(&mddev->flush_pending, 1);
378 rcu_read_lock();
379 rdev_for_each_rcu(rdev, mddev)
380 if (rdev->raid_disk >= 0 &&
381 !test_bit(Faulty, &rdev->flags)) {
382
383
384
385
386 struct bio *bi;
387 atomic_inc(&rdev->nr_pending);
388 atomic_inc(&rdev->nr_pending);
389 rcu_read_unlock();
390 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
391 bi->bi_end_io = md_end_flush;
392 bi->bi_private = rdev;
393 bi->bi_bdev = rdev->bdev;
394 atomic_inc(&mddev->flush_pending);
395 submit_bio(WRITE_FLUSH, bi);
396 rcu_read_lock();
397 rdev_dec_pending(rdev, mddev);
398 }
399 rcu_read_unlock();
400 if (atomic_dec_and_test(&mddev->flush_pending))
401 queue_work(md_wq, &mddev->flush_work);
402}
403
404static void md_submit_flush_data(struct work_struct *ws)
405{
406 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
407 struct bio *bio = mddev->flush_bio;
408
409 if (bio->bi_iter.bi_size == 0)
410
411 bio_endio(bio);
412 else {
413 bio->bi_rw &= ~REQ_FLUSH;
414 mddev->pers->make_request(mddev, bio);
415 }
416
417 mddev->flush_bio = NULL;
418 wake_up(&mddev->sb_wait);
419}
420
421void md_flush_request(struct mddev *mddev, struct bio *bio)
422{
423 spin_lock_irq(&mddev->lock);
424 wait_event_lock_irq(mddev->sb_wait,
425 !mddev->flush_bio,
426 mddev->lock);
427 mddev->flush_bio = bio;
428 spin_unlock_irq(&mddev->lock);
429
430 INIT_WORK(&mddev->flush_work, submit_flushes);
431 queue_work(md_wq, &mddev->flush_work);
432}
433EXPORT_SYMBOL(md_flush_request);
434
435void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
436{
437 struct mddev *mddev = cb->data;
438 md_wakeup_thread(mddev->thread);
439 kfree(cb);
440}
441EXPORT_SYMBOL(md_unplug);
442
443static inline struct mddev *mddev_get(struct mddev *mddev)
444{
445 atomic_inc(&mddev->active);
446 return mddev;
447}
448
449static void mddev_delayed_delete(struct work_struct *ws);
450
451static void mddev_put(struct mddev *mddev)
452{
453 struct bio_set *bs = NULL;
454
455 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
456 return;
457 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
458 mddev->ctime == 0 && !mddev->hold_active) {
459
460
461 list_del_init(&mddev->all_mddevs);
462 bs = mddev->bio_set;
463 mddev->bio_set = NULL;
464 if (mddev->gendisk) {
465
466
467
468
469
470 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
471 queue_work(md_misc_wq, &mddev->del_work);
472 } else
473 kfree(mddev);
474 }
475 spin_unlock(&all_mddevs_lock);
476 if (bs)
477 bioset_free(bs);
478}
479
480static void md_safemode_timeout(unsigned long data);
481
482void mddev_init(struct mddev *mddev)
483{
484 mutex_init(&mddev->open_mutex);
485 mutex_init(&mddev->reconfig_mutex);
486 mutex_init(&mddev->bitmap_info.mutex);
487 INIT_LIST_HEAD(&mddev->disks);
488 INIT_LIST_HEAD(&mddev->all_mddevs);
489 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
490 (unsigned long) mddev);
491 atomic_set(&mddev->active, 1);
492 atomic_set(&mddev->openers, 0);
493 atomic_set(&mddev->active_io, 0);
494 spin_lock_init(&mddev->lock);
495 atomic_set(&mddev->flush_pending, 0);
496 init_waitqueue_head(&mddev->sb_wait);
497 init_waitqueue_head(&mddev->recovery_wait);
498 mddev->reshape_position = MaxSector;
499 mddev->reshape_backwards = 0;
500 mddev->last_sync_action = "none";
501 mddev->resync_min = 0;
502 mddev->resync_max = MaxSector;
503 mddev->level = LEVEL_NONE;
504}
505EXPORT_SYMBOL_GPL(mddev_init);
506
507static struct mddev *mddev_find(dev_t unit)
508{
509 struct mddev *mddev, *new = NULL;
510
511 if (unit && MAJOR(unit) != MD_MAJOR)
512 unit &= ~((1<<MdpMinorShift)-1);
513
514 retry:
515 spin_lock(&all_mddevs_lock);
516
517 if (unit) {
518 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
519 if (mddev->unit == unit) {
520 mddev_get(mddev);
521 spin_unlock(&all_mddevs_lock);
522 kfree(new);
523 return mddev;
524 }
525
526 if (new) {
527 list_add(&new->all_mddevs, &all_mddevs);
528 spin_unlock(&all_mddevs_lock);
529 new->hold_active = UNTIL_IOCTL;
530 return new;
531 }
532 } else if (new) {
533
534 static int next_minor = 512;
535 int start = next_minor;
536 int is_free = 0;
537 int dev = 0;
538 while (!is_free) {
539 dev = MKDEV(MD_MAJOR, next_minor);
540 next_minor++;
541 if (next_minor > MINORMASK)
542 next_minor = 0;
543 if (next_minor == start) {
544
545 spin_unlock(&all_mddevs_lock);
546 kfree(new);
547 return NULL;
548 }
549
550 is_free = 1;
551 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
552 if (mddev->unit == dev) {
553 is_free = 0;
554 break;
555 }
556 }
557 new->unit = dev;
558 new->md_minor = MINOR(dev);
559 new->hold_active = UNTIL_STOP;
560 list_add(&new->all_mddevs, &all_mddevs);
561 spin_unlock(&all_mddevs_lock);
562 return new;
563 }
564 spin_unlock(&all_mddevs_lock);
565
566 new = kzalloc(sizeof(*new), GFP_KERNEL);
567 if (!new)
568 return NULL;
569
570 new->unit = unit;
571 if (MAJOR(unit) == MD_MAJOR)
572 new->md_minor = MINOR(unit);
573 else
574 new->md_minor = MINOR(unit) >> MdpMinorShift;
575
576 mddev_init(new);
577
578 goto retry;
579}
580
581static struct attribute_group md_redundancy_group;
582
583void mddev_unlock(struct mddev *mddev)
584{
585 if (mddev->to_remove) {
586
587
588
589
590
591
592
593
594
595
596
597
598 struct attribute_group *to_remove = mddev->to_remove;
599 mddev->to_remove = NULL;
600 mddev->sysfs_active = 1;
601 mutex_unlock(&mddev->reconfig_mutex);
602
603 if (mddev->kobj.sd) {
604 if (to_remove != &md_redundancy_group)
605 sysfs_remove_group(&mddev->kobj, to_remove);
606 if (mddev->pers == NULL ||
607 mddev->pers->sync_request == NULL) {
608 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
609 if (mddev->sysfs_action)
610 sysfs_put(mddev->sysfs_action);
611 mddev->sysfs_action = NULL;
612 }
613 }
614 mddev->sysfs_active = 0;
615 } else
616 mutex_unlock(&mddev->reconfig_mutex);
617
618
619
620
621 spin_lock(&pers_lock);
622 md_wakeup_thread(mddev->thread);
623 spin_unlock(&pers_lock);
624}
625EXPORT_SYMBOL_GPL(mddev_unlock);
626
627struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
628{
629 struct md_rdev *rdev;
630
631 rdev_for_each_rcu(rdev, mddev)
632 if (rdev->desc_nr == nr)
633 return rdev;
634
635 return NULL;
636}
637EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
638
639static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
640{
641 struct md_rdev *rdev;
642
643 rdev_for_each(rdev, mddev)
644 if (rdev->bdev->bd_dev == dev)
645 return rdev;
646
647 return NULL;
648}
649
650static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
651{
652 struct md_rdev *rdev;
653
654 rdev_for_each_rcu(rdev, mddev)
655 if (rdev->bdev->bd_dev == dev)
656 return rdev;
657
658 return NULL;
659}
660
661static struct md_personality *find_pers(int level, char *clevel)
662{
663 struct md_personality *pers;
664 list_for_each_entry(pers, &pers_list, list) {
665 if (level != LEVEL_NONE && pers->level == level)
666 return pers;
667 if (strcmp(pers->name, clevel)==0)
668 return pers;
669 }
670 return NULL;
671}
672
673
674static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
675{
676 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
677 return MD_NEW_SIZE_SECTORS(num_sectors);
678}
679
680static int alloc_disk_sb(struct md_rdev *rdev)
681{
682 rdev->sb_page = alloc_page(GFP_KERNEL);
683 if (!rdev->sb_page) {
684 printk(KERN_ALERT "md: out of memory.\n");
685 return -ENOMEM;
686 }
687
688 return 0;
689}
690
691void md_rdev_clear(struct md_rdev *rdev)
692{
693 if (rdev->sb_page) {
694 put_page(rdev->sb_page);
695 rdev->sb_loaded = 0;
696 rdev->sb_page = NULL;
697 rdev->sb_start = 0;
698 rdev->sectors = 0;
699 }
700 if (rdev->bb_page) {
701 put_page(rdev->bb_page);
702 rdev->bb_page = NULL;
703 }
704 badblocks_exit(&rdev->badblocks);
705}
706EXPORT_SYMBOL_GPL(md_rdev_clear);
707
708static void super_written(struct bio *bio)
709{
710 struct md_rdev *rdev = bio->bi_private;
711 struct mddev *mddev = rdev->mddev;
712
713 if (bio->bi_error) {
714 printk("md: super_written gets error=%d\n", bio->bi_error);
715 md_error(mddev, rdev);
716 }
717
718 if (atomic_dec_and_test(&mddev->pending_writes))
719 wake_up(&mddev->sb_wait);
720 bio_put(bio);
721}
722
723void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
724 sector_t sector, int size, struct page *page)
725{
726
727
728
729
730
731
732 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
733
734 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
735 bio->bi_iter.bi_sector = sector;
736 bio_add_page(bio, page, size, 0);
737 bio->bi_private = rdev;
738 bio->bi_end_io = super_written;
739
740 atomic_inc(&mddev->pending_writes);
741 submit_bio(WRITE_FLUSH_FUA, bio);
742}
743
744void md_super_wait(struct mddev *mddev)
745{
746
747 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
748}
749
750int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
751 struct page *page, int rw, bool metadata_op)
752{
753 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
754 int ret;
755
756 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
757 rdev->meta_bdev : rdev->bdev;
758 if (metadata_op)
759 bio->bi_iter.bi_sector = sector + rdev->sb_start;
760 else if (rdev->mddev->reshape_position != MaxSector &&
761 (rdev->mddev->reshape_backwards ==
762 (sector >= rdev->mddev->reshape_position)))
763 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
764 else
765 bio->bi_iter.bi_sector = sector + rdev->data_offset;
766 bio_add_page(bio, page, size, 0);
767 submit_bio_wait(rw, bio);
768
769 ret = !bio->bi_error;
770 bio_put(bio);
771 return ret;
772}
773EXPORT_SYMBOL_GPL(sync_page_io);
774
775static int read_disk_sb(struct md_rdev *rdev, int size)
776{
777 char b[BDEVNAME_SIZE];
778
779 if (rdev->sb_loaded)
780 return 0;
781
782 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
783 goto fail;
784 rdev->sb_loaded = 1;
785 return 0;
786
787fail:
788 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
789 bdevname(rdev->bdev,b));
790 return -EINVAL;
791}
792
793static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
794{
795 return sb1->set_uuid0 == sb2->set_uuid0 &&
796 sb1->set_uuid1 == sb2->set_uuid1 &&
797 sb1->set_uuid2 == sb2->set_uuid2 &&
798 sb1->set_uuid3 == sb2->set_uuid3;
799}
800
801static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
802{
803 int ret;
804 mdp_super_t *tmp1, *tmp2;
805
806 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
807 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
808
809 if (!tmp1 || !tmp2) {
810 ret = 0;
811 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
812 goto abort;
813 }
814
815 *tmp1 = *sb1;
816 *tmp2 = *sb2;
817
818
819
820
821 tmp1->nr_disks = 0;
822 tmp2->nr_disks = 0;
823
824 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
825abort:
826 kfree(tmp1);
827 kfree(tmp2);
828 return ret;
829}
830
831static u32 md_csum_fold(u32 csum)
832{
833 csum = (csum & 0xffff) + (csum >> 16);
834 return (csum & 0xffff) + (csum >> 16);
835}
836
837static unsigned int calc_sb_csum(mdp_super_t *sb)
838{
839 u64 newcsum = 0;
840 u32 *sb32 = (u32*)sb;
841 int i;
842 unsigned int disk_csum, csum;
843
844 disk_csum = sb->sb_csum;
845 sb->sb_csum = 0;
846
847 for (i = 0; i < MD_SB_BYTES/4 ; i++)
848 newcsum += sb32[i];
849 csum = (newcsum & 0xffffffff) + (newcsum>>32);
850
851#ifdef CONFIG_ALPHA
852
853
854
855
856
857
858
859
860 sb->sb_csum = md_csum_fold(disk_csum);
861#else
862 sb->sb_csum = disk_csum;
863#endif
864 return csum;
865}
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897struct super_type {
898 char *name;
899 struct module *owner;
900 int (*load_super)(struct md_rdev *rdev,
901 struct md_rdev *refdev,
902 int minor_version);
903 int (*validate_super)(struct mddev *mddev,
904 struct md_rdev *rdev);
905 void (*sync_super)(struct mddev *mddev,
906 struct md_rdev *rdev);
907 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
908 sector_t num_sectors);
909 int (*allow_new_offset)(struct md_rdev *rdev,
910 unsigned long long new_offset);
911};
912
913
914
915
916
917
918
919
920
921int md_check_no_bitmap(struct mddev *mddev)
922{
923 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
924 return 0;
925 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
926 mdname(mddev), mddev->pers->name);
927 return 1;
928}
929EXPORT_SYMBOL(md_check_no_bitmap);
930
931
932
933
934static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
935{
936 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
937 mdp_super_t *sb;
938 int ret;
939
940
941
942
943
944
945
946 rdev->sb_start = calc_dev_sboffset(rdev);
947
948 ret = read_disk_sb(rdev, MD_SB_BYTES);
949 if (ret) return ret;
950
951 ret = -EINVAL;
952
953 bdevname(rdev->bdev, b);
954 sb = page_address(rdev->sb_page);
955
956 if (sb->md_magic != MD_SB_MAGIC) {
957 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
958 b);
959 goto abort;
960 }
961
962 if (sb->major_version != 0 ||
963 sb->minor_version < 90 ||
964 sb->minor_version > 91) {
965 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
966 sb->major_version, sb->minor_version,
967 b);
968 goto abort;
969 }
970
971 if (sb->raid_disks <= 0)
972 goto abort;
973
974 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
975 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
976 b);
977 goto abort;
978 }
979
980 rdev->preferred_minor = sb->md_minor;
981 rdev->data_offset = 0;
982 rdev->new_data_offset = 0;
983 rdev->sb_size = MD_SB_BYTES;
984 rdev->badblocks.shift = -1;
985
986 if (sb->level == LEVEL_MULTIPATH)
987 rdev->desc_nr = -1;
988 else
989 rdev->desc_nr = sb->this_disk.number;
990
991 if (!refdev) {
992 ret = 1;
993 } else {
994 __u64 ev1, ev2;
995 mdp_super_t *refsb = page_address(refdev->sb_page);
996 if (!uuid_equal(refsb, sb)) {
997 printk(KERN_WARNING "md: %s has different UUID to %s\n",
998 b, bdevname(refdev->bdev,b2));
999 goto abort;
1000 }
1001 if (!sb_equal(refsb, sb)) {
1002 printk(KERN_WARNING "md: %s has same UUID"
1003 " but different superblock to %s\n",
1004 b, bdevname(refdev->bdev, b2));
1005 goto abort;
1006 }
1007 ev1 = md_event(sb);
1008 ev2 = md_event(refsb);
1009 if (ev1 > ev2)
1010 ret = 1;
1011 else
1012 ret = 0;
1013 }
1014 rdev->sectors = rdev->sb_start;
1015
1016
1017
1018
1019 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1020 sb->level >= 1)
1021 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1022
1023 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1024
1025 ret = -EINVAL;
1026
1027 abort:
1028 return ret;
1029}
1030
1031
1032
1033
1034static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1035{
1036 mdp_disk_t *desc;
1037 mdp_super_t *sb = page_address(rdev->sb_page);
1038 __u64 ev1 = md_event(sb);
1039
1040 rdev->raid_disk = -1;
1041 clear_bit(Faulty, &rdev->flags);
1042 clear_bit(In_sync, &rdev->flags);
1043 clear_bit(Bitmap_sync, &rdev->flags);
1044 clear_bit(WriteMostly, &rdev->flags);
1045
1046 if (mddev->raid_disks == 0) {
1047 mddev->major_version = 0;
1048 mddev->minor_version = sb->minor_version;
1049 mddev->patch_version = sb->patch_version;
1050 mddev->external = 0;
1051 mddev->chunk_sectors = sb->chunk_size >> 9;
1052 mddev->ctime = sb->ctime;
1053 mddev->utime = sb->utime;
1054 mddev->level = sb->level;
1055 mddev->clevel[0] = 0;
1056 mddev->layout = sb->layout;
1057 mddev->raid_disks = sb->raid_disks;
1058 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1059 mddev->events = ev1;
1060 mddev->bitmap_info.offset = 0;
1061 mddev->bitmap_info.space = 0;
1062
1063 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1064 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1065 mddev->reshape_backwards = 0;
1066
1067 if (mddev->minor_version >= 91) {
1068 mddev->reshape_position = sb->reshape_position;
1069 mddev->delta_disks = sb->delta_disks;
1070 mddev->new_level = sb->new_level;
1071 mddev->new_layout = sb->new_layout;
1072 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1073 if (mddev->delta_disks < 0)
1074 mddev->reshape_backwards = 1;
1075 } else {
1076 mddev->reshape_position = MaxSector;
1077 mddev->delta_disks = 0;
1078 mddev->new_level = mddev->level;
1079 mddev->new_layout = mddev->layout;
1080 mddev->new_chunk_sectors = mddev->chunk_sectors;
1081 }
1082
1083 if (sb->state & (1<<MD_SB_CLEAN))
1084 mddev->recovery_cp = MaxSector;
1085 else {
1086 if (sb->events_hi == sb->cp_events_hi &&
1087 sb->events_lo == sb->cp_events_lo) {
1088 mddev->recovery_cp = sb->recovery_cp;
1089 } else
1090 mddev->recovery_cp = 0;
1091 }
1092
1093 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1094 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1095 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1096 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1097
1098 mddev->max_disks = MD_SB_DISKS;
1099
1100 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1101 mddev->bitmap_info.file == NULL) {
1102 mddev->bitmap_info.offset =
1103 mddev->bitmap_info.default_offset;
1104 mddev->bitmap_info.space =
1105 mddev->bitmap_info.default_space;
1106 }
1107
1108 } else if (mddev->pers == NULL) {
1109
1110
1111 ++ev1;
1112 if (sb->disks[rdev->desc_nr].state & (
1113 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1114 if (ev1 < mddev->events)
1115 return -EINVAL;
1116 } else if (mddev->bitmap) {
1117
1118
1119
1120 if (ev1 < mddev->bitmap->events_cleared)
1121 return 0;
1122 if (ev1 < mddev->events)
1123 set_bit(Bitmap_sync, &rdev->flags);
1124 } else {
1125 if (ev1 < mddev->events)
1126
1127 return 0;
1128 }
1129
1130 if (mddev->level != LEVEL_MULTIPATH) {
1131 desc = sb->disks + rdev->desc_nr;
1132
1133 if (desc->state & (1<<MD_DISK_FAULTY))
1134 set_bit(Faulty, &rdev->flags);
1135 else if (desc->state & (1<<MD_DISK_SYNC)
1136) {
1137 set_bit(In_sync, &rdev->flags);
1138 rdev->raid_disk = desc->raid_disk;
1139 rdev->saved_raid_disk = desc->raid_disk;
1140 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1141
1142
1143
1144 if (mddev->minor_version >= 91) {
1145 rdev->recovery_offset = 0;
1146 rdev->raid_disk = desc->raid_disk;
1147 }
1148 }
1149 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1150 set_bit(WriteMostly, &rdev->flags);
1151 } else
1152 set_bit(In_sync, &rdev->flags);
1153 return 0;
1154}
1155
1156
1157
1158
1159static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1160{
1161 mdp_super_t *sb;
1162 struct md_rdev *rdev2;
1163 int next_spare = mddev->raid_disks;
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175 int i;
1176 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1177
1178 rdev->sb_size = MD_SB_BYTES;
1179
1180 sb = page_address(rdev->sb_page);
1181
1182 memset(sb, 0, sizeof(*sb));
1183
1184 sb->md_magic = MD_SB_MAGIC;
1185 sb->major_version = mddev->major_version;
1186 sb->patch_version = mddev->patch_version;
1187 sb->gvalid_words = 0;
1188 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1189 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1190 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1191 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1192
1193 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1194 sb->level = mddev->level;
1195 sb->size = mddev->dev_sectors / 2;
1196 sb->raid_disks = mddev->raid_disks;
1197 sb->md_minor = mddev->md_minor;
1198 sb->not_persistent = 0;
1199 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1200 sb->state = 0;
1201 sb->events_hi = (mddev->events>>32);
1202 sb->events_lo = (u32)mddev->events;
1203
1204 if (mddev->reshape_position == MaxSector)
1205 sb->minor_version = 90;
1206 else {
1207 sb->minor_version = 91;
1208 sb->reshape_position = mddev->reshape_position;
1209 sb->new_level = mddev->new_level;
1210 sb->delta_disks = mddev->delta_disks;
1211 sb->new_layout = mddev->new_layout;
1212 sb->new_chunk = mddev->new_chunk_sectors << 9;
1213 }
1214 mddev->minor_version = sb->minor_version;
1215 if (mddev->in_sync)
1216 {
1217 sb->recovery_cp = mddev->recovery_cp;
1218 sb->cp_events_hi = (mddev->events>>32);
1219 sb->cp_events_lo = (u32)mddev->events;
1220 if (mddev->recovery_cp == MaxSector)
1221 sb->state = (1<< MD_SB_CLEAN);
1222 } else
1223 sb->recovery_cp = 0;
1224
1225 sb->layout = mddev->layout;
1226 sb->chunk_size = mddev->chunk_sectors << 9;
1227
1228 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1229 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1230
1231 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1232 rdev_for_each(rdev2, mddev) {
1233 mdp_disk_t *d;
1234 int desc_nr;
1235 int is_active = test_bit(In_sync, &rdev2->flags);
1236
1237 if (rdev2->raid_disk >= 0 &&
1238 sb->minor_version >= 91)
1239
1240
1241
1242
1243 is_active = 1;
1244 if (rdev2->raid_disk < 0 ||
1245 test_bit(Faulty, &rdev2->flags))
1246 is_active = 0;
1247 if (is_active)
1248 desc_nr = rdev2->raid_disk;
1249 else
1250 desc_nr = next_spare++;
1251 rdev2->desc_nr = desc_nr;
1252 d = &sb->disks[rdev2->desc_nr];
1253 nr_disks++;
1254 d->number = rdev2->desc_nr;
1255 d->major = MAJOR(rdev2->bdev->bd_dev);
1256 d->minor = MINOR(rdev2->bdev->bd_dev);
1257 if (is_active)
1258 d->raid_disk = rdev2->raid_disk;
1259 else
1260 d->raid_disk = rdev2->desc_nr;
1261 if (test_bit(Faulty, &rdev2->flags))
1262 d->state = (1<<MD_DISK_FAULTY);
1263 else if (is_active) {
1264 d->state = (1<<MD_DISK_ACTIVE);
1265 if (test_bit(In_sync, &rdev2->flags))
1266 d->state |= (1<<MD_DISK_SYNC);
1267 active++;
1268 working++;
1269 } else {
1270 d->state = 0;
1271 spare++;
1272 working++;
1273 }
1274 if (test_bit(WriteMostly, &rdev2->flags))
1275 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1276 }
1277
1278 for (i=0 ; i < mddev->raid_disks ; i++) {
1279 mdp_disk_t *d = &sb->disks[i];
1280 if (d->state == 0 && d->number == 0) {
1281 d->number = i;
1282 d->raid_disk = i;
1283 d->state = (1<<MD_DISK_REMOVED);
1284 d->state |= (1<<MD_DISK_FAULTY);
1285 failed++;
1286 }
1287 }
1288 sb->nr_disks = nr_disks;
1289 sb->active_disks = active;
1290 sb->working_disks = working;
1291 sb->failed_disks = failed;
1292 sb->spare_disks = spare;
1293
1294 sb->this_disk = sb->disks[rdev->desc_nr];
1295 sb->sb_csum = calc_sb_csum(sb);
1296}
1297
1298
1299
1300
1301static unsigned long long
1302super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1303{
1304 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1305 return 0;
1306 if (rdev->mddev->bitmap_info.offset)
1307 return 0;
1308 rdev->sb_start = calc_dev_sboffset(rdev);
1309 if (!num_sectors || num_sectors > rdev->sb_start)
1310 num_sectors = rdev->sb_start;
1311
1312
1313
1314 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1315 rdev->mddev->level >= 1)
1316 num_sectors = (sector_t)(2ULL << 32) - 2;
1317 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1318 rdev->sb_page);
1319 md_super_wait(rdev->mddev);
1320 return num_sectors;
1321}
1322
1323static int
1324super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1325{
1326
1327 return new_offset == 0;
1328}
1329
1330
1331
1332
1333
1334static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1335{
1336 __le32 disk_csum;
1337 u32 csum;
1338 unsigned long long newcsum;
1339 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1340 __le32 *isuper = (__le32*)sb;
1341
1342 disk_csum = sb->sb_csum;
1343 sb->sb_csum = 0;
1344 newcsum = 0;
1345 for (; size >= 4; size -= 4)
1346 newcsum += le32_to_cpu(*isuper++);
1347
1348 if (size == 2)
1349 newcsum += le16_to_cpu(*(__le16*) isuper);
1350
1351 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1352 sb->sb_csum = disk_csum;
1353 return cpu_to_le32(csum);
1354}
1355
1356static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1357{
1358 struct mdp_superblock_1 *sb;
1359 int ret;
1360 sector_t sb_start;
1361 sector_t sectors;
1362 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1363 int bmask;
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373 switch(minor_version) {
1374 case 0:
1375 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1376 sb_start -= 8*2;
1377 sb_start &= ~(sector_t)(4*2-1);
1378 break;
1379 case 1:
1380 sb_start = 0;
1381 break;
1382 case 2:
1383 sb_start = 8;
1384 break;
1385 default:
1386 return -EINVAL;
1387 }
1388 rdev->sb_start = sb_start;
1389
1390
1391
1392
1393 ret = read_disk_sb(rdev, 4096);
1394 if (ret) return ret;
1395
1396 sb = page_address(rdev->sb_page);
1397
1398 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1399 sb->major_version != cpu_to_le32(1) ||
1400 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1401 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1402 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1403 return -EINVAL;
1404
1405 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1406 printk("md: invalid superblock checksum on %s\n",
1407 bdevname(rdev->bdev,b));
1408 return -EINVAL;
1409 }
1410 if (le64_to_cpu(sb->data_size) < 10) {
1411 printk("md: data_size too small on %s\n",
1412 bdevname(rdev->bdev,b));
1413 return -EINVAL;
1414 }
1415 if (sb->pad0 ||
1416 sb->pad3[0] ||
1417 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1418
1419 return -EINVAL;
1420
1421 rdev->preferred_minor = 0xffff;
1422 rdev->data_offset = le64_to_cpu(sb->data_offset);
1423 rdev->new_data_offset = rdev->data_offset;
1424 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1425 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1426 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1427 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1428
1429 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1430 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1431 if (rdev->sb_size & bmask)
1432 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1433
1434 if (minor_version
1435 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1436 return -EINVAL;
1437 if (minor_version
1438 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1439 return -EINVAL;
1440
1441 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1442 rdev->desc_nr = -1;
1443 else
1444 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1445
1446 if (!rdev->bb_page) {
1447 rdev->bb_page = alloc_page(GFP_KERNEL);
1448 if (!rdev->bb_page)
1449 return -ENOMEM;
1450 }
1451 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1452 rdev->badblocks.count == 0) {
1453
1454
1455
1456 s32 offset;
1457 sector_t bb_sector;
1458 u64 *bbp;
1459 int i;
1460 int sectors = le16_to_cpu(sb->bblog_size);
1461 if (sectors > (PAGE_SIZE / 512))
1462 return -EINVAL;
1463 offset = le32_to_cpu(sb->bblog_offset);
1464 if (offset == 0)
1465 return -EINVAL;
1466 bb_sector = (long long)offset;
1467 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1468 rdev->bb_page, READ, true))
1469 return -EIO;
1470 bbp = (u64 *)page_address(rdev->bb_page);
1471 rdev->badblocks.shift = sb->bblog_shift;
1472 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1473 u64 bb = le64_to_cpu(*bbp);
1474 int count = bb & (0x3ff);
1475 u64 sector = bb >> 10;
1476 sector <<= sb->bblog_shift;
1477 count <<= sb->bblog_shift;
1478 if (bb + 1 == 0)
1479 break;
1480 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1481 return -EINVAL;
1482 }
1483 } else if (sb->bblog_offset != 0)
1484 rdev->badblocks.shift = 0;
1485
1486 if (!refdev) {
1487 ret = 1;
1488 } else {
1489 __u64 ev1, ev2;
1490 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1491
1492 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1493 sb->level != refsb->level ||
1494 sb->layout != refsb->layout ||
1495 sb->chunksize != refsb->chunksize) {
1496 printk(KERN_WARNING "md: %s has strangely different"
1497 " superblock to %s\n",
1498 bdevname(rdev->bdev,b),
1499 bdevname(refdev->bdev,b2));
1500 return -EINVAL;
1501 }
1502 ev1 = le64_to_cpu(sb->events);
1503 ev2 = le64_to_cpu(refsb->events);
1504
1505 if (ev1 > ev2)
1506 ret = 1;
1507 else
1508 ret = 0;
1509 }
1510 if (minor_version) {
1511 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1512 sectors -= rdev->data_offset;
1513 } else
1514 sectors = rdev->sb_start;
1515 if (sectors < le64_to_cpu(sb->data_size))
1516 return -EINVAL;
1517 rdev->sectors = le64_to_cpu(sb->data_size);
1518 return ret;
1519}
1520
1521static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1522{
1523 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1524 __u64 ev1 = le64_to_cpu(sb->events);
1525
1526 rdev->raid_disk = -1;
1527 clear_bit(Faulty, &rdev->flags);
1528 clear_bit(In_sync, &rdev->flags);
1529 clear_bit(Bitmap_sync, &rdev->flags);
1530 clear_bit(WriteMostly, &rdev->flags);
1531
1532 if (mddev->raid_disks == 0) {
1533 mddev->major_version = 1;
1534 mddev->patch_version = 0;
1535 mddev->external = 0;
1536 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1537 mddev->ctime = le64_to_cpu(sb->ctime);
1538 mddev->utime = le64_to_cpu(sb->utime);
1539 mddev->level = le32_to_cpu(sb->level);
1540 mddev->clevel[0] = 0;
1541 mddev->layout = le32_to_cpu(sb->layout);
1542 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1543 mddev->dev_sectors = le64_to_cpu(sb->size);
1544 mddev->events = ev1;
1545 mddev->bitmap_info.offset = 0;
1546 mddev->bitmap_info.space = 0;
1547
1548
1549
1550 mddev->bitmap_info.default_offset = 1024 >> 9;
1551 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1552 mddev->reshape_backwards = 0;
1553
1554 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1555 memcpy(mddev->uuid, sb->set_uuid, 16);
1556
1557 mddev->max_disks = (4096-256)/2;
1558
1559 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1560 mddev->bitmap_info.file == NULL) {
1561 mddev->bitmap_info.offset =
1562 (__s32)le32_to_cpu(sb->bitmap_offset);
1563
1564
1565
1566
1567
1568 if (mddev->minor_version > 0)
1569 mddev->bitmap_info.space = 0;
1570 else if (mddev->bitmap_info.offset > 0)
1571 mddev->bitmap_info.space =
1572 8 - mddev->bitmap_info.offset;
1573 else
1574 mddev->bitmap_info.space =
1575 -mddev->bitmap_info.offset;
1576 }
1577
1578 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1579 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1580 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1581 mddev->new_level = le32_to_cpu(sb->new_level);
1582 mddev->new_layout = le32_to_cpu(sb->new_layout);
1583 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1584 if (mddev->delta_disks < 0 ||
1585 (mddev->delta_disks == 0 &&
1586 (le32_to_cpu(sb->feature_map)
1587 & MD_FEATURE_RESHAPE_BACKWARDS)))
1588 mddev->reshape_backwards = 1;
1589 } else {
1590 mddev->reshape_position = MaxSector;
1591 mddev->delta_disks = 0;
1592 mddev->new_level = mddev->level;
1593 mddev->new_layout = mddev->layout;
1594 mddev->new_chunk_sectors = mddev->chunk_sectors;
1595 }
1596
1597 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
1598 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1599 if (mddev->recovery_cp == MaxSector)
1600 set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
1601 }
1602 } else if (mddev->pers == NULL) {
1603
1604
1605 ++ev1;
1606 if (rdev->desc_nr >= 0 &&
1607 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1608 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1609 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1610 if (ev1 < mddev->events)
1611 return -EINVAL;
1612 } else if (mddev->bitmap) {
1613
1614
1615
1616 if (ev1 < mddev->bitmap->events_cleared)
1617 return 0;
1618 if (ev1 < mddev->events)
1619 set_bit(Bitmap_sync, &rdev->flags);
1620 } else {
1621 if (ev1 < mddev->events)
1622
1623 return 0;
1624 }
1625 if (mddev->level != LEVEL_MULTIPATH) {
1626 int role;
1627 if (rdev->desc_nr < 0 ||
1628 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1629 role = MD_DISK_ROLE_SPARE;
1630 rdev->desc_nr = -1;
1631 } else
1632 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1633 switch(role) {
1634 case MD_DISK_ROLE_SPARE:
1635 break;
1636 case MD_DISK_ROLE_FAULTY:
1637 set_bit(Faulty, &rdev->flags);
1638 break;
1639 case MD_DISK_ROLE_JOURNAL:
1640 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1641
1642 printk(KERN_WARNING
1643 "md: journal device provided without journal feature, ignoring the device\n");
1644 return -EINVAL;
1645 }
1646 set_bit(Journal, &rdev->flags);
1647 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1648 rdev->raid_disk = 0;
1649 break;
1650 default:
1651 rdev->saved_raid_disk = role;
1652 if ((le32_to_cpu(sb->feature_map) &
1653 MD_FEATURE_RECOVERY_OFFSET)) {
1654 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1655 if (!(le32_to_cpu(sb->feature_map) &
1656 MD_FEATURE_RECOVERY_BITMAP))
1657 rdev->saved_raid_disk = -1;
1658 } else
1659 set_bit(In_sync, &rdev->flags);
1660 rdev->raid_disk = role;
1661 break;
1662 }
1663 if (sb->devflags & WriteMostly1)
1664 set_bit(WriteMostly, &rdev->flags);
1665 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1666 set_bit(Replacement, &rdev->flags);
1667 } else
1668 set_bit(In_sync, &rdev->flags);
1669
1670 return 0;
1671}
1672
1673static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1674{
1675 struct mdp_superblock_1 *sb;
1676 struct md_rdev *rdev2;
1677 int max_dev, i;
1678
1679
1680 sb = page_address(rdev->sb_page);
1681
1682 sb->feature_map = 0;
1683 sb->pad0 = 0;
1684 sb->recovery_offset = cpu_to_le64(0);
1685 memset(sb->pad3, 0, sizeof(sb->pad3));
1686
1687 sb->utime = cpu_to_le64((__u64)mddev->utime);
1688 sb->events = cpu_to_le64(mddev->events);
1689 if (mddev->in_sync)
1690 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1691 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1692 sb->resync_offset = cpu_to_le64(MaxSector);
1693 else
1694 sb->resync_offset = cpu_to_le64(0);
1695
1696 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1697
1698 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1699 sb->size = cpu_to_le64(mddev->dev_sectors);
1700 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1701 sb->level = cpu_to_le32(mddev->level);
1702 sb->layout = cpu_to_le32(mddev->layout);
1703
1704 if (test_bit(WriteMostly, &rdev->flags))
1705 sb->devflags |= WriteMostly1;
1706 else
1707 sb->devflags &= ~WriteMostly1;
1708 sb->data_offset = cpu_to_le64(rdev->data_offset);
1709 sb->data_size = cpu_to_le64(rdev->sectors);
1710
1711 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1712 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1713 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1714 }
1715
1716 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1717 !test_bit(In_sync, &rdev->flags)) {
1718 sb->feature_map |=
1719 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1720 sb->recovery_offset =
1721 cpu_to_le64(rdev->recovery_offset);
1722 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1723 sb->feature_map |=
1724 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1725 }
1726
1727 if (test_bit(Journal, &rdev->flags))
1728 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1729 if (test_bit(Replacement, &rdev->flags))
1730 sb->feature_map |=
1731 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1732
1733 if (mddev->reshape_position != MaxSector) {
1734 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1735 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1736 sb->new_layout = cpu_to_le32(mddev->new_layout);
1737 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1738 sb->new_level = cpu_to_le32(mddev->new_level);
1739 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1740 if (mddev->delta_disks == 0 &&
1741 mddev->reshape_backwards)
1742 sb->feature_map
1743 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1744 if (rdev->new_data_offset != rdev->data_offset) {
1745 sb->feature_map
1746 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1747 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1748 - rdev->data_offset));
1749 }
1750 }
1751
1752 if (mddev_is_clustered(mddev))
1753 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1754
1755 if (rdev->badblocks.count == 0)
1756 ;
1757 else if (sb->bblog_offset == 0)
1758
1759 md_error(mddev, rdev);
1760 else {
1761 struct badblocks *bb = &rdev->badblocks;
1762 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1763 u64 *p = bb->page;
1764 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1765 if (bb->changed) {
1766 unsigned seq;
1767
1768retry:
1769 seq = read_seqbegin(&bb->lock);
1770
1771 memset(bbp, 0xff, PAGE_SIZE);
1772
1773 for (i = 0 ; i < bb->count ; i++) {
1774 u64 internal_bb = p[i];
1775 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1776 | BB_LEN(internal_bb));
1777 bbp[i] = cpu_to_le64(store_bb);
1778 }
1779 bb->changed = 0;
1780 if (read_seqretry(&bb->lock, seq))
1781 goto retry;
1782
1783 bb->sector = (rdev->sb_start +
1784 (int)le32_to_cpu(sb->bblog_offset));
1785 bb->size = le16_to_cpu(sb->bblog_size);
1786 }
1787 }
1788
1789 max_dev = 0;
1790 rdev_for_each(rdev2, mddev)
1791 if (rdev2->desc_nr+1 > max_dev)
1792 max_dev = rdev2->desc_nr+1;
1793
1794 if (max_dev > le32_to_cpu(sb->max_dev)) {
1795 int bmask;
1796 sb->max_dev = cpu_to_le32(max_dev);
1797 rdev->sb_size = max_dev * 2 + 256;
1798 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1799 if (rdev->sb_size & bmask)
1800 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1801 } else
1802 max_dev = le32_to_cpu(sb->max_dev);
1803
1804 for (i=0; i<max_dev;i++)
1805 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1806
1807 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1808 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1809
1810 rdev_for_each(rdev2, mddev) {
1811 i = rdev2->desc_nr;
1812 if (test_bit(Faulty, &rdev2->flags))
1813 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1814 else if (test_bit(In_sync, &rdev2->flags))
1815 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1816 else if (test_bit(Journal, &rdev2->flags))
1817 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1818 else if (rdev2->raid_disk >= 0)
1819 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1820 else
1821 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1822 }
1823
1824 sb->sb_csum = calc_sb_1_csum(sb);
1825}
1826
1827static unsigned long long
1828super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1829{
1830 struct mdp_superblock_1 *sb;
1831 sector_t max_sectors;
1832 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1833 return 0;
1834 if (rdev->data_offset != rdev->new_data_offset)
1835 return 0;
1836 if (rdev->sb_start < rdev->data_offset) {
1837
1838 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1839 max_sectors -= rdev->data_offset;
1840 if (!num_sectors || num_sectors > max_sectors)
1841 num_sectors = max_sectors;
1842 } else if (rdev->mddev->bitmap_info.offset) {
1843
1844 return 0;
1845 } else {
1846
1847 sector_t sb_start;
1848 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1849 sb_start &= ~(sector_t)(4*2 - 1);
1850 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1851 if (!num_sectors || num_sectors > max_sectors)
1852 num_sectors = max_sectors;
1853 rdev->sb_start = sb_start;
1854 }
1855 sb = page_address(rdev->sb_page);
1856 sb->data_size = cpu_to_le64(num_sectors);
1857 sb->super_offset = rdev->sb_start;
1858 sb->sb_csum = calc_sb_1_csum(sb);
1859 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1860 rdev->sb_page);
1861 md_super_wait(rdev->mddev);
1862 return num_sectors;
1863
1864}
1865
1866static int
1867super_1_allow_new_offset(struct md_rdev *rdev,
1868 unsigned long long new_offset)
1869{
1870
1871 struct bitmap *bitmap;
1872 if (new_offset >= rdev->data_offset)
1873 return 1;
1874
1875
1876
1877 if (rdev->mddev->minor_version == 0)
1878 return 1;
1879
1880
1881
1882
1883
1884
1885
1886 if (rdev->sb_start + (32+4)*2 > new_offset)
1887 return 0;
1888 bitmap = rdev->mddev->bitmap;
1889 if (bitmap && !rdev->mddev->bitmap_info.file &&
1890 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1891 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1892 return 0;
1893 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1894 return 0;
1895
1896 return 1;
1897}
1898
1899static struct super_type super_types[] = {
1900 [0] = {
1901 .name = "0.90.0",
1902 .owner = THIS_MODULE,
1903 .load_super = super_90_load,
1904 .validate_super = super_90_validate,
1905 .sync_super = super_90_sync,
1906 .rdev_size_change = super_90_rdev_size_change,
1907 .allow_new_offset = super_90_allow_new_offset,
1908 },
1909 [1] = {
1910 .name = "md-1",
1911 .owner = THIS_MODULE,
1912 .load_super = super_1_load,
1913 .validate_super = super_1_validate,
1914 .sync_super = super_1_sync,
1915 .rdev_size_change = super_1_rdev_size_change,
1916 .allow_new_offset = super_1_allow_new_offset,
1917 },
1918};
1919
1920static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1921{
1922 if (mddev->sync_super) {
1923 mddev->sync_super(mddev, rdev);
1924 return;
1925 }
1926
1927 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1928
1929 super_types[mddev->major_version].sync_super(mddev, rdev);
1930}
1931
1932static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1933{
1934 struct md_rdev *rdev, *rdev2;
1935
1936 rcu_read_lock();
1937 rdev_for_each_rcu(rdev, mddev1) {
1938 if (test_bit(Faulty, &rdev->flags) ||
1939 test_bit(Journal, &rdev->flags) ||
1940 rdev->raid_disk == -1)
1941 continue;
1942 rdev_for_each_rcu(rdev2, mddev2) {
1943 if (test_bit(Faulty, &rdev2->flags) ||
1944 test_bit(Journal, &rdev2->flags) ||
1945 rdev2->raid_disk == -1)
1946 continue;
1947 if (rdev->bdev->bd_contains ==
1948 rdev2->bdev->bd_contains) {
1949 rcu_read_unlock();
1950 return 1;
1951 }
1952 }
1953 }
1954 rcu_read_unlock();
1955 return 0;
1956}
1957
1958static LIST_HEAD(pending_raid_disks);
1959
1960
1961
1962
1963
1964
1965
1966
1967int md_integrity_register(struct mddev *mddev)
1968{
1969 struct md_rdev *rdev, *reference = NULL;
1970
1971 if (list_empty(&mddev->disks))
1972 return 0;
1973 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1974 return 0;
1975 rdev_for_each(rdev, mddev) {
1976
1977 if (test_bit(Faulty, &rdev->flags))
1978 continue;
1979 if (rdev->raid_disk < 0)
1980 continue;
1981 if (!reference) {
1982
1983 reference = rdev;
1984 continue;
1985 }
1986
1987 if (blk_integrity_compare(reference->bdev->bd_disk,
1988 rdev->bdev->bd_disk) < 0)
1989 return -EINVAL;
1990 }
1991 if (!reference || !bdev_get_integrity(reference->bdev))
1992 return 0;
1993
1994
1995
1996
1997 blk_integrity_register(mddev->gendisk,
1998 bdev_get_integrity(reference->bdev));
1999
2000 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2001 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2002 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2003 mdname(mddev));
2004 return -EINVAL;
2005 }
2006 return 0;
2007}
2008EXPORT_SYMBOL(md_integrity_register);
2009
2010
2011
2012
2013
2014int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2015{
2016 struct blk_integrity *bi_rdev;
2017 struct blk_integrity *bi_mddev;
2018 char name[BDEVNAME_SIZE];
2019
2020 if (!mddev->gendisk)
2021 return 0;
2022
2023 bi_rdev = bdev_get_integrity(rdev->bdev);
2024 bi_mddev = blk_get_integrity(mddev->gendisk);
2025
2026 if (!bi_mddev)
2027 return 0;
2028
2029 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2030 printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n",
2031 mdname(mddev), bdevname(rdev->bdev, name));
2032 return -ENXIO;
2033 }
2034
2035 return 0;
2036}
2037EXPORT_SYMBOL(md_integrity_add_rdev);
2038
2039static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2040{
2041 char b[BDEVNAME_SIZE];
2042 struct kobject *ko;
2043 int err;
2044
2045
2046 if (find_rdev(mddev, rdev->bdev->bd_dev))
2047 return -EEXIST;
2048
2049
2050 if (!test_bit(Journal, &rdev->flags) &&
2051 rdev->sectors &&
2052 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2053 if (mddev->pers) {
2054
2055
2056
2057
2058 if (mddev->level > 0)
2059 return -ENOSPC;
2060 } else
2061 mddev->dev_sectors = rdev->sectors;
2062 }
2063
2064
2065
2066
2067
2068 rcu_read_lock();
2069 if (rdev->desc_nr < 0) {
2070 int choice = 0;
2071 if (mddev->pers)
2072 choice = mddev->raid_disks;
2073 while (md_find_rdev_nr_rcu(mddev, choice))
2074 choice++;
2075 rdev->desc_nr = choice;
2076 } else {
2077 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2078 rcu_read_unlock();
2079 return -EBUSY;
2080 }
2081 }
2082 rcu_read_unlock();
2083 if (!test_bit(Journal, &rdev->flags) &&
2084 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2085 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2086 mdname(mddev), mddev->max_disks);
2087 return -EBUSY;
2088 }
2089 bdevname(rdev->bdev,b);
2090 strreplace(b, '/', '!');
2091
2092 rdev->mddev = mddev;
2093 printk(KERN_INFO "md: bind<%s>\n", b);
2094
2095 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2096 goto fail;
2097
2098 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2099 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2100 ;
2101 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2102
2103 list_add_rcu(&rdev->same_set, &mddev->disks);
2104 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2105
2106
2107 mddev->recovery_disabled++;
2108
2109 return 0;
2110
2111 fail:
2112 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2113 b, mdname(mddev));
2114 return err;
2115}
2116
2117static void md_delayed_delete(struct work_struct *ws)
2118{
2119 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2120 kobject_del(&rdev->kobj);
2121 kobject_put(&rdev->kobj);
2122}
2123
2124static void unbind_rdev_from_array(struct md_rdev *rdev)
2125{
2126 char b[BDEVNAME_SIZE];
2127
2128 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2129 list_del_rcu(&rdev->same_set);
2130 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2131 rdev->mddev = NULL;
2132 sysfs_remove_link(&rdev->kobj, "block");
2133 sysfs_put(rdev->sysfs_state);
2134 rdev->sysfs_state = NULL;
2135 rdev->badblocks.count = 0;
2136
2137
2138
2139
2140 synchronize_rcu();
2141 INIT_WORK(&rdev->del_work, md_delayed_delete);
2142 kobject_get(&rdev->kobj);
2143 queue_work(md_misc_wq, &rdev->del_work);
2144}
2145
2146
2147
2148
2149
2150
2151static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2152{
2153 int err = 0;
2154 struct block_device *bdev;
2155 char b[BDEVNAME_SIZE];
2156
2157 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2158 shared ? (struct md_rdev *)lock_rdev : rdev);
2159 if (IS_ERR(bdev)) {
2160 printk(KERN_ERR "md: could not open %s.\n",
2161 __bdevname(dev, b));
2162 return PTR_ERR(bdev);
2163 }
2164 rdev->bdev = bdev;
2165 return err;
2166}
2167
2168static void unlock_rdev(struct md_rdev *rdev)
2169{
2170 struct block_device *bdev = rdev->bdev;
2171 rdev->bdev = NULL;
2172 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2173}
2174
2175void md_autodetect_dev(dev_t dev);
2176
2177static void export_rdev(struct md_rdev *rdev)
2178{
2179 char b[BDEVNAME_SIZE];
2180
2181 printk(KERN_INFO "md: export_rdev(%s)\n",
2182 bdevname(rdev->bdev,b));
2183 md_rdev_clear(rdev);
2184#ifndef MODULE
2185 if (test_bit(AutoDetected, &rdev->flags))
2186 md_autodetect_dev(rdev->bdev->bd_dev);
2187#endif
2188 unlock_rdev(rdev);
2189 kobject_put(&rdev->kobj);
2190}
2191
2192void md_kick_rdev_from_array(struct md_rdev *rdev)
2193{
2194 unbind_rdev_from_array(rdev);
2195 export_rdev(rdev);
2196}
2197EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2198
2199static void export_array(struct mddev *mddev)
2200{
2201 struct md_rdev *rdev;
2202
2203 while (!list_empty(&mddev->disks)) {
2204 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2205 same_set);
2206 md_kick_rdev_from_array(rdev);
2207 }
2208 mddev->raid_disks = 0;
2209 mddev->major_version = 0;
2210}
2211
2212static void sync_sbs(struct mddev *mddev, int nospares)
2213{
2214
2215
2216
2217
2218
2219
2220 struct md_rdev *rdev;
2221 rdev_for_each(rdev, mddev) {
2222 if (rdev->sb_events == mddev->events ||
2223 (nospares &&
2224 rdev->raid_disk < 0 &&
2225 rdev->sb_events+1 == mddev->events)) {
2226
2227 rdev->sb_loaded = 2;
2228 } else {
2229 sync_super(mddev, rdev);
2230 rdev->sb_loaded = 1;
2231 }
2232 }
2233}
2234
2235static bool does_sb_need_changing(struct mddev *mddev)
2236{
2237 struct md_rdev *rdev;
2238 struct mdp_superblock_1 *sb;
2239 int role;
2240
2241
2242 rdev_for_each(rdev, mddev)
2243 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2244 break;
2245
2246
2247 if (!rdev)
2248 return false;
2249
2250 sb = page_address(rdev->sb_page);
2251
2252 rdev_for_each(rdev, mddev) {
2253 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2254
2255 if (role == 0xffff && rdev->raid_disk >=0 &&
2256 !test_bit(Faulty, &rdev->flags))
2257 return true;
2258
2259 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2260 return true;
2261 }
2262
2263
2264 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2265 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2266 (mddev->layout != le64_to_cpu(sb->layout)) ||
2267 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2268 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2269 return true;
2270
2271 return false;
2272}
2273
2274void md_update_sb(struct mddev *mddev, int force_change)
2275{
2276 struct md_rdev *rdev;
2277 int sync_req;
2278 int nospares = 0;
2279 int any_badblocks_changed = 0;
2280 int ret = -1;
2281
2282 if (mddev->ro) {
2283 if (force_change)
2284 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2285 return;
2286 }
2287
2288 if (mddev_is_clustered(mddev)) {
2289 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2290 force_change = 1;
2291 ret = md_cluster_ops->metadata_update_start(mddev);
2292
2293 if (!does_sb_need_changing(mddev)) {
2294 if (ret == 0)
2295 md_cluster_ops->metadata_update_cancel(mddev);
2296 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2297 return;
2298 }
2299 }
2300repeat:
2301
2302 rdev_for_each(rdev, mddev) {
2303 if (rdev->raid_disk >= 0 &&
2304 mddev->delta_disks >= 0 &&
2305 !test_bit(Journal, &rdev->flags) &&
2306 !test_bit(In_sync, &rdev->flags) &&
2307 mddev->curr_resync_completed > rdev->recovery_offset)
2308 rdev->recovery_offset = mddev->curr_resync_completed;
2309
2310 }
2311 if (!mddev->persistent) {
2312 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2313 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2314 if (!mddev->external) {
2315 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2316 rdev_for_each(rdev, mddev) {
2317 if (rdev->badblocks.changed) {
2318 rdev->badblocks.changed = 0;
2319 ack_all_badblocks(&rdev->badblocks);
2320 md_error(mddev, rdev);
2321 }
2322 clear_bit(Blocked, &rdev->flags);
2323 clear_bit(BlockedBadBlocks, &rdev->flags);
2324 wake_up(&rdev->blocked_wait);
2325 }
2326 }
2327 wake_up(&mddev->sb_wait);
2328 return;
2329 }
2330
2331 spin_lock(&mddev->lock);
2332
2333 mddev->utime = ktime_get_real_seconds();
2334
2335 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2336 force_change = 1;
2337 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2338
2339
2340
2341
2342 nospares = 1;
2343 if (force_change)
2344 nospares = 0;
2345 if (mddev->degraded)
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355 nospares = 0;
2356
2357 sync_req = mddev->in_sync;
2358
2359
2360
2361 if (nospares
2362 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2363 && mddev->can_decrease_events
2364 && mddev->events != 1) {
2365 mddev->events--;
2366 mddev->can_decrease_events = 0;
2367 } else {
2368
2369 mddev->events ++;
2370 mddev->can_decrease_events = nospares;
2371 }
2372
2373
2374
2375
2376
2377
2378 WARN_ON(mddev->events == 0);
2379
2380 rdev_for_each(rdev, mddev) {
2381 if (rdev->badblocks.changed)
2382 any_badblocks_changed++;
2383 if (test_bit(Faulty, &rdev->flags))
2384 set_bit(FaultRecorded, &rdev->flags);
2385 }
2386
2387 sync_sbs(mddev, nospares);
2388 spin_unlock(&mddev->lock);
2389
2390 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2391 mdname(mddev), mddev->in_sync);
2392
2393 bitmap_update_sb(mddev->bitmap);
2394 rdev_for_each(rdev, mddev) {
2395 char b[BDEVNAME_SIZE];
2396
2397 if (rdev->sb_loaded != 1)
2398 continue;
2399
2400 if (!test_bit(Faulty, &rdev->flags)) {
2401 md_super_write(mddev,rdev,
2402 rdev->sb_start, rdev->sb_size,
2403 rdev->sb_page);
2404 pr_debug("md: (write) %s's sb offset: %llu\n",
2405 bdevname(rdev->bdev, b),
2406 (unsigned long long)rdev->sb_start);
2407 rdev->sb_events = mddev->events;
2408 if (rdev->badblocks.size) {
2409 md_super_write(mddev, rdev,
2410 rdev->badblocks.sector,
2411 rdev->badblocks.size << 9,
2412 rdev->bb_page);
2413 rdev->badblocks.size = 0;
2414 }
2415
2416 } else
2417 pr_debug("md: %s (skipping faulty)\n",
2418 bdevname(rdev->bdev, b));
2419
2420 if (mddev->level == LEVEL_MULTIPATH)
2421
2422 break;
2423 }
2424 md_super_wait(mddev);
2425
2426
2427 spin_lock(&mddev->lock);
2428 if (mddev->in_sync != sync_req ||
2429 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2430
2431 spin_unlock(&mddev->lock);
2432 goto repeat;
2433 }
2434 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2435 spin_unlock(&mddev->lock);
2436 wake_up(&mddev->sb_wait);
2437 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2438 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2439
2440 rdev_for_each(rdev, mddev) {
2441 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2442 clear_bit(Blocked, &rdev->flags);
2443
2444 if (any_badblocks_changed)
2445 ack_all_badblocks(&rdev->badblocks);
2446 clear_bit(BlockedBadBlocks, &rdev->flags);
2447 wake_up(&rdev->blocked_wait);
2448 }
2449
2450 if (mddev_is_clustered(mddev) && ret == 0)
2451 md_cluster_ops->metadata_update_finish(mddev);
2452}
2453EXPORT_SYMBOL(md_update_sb);
2454
2455static int add_bound_rdev(struct md_rdev *rdev)
2456{
2457 struct mddev *mddev = rdev->mddev;
2458 int err = 0;
2459 bool add_journal = test_bit(Journal, &rdev->flags);
2460
2461 if (!mddev->pers->hot_remove_disk || add_journal) {
2462
2463
2464
2465
2466 super_types[mddev->major_version].
2467 validate_super(mddev, rdev);
2468 if (add_journal)
2469 mddev_suspend(mddev);
2470 err = mddev->pers->hot_add_disk(mddev, rdev);
2471 if (add_journal)
2472 mddev_resume(mddev);
2473 if (err) {
2474 unbind_rdev_from_array(rdev);
2475 export_rdev(rdev);
2476 return err;
2477 }
2478 }
2479 sysfs_notify_dirent_safe(rdev->sysfs_state);
2480
2481 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2482 if (mddev->degraded)
2483 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2484 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2485 md_new_event(mddev);
2486 md_wakeup_thread(mddev->thread);
2487 return 0;
2488}
2489
2490
2491
2492
2493static int cmd_match(const char *cmd, const char *str)
2494{
2495
2496
2497
2498
2499 while (*cmd && *str && *cmd == *str) {
2500 cmd++;
2501 str++;
2502 }
2503 if (*cmd == '\n')
2504 cmd++;
2505 if (*str || *cmd)
2506 return 0;
2507 return 1;
2508}
2509
2510struct rdev_sysfs_entry {
2511 struct attribute attr;
2512 ssize_t (*show)(struct md_rdev *, char *);
2513 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2514};
2515
2516static ssize_t
2517state_show(struct md_rdev *rdev, char *page)
2518{
2519 char *sep = "";
2520 size_t len = 0;
2521 unsigned long flags = ACCESS_ONCE(rdev->flags);
2522
2523 if (test_bit(Faulty, &flags) ||
2524 rdev->badblocks.unacked_exist) {
2525 len+= sprintf(page+len, "%sfaulty",sep);
2526 sep = ",";
2527 }
2528 if (test_bit(In_sync, &flags)) {
2529 len += sprintf(page+len, "%sin_sync",sep);
2530 sep = ",";
2531 }
2532 if (test_bit(Journal, &flags)) {
2533 len += sprintf(page+len, "%sjournal",sep);
2534 sep = ",";
2535 }
2536 if (test_bit(WriteMostly, &flags)) {
2537 len += sprintf(page+len, "%swrite_mostly",sep);
2538 sep = ",";
2539 }
2540 if (test_bit(Blocked, &flags) ||
2541 (rdev->badblocks.unacked_exist
2542 && !test_bit(Faulty, &flags))) {
2543 len += sprintf(page+len, "%sblocked", sep);
2544 sep = ",";
2545 }
2546 if (!test_bit(Faulty, &flags) &&
2547 !test_bit(Journal, &flags) &&
2548 !test_bit(In_sync, &flags)) {
2549 len += sprintf(page+len, "%sspare", sep);
2550 sep = ",";
2551 }
2552 if (test_bit(WriteErrorSeen, &flags)) {
2553 len += sprintf(page+len, "%swrite_error", sep);
2554 sep = ",";
2555 }
2556 if (test_bit(WantReplacement, &flags)) {
2557 len += sprintf(page+len, "%swant_replacement", sep);
2558 sep = ",";
2559 }
2560 if (test_bit(Replacement, &flags)) {
2561 len += sprintf(page+len, "%sreplacement", sep);
2562 sep = ",";
2563 }
2564
2565 return len+sprintf(page+len, "\n");
2566}
2567
2568static ssize_t
2569state_store(struct md_rdev *rdev, const char *buf, size_t len)
2570{
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584 int err = -EINVAL;
2585 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2586 md_error(rdev->mddev, rdev);
2587 if (test_bit(Faulty, &rdev->flags))
2588 err = 0;
2589 else
2590 err = -EBUSY;
2591 } else if (cmd_match(buf, "remove")) {
2592 if (rdev->raid_disk >= 0)
2593 err = -EBUSY;
2594 else {
2595 struct mddev *mddev = rdev->mddev;
2596 err = 0;
2597 if (mddev_is_clustered(mddev))
2598 err = md_cluster_ops->remove_disk(mddev, rdev);
2599
2600 if (err == 0) {
2601 md_kick_rdev_from_array(rdev);
2602 if (mddev->pers)
2603 md_update_sb(mddev, 1);
2604 md_new_event(mddev);
2605 }
2606 }
2607 } else if (cmd_match(buf, "writemostly")) {
2608 set_bit(WriteMostly, &rdev->flags);
2609 err = 0;
2610 } else if (cmd_match(buf, "-writemostly")) {
2611 clear_bit(WriteMostly, &rdev->flags);
2612 err = 0;
2613 } else if (cmd_match(buf, "blocked")) {
2614 set_bit(Blocked, &rdev->flags);
2615 err = 0;
2616 } else if (cmd_match(buf, "-blocked")) {
2617 if (!test_bit(Faulty, &rdev->flags) &&
2618 rdev->badblocks.unacked_exist) {
2619
2620
2621
2622 md_error(rdev->mddev, rdev);
2623 }
2624 clear_bit(Blocked, &rdev->flags);
2625 clear_bit(BlockedBadBlocks, &rdev->flags);
2626 wake_up(&rdev->blocked_wait);
2627 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2628 md_wakeup_thread(rdev->mddev->thread);
2629
2630 err = 0;
2631 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2632 set_bit(In_sync, &rdev->flags);
2633 err = 0;
2634 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2635 !test_bit(Journal, &rdev->flags)) {
2636 if (rdev->mddev->pers == NULL) {
2637 clear_bit(In_sync, &rdev->flags);
2638 rdev->saved_raid_disk = rdev->raid_disk;
2639 rdev->raid_disk = -1;
2640 err = 0;
2641 }
2642 } else if (cmd_match(buf, "write_error")) {
2643 set_bit(WriteErrorSeen, &rdev->flags);
2644 err = 0;
2645 } else if (cmd_match(buf, "-write_error")) {
2646 clear_bit(WriteErrorSeen, &rdev->flags);
2647 err = 0;
2648 } else if (cmd_match(buf, "want_replacement")) {
2649
2650
2651
2652
2653 if (rdev->raid_disk >= 0 &&
2654 !test_bit(Journal, &rdev->flags) &&
2655 !test_bit(Replacement, &rdev->flags))
2656 set_bit(WantReplacement, &rdev->flags);
2657 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2658 md_wakeup_thread(rdev->mddev->thread);
2659 err = 0;
2660 } else if (cmd_match(buf, "-want_replacement")) {
2661
2662
2663
2664 err = 0;
2665 clear_bit(WantReplacement, &rdev->flags);
2666 } else if (cmd_match(buf, "replacement")) {
2667
2668
2669
2670
2671 if (rdev->mddev->pers)
2672 err = -EBUSY;
2673 else {
2674 set_bit(Replacement, &rdev->flags);
2675 err = 0;
2676 }
2677 } else if (cmd_match(buf, "-replacement")) {
2678
2679 if (rdev->mddev->pers)
2680 err = -EBUSY;
2681 else {
2682 clear_bit(Replacement, &rdev->flags);
2683 err = 0;
2684 }
2685 } else if (cmd_match(buf, "re-add")) {
2686 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2687
2688
2689
2690
2691
2692
2693 if (!mddev_is_clustered(rdev->mddev) ||
2694 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2695 clear_bit(Faulty, &rdev->flags);
2696 err = add_bound_rdev(rdev);
2697 }
2698 } else
2699 err = -EBUSY;
2700 }
2701 if (!err)
2702 sysfs_notify_dirent_safe(rdev->sysfs_state);
2703 return err ? err : len;
2704}
2705static struct rdev_sysfs_entry rdev_state =
2706__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2707
2708static ssize_t
2709errors_show(struct md_rdev *rdev, char *page)
2710{
2711 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2712}
2713
2714static ssize_t
2715errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2716{
2717 unsigned int n;
2718 int rv;
2719
2720 rv = kstrtouint(buf, 10, &n);
2721 if (rv < 0)
2722 return rv;
2723 atomic_set(&rdev->corrected_errors, n);
2724 return len;
2725}
2726static struct rdev_sysfs_entry rdev_errors =
2727__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2728
2729static ssize_t
2730slot_show(struct md_rdev *rdev, char *page)
2731{
2732 if (test_bit(Journal, &rdev->flags))
2733 return sprintf(page, "journal\n");
2734 else if (rdev->raid_disk < 0)
2735 return sprintf(page, "none\n");
2736 else
2737 return sprintf(page, "%d\n", rdev->raid_disk);
2738}
2739
2740static ssize_t
2741slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2742{
2743 int slot;
2744 int err;
2745
2746 if (test_bit(Journal, &rdev->flags))
2747 return -EBUSY;
2748 if (strncmp(buf, "none", 4)==0)
2749 slot = -1;
2750 else {
2751 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2752 if (err < 0)
2753 return err;
2754 }
2755 if (rdev->mddev->pers && slot == -1) {
2756
2757
2758
2759
2760
2761
2762
2763 if (rdev->raid_disk == -1)
2764 return -EEXIST;
2765
2766 if (rdev->mddev->pers->hot_remove_disk == NULL)
2767 return -EINVAL;
2768 clear_bit(Blocked, &rdev->flags);
2769 remove_and_add_spares(rdev->mddev, rdev);
2770 if (rdev->raid_disk >= 0)
2771 return -EBUSY;
2772 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2773 md_wakeup_thread(rdev->mddev->thread);
2774 } else if (rdev->mddev->pers) {
2775
2776
2777
2778 int err;
2779
2780 if (rdev->raid_disk != -1)
2781 return -EBUSY;
2782
2783 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2784 return -EBUSY;
2785
2786 if (rdev->mddev->pers->hot_add_disk == NULL)
2787 return -EINVAL;
2788
2789 if (slot >= rdev->mddev->raid_disks &&
2790 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2791 return -ENOSPC;
2792
2793 rdev->raid_disk = slot;
2794 if (test_bit(In_sync, &rdev->flags))
2795 rdev->saved_raid_disk = slot;
2796 else
2797 rdev->saved_raid_disk = -1;
2798 clear_bit(In_sync, &rdev->flags);
2799 clear_bit(Bitmap_sync, &rdev->flags);
2800 err = rdev->mddev->pers->
2801 hot_add_disk(rdev->mddev, rdev);
2802 if (err) {
2803 rdev->raid_disk = -1;
2804 return err;
2805 } else
2806 sysfs_notify_dirent_safe(rdev->sysfs_state);
2807 if (sysfs_link_rdev(rdev->mddev, rdev))
2808 ;
2809
2810 } else {
2811 if (slot >= rdev->mddev->raid_disks &&
2812 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2813 return -ENOSPC;
2814 rdev->raid_disk = slot;
2815
2816 clear_bit(Faulty, &rdev->flags);
2817 clear_bit(WriteMostly, &rdev->flags);
2818 set_bit(In_sync, &rdev->flags);
2819 sysfs_notify_dirent_safe(rdev->sysfs_state);
2820 }
2821 return len;
2822}
2823
2824static struct rdev_sysfs_entry rdev_slot =
2825__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2826
2827static ssize_t
2828offset_show(struct md_rdev *rdev, char *page)
2829{
2830 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2831}
2832
2833static ssize_t
2834offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2835{
2836 unsigned long long offset;
2837 if (kstrtoull(buf, 10, &offset) < 0)
2838 return -EINVAL;
2839 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2840 return -EBUSY;
2841 if (rdev->sectors && rdev->mddev->external)
2842
2843
2844 return -EBUSY;
2845 rdev->data_offset = offset;
2846 rdev->new_data_offset = offset;
2847 return len;
2848}
2849
2850static struct rdev_sysfs_entry rdev_offset =
2851__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2852
2853static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2854{
2855 return sprintf(page, "%llu\n",
2856 (unsigned long long)rdev->new_data_offset);
2857}
2858
2859static ssize_t new_offset_store(struct md_rdev *rdev,
2860 const char *buf, size_t len)
2861{
2862 unsigned long long new_offset;
2863 struct mddev *mddev = rdev->mddev;
2864
2865 if (kstrtoull(buf, 10, &new_offset) < 0)
2866 return -EINVAL;
2867
2868 if (mddev->sync_thread ||
2869 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2870 return -EBUSY;
2871 if (new_offset == rdev->data_offset)
2872
2873 ;
2874 else if (new_offset > rdev->data_offset) {
2875
2876 if (new_offset - rdev->data_offset
2877 + mddev->dev_sectors > rdev->sectors)
2878 return -E2BIG;
2879 }
2880
2881
2882
2883
2884
2885 if (new_offset < rdev->data_offset &&
2886 mddev->reshape_backwards)
2887 return -EINVAL;
2888
2889
2890
2891
2892 if (new_offset > rdev->data_offset &&
2893 !mddev->reshape_backwards)
2894 return -EINVAL;
2895
2896 if (mddev->pers && mddev->persistent &&
2897 !super_types[mddev->major_version]
2898 .allow_new_offset(rdev, new_offset))
2899 return -E2BIG;
2900 rdev->new_data_offset = new_offset;
2901 if (new_offset > rdev->data_offset)
2902 mddev->reshape_backwards = 1;
2903 else if (new_offset < rdev->data_offset)
2904 mddev->reshape_backwards = 0;
2905
2906 return len;
2907}
2908static struct rdev_sysfs_entry rdev_new_offset =
2909__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2910
2911static ssize_t
2912rdev_size_show(struct md_rdev *rdev, char *page)
2913{
2914 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2915}
2916
2917static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2918{
2919
2920 if (s1+l1 <= s2)
2921 return 0;
2922 if (s2+l2 <= s1)
2923 return 0;
2924 return 1;
2925}
2926
2927static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2928{
2929 unsigned long long blocks;
2930 sector_t new;
2931
2932 if (kstrtoull(buf, 10, &blocks) < 0)
2933 return -EINVAL;
2934
2935 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2936 return -EINVAL;
2937
2938 new = blocks * 2;
2939 if (new != blocks * 2)
2940 return -EINVAL;
2941
2942 *sectors = new;
2943 return 0;
2944}
2945
2946static ssize_t
2947rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2948{
2949 struct mddev *my_mddev = rdev->mddev;
2950 sector_t oldsectors = rdev->sectors;
2951 sector_t sectors;
2952
2953 if (test_bit(Journal, &rdev->flags))
2954 return -EBUSY;
2955 if (strict_blocks_to_sectors(buf, §ors) < 0)
2956 return -EINVAL;
2957 if (rdev->data_offset != rdev->new_data_offset)
2958 return -EINVAL;
2959 if (my_mddev->pers && rdev->raid_disk >= 0) {
2960 if (my_mddev->persistent) {
2961 sectors = super_types[my_mddev->major_version].
2962 rdev_size_change(rdev, sectors);
2963 if (!sectors)
2964 return -EBUSY;
2965 } else if (!sectors)
2966 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2967 rdev->data_offset;
2968 if (!my_mddev->pers->resize)
2969
2970 return -EINVAL;
2971 }
2972 if (sectors < my_mddev->dev_sectors)
2973 return -EINVAL;
2974
2975 rdev->sectors = sectors;
2976 if (sectors > oldsectors && my_mddev->external) {
2977
2978
2979
2980
2981
2982
2983 struct mddev *mddev;
2984 int overlap = 0;
2985 struct list_head *tmp;
2986
2987 rcu_read_lock();
2988 for_each_mddev(mddev, tmp) {
2989 struct md_rdev *rdev2;
2990
2991 rdev_for_each(rdev2, mddev)
2992 if (rdev->bdev == rdev2->bdev &&
2993 rdev != rdev2 &&
2994 overlaps(rdev->data_offset, rdev->sectors,
2995 rdev2->data_offset,
2996 rdev2->sectors)) {
2997 overlap = 1;
2998 break;
2999 }
3000 if (overlap) {
3001 mddev_put(mddev);
3002 break;
3003 }
3004 }
3005 rcu_read_unlock();
3006 if (overlap) {
3007
3008
3009
3010
3011
3012
3013 rdev->sectors = oldsectors;
3014 return -EBUSY;
3015 }
3016 }
3017 return len;
3018}
3019
3020static struct rdev_sysfs_entry rdev_size =
3021__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3022
3023static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3024{
3025 unsigned long long recovery_start = rdev->recovery_offset;
3026
3027 if (test_bit(In_sync, &rdev->flags) ||
3028 recovery_start == MaxSector)
3029 return sprintf(page, "none\n");
3030
3031 return sprintf(page, "%llu\n", recovery_start);
3032}
3033
3034static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3035{
3036 unsigned long long recovery_start;
3037
3038 if (cmd_match(buf, "none"))
3039 recovery_start = MaxSector;
3040 else if (kstrtoull(buf, 10, &recovery_start))
3041 return -EINVAL;
3042
3043 if (rdev->mddev->pers &&
3044 rdev->raid_disk >= 0)
3045 return -EBUSY;
3046
3047 rdev->recovery_offset = recovery_start;
3048 if (recovery_start == MaxSector)
3049 set_bit(In_sync, &rdev->flags);
3050 else
3051 clear_bit(In_sync, &rdev->flags);
3052 return len;
3053}
3054
3055static struct rdev_sysfs_entry rdev_recovery_start =
3056__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069static ssize_t bb_show(struct md_rdev *rdev, char *page)
3070{
3071 return badblocks_show(&rdev->badblocks, page, 0);
3072}
3073static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3074{
3075 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3076
3077 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3078 wake_up(&rdev->blocked_wait);
3079 return rv;
3080}
3081static struct rdev_sysfs_entry rdev_bad_blocks =
3082__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3083
3084static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3085{
3086 return badblocks_show(&rdev->badblocks, page, 1);
3087}
3088static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3089{
3090 return badblocks_store(&rdev->badblocks, page, len, 1);
3091}
3092static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3093__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3094
3095static struct attribute *rdev_default_attrs[] = {
3096 &rdev_state.attr,
3097 &rdev_errors.attr,
3098 &rdev_slot.attr,
3099 &rdev_offset.attr,
3100 &rdev_new_offset.attr,
3101 &rdev_size.attr,
3102 &rdev_recovery_start.attr,
3103 &rdev_bad_blocks.attr,
3104 &rdev_unack_bad_blocks.attr,
3105 NULL,
3106};
3107static ssize_t
3108rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3109{
3110 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3111 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3112
3113 if (!entry->show)
3114 return -EIO;
3115 if (!rdev->mddev)
3116 return -EBUSY;
3117 return entry->show(rdev, page);
3118}
3119
3120static ssize_t
3121rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3122 const char *page, size_t length)
3123{
3124 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3125 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3126 ssize_t rv;
3127 struct mddev *mddev = rdev->mddev;
3128
3129 if (!entry->store)
3130 return -EIO;
3131 if (!capable(CAP_SYS_ADMIN))
3132 return -EACCES;
3133 rv = mddev ? mddev_lock(mddev): -EBUSY;
3134 if (!rv) {
3135 if (rdev->mddev == NULL)
3136 rv = -EBUSY;
3137 else
3138 rv = entry->store(rdev, page, length);
3139 mddev_unlock(mddev);
3140 }
3141 return rv;
3142}
3143
3144static void rdev_free(struct kobject *ko)
3145{
3146 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3147 kfree(rdev);
3148}
3149static const struct sysfs_ops rdev_sysfs_ops = {
3150 .show = rdev_attr_show,
3151 .store = rdev_attr_store,
3152};
3153static struct kobj_type rdev_ktype = {
3154 .release = rdev_free,
3155 .sysfs_ops = &rdev_sysfs_ops,
3156 .default_attrs = rdev_default_attrs,
3157};
3158
3159int md_rdev_init(struct md_rdev *rdev)
3160{
3161 rdev->desc_nr = -1;
3162 rdev->saved_raid_disk = -1;
3163 rdev->raid_disk = -1;
3164 rdev->flags = 0;
3165 rdev->data_offset = 0;
3166 rdev->new_data_offset = 0;
3167 rdev->sb_events = 0;
3168 rdev->last_read_error.tv_sec = 0;
3169 rdev->last_read_error.tv_nsec = 0;
3170 rdev->sb_loaded = 0;
3171 rdev->bb_page = NULL;
3172 atomic_set(&rdev->nr_pending, 0);
3173 atomic_set(&rdev->read_errors, 0);
3174 atomic_set(&rdev->corrected_errors, 0);
3175
3176 INIT_LIST_HEAD(&rdev->same_set);
3177 init_waitqueue_head(&rdev->blocked_wait);
3178
3179
3180
3181
3182
3183 return badblocks_init(&rdev->badblocks, 0);
3184}
3185EXPORT_SYMBOL_GPL(md_rdev_init);
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3197{
3198 char b[BDEVNAME_SIZE];
3199 int err;
3200 struct md_rdev *rdev;
3201 sector_t size;
3202
3203 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3204 if (!rdev) {
3205 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3206 return ERR_PTR(-ENOMEM);
3207 }
3208
3209 err = md_rdev_init(rdev);
3210 if (err)
3211 goto abort_free;
3212 err = alloc_disk_sb(rdev);
3213 if (err)
3214 goto abort_free;
3215
3216 err = lock_rdev(rdev, newdev, super_format == -2);
3217 if (err)
3218 goto abort_free;
3219
3220 kobject_init(&rdev->kobj, &rdev_ktype);
3221
3222 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3223 if (!size) {
3224 printk(KERN_WARNING
3225 "md: %s has zero or unknown size, marking faulty!\n",
3226 bdevname(rdev->bdev,b));
3227 err = -EINVAL;
3228 goto abort_free;
3229 }
3230
3231 if (super_format >= 0) {
3232 err = super_types[super_format].
3233 load_super(rdev, NULL, super_minor);
3234 if (err == -EINVAL) {
3235 printk(KERN_WARNING
3236 "md: %s does not have a valid v%d.%d "
3237 "superblock, not importing!\n",
3238 bdevname(rdev->bdev,b),
3239 super_format, super_minor);
3240 goto abort_free;
3241 }
3242 if (err < 0) {
3243 printk(KERN_WARNING
3244 "md: could not read %s's sb, not importing!\n",
3245 bdevname(rdev->bdev,b));
3246 goto abort_free;
3247 }
3248 }
3249
3250 return rdev;
3251
3252abort_free:
3253 if (rdev->bdev)
3254 unlock_rdev(rdev);
3255 md_rdev_clear(rdev);
3256 kfree(rdev);
3257 return ERR_PTR(err);
3258}
3259
3260
3261
3262
3263
3264static void analyze_sbs(struct mddev *mddev)
3265{
3266 int i;
3267 struct md_rdev *rdev, *freshest, *tmp;
3268 char b[BDEVNAME_SIZE];
3269
3270 freshest = NULL;
3271 rdev_for_each_safe(rdev, tmp, mddev)
3272 switch (super_types[mddev->major_version].
3273 load_super(rdev, freshest, mddev->minor_version)) {
3274 case 1:
3275 freshest = rdev;
3276 break;
3277 case 0:
3278 break;
3279 default:
3280 printk( KERN_ERR \
3281 "md: fatal superblock inconsistency in %s"
3282 " -- removing from array\n",
3283 bdevname(rdev->bdev,b));
3284 md_kick_rdev_from_array(rdev);
3285 }
3286
3287 super_types[mddev->major_version].
3288 validate_super(mddev, freshest);
3289
3290 i = 0;
3291 rdev_for_each_safe(rdev, tmp, mddev) {
3292 if (mddev->max_disks &&
3293 (rdev->desc_nr >= mddev->max_disks ||
3294 i > mddev->max_disks)) {
3295 printk(KERN_WARNING
3296 "md: %s: %s: only %d devices permitted\n",
3297 mdname(mddev), bdevname(rdev->bdev, b),
3298 mddev->max_disks);
3299 md_kick_rdev_from_array(rdev);
3300 continue;
3301 }
3302 if (rdev != freshest) {
3303 if (super_types[mddev->major_version].
3304 validate_super(mddev, rdev)) {
3305 printk(KERN_WARNING "md: kicking non-fresh %s"
3306 " from array!\n",
3307 bdevname(rdev->bdev,b));
3308 md_kick_rdev_from_array(rdev);
3309 continue;
3310 }
3311 }
3312 if (mddev->level == LEVEL_MULTIPATH) {
3313 rdev->desc_nr = i++;
3314 rdev->raid_disk = rdev->desc_nr;
3315 set_bit(In_sync, &rdev->flags);
3316 } else if (rdev->raid_disk >=
3317 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3318 !test_bit(Journal, &rdev->flags)) {
3319 rdev->raid_disk = -1;
3320 clear_bit(In_sync, &rdev->flags);
3321 }
3322 }
3323}
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3336{
3337 unsigned long result = 0;
3338 long decimals = -1;
3339 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3340 if (*cp == '.')
3341 decimals = 0;
3342 else if (decimals < scale) {
3343 unsigned int value;
3344 value = *cp - '0';
3345 result = result * 10 + value;
3346 if (decimals >= 0)
3347 decimals++;
3348 }
3349 cp++;
3350 }
3351 if (*cp == '\n')
3352 cp++;
3353 if (*cp)
3354 return -EINVAL;
3355 if (decimals < 0)
3356 decimals = 0;
3357 while (decimals < scale) {
3358 result *= 10;
3359 decimals ++;
3360 }
3361 *res = result;
3362 return 0;
3363}
3364
3365static ssize_t
3366safe_delay_show(struct mddev *mddev, char *page)
3367{
3368 int msec = (mddev->safemode_delay*1000)/HZ;
3369 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3370}
3371static ssize_t
3372safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3373{
3374 unsigned long msec;
3375
3376 if (mddev_is_clustered(mddev)) {
3377 pr_info("md: Safemode is disabled for clustered mode\n");
3378 return -EINVAL;
3379 }
3380
3381 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3382 return -EINVAL;
3383 if (msec == 0)
3384 mddev->safemode_delay = 0;
3385 else {
3386 unsigned long old_delay = mddev->safemode_delay;
3387 unsigned long new_delay = (msec*HZ)/1000;
3388
3389 if (new_delay == 0)
3390 new_delay = 1;
3391 mddev->safemode_delay = new_delay;
3392 if (new_delay < old_delay || old_delay == 0)
3393 mod_timer(&mddev->safemode_timer, jiffies+1);
3394 }
3395 return len;
3396}
3397static struct md_sysfs_entry md_safe_delay =
3398__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3399
3400static ssize_t
3401level_show(struct mddev *mddev, char *page)
3402{
3403 struct md_personality *p;
3404 int ret;
3405 spin_lock(&mddev->lock);
3406 p = mddev->pers;
3407 if (p)
3408 ret = sprintf(page, "%s\n", p->name);
3409 else if (mddev->clevel[0])
3410 ret = sprintf(page, "%s\n", mddev->clevel);
3411 else if (mddev->level != LEVEL_NONE)
3412 ret = sprintf(page, "%d\n", mddev->level);
3413 else
3414 ret = 0;
3415 spin_unlock(&mddev->lock);
3416 return ret;
3417}
3418
3419static ssize_t
3420level_store(struct mddev *mddev, const char *buf, size_t len)
3421{
3422 char clevel[16];
3423 ssize_t rv;
3424 size_t slen = len;
3425 struct md_personality *pers, *oldpers;
3426 long level;
3427 void *priv, *oldpriv;
3428 struct md_rdev *rdev;
3429
3430 if (slen == 0 || slen >= sizeof(clevel))
3431 return -EINVAL;
3432
3433 rv = mddev_lock(mddev);
3434 if (rv)
3435 return rv;
3436
3437 if (mddev->pers == NULL) {
3438 strncpy(mddev->clevel, buf, slen);
3439 if (mddev->clevel[slen-1] == '\n')
3440 slen--;
3441 mddev->clevel[slen] = 0;
3442 mddev->level = LEVEL_NONE;
3443 rv = len;
3444 goto out_unlock;
3445 }
3446 rv = -EROFS;
3447 if (mddev->ro)
3448 goto out_unlock;
3449
3450
3451
3452
3453
3454
3455
3456 rv = -EBUSY;
3457 if (mddev->sync_thread ||
3458 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3459 mddev->reshape_position != MaxSector ||
3460 mddev->sysfs_active)
3461 goto out_unlock;
3462
3463 rv = -EINVAL;
3464 if (!mddev->pers->quiesce) {
3465 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3466 mdname(mddev), mddev->pers->name);
3467 goto out_unlock;
3468 }
3469
3470
3471 strncpy(clevel, buf, slen);
3472 if (clevel[slen-1] == '\n')
3473 slen--;
3474 clevel[slen] = 0;
3475 if (kstrtol(clevel, 10, &level))
3476 level = LEVEL_NONE;
3477
3478 if (request_module("md-%s", clevel) != 0)
3479 request_module("md-level-%s", clevel);
3480 spin_lock(&pers_lock);
3481 pers = find_pers(level, clevel);
3482 if (!pers || !try_module_get(pers->owner)) {
3483 spin_unlock(&pers_lock);
3484 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3485 rv = -EINVAL;
3486 goto out_unlock;
3487 }
3488 spin_unlock(&pers_lock);
3489
3490 if (pers == mddev->pers) {
3491
3492 module_put(pers->owner);
3493 rv = len;
3494 goto out_unlock;
3495 }
3496 if (!pers->takeover) {
3497 module_put(pers->owner);
3498 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3499 mdname(mddev), clevel);
3500 rv = -EINVAL;
3501 goto out_unlock;
3502 }
3503
3504 rdev_for_each(rdev, mddev)
3505 rdev->new_raid_disk = rdev->raid_disk;
3506
3507
3508
3509
3510 priv = pers->takeover(mddev);
3511 if (IS_ERR(priv)) {
3512 mddev->new_level = mddev->level;
3513 mddev->new_layout = mddev->layout;
3514 mddev->new_chunk_sectors = mddev->chunk_sectors;
3515 mddev->raid_disks -= mddev->delta_disks;
3516 mddev->delta_disks = 0;
3517 mddev->reshape_backwards = 0;
3518 module_put(pers->owner);
3519 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3520 mdname(mddev), clevel);
3521 rv = PTR_ERR(priv);
3522 goto out_unlock;
3523 }
3524
3525
3526 mddev_suspend(mddev);
3527 mddev_detach(mddev);
3528
3529 spin_lock(&mddev->lock);
3530 oldpers = mddev->pers;
3531 oldpriv = mddev->private;
3532 mddev->pers = pers;
3533 mddev->private = priv;
3534 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3535 mddev->level = mddev->new_level;
3536 mddev->layout = mddev->new_layout;
3537 mddev->chunk_sectors = mddev->new_chunk_sectors;
3538 mddev->delta_disks = 0;
3539 mddev->reshape_backwards = 0;
3540 mddev->degraded = 0;
3541 spin_unlock(&mddev->lock);
3542
3543 if (oldpers->sync_request == NULL &&
3544 mddev->external) {
3545
3546
3547
3548
3549
3550
3551
3552 mddev->in_sync = 0;
3553 mddev->safemode_delay = 0;
3554 mddev->safemode = 0;
3555 }
3556
3557 oldpers->free(mddev, oldpriv);
3558
3559 if (oldpers->sync_request == NULL &&
3560 pers->sync_request != NULL) {
3561
3562 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3563 printk(KERN_WARNING
3564 "md: cannot register extra attributes for %s\n",
3565 mdname(mddev));
3566 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3567 }
3568 if (oldpers->sync_request != NULL &&
3569 pers->sync_request == NULL) {
3570
3571 if (mddev->to_remove == NULL)
3572 mddev->to_remove = &md_redundancy_group;
3573 }
3574
3575 rdev_for_each(rdev, mddev) {
3576 if (rdev->raid_disk < 0)
3577 continue;
3578 if (rdev->new_raid_disk >= mddev->raid_disks)
3579 rdev->new_raid_disk = -1;
3580 if (rdev->new_raid_disk == rdev->raid_disk)
3581 continue;
3582 sysfs_unlink_rdev(mddev, rdev);
3583 }
3584 rdev_for_each(rdev, mddev) {
3585 if (rdev->raid_disk < 0)
3586 continue;
3587 if (rdev->new_raid_disk == rdev->raid_disk)
3588 continue;
3589 rdev->raid_disk = rdev->new_raid_disk;
3590 if (rdev->raid_disk < 0)
3591 clear_bit(In_sync, &rdev->flags);
3592 else {
3593 if (sysfs_link_rdev(mddev, rdev))
3594 printk(KERN_WARNING "md: cannot register rd%d"
3595 " for %s after level change\n",
3596 rdev->raid_disk, mdname(mddev));
3597 }
3598 }
3599
3600 if (pers->sync_request == NULL) {
3601
3602
3603
3604 mddev->in_sync = 1;
3605 del_timer_sync(&mddev->safemode_timer);
3606 }
3607 blk_set_stacking_limits(&mddev->queue->limits);
3608 pers->run(mddev);
3609 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3610 mddev_resume(mddev);
3611 if (!mddev->thread)
3612 md_update_sb(mddev, 1);
3613 sysfs_notify(&mddev->kobj, NULL, "level");
3614 md_new_event(mddev);
3615 rv = len;
3616out_unlock:
3617 mddev_unlock(mddev);
3618 return rv;
3619}
3620
3621static struct md_sysfs_entry md_level =
3622__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3623
3624static ssize_t
3625layout_show(struct mddev *mddev, char *page)
3626{
3627
3628 if (mddev->reshape_position != MaxSector &&
3629 mddev->layout != mddev->new_layout)
3630 return sprintf(page, "%d (%d)\n",
3631 mddev->new_layout, mddev->layout);
3632 return sprintf(page, "%d\n", mddev->layout);
3633}
3634
3635static ssize_t
3636layout_store(struct mddev *mddev, const char *buf, size_t len)
3637{
3638 unsigned int n;
3639 int err;
3640
3641 err = kstrtouint(buf, 10, &n);
3642 if (err < 0)
3643 return err;
3644 err = mddev_lock(mddev);
3645 if (err)
3646 return err;
3647
3648 if (mddev->pers) {
3649 if (mddev->pers->check_reshape == NULL)
3650 err = -EBUSY;
3651 else if (mddev->ro)
3652 err = -EROFS;
3653 else {
3654 mddev->new_layout = n;
3655 err = mddev->pers->check_reshape(mddev);
3656 if (err)
3657 mddev->new_layout = mddev->layout;
3658 }
3659 } else {
3660 mddev->new_layout = n;
3661 if (mddev->reshape_position == MaxSector)
3662 mddev->layout = n;
3663 }
3664 mddev_unlock(mddev);
3665 return err ?: len;
3666}
3667static struct md_sysfs_entry md_layout =
3668__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3669
3670static ssize_t
3671raid_disks_show(struct mddev *mddev, char *page)
3672{
3673 if (mddev->raid_disks == 0)
3674 return 0;
3675 if (mddev->reshape_position != MaxSector &&
3676 mddev->delta_disks != 0)
3677 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3678 mddev->raid_disks - mddev->delta_disks);
3679 return sprintf(page, "%d\n", mddev->raid_disks);
3680}
3681
3682static int update_raid_disks(struct mddev *mddev, int raid_disks);
3683
3684static ssize_t
3685raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3686{
3687 unsigned int n;
3688 int err;
3689
3690 err = kstrtouint(buf, 10, &n);
3691 if (err < 0)
3692 return err;
3693
3694 err = mddev_lock(mddev);
3695 if (err)
3696 return err;
3697 if (mddev->pers)
3698 err = update_raid_disks(mddev, n);
3699 else if (mddev->reshape_position != MaxSector) {
3700 struct md_rdev *rdev;
3701 int olddisks = mddev->raid_disks - mddev->delta_disks;
3702
3703 err = -EINVAL;
3704 rdev_for_each(rdev, mddev) {
3705 if (olddisks < n &&
3706 rdev->data_offset < rdev->new_data_offset)
3707 goto out_unlock;
3708 if (olddisks > n &&
3709 rdev->data_offset > rdev->new_data_offset)
3710 goto out_unlock;
3711 }
3712 err = 0;
3713 mddev->delta_disks = n - olddisks;
3714 mddev->raid_disks = n;
3715 mddev->reshape_backwards = (mddev->delta_disks < 0);
3716 } else
3717 mddev->raid_disks = n;
3718out_unlock:
3719 mddev_unlock(mddev);
3720 return err ? err : len;
3721}
3722static struct md_sysfs_entry md_raid_disks =
3723__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3724
3725static ssize_t
3726chunk_size_show(struct mddev *mddev, char *page)
3727{
3728 if (mddev->reshape_position != MaxSector &&
3729 mddev->chunk_sectors != mddev->new_chunk_sectors)
3730 return sprintf(page, "%d (%d)\n",
3731 mddev->new_chunk_sectors << 9,
3732 mddev->chunk_sectors << 9);
3733 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3734}
3735
3736static ssize_t
3737chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3738{
3739 unsigned long n;
3740 int err;
3741
3742 err = kstrtoul(buf, 10, &n);
3743 if (err < 0)
3744 return err;
3745
3746 err = mddev_lock(mddev);
3747 if (err)
3748 return err;
3749 if (mddev->pers) {
3750 if (mddev->pers->check_reshape == NULL)
3751 err = -EBUSY;
3752 else if (mddev->ro)
3753 err = -EROFS;
3754 else {
3755 mddev->new_chunk_sectors = n >> 9;
3756 err = mddev->pers->check_reshape(mddev);
3757 if (err)
3758 mddev->new_chunk_sectors = mddev->chunk_sectors;
3759 }
3760 } else {
3761 mddev->new_chunk_sectors = n >> 9;
3762 if (mddev->reshape_position == MaxSector)
3763 mddev->chunk_sectors = n >> 9;
3764 }
3765 mddev_unlock(mddev);
3766 return err ?: len;
3767}
3768static struct md_sysfs_entry md_chunk_size =
3769__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3770
3771static ssize_t
3772resync_start_show(struct mddev *mddev, char *page)
3773{
3774 if (mddev->recovery_cp == MaxSector)
3775 return sprintf(page, "none\n");
3776 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3777}
3778
3779static ssize_t
3780resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3781{
3782 unsigned long long n;
3783 int err;
3784
3785 if (cmd_match(buf, "none"))
3786 n = MaxSector;
3787 else {
3788 err = kstrtoull(buf, 10, &n);
3789 if (err < 0)
3790 return err;
3791 if (n != (sector_t)n)
3792 return -EINVAL;
3793 }
3794
3795 err = mddev_lock(mddev);
3796 if (err)
3797 return err;
3798 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3799 err = -EBUSY;
3800
3801 if (!err) {
3802 mddev->recovery_cp = n;
3803 if (mddev->pers)
3804 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3805 }
3806 mddev_unlock(mddev);
3807 return err ?: len;
3808}
3809static struct md_sysfs_entry md_resync_start =
3810__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3811 resync_start_show, resync_start_store);
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3850 write_pending, active_idle, bad_word};
3851static char *array_states[] = {
3852 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3853 "write-pending", "active-idle", NULL };
3854
3855static int match_word(const char *word, char **list)
3856{
3857 int n;
3858 for (n=0; list[n]; n++)
3859 if (cmd_match(word, list[n]))
3860 break;
3861 return n;
3862}
3863
3864static ssize_t
3865array_state_show(struct mddev *mddev, char *page)
3866{
3867 enum array_state st = inactive;
3868
3869 if (mddev->pers)
3870 switch(mddev->ro) {
3871 case 1:
3872 st = readonly;
3873 break;
3874 case 2:
3875 st = read_auto;
3876 break;
3877 case 0:
3878 if (mddev->in_sync)
3879 st = clean;
3880 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3881 st = write_pending;
3882 else if (mddev->safemode)
3883 st = active_idle;
3884 else
3885 st = active;
3886 }
3887 else {
3888 if (list_empty(&mddev->disks) &&
3889 mddev->raid_disks == 0 &&
3890 mddev->dev_sectors == 0)
3891 st = clear;
3892 else
3893 st = inactive;
3894 }
3895 return sprintf(page, "%s\n", array_states[st]);
3896}
3897
3898static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3899static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3900static int do_md_run(struct mddev *mddev);
3901static int restart_array(struct mddev *mddev);
3902
3903static ssize_t
3904array_state_store(struct mddev *mddev, const char *buf, size_t len)
3905{
3906 int err;
3907 enum array_state st = match_word(buf, array_states);
3908
3909 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3910
3911
3912
3913 spin_lock(&mddev->lock);
3914 if (st == active) {
3915 restart_array(mddev);
3916 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3917 wake_up(&mddev->sb_wait);
3918 err = 0;
3919 } else {
3920 restart_array(mddev);
3921 if (atomic_read(&mddev->writes_pending) == 0) {
3922 if (mddev->in_sync == 0) {
3923 mddev->in_sync = 1;
3924 if (mddev->safemode == 1)
3925 mddev->safemode = 0;
3926 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3927 }
3928 err = 0;
3929 } else
3930 err = -EBUSY;
3931 }
3932 spin_unlock(&mddev->lock);
3933 return err ?: len;
3934 }
3935 err = mddev_lock(mddev);
3936 if (err)
3937 return err;
3938 err = -EINVAL;
3939 switch(st) {
3940 case bad_word:
3941 break;
3942 case clear:
3943
3944 err = do_md_stop(mddev, 0, NULL);
3945 break;
3946 case inactive:
3947
3948 if (mddev->pers)
3949 err = do_md_stop(mddev, 2, NULL);
3950 else
3951 err = 0;
3952 break;
3953 case suspended:
3954 break;
3955 case readonly:
3956 if (mddev->pers)
3957 err = md_set_readonly(mddev, NULL);
3958 else {
3959 mddev->ro = 1;
3960 set_disk_ro(mddev->gendisk, 1);
3961 err = do_md_run(mddev);
3962 }
3963 break;
3964 case read_auto:
3965 if (mddev->pers) {
3966 if (mddev->ro == 0)
3967 err = md_set_readonly(mddev, NULL);
3968 else if (mddev->ro == 1)
3969 err = restart_array(mddev);
3970 if (err == 0) {
3971 mddev->ro = 2;
3972 set_disk_ro(mddev->gendisk, 0);
3973 }
3974 } else {
3975 mddev->ro = 2;
3976 err = do_md_run(mddev);
3977 }
3978 break;
3979 case clean:
3980 if (mddev->pers) {
3981 err = restart_array(mddev);
3982 if (err)
3983 break;
3984 spin_lock(&mddev->lock);
3985 if (atomic_read(&mddev->writes_pending) == 0) {
3986 if (mddev->in_sync == 0) {
3987 mddev->in_sync = 1;
3988 if (mddev->safemode == 1)
3989 mddev->safemode = 0;
3990 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3991 }
3992 err = 0;
3993 } else
3994 err = -EBUSY;
3995 spin_unlock(&mddev->lock);
3996 } else
3997 err = -EINVAL;
3998 break;
3999 case active:
4000 if (mddev->pers) {
4001 err = restart_array(mddev);
4002 if (err)
4003 break;
4004 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
4005 wake_up(&mddev->sb_wait);
4006 err = 0;
4007 } else {
4008 mddev->ro = 0;
4009 set_disk_ro(mddev->gendisk, 0);
4010 err = do_md_run(mddev);
4011 }
4012 break;
4013 case write_pending:
4014 case active_idle:
4015
4016 break;
4017 }
4018
4019 if (!err) {
4020 if (mddev->hold_active == UNTIL_IOCTL)
4021 mddev->hold_active = 0;
4022 sysfs_notify_dirent_safe(mddev->sysfs_state);
4023 }
4024 mddev_unlock(mddev);
4025 return err ?: len;
4026}
4027static struct md_sysfs_entry md_array_state =
4028__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4029
4030static ssize_t
4031max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4032 return sprintf(page, "%d\n",
4033 atomic_read(&mddev->max_corr_read_errors));
4034}
4035
4036static ssize_t
4037max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4038{
4039 unsigned int n;
4040 int rv;
4041
4042 rv = kstrtouint(buf, 10, &n);
4043 if (rv < 0)
4044 return rv;
4045 atomic_set(&mddev->max_corr_read_errors, n);
4046 return len;
4047}
4048
4049static struct md_sysfs_entry max_corr_read_errors =
4050__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4051 max_corrected_read_errors_store);
4052
4053static ssize_t
4054null_show(struct mddev *mddev, char *page)
4055{
4056 return -EINVAL;
4057}
4058
4059static ssize_t
4060new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4061{
4062
4063
4064
4065
4066
4067
4068
4069 char *e;
4070 int major = simple_strtoul(buf, &e, 10);
4071 int minor;
4072 dev_t dev;
4073 struct md_rdev *rdev;
4074 int err;
4075
4076 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4077 return -EINVAL;
4078 minor = simple_strtoul(e+1, &e, 10);
4079 if (*e && *e != '\n')
4080 return -EINVAL;
4081 dev = MKDEV(major, minor);
4082 if (major != MAJOR(dev) ||
4083 minor != MINOR(dev))
4084 return -EOVERFLOW;
4085
4086 flush_workqueue(md_misc_wq);
4087
4088 err = mddev_lock(mddev);
4089 if (err)
4090 return err;
4091 if (mddev->persistent) {
4092 rdev = md_import_device(dev, mddev->major_version,
4093 mddev->minor_version);
4094 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4095 struct md_rdev *rdev0
4096 = list_entry(mddev->disks.next,
4097 struct md_rdev, same_set);
4098 err = super_types[mddev->major_version]
4099 .load_super(rdev, rdev0, mddev->minor_version);
4100 if (err < 0)
4101 goto out;
4102 }
4103 } else if (mddev->external)
4104 rdev = md_import_device(dev, -2, -1);
4105 else
4106 rdev = md_import_device(dev, -1, -1);
4107
4108 if (IS_ERR(rdev)) {
4109 mddev_unlock(mddev);
4110 return PTR_ERR(rdev);
4111 }
4112 err = bind_rdev_to_array(rdev, mddev);
4113 out:
4114 if (err)
4115 export_rdev(rdev);
4116 mddev_unlock(mddev);
4117 return err ? err : len;
4118}
4119
4120static struct md_sysfs_entry md_new_device =
4121__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4122
4123static ssize_t
4124bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4125{
4126 char *end;
4127 unsigned long chunk, end_chunk;
4128 int err;
4129
4130 err = mddev_lock(mddev);
4131 if (err)
4132 return err;
4133 if (!mddev->bitmap)
4134 goto out;
4135
4136 while (*buf) {
4137 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4138 if (buf == end) break;
4139 if (*end == '-') {
4140 buf = end + 1;
4141 end_chunk = simple_strtoul(buf, &end, 0);
4142 if (buf == end) break;
4143 }
4144 if (*end && !isspace(*end)) break;
4145 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4146 buf = skip_spaces(end);
4147 }
4148 bitmap_unplug(mddev->bitmap);
4149out:
4150 mddev_unlock(mddev);
4151 return len;
4152}
4153
4154static struct md_sysfs_entry md_bitmap =
4155__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4156
4157static ssize_t
4158size_show(struct mddev *mddev, char *page)
4159{
4160 return sprintf(page, "%llu\n",
4161 (unsigned long long)mddev->dev_sectors / 2);
4162}
4163
4164static int update_size(struct mddev *mddev, sector_t num_sectors);
4165
4166static ssize_t
4167size_store(struct mddev *mddev, const char *buf, size_t len)
4168{
4169
4170
4171
4172
4173 sector_t sectors;
4174 int err = strict_blocks_to_sectors(buf, §ors);
4175
4176 if (err < 0)
4177 return err;
4178 err = mddev_lock(mddev);
4179 if (err)
4180 return err;
4181 if (mddev->pers) {
4182 err = update_size(mddev, sectors);
4183 md_update_sb(mddev, 1);
4184 } else {
4185 if (mddev->dev_sectors == 0 ||
4186 mddev->dev_sectors > sectors)
4187 mddev->dev_sectors = sectors;
4188 else
4189 err = -ENOSPC;
4190 }
4191 mddev_unlock(mddev);
4192 return err ? err : len;
4193}
4194
4195static struct md_sysfs_entry md_size =
4196__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4197
4198
4199
4200
4201
4202
4203
4204static ssize_t
4205metadata_show(struct mddev *mddev, char *page)
4206{
4207 if (mddev->persistent)
4208 return sprintf(page, "%d.%d\n",
4209 mddev->major_version, mddev->minor_version);
4210 else if (mddev->external)
4211 return sprintf(page, "external:%s\n", mddev->metadata_type);
4212 else
4213 return sprintf(page, "none\n");
4214}
4215
4216static ssize_t
4217metadata_store(struct mddev *mddev, const char *buf, size_t len)
4218{
4219 int major, minor;
4220 char *e;
4221 int err;
4222
4223
4224
4225
4226
4227 err = mddev_lock(mddev);
4228 if (err)
4229 return err;
4230 err = -EBUSY;
4231 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4232 ;
4233 else if (!list_empty(&mddev->disks))
4234 goto out_unlock;
4235
4236 err = 0;
4237 if (cmd_match(buf, "none")) {
4238 mddev->persistent = 0;
4239 mddev->external = 0;
4240 mddev->major_version = 0;
4241 mddev->minor_version = 90;
4242 goto out_unlock;
4243 }
4244 if (strncmp(buf, "external:", 9) == 0) {
4245 size_t namelen = len-9;
4246 if (namelen >= sizeof(mddev->metadata_type))
4247 namelen = sizeof(mddev->metadata_type)-1;
4248 strncpy(mddev->metadata_type, buf+9, namelen);
4249 mddev->metadata_type[namelen] = 0;
4250 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4251 mddev->metadata_type[--namelen] = 0;
4252 mddev->persistent = 0;
4253 mddev->external = 1;
4254 mddev->major_version = 0;
4255 mddev->minor_version = 90;
4256 goto out_unlock;
4257 }
4258 major = simple_strtoul(buf, &e, 10);
4259 err = -EINVAL;
4260 if (e==buf || *e != '.')
4261 goto out_unlock;
4262 buf = e+1;
4263 minor = simple_strtoul(buf, &e, 10);
4264 if (e==buf || (*e && *e != '\n') )
4265 goto out_unlock;
4266 err = -ENOENT;
4267 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4268 goto out_unlock;
4269 mddev->major_version = major;
4270 mddev->minor_version = minor;
4271 mddev->persistent = 1;
4272 mddev->external = 0;
4273 err = 0;
4274out_unlock:
4275 mddev_unlock(mddev);
4276 return err ?: len;
4277}
4278
4279static struct md_sysfs_entry md_metadata =
4280__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4281
4282static ssize_t
4283action_show(struct mddev *mddev, char *page)
4284{
4285 char *type = "idle";
4286 unsigned long recovery = mddev->recovery;
4287 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4288 type = "frozen";
4289 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4290 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4291 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4292 type = "reshape";
4293 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4294 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4295 type = "resync";
4296 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4297 type = "check";
4298 else
4299 type = "repair";
4300 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4301 type = "recover";
4302 else if (mddev->reshape_position != MaxSector)
4303 type = "reshape";
4304 }
4305 return sprintf(page, "%s\n", type);
4306}
4307
4308static ssize_t
4309action_store(struct mddev *mddev, const char *page, size_t len)
4310{
4311 if (!mddev->pers || !mddev->pers->sync_request)
4312 return -EINVAL;
4313
4314
4315 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4316 if (cmd_match(page, "frozen"))
4317 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4318 else
4319 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4320 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4321 mddev_lock(mddev) == 0) {
4322 flush_workqueue(md_misc_wq);
4323 if (mddev->sync_thread) {
4324 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4325 md_reap_sync_thread(mddev);
4326 }
4327 mddev_unlock(mddev);
4328 }
4329 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4330 return -EBUSY;
4331 else if (cmd_match(page, "resync"))
4332 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4333 else if (cmd_match(page, "recover")) {
4334 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4335 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4336 } else if (cmd_match(page, "reshape")) {
4337 int err;
4338 if (mddev->pers->start_reshape == NULL)
4339 return -EINVAL;
4340 err = mddev_lock(mddev);
4341 if (!err) {
4342 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4343 err = -EBUSY;
4344 else {
4345 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4346 err = mddev->pers->start_reshape(mddev);
4347 }
4348 mddev_unlock(mddev);
4349 }
4350 if (err)
4351 return err;
4352 sysfs_notify(&mddev->kobj, NULL, "degraded");
4353 } else {
4354 if (cmd_match(page, "check"))
4355 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4356 else if (!cmd_match(page, "repair"))
4357 return -EINVAL;
4358 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4359 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4360 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4361 }
4362 if (mddev->ro == 2) {
4363
4364
4365
4366 mddev->ro = 0;
4367 md_wakeup_thread(mddev->sync_thread);
4368 }
4369 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4370 md_wakeup_thread(mddev->thread);
4371 sysfs_notify_dirent_safe(mddev->sysfs_action);
4372 return len;
4373}
4374
4375static struct md_sysfs_entry md_scan_mode =
4376__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4377
4378static ssize_t
4379last_sync_action_show(struct mddev *mddev, char *page)
4380{
4381 return sprintf(page, "%s\n", mddev->last_sync_action);
4382}
4383
4384static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4385
4386static ssize_t
4387mismatch_cnt_show(struct mddev *mddev, char *page)
4388{
4389 return sprintf(page, "%llu\n",
4390 (unsigned long long)
4391 atomic64_read(&mddev->resync_mismatches));
4392}
4393
4394static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4395
4396static ssize_t
4397sync_min_show(struct mddev *mddev, char *page)
4398{
4399 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4400 mddev->sync_speed_min ? "local": "system");
4401}
4402
4403static ssize_t
4404sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4405{
4406 unsigned int min;
4407 int rv;
4408
4409 if (strncmp(buf, "system", 6)==0) {
4410 min = 0;
4411 } else {
4412 rv = kstrtouint(buf, 10, &min);
4413 if (rv < 0)
4414 return rv;
4415 if (min == 0)
4416 return -EINVAL;
4417 }
4418 mddev->sync_speed_min = min;
4419 return len;
4420}
4421
4422static struct md_sysfs_entry md_sync_min =
4423__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4424
4425static ssize_t
4426sync_max_show(struct mddev *mddev, char *page)
4427{
4428 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4429 mddev->sync_speed_max ? "local": "system");
4430}
4431
4432static ssize_t
4433sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4434{
4435 unsigned int max;
4436 int rv;
4437
4438 if (strncmp(buf, "system", 6)==0) {
4439 max = 0;
4440 } else {
4441 rv = kstrtouint(buf, 10, &max);
4442 if (rv < 0)
4443 return rv;
4444 if (max == 0)
4445 return -EINVAL;
4446 }
4447 mddev->sync_speed_max = max;
4448 return len;
4449}
4450
4451static struct md_sysfs_entry md_sync_max =
4452__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4453
4454static ssize_t
4455degraded_show(struct mddev *mddev, char *page)
4456{
4457 return sprintf(page, "%d\n", mddev->degraded);
4458}
4459static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4460
4461static ssize_t
4462sync_force_parallel_show(struct mddev *mddev, char *page)
4463{
4464 return sprintf(page, "%d\n", mddev->parallel_resync);
4465}
4466
4467static ssize_t
4468sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4469{
4470 long n;
4471
4472 if (kstrtol(buf, 10, &n))
4473 return -EINVAL;
4474
4475 if (n != 0 && n != 1)
4476 return -EINVAL;
4477
4478 mddev->parallel_resync = n;
4479
4480 if (mddev->sync_thread)
4481 wake_up(&resync_wait);
4482
4483 return len;
4484}
4485
4486
4487static struct md_sysfs_entry md_sync_force_parallel =
4488__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4489 sync_force_parallel_show, sync_force_parallel_store);
4490
4491static ssize_t
4492sync_speed_show(struct mddev *mddev, char *page)
4493{
4494 unsigned long resync, dt, db;
4495 if (mddev->curr_resync == 0)
4496 return sprintf(page, "none\n");
4497 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4498 dt = (jiffies - mddev->resync_mark) / HZ;
4499 if (!dt) dt++;
4500 db = resync - mddev->resync_mark_cnt;
4501 return sprintf(page, "%lu\n", db/dt/2);
4502}
4503
4504static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4505
4506static ssize_t
4507sync_completed_show(struct mddev *mddev, char *page)
4508{
4509 unsigned long long max_sectors, resync;
4510
4511 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4512 return sprintf(page, "none\n");
4513
4514 if (mddev->curr_resync == 1 ||
4515 mddev->curr_resync == 2)
4516 return sprintf(page, "delayed\n");
4517
4518 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4519 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4520 max_sectors = mddev->resync_max_sectors;
4521 else
4522 max_sectors = mddev->dev_sectors;
4523
4524 resync = mddev->curr_resync_completed;
4525 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4526}
4527
4528static struct md_sysfs_entry md_sync_completed =
4529 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4530
4531static ssize_t
4532min_sync_show(struct mddev *mddev, char *page)
4533{
4534 return sprintf(page, "%llu\n",
4535 (unsigned long long)mddev->resync_min);
4536}
4537static ssize_t
4538min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4539{
4540 unsigned long long min;
4541 int err;
4542
4543 if (kstrtoull(buf, 10, &min))
4544 return -EINVAL;
4545
4546 spin_lock(&mddev->lock);
4547 err = -EINVAL;
4548 if (min > mddev->resync_max)
4549 goto out_unlock;
4550
4551 err = -EBUSY;
4552 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4553 goto out_unlock;
4554
4555
4556 mddev->resync_min = round_down(min, 8);
4557 err = 0;
4558
4559out_unlock:
4560 spin_unlock(&mddev->lock);
4561 return err ?: len;
4562}
4563
4564static struct md_sysfs_entry md_min_sync =
4565__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4566
4567static ssize_t
4568max_sync_show(struct mddev *mddev, char *page)
4569{
4570 if (mddev->resync_max == MaxSector)
4571 return sprintf(page, "max\n");
4572 else
4573 return sprintf(page, "%llu\n",
4574 (unsigned long long)mddev->resync_max);
4575}
4576static ssize_t
4577max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4578{
4579 int err;
4580 spin_lock(&mddev->lock);
4581 if (strncmp(buf, "max", 3) == 0)
4582 mddev->resync_max = MaxSector;
4583 else {
4584 unsigned long long max;
4585 int chunk;
4586
4587 err = -EINVAL;
4588 if (kstrtoull(buf, 10, &max))
4589 goto out_unlock;
4590 if (max < mddev->resync_min)
4591 goto out_unlock;
4592
4593 err = -EBUSY;
4594 if (max < mddev->resync_max &&
4595 mddev->ro == 0 &&
4596 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4597 goto out_unlock;
4598
4599
4600 chunk = mddev->chunk_sectors;
4601 if (chunk) {
4602 sector_t temp = max;
4603
4604 err = -EINVAL;
4605 if (sector_div(temp, chunk))
4606 goto out_unlock;
4607 }
4608 mddev->resync_max = max;
4609 }
4610 wake_up(&mddev->recovery_wait);
4611 err = 0;
4612out_unlock:
4613 spin_unlock(&mddev->lock);
4614 return err ?: len;
4615}
4616
4617static struct md_sysfs_entry md_max_sync =
4618__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4619
4620static ssize_t
4621suspend_lo_show(struct mddev *mddev, char *page)
4622{
4623 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4624}
4625
4626static ssize_t
4627suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4628{
4629 unsigned long long old, new;
4630 int err;
4631
4632 err = kstrtoull(buf, 10, &new);
4633 if (err < 0)
4634 return err;
4635 if (new != (sector_t)new)
4636 return -EINVAL;
4637
4638 err = mddev_lock(mddev);
4639 if (err)
4640 return err;
4641 err = -EINVAL;
4642 if (mddev->pers == NULL ||
4643 mddev->pers->quiesce == NULL)
4644 goto unlock;
4645 old = mddev->suspend_lo;
4646 mddev->suspend_lo = new;
4647 if (new >= old)
4648
4649 mddev->pers->quiesce(mddev, 2);
4650 else {
4651
4652 mddev->pers->quiesce(mddev, 1);
4653 mddev->pers->quiesce(mddev, 0);
4654 }
4655 err = 0;
4656unlock:
4657 mddev_unlock(mddev);
4658 return err ?: len;
4659}
4660static struct md_sysfs_entry md_suspend_lo =
4661__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4662
4663static ssize_t
4664suspend_hi_show(struct mddev *mddev, char *page)
4665{
4666 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4667}
4668
4669static ssize_t
4670suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4671{
4672 unsigned long long old, new;
4673 int err;
4674
4675 err = kstrtoull(buf, 10, &new);
4676 if (err < 0)
4677 return err;
4678 if (new != (sector_t)new)
4679 return -EINVAL;
4680
4681 err = mddev_lock(mddev);
4682 if (err)
4683 return err;
4684 err = -EINVAL;
4685 if (mddev->pers == NULL ||
4686 mddev->pers->quiesce == NULL)
4687 goto unlock;
4688 old = mddev->suspend_hi;
4689 mddev->suspend_hi = new;
4690 if (new <= old)
4691
4692 mddev->pers->quiesce(mddev, 2);
4693 else {
4694
4695 mddev->pers->quiesce(mddev, 1);
4696 mddev->pers->quiesce(mddev, 0);
4697 }
4698 err = 0;
4699unlock:
4700 mddev_unlock(mddev);
4701 return err ?: len;
4702}
4703static struct md_sysfs_entry md_suspend_hi =
4704__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4705
4706static ssize_t
4707reshape_position_show(struct mddev *mddev, char *page)
4708{
4709 if (mddev->reshape_position != MaxSector)
4710 return sprintf(page, "%llu\n",
4711 (unsigned long long)mddev->reshape_position);
4712 strcpy(page, "none\n");
4713 return 5;
4714}
4715
4716static ssize_t
4717reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4718{
4719 struct md_rdev *rdev;
4720 unsigned long long new;
4721 int err;
4722
4723 err = kstrtoull(buf, 10, &new);
4724 if (err < 0)
4725 return err;
4726 if (new != (sector_t)new)
4727 return -EINVAL;
4728 err = mddev_lock(mddev);
4729 if (err)
4730 return err;
4731 err = -EBUSY;
4732 if (mddev->pers)
4733 goto unlock;
4734 mddev->reshape_position = new;
4735 mddev->delta_disks = 0;
4736 mddev->reshape_backwards = 0;
4737 mddev->new_level = mddev->level;
4738 mddev->new_layout = mddev->layout;
4739 mddev->new_chunk_sectors = mddev->chunk_sectors;
4740 rdev_for_each(rdev, mddev)
4741 rdev->new_data_offset = rdev->data_offset;
4742 err = 0;
4743unlock:
4744 mddev_unlock(mddev);
4745 return err ?: len;
4746}
4747
4748static struct md_sysfs_entry md_reshape_position =
4749__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4750 reshape_position_store);
4751
4752static ssize_t
4753reshape_direction_show(struct mddev *mddev, char *page)
4754{
4755 return sprintf(page, "%s\n",
4756 mddev->reshape_backwards ? "backwards" : "forwards");
4757}
4758
4759static ssize_t
4760reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4761{
4762 int backwards = 0;
4763 int err;
4764
4765 if (cmd_match(buf, "forwards"))
4766 backwards = 0;
4767 else if (cmd_match(buf, "backwards"))
4768 backwards = 1;
4769 else
4770 return -EINVAL;
4771 if (mddev->reshape_backwards == backwards)
4772 return len;
4773
4774 err = mddev_lock(mddev);
4775 if (err)
4776 return err;
4777
4778 if (mddev->delta_disks)
4779 err = -EBUSY;
4780 else if (mddev->persistent &&
4781 mddev->major_version == 0)
4782 err = -EINVAL;
4783 else
4784 mddev->reshape_backwards = backwards;
4785 mddev_unlock(mddev);
4786 return err ?: len;
4787}
4788
4789static struct md_sysfs_entry md_reshape_direction =
4790__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4791 reshape_direction_store);
4792
4793static ssize_t
4794array_size_show(struct mddev *mddev, char *page)
4795{
4796 if (mddev->external_size)
4797 return sprintf(page, "%llu\n",
4798 (unsigned long long)mddev->array_sectors/2);
4799 else
4800 return sprintf(page, "default\n");
4801}
4802
4803static ssize_t
4804array_size_store(struct mddev *mddev, const char *buf, size_t len)
4805{
4806 sector_t sectors;
4807 int err;
4808
4809 err = mddev_lock(mddev);
4810 if (err)
4811 return err;
4812
4813 if (strncmp(buf, "default", 7) == 0) {
4814 if (mddev->pers)
4815 sectors = mddev->pers->size(mddev, 0, 0);
4816 else
4817 sectors = mddev->array_sectors;
4818
4819 mddev->external_size = 0;
4820 } else {
4821 if (strict_blocks_to_sectors(buf, §ors) < 0)
4822 err = -EINVAL;
4823 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4824 err = -E2BIG;
4825 else
4826 mddev->external_size = 1;
4827 }
4828
4829 if (!err) {
4830 mddev->array_sectors = sectors;
4831 if (mddev->pers) {
4832 set_capacity(mddev->gendisk, mddev->array_sectors);
4833 revalidate_disk(mddev->gendisk);
4834 }
4835 }
4836 mddev_unlock(mddev);
4837 return err ?: len;
4838}
4839
4840static struct md_sysfs_entry md_array_size =
4841__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4842 array_size_store);
4843
4844static struct attribute *md_default_attrs[] = {
4845 &md_level.attr,
4846 &md_layout.attr,
4847 &md_raid_disks.attr,
4848 &md_chunk_size.attr,
4849 &md_size.attr,
4850 &md_resync_start.attr,
4851 &md_metadata.attr,
4852 &md_new_device.attr,
4853 &md_safe_delay.attr,
4854 &md_array_state.attr,
4855 &md_reshape_position.attr,
4856 &md_reshape_direction.attr,
4857 &md_array_size.attr,
4858 &max_corr_read_errors.attr,
4859 NULL,
4860};
4861
4862static struct attribute *md_redundancy_attrs[] = {
4863 &md_scan_mode.attr,
4864 &md_last_scan_mode.attr,
4865 &md_mismatches.attr,
4866 &md_sync_min.attr,
4867 &md_sync_max.attr,
4868 &md_sync_speed.attr,
4869 &md_sync_force_parallel.attr,
4870 &md_sync_completed.attr,
4871 &md_min_sync.attr,
4872 &md_max_sync.attr,
4873 &md_suspend_lo.attr,
4874 &md_suspend_hi.attr,
4875 &md_bitmap.attr,
4876 &md_degraded.attr,
4877 NULL,
4878};
4879static struct attribute_group md_redundancy_group = {
4880 .name = NULL,
4881 .attrs = md_redundancy_attrs,
4882};
4883
4884static ssize_t
4885md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4886{
4887 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4888 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4889 ssize_t rv;
4890
4891 if (!entry->show)
4892 return -EIO;
4893 spin_lock(&all_mddevs_lock);
4894 if (list_empty(&mddev->all_mddevs)) {
4895 spin_unlock(&all_mddevs_lock);
4896 return -EBUSY;
4897 }
4898 mddev_get(mddev);
4899 spin_unlock(&all_mddevs_lock);
4900
4901 rv = entry->show(mddev, page);
4902 mddev_put(mddev);
4903 return rv;
4904}
4905
4906static ssize_t
4907md_attr_store(struct kobject *kobj, struct attribute *attr,
4908 const char *page, size_t length)
4909{
4910 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4911 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4912 ssize_t rv;
4913
4914 if (!entry->store)
4915 return -EIO;
4916 if (!capable(CAP_SYS_ADMIN))
4917 return -EACCES;
4918 spin_lock(&all_mddevs_lock);
4919 if (list_empty(&mddev->all_mddevs)) {
4920 spin_unlock(&all_mddevs_lock);
4921 return -EBUSY;
4922 }
4923 mddev_get(mddev);
4924 spin_unlock(&all_mddevs_lock);
4925 rv = entry->store(mddev, page, length);
4926 mddev_put(mddev);
4927 return rv;
4928}
4929
4930static void md_free(struct kobject *ko)
4931{
4932 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4933
4934 if (mddev->sysfs_state)
4935 sysfs_put(mddev->sysfs_state);
4936
4937 if (mddev->queue)
4938 blk_cleanup_queue(mddev->queue);
4939 if (mddev->gendisk) {
4940 del_gendisk(mddev->gendisk);
4941 put_disk(mddev->gendisk);
4942 }
4943
4944 kfree(mddev);
4945}
4946
4947static const struct sysfs_ops md_sysfs_ops = {
4948 .show = md_attr_show,
4949 .store = md_attr_store,
4950};
4951static struct kobj_type md_ktype = {
4952 .release = md_free,
4953 .sysfs_ops = &md_sysfs_ops,
4954 .default_attrs = md_default_attrs,
4955};
4956
4957int mdp_major = 0;
4958
4959static void mddev_delayed_delete(struct work_struct *ws)
4960{
4961 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4962
4963 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4964 kobject_del(&mddev->kobj);
4965 kobject_put(&mddev->kobj);
4966}
4967
4968static int md_alloc(dev_t dev, char *name)
4969{
4970 static DEFINE_MUTEX(disks_mutex);
4971 struct mddev *mddev = mddev_find(dev);
4972 struct gendisk *disk;
4973 int partitioned;
4974 int shift;
4975 int unit;
4976 int error;
4977
4978 if (!mddev)
4979 return -ENODEV;
4980
4981 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4982 shift = partitioned ? MdpMinorShift : 0;
4983 unit = MINOR(mddev->unit) >> shift;
4984
4985
4986
4987
4988 flush_workqueue(md_misc_wq);
4989
4990 mutex_lock(&disks_mutex);
4991 error = -EEXIST;
4992 if (mddev->gendisk)
4993 goto abort;
4994
4995 if (name) {
4996
4997
4998 struct mddev *mddev2;
4999 spin_lock(&all_mddevs_lock);
5000
5001 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5002 if (mddev2->gendisk &&
5003 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5004 spin_unlock(&all_mddevs_lock);
5005 goto abort;
5006 }
5007 spin_unlock(&all_mddevs_lock);
5008 }
5009
5010 error = -ENOMEM;
5011 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5012 if (!mddev->queue)
5013 goto abort;
5014 mddev->queue->queuedata = mddev;
5015
5016 blk_queue_make_request(mddev->queue, md_make_request);
5017 blk_set_stacking_limits(&mddev->queue->limits);
5018
5019 disk = alloc_disk(1 << shift);
5020 if (!disk) {
5021 blk_cleanup_queue(mddev->queue);
5022 mddev->queue = NULL;
5023 goto abort;
5024 }
5025 disk->major = MAJOR(mddev->unit);
5026 disk->first_minor = unit << shift;
5027 if (name)
5028 strcpy(disk->disk_name, name);
5029 else if (partitioned)
5030 sprintf(disk->disk_name, "md_d%d", unit);
5031 else
5032 sprintf(disk->disk_name, "md%d", unit);
5033 disk->fops = &md_fops;
5034 disk->private_data = mddev;
5035 disk->queue = mddev->queue;
5036 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
5037
5038
5039
5040
5041 disk->flags |= GENHD_FL_EXT_DEVT;
5042 mddev->gendisk = disk;
5043
5044
5045
5046 mutex_lock(&mddev->open_mutex);
5047 add_disk(disk);
5048
5049 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5050 &disk_to_dev(disk)->kobj, "%s", "md");
5051 if (error) {
5052
5053
5054
5055 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
5056 disk->disk_name);
5057 error = 0;
5058 }
5059 if (mddev->kobj.sd &&
5060 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5061 printk(KERN_DEBUG "pointless warning\n");
5062 mutex_unlock(&mddev->open_mutex);
5063 abort:
5064 mutex_unlock(&disks_mutex);
5065 if (!error && mddev->kobj.sd) {
5066 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5067 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5068 }
5069 mddev_put(mddev);
5070 return error;
5071}
5072
5073static struct kobject *md_probe(dev_t dev, int *part, void *data)
5074{
5075 md_alloc(dev, NULL);
5076 return NULL;
5077}
5078
5079static int add_named_array(const char *val, struct kernel_param *kp)
5080{
5081
5082
5083
5084
5085 int len = strlen(val);
5086 char buf[DISK_NAME_LEN];
5087
5088 while (len && val[len-1] == '\n')
5089 len--;
5090 if (len >= DISK_NAME_LEN)
5091 return -E2BIG;
5092 strlcpy(buf, val, len+1);
5093 if (strncmp(buf, "md_", 3) != 0)
5094 return -EINVAL;
5095 return md_alloc(0, buf);
5096}
5097
5098static void md_safemode_timeout(unsigned long data)
5099{
5100 struct mddev *mddev = (struct mddev *) data;
5101
5102 if (!atomic_read(&mddev->writes_pending)) {
5103 mddev->safemode = 1;
5104 if (mddev->external)
5105 sysfs_notify_dirent_safe(mddev->sysfs_state);
5106 }
5107 md_wakeup_thread(mddev->thread);
5108}
5109
5110static int start_dirty_degraded;
5111
5112int md_run(struct mddev *mddev)
5113{
5114 int err;
5115 struct md_rdev *rdev;
5116 struct md_personality *pers;
5117
5118 if (list_empty(&mddev->disks))
5119
5120 return -EINVAL;
5121
5122 if (mddev->pers)
5123 return -EBUSY;
5124
5125 if (mddev->sysfs_active)
5126 return -EBUSY;
5127
5128
5129
5130
5131 if (!mddev->raid_disks) {
5132 if (!mddev->persistent)
5133 return -EINVAL;
5134 analyze_sbs(mddev);
5135 }
5136
5137 if (mddev->level != LEVEL_NONE)
5138 request_module("md-level-%d", mddev->level);
5139 else if (mddev->clevel[0])
5140 request_module("md-%s", mddev->clevel);
5141
5142
5143
5144
5145
5146
5147 rdev_for_each(rdev, mddev) {
5148 if (test_bit(Faulty, &rdev->flags))
5149 continue;
5150 sync_blockdev(rdev->bdev);
5151 invalidate_bdev(rdev->bdev);
5152
5153
5154
5155
5156
5157 if (rdev->meta_bdev) {
5158 ;
5159 } else if (rdev->data_offset < rdev->sb_start) {
5160 if (mddev->dev_sectors &&
5161 rdev->data_offset + mddev->dev_sectors
5162 > rdev->sb_start) {
5163 printk("md: %s: data overlaps metadata\n",
5164 mdname(mddev));
5165 return -EINVAL;
5166 }
5167 } else {
5168 if (rdev->sb_start + rdev->sb_size/512
5169 > rdev->data_offset) {
5170 printk("md: %s: metadata overlaps data\n",
5171 mdname(mddev));
5172 return -EINVAL;
5173 }
5174 }
5175 sysfs_notify_dirent_safe(rdev->sysfs_state);
5176 }
5177
5178 if (mddev->bio_set == NULL)
5179 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5180
5181 spin_lock(&pers_lock);
5182 pers = find_pers(mddev->level, mddev->clevel);
5183 if (!pers || !try_module_get(pers->owner)) {
5184 spin_unlock(&pers_lock);
5185 if (mddev->level != LEVEL_NONE)
5186 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5187 mddev->level);
5188 else
5189 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5190 mddev->clevel);
5191 return -EINVAL;
5192 }
5193 spin_unlock(&pers_lock);
5194 if (mddev->level != pers->level) {
5195 mddev->level = pers->level;
5196 mddev->new_level = pers->level;
5197 }
5198 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5199
5200 if (mddev->reshape_position != MaxSector &&
5201 pers->start_reshape == NULL) {
5202
5203 module_put(pers->owner);
5204 return -EINVAL;
5205 }
5206
5207 if (pers->sync_request) {
5208
5209
5210
5211 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5212 struct md_rdev *rdev2;
5213 int warned = 0;
5214
5215 rdev_for_each(rdev, mddev)
5216 rdev_for_each(rdev2, mddev) {
5217 if (rdev < rdev2 &&
5218 rdev->bdev->bd_contains ==
5219 rdev2->bdev->bd_contains) {
5220 printk(KERN_WARNING
5221 "%s: WARNING: %s appears to be"
5222 " on the same physical disk as"
5223 " %s.\n",
5224 mdname(mddev),
5225 bdevname(rdev->bdev,b),
5226 bdevname(rdev2->bdev,b2));
5227 warned = 1;
5228 }
5229 }
5230
5231 if (warned)
5232 printk(KERN_WARNING
5233 "True protection against single-disk"
5234 " failure might be compromised.\n");
5235 }
5236
5237 mddev->recovery = 0;
5238
5239 mddev->resync_max_sectors = mddev->dev_sectors;
5240
5241 mddev->ok_start_degraded = start_dirty_degraded;
5242
5243 if (start_readonly && mddev->ro == 0)
5244 mddev->ro = 2;
5245
5246 err = pers->run(mddev);
5247 if (err)
5248 printk(KERN_ERR "md: pers->run() failed ...\n");
5249 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5250 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5251 " but 'external_size' not in effect?\n", __func__);
5252 printk(KERN_ERR
5253 "md: invalid array_size %llu > default size %llu\n",
5254 (unsigned long long)mddev->array_sectors / 2,
5255 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5256 err = -EINVAL;
5257 }
5258 if (err == 0 && pers->sync_request &&
5259 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5260 struct bitmap *bitmap;
5261
5262 bitmap = bitmap_create(mddev, -1);
5263 if (IS_ERR(bitmap)) {
5264 err = PTR_ERR(bitmap);
5265 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5266 mdname(mddev), err);
5267 } else
5268 mddev->bitmap = bitmap;
5269
5270 }
5271 if (err) {
5272 mddev_detach(mddev);
5273 if (mddev->private)
5274 pers->free(mddev, mddev->private);
5275 mddev->private = NULL;
5276 module_put(pers->owner);
5277 bitmap_destroy(mddev);
5278 return err;
5279 }
5280 if (mddev->queue) {
5281 mddev->queue->backing_dev_info.congested_data = mddev;
5282 mddev->queue->backing_dev_info.congested_fn = md_congested;
5283 }
5284 if (pers->sync_request) {
5285 if (mddev->kobj.sd &&
5286 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5287 printk(KERN_WARNING
5288 "md: cannot register extra attributes for %s\n",
5289 mdname(mddev));
5290 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5291 } else if (mddev->ro == 2)
5292 mddev->ro = 0;
5293
5294 atomic_set(&mddev->writes_pending,0);
5295 atomic_set(&mddev->max_corr_read_errors,
5296 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5297 mddev->safemode = 0;
5298 if (mddev_is_clustered(mddev))
5299 mddev->safemode_delay = 0;
5300 else
5301 mddev->safemode_delay = (200 * HZ)/1000 +1;
5302 mddev->in_sync = 1;
5303 smp_wmb();
5304 spin_lock(&mddev->lock);
5305 mddev->pers = pers;
5306 spin_unlock(&mddev->lock);
5307 rdev_for_each(rdev, mddev)
5308 if (rdev->raid_disk >= 0)
5309 if (sysfs_link_rdev(mddev, rdev))
5310 ;
5311
5312 if (mddev->degraded && !mddev->ro)
5313
5314
5315
5316 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5317 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5318
5319 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5320 md_update_sb(mddev, 0);
5321
5322 md_new_event(mddev);
5323 sysfs_notify_dirent_safe(mddev->sysfs_state);
5324 sysfs_notify_dirent_safe(mddev->sysfs_action);
5325 sysfs_notify(&mddev->kobj, NULL, "degraded");
5326 return 0;
5327}
5328EXPORT_SYMBOL_GPL(md_run);
5329
5330static int do_md_run(struct mddev *mddev)
5331{
5332 int err;
5333
5334 err = md_run(mddev);
5335 if (err)
5336 goto out;
5337 err = bitmap_load(mddev);
5338 if (err) {
5339 bitmap_destroy(mddev);
5340 goto out;
5341 }
5342
5343 if (mddev_is_clustered(mddev))
5344 md_allow_write(mddev);
5345
5346 md_wakeup_thread(mddev->thread);
5347 md_wakeup_thread(mddev->sync_thread);
5348
5349 set_capacity(mddev->gendisk, mddev->array_sectors);
5350 revalidate_disk(mddev->gendisk);
5351 mddev->changed = 1;
5352 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5353out:
5354 return err;
5355}
5356
5357static int restart_array(struct mddev *mddev)
5358{
5359 struct gendisk *disk = mddev->gendisk;
5360
5361
5362 if (list_empty(&mddev->disks))
5363 return -ENXIO;
5364 if (!mddev->pers)
5365 return -EINVAL;
5366 if (!mddev->ro)
5367 return -EBUSY;
5368 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5369 struct md_rdev *rdev;
5370 bool has_journal = false;
5371
5372 rcu_read_lock();
5373 rdev_for_each_rcu(rdev, mddev) {
5374 if (test_bit(Journal, &rdev->flags) &&
5375 !test_bit(Faulty, &rdev->flags)) {
5376 has_journal = true;
5377 break;
5378 }
5379 }
5380 rcu_read_unlock();
5381
5382
5383 if (!has_journal)
5384 return -EINVAL;
5385 }
5386
5387 mddev->safemode = 0;
5388 mddev->ro = 0;
5389 set_disk_ro(disk, 0);
5390 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5391 mdname(mddev));
5392
5393 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5394 md_wakeup_thread(mddev->thread);
5395 md_wakeup_thread(mddev->sync_thread);
5396 sysfs_notify_dirent_safe(mddev->sysfs_state);
5397 return 0;
5398}
5399
5400static void md_clean(struct mddev *mddev)
5401{
5402 mddev->array_sectors = 0;
5403 mddev->external_size = 0;
5404 mddev->dev_sectors = 0;
5405 mddev->raid_disks = 0;
5406 mddev->recovery_cp = 0;
5407 mddev->resync_min = 0;
5408 mddev->resync_max = MaxSector;
5409 mddev->reshape_position = MaxSector;
5410 mddev->external = 0;
5411 mddev->persistent = 0;
5412 mddev->level = LEVEL_NONE;
5413 mddev->clevel[0] = 0;
5414 mddev->flags = 0;
5415 mddev->ro = 0;
5416 mddev->metadata_type[0] = 0;
5417 mddev->chunk_sectors = 0;
5418 mddev->ctime = mddev->utime = 0;
5419 mddev->layout = 0;
5420 mddev->max_disks = 0;
5421 mddev->events = 0;
5422 mddev->can_decrease_events = 0;
5423 mddev->delta_disks = 0;
5424 mddev->reshape_backwards = 0;
5425 mddev->new_level = LEVEL_NONE;
5426 mddev->new_layout = 0;
5427 mddev->new_chunk_sectors = 0;
5428 mddev->curr_resync = 0;
5429 atomic64_set(&mddev->resync_mismatches, 0);
5430 mddev->suspend_lo = mddev->suspend_hi = 0;
5431 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5432 mddev->recovery = 0;
5433 mddev->in_sync = 0;
5434 mddev->changed = 0;
5435 mddev->degraded = 0;
5436 mddev->safemode = 0;
5437 mddev->private = NULL;
5438 mddev->bitmap_info.offset = 0;
5439 mddev->bitmap_info.default_offset = 0;
5440 mddev->bitmap_info.default_space = 0;
5441 mddev->bitmap_info.chunksize = 0;
5442 mddev->bitmap_info.daemon_sleep = 0;
5443 mddev->bitmap_info.max_write_behind = 0;
5444}
5445
5446static void __md_stop_writes(struct mddev *mddev)
5447{
5448 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5449 flush_workqueue(md_misc_wq);
5450 if (mddev->sync_thread) {
5451 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5452 md_reap_sync_thread(mddev);
5453 }
5454
5455 del_timer_sync(&mddev->safemode_timer);
5456
5457 bitmap_flush(mddev);
5458 md_super_wait(mddev);
5459
5460 if (mddev->ro == 0 &&
5461 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5462 (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5463
5464 if (!mddev_is_clustered(mddev))
5465 mddev->in_sync = 1;
5466 md_update_sb(mddev, 1);
5467 }
5468}
5469
5470void md_stop_writes(struct mddev *mddev)
5471{
5472 mddev_lock_nointr(mddev);
5473 __md_stop_writes(mddev);
5474 mddev_unlock(mddev);
5475}
5476EXPORT_SYMBOL_GPL(md_stop_writes);
5477
5478static void mddev_detach(struct mddev *mddev)
5479{
5480 struct bitmap *bitmap = mddev->bitmap;
5481
5482 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5483 printk(KERN_INFO "md:%s: behind writes in progress - waiting to stop.\n",
5484 mdname(mddev));
5485
5486 wait_event(bitmap->behind_wait,
5487 atomic_read(&bitmap->behind_writes) == 0);
5488 }
5489 if (mddev->pers && mddev->pers->quiesce) {
5490 mddev->pers->quiesce(mddev, 1);
5491 mddev->pers->quiesce(mddev, 0);
5492 }
5493 md_unregister_thread(&mddev->thread);
5494 if (mddev->queue)
5495 blk_sync_queue(mddev->queue);
5496}
5497
5498static void __md_stop(struct mddev *mddev)
5499{
5500 struct md_personality *pers = mddev->pers;
5501 mddev_detach(mddev);
5502
5503 flush_workqueue(md_misc_wq);
5504 spin_lock(&mddev->lock);
5505 mddev->pers = NULL;
5506 spin_unlock(&mddev->lock);
5507 pers->free(mddev, mddev->private);
5508 mddev->private = NULL;
5509 if (pers->sync_request && mddev->to_remove == NULL)
5510 mddev->to_remove = &md_redundancy_group;
5511 module_put(pers->owner);
5512 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5513}
5514
5515void md_stop(struct mddev *mddev)
5516{
5517
5518
5519
5520 __md_stop(mddev);
5521 bitmap_destroy(mddev);
5522 if (mddev->bio_set)
5523 bioset_free(mddev->bio_set);
5524}
5525
5526EXPORT_SYMBOL_GPL(md_stop);
5527
5528static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5529{
5530 int err = 0;
5531 int did_freeze = 0;
5532
5533 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5534 did_freeze = 1;
5535 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5536 md_wakeup_thread(mddev->thread);
5537 }
5538 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5539 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5540 if (mddev->sync_thread)
5541
5542
5543 wake_up_process(mddev->sync_thread->tsk);
5544
5545 if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags))
5546 return -EBUSY;
5547 mddev_unlock(mddev);
5548 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5549 &mddev->recovery));
5550 wait_event(mddev->sb_wait,
5551 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5552 mddev_lock_nointr(mddev);
5553
5554 mutex_lock(&mddev->open_mutex);
5555 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5556 mddev->sync_thread ||
5557 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
5558 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5559 printk("md: %s still in use.\n",mdname(mddev));
5560 if (did_freeze) {
5561 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5562 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5563 md_wakeup_thread(mddev->thread);
5564 }
5565 err = -EBUSY;
5566 goto out;
5567 }
5568 if (mddev->pers) {
5569 __md_stop_writes(mddev);
5570
5571 err = -ENXIO;
5572 if (mddev->ro==1)
5573 goto out;
5574 mddev->ro = 1;
5575 set_disk_ro(mddev->gendisk, 1);
5576 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5577 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5578 md_wakeup_thread(mddev->thread);
5579 sysfs_notify_dirent_safe(mddev->sysfs_state);
5580 err = 0;
5581 }
5582out:
5583 mutex_unlock(&mddev->open_mutex);
5584 return err;
5585}
5586
5587
5588
5589
5590
5591static int do_md_stop(struct mddev *mddev, int mode,
5592 struct block_device *bdev)
5593{
5594 struct gendisk *disk = mddev->gendisk;
5595 struct md_rdev *rdev;
5596 int did_freeze = 0;
5597
5598 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5599 did_freeze = 1;
5600 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5601 md_wakeup_thread(mddev->thread);
5602 }
5603 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5604 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5605 if (mddev->sync_thread)
5606
5607
5608 wake_up_process(mddev->sync_thread->tsk);
5609
5610 mddev_unlock(mddev);
5611 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5612 !test_bit(MD_RECOVERY_RUNNING,
5613 &mddev->recovery)));
5614 mddev_lock_nointr(mddev);
5615
5616 mutex_lock(&mddev->open_mutex);
5617 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5618 mddev->sysfs_active ||
5619 mddev->sync_thread ||
5620 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
5621 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5622 printk("md: %s still in use.\n",mdname(mddev));
5623 mutex_unlock(&mddev->open_mutex);
5624 if (did_freeze) {
5625 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5626 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5627 md_wakeup_thread(mddev->thread);
5628 }
5629 return -EBUSY;
5630 }
5631 if (mddev->pers) {
5632 if (mddev->ro)
5633 set_disk_ro(disk, 0);
5634
5635 __md_stop_writes(mddev);
5636 __md_stop(mddev);
5637 mddev->queue->backing_dev_info.congested_fn = NULL;
5638
5639
5640 sysfs_notify_dirent_safe(mddev->sysfs_state);
5641
5642 rdev_for_each(rdev, mddev)
5643 if (rdev->raid_disk >= 0)
5644 sysfs_unlink_rdev(mddev, rdev);
5645
5646 set_capacity(disk, 0);
5647 mutex_unlock(&mddev->open_mutex);
5648 mddev->changed = 1;
5649 revalidate_disk(disk);
5650
5651 if (mddev->ro)
5652 mddev->ro = 0;
5653 } else
5654 mutex_unlock(&mddev->open_mutex);
5655
5656
5657
5658 if (mode == 0) {
5659 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5660
5661 bitmap_destroy(mddev);
5662 if (mddev->bitmap_info.file) {
5663 struct file *f = mddev->bitmap_info.file;
5664 spin_lock(&mddev->lock);
5665 mddev->bitmap_info.file = NULL;
5666 spin_unlock(&mddev->lock);
5667 fput(f);
5668 }
5669 mddev->bitmap_info.offset = 0;
5670
5671 export_array(mddev);
5672
5673 md_clean(mddev);
5674 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5675 if (mddev->hold_active == UNTIL_STOP)
5676 mddev->hold_active = 0;
5677 }
5678 md_new_event(mddev);
5679 sysfs_notify_dirent_safe(mddev->sysfs_state);
5680 return 0;
5681}
5682
5683#ifndef MODULE
5684static void autorun_array(struct mddev *mddev)
5685{
5686 struct md_rdev *rdev;
5687 int err;
5688
5689 if (list_empty(&mddev->disks))
5690 return;
5691
5692 printk(KERN_INFO "md: running: ");
5693
5694 rdev_for_each(rdev, mddev) {
5695 char b[BDEVNAME_SIZE];
5696 printk("<%s>", bdevname(rdev->bdev,b));
5697 }
5698 printk("\n");
5699
5700 err = do_md_run(mddev);
5701 if (err) {
5702 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5703 do_md_stop(mddev, 0, NULL);
5704 }
5705}
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719static void autorun_devices(int part)
5720{
5721 struct md_rdev *rdev0, *rdev, *tmp;
5722 struct mddev *mddev;
5723 char b[BDEVNAME_SIZE];
5724
5725 printk(KERN_INFO "md: autorun ...\n");
5726 while (!list_empty(&pending_raid_disks)) {
5727 int unit;
5728 dev_t dev;
5729 LIST_HEAD(candidates);
5730 rdev0 = list_entry(pending_raid_disks.next,
5731 struct md_rdev, same_set);
5732
5733 printk(KERN_INFO "md: considering %s ...\n",
5734 bdevname(rdev0->bdev,b));
5735 INIT_LIST_HEAD(&candidates);
5736 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5737 if (super_90_load(rdev, rdev0, 0) >= 0) {
5738 printk(KERN_INFO "md: adding %s ...\n",
5739 bdevname(rdev->bdev,b));
5740 list_move(&rdev->same_set, &candidates);
5741 }
5742
5743
5744
5745
5746
5747 if (part) {
5748 dev = MKDEV(mdp_major,
5749 rdev0->preferred_minor << MdpMinorShift);
5750 unit = MINOR(dev) >> MdpMinorShift;
5751 } else {
5752 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5753 unit = MINOR(dev);
5754 }
5755 if (rdev0->preferred_minor != unit) {
5756 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5757 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5758 break;
5759 }
5760
5761 md_probe(dev, NULL, NULL);
5762 mddev = mddev_find(dev);
5763 if (!mddev || !mddev->gendisk) {
5764 if (mddev)
5765 mddev_put(mddev);
5766 printk(KERN_ERR
5767 "md: cannot allocate memory for md drive.\n");
5768 break;
5769 }
5770 if (mddev_lock(mddev))
5771 printk(KERN_WARNING "md: %s locked, cannot run\n",
5772 mdname(mddev));
5773 else if (mddev->raid_disks || mddev->major_version
5774 || !list_empty(&mddev->disks)) {
5775 printk(KERN_WARNING
5776 "md: %s already running, cannot run %s\n",
5777 mdname(mddev), bdevname(rdev0->bdev,b));
5778 mddev_unlock(mddev);
5779 } else {
5780 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5781 mddev->persistent = 1;
5782 rdev_for_each_list(rdev, tmp, &candidates) {
5783 list_del_init(&rdev->same_set);
5784 if (bind_rdev_to_array(rdev, mddev))
5785 export_rdev(rdev);
5786 }
5787 autorun_array(mddev);
5788 mddev_unlock(mddev);
5789 }
5790
5791
5792
5793 rdev_for_each_list(rdev, tmp, &candidates) {
5794 list_del_init(&rdev->same_set);
5795 export_rdev(rdev);
5796 }
5797 mddev_put(mddev);
5798 }
5799 printk(KERN_INFO "md: ... autorun DONE.\n");
5800}
5801#endif
5802
5803static int get_version(void __user *arg)
5804{
5805 mdu_version_t ver;
5806
5807 ver.major = MD_MAJOR_VERSION;
5808 ver.minor = MD_MINOR_VERSION;
5809 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5810
5811 if (copy_to_user(arg, &ver, sizeof(ver)))
5812 return -EFAULT;
5813
5814 return 0;
5815}
5816
5817static int get_array_info(struct mddev *mddev, void __user *arg)
5818{
5819 mdu_array_info_t info;
5820 int nr,working,insync,failed,spare;
5821 struct md_rdev *rdev;
5822
5823 nr = working = insync = failed = spare = 0;
5824 rcu_read_lock();
5825 rdev_for_each_rcu(rdev, mddev) {
5826 nr++;
5827 if (test_bit(Faulty, &rdev->flags))
5828 failed++;
5829 else {
5830 working++;
5831 if (test_bit(In_sync, &rdev->flags))
5832 insync++;
5833 else
5834 spare++;
5835 }
5836 }
5837 rcu_read_unlock();
5838
5839 info.major_version = mddev->major_version;
5840 info.minor_version = mddev->minor_version;
5841 info.patch_version = MD_PATCHLEVEL_VERSION;
5842 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
5843 info.level = mddev->level;
5844 info.size = mddev->dev_sectors / 2;
5845 if (info.size != mddev->dev_sectors / 2)
5846 info.size = -1;
5847 info.nr_disks = nr;
5848 info.raid_disks = mddev->raid_disks;
5849 info.md_minor = mddev->md_minor;
5850 info.not_persistent= !mddev->persistent;
5851
5852 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
5853 info.state = 0;
5854 if (mddev->in_sync)
5855 info.state = (1<<MD_SB_CLEAN);
5856 if (mddev->bitmap && mddev->bitmap_info.offset)
5857 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5858 if (mddev_is_clustered(mddev))
5859 info.state |= (1<<MD_SB_CLUSTERED);
5860 info.active_disks = insync;
5861 info.working_disks = working;
5862 info.failed_disks = failed;
5863 info.spare_disks = spare;
5864
5865 info.layout = mddev->layout;
5866 info.chunk_size = mddev->chunk_sectors << 9;
5867
5868 if (copy_to_user(arg, &info, sizeof(info)))
5869 return -EFAULT;
5870
5871 return 0;
5872}
5873
5874static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5875{
5876 mdu_bitmap_file_t *file = NULL;
5877 char *ptr;
5878 int err;
5879
5880 file = kzalloc(sizeof(*file), GFP_NOIO);
5881 if (!file)
5882 return -ENOMEM;
5883
5884 err = 0;
5885 spin_lock(&mddev->lock);
5886
5887 if (mddev->bitmap_info.file) {
5888 ptr = file_path(mddev->bitmap_info.file, file->pathname,
5889 sizeof(file->pathname));
5890 if (IS_ERR(ptr))
5891 err = PTR_ERR(ptr);
5892 else
5893 memmove(file->pathname, ptr,
5894 sizeof(file->pathname)-(ptr-file->pathname));
5895 }
5896 spin_unlock(&mddev->lock);
5897
5898 if (err == 0 &&
5899 copy_to_user(arg, file, sizeof(*file)))
5900 err = -EFAULT;
5901
5902 kfree(file);
5903 return err;
5904}
5905
5906static int get_disk_info(struct mddev *mddev, void __user * arg)
5907{
5908 mdu_disk_info_t info;
5909 struct md_rdev *rdev;
5910
5911 if (copy_from_user(&info, arg, sizeof(info)))
5912 return -EFAULT;
5913
5914 rcu_read_lock();
5915 rdev = md_find_rdev_nr_rcu(mddev, info.number);
5916 if (rdev) {
5917 info.major = MAJOR(rdev->bdev->bd_dev);
5918 info.minor = MINOR(rdev->bdev->bd_dev);
5919 info.raid_disk = rdev->raid_disk;
5920 info.state = 0;
5921 if (test_bit(Faulty, &rdev->flags))
5922 info.state |= (1<<MD_DISK_FAULTY);
5923 else if (test_bit(In_sync, &rdev->flags)) {
5924 info.state |= (1<<MD_DISK_ACTIVE);
5925 info.state |= (1<<MD_DISK_SYNC);
5926 }
5927 if (test_bit(Journal, &rdev->flags))
5928 info.state |= (1<<MD_DISK_JOURNAL);
5929 if (test_bit(WriteMostly, &rdev->flags))
5930 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5931 } else {
5932 info.major = info.minor = 0;
5933 info.raid_disk = -1;
5934 info.state = (1<<MD_DISK_REMOVED);
5935 }
5936 rcu_read_unlock();
5937
5938 if (copy_to_user(arg, &info, sizeof(info)))
5939 return -EFAULT;
5940
5941 return 0;
5942}
5943
5944static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
5945{
5946 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5947 struct md_rdev *rdev;
5948 dev_t dev = MKDEV(info->major,info->minor);
5949
5950 if (mddev_is_clustered(mddev) &&
5951 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
5952 pr_err("%s: Cannot add to clustered mddev.\n",
5953 mdname(mddev));
5954 return -EINVAL;
5955 }
5956
5957 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5958 return -EOVERFLOW;
5959
5960 if (!mddev->raid_disks) {
5961 int err;
5962
5963 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5964 if (IS_ERR(rdev)) {
5965 printk(KERN_WARNING
5966 "md: md_import_device returned %ld\n",
5967 PTR_ERR(rdev));
5968 return PTR_ERR(rdev);
5969 }
5970 if (!list_empty(&mddev->disks)) {
5971 struct md_rdev *rdev0
5972 = list_entry(mddev->disks.next,
5973 struct md_rdev, same_set);
5974 err = super_types[mddev->major_version]
5975 .load_super(rdev, rdev0, mddev->minor_version);
5976 if (err < 0) {
5977 printk(KERN_WARNING
5978 "md: %s has different UUID to %s\n",
5979 bdevname(rdev->bdev,b),
5980 bdevname(rdev0->bdev,b2));
5981 export_rdev(rdev);
5982 return -EINVAL;
5983 }
5984 }
5985 err = bind_rdev_to_array(rdev, mddev);
5986 if (err)
5987 export_rdev(rdev);
5988 return err;
5989 }
5990
5991
5992
5993
5994
5995
5996 if (mddev->pers) {
5997 int err;
5998 if (!mddev->pers->hot_add_disk) {
5999 printk(KERN_WARNING
6000 "%s: personality does not support diskops!\n",
6001 mdname(mddev));
6002 return -EINVAL;
6003 }
6004 if (mddev->persistent)
6005 rdev = md_import_device(dev, mddev->major_version,
6006 mddev->minor_version);
6007 else
6008 rdev = md_import_device(dev, -1, -1);
6009 if (IS_ERR(rdev)) {
6010 printk(KERN_WARNING
6011 "md: md_import_device returned %ld\n",
6012 PTR_ERR(rdev));
6013 return PTR_ERR(rdev);
6014 }
6015
6016 if (!mddev->persistent) {
6017 if (info->state & (1<<MD_DISK_SYNC) &&
6018 info->raid_disk < mddev->raid_disks) {
6019 rdev->raid_disk = info->raid_disk;
6020 set_bit(In_sync, &rdev->flags);
6021 clear_bit(Bitmap_sync, &rdev->flags);
6022 } else
6023 rdev->raid_disk = -1;
6024 rdev->saved_raid_disk = rdev->raid_disk;
6025 } else
6026 super_types[mddev->major_version].
6027 validate_super(mddev, rdev);
6028 if ((info->state & (1<<MD_DISK_SYNC)) &&
6029 rdev->raid_disk != info->raid_disk) {
6030
6031
6032
6033 export_rdev(rdev);
6034 return -EINVAL;
6035 }
6036
6037 clear_bit(In_sync, &rdev->flags);
6038 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6039 set_bit(WriteMostly, &rdev->flags);
6040 else
6041 clear_bit(WriteMostly, &rdev->flags);
6042
6043 if (info->state & (1<<MD_DISK_JOURNAL)) {
6044 struct md_rdev *rdev2;
6045 bool has_journal = false;
6046
6047
6048 rdev_for_each(rdev2, mddev) {
6049 if (test_bit(Journal, &rdev2->flags)) {
6050 has_journal = true;
6051 break;
6052 }
6053 }
6054 if (has_journal) {
6055 export_rdev(rdev);
6056 return -EBUSY;
6057 }
6058 set_bit(Journal, &rdev->flags);
6059 }
6060
6061
6062
6063 if (mddev_is_clustered(mddev)) {
6064 if (info->state & (1 << MD_DISK_CANDIDATE))
6065 set_bit(Candidate, &rdev->flags);
6066 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6067
6068 err = md_cluster_ops->add_new_disk(mddev, rdev);
6069 if (err) {
6070 export_rdev(rdev);
6071 return err;
6072 }
6073 }
6074 }
6075
6076 rdev->raid_disk = -1;
6077 err = bind_rdev_to_array(rdev, mddev);
6078
6079 if (err)
6080 export_rdev(rdev);
6081
6082 if (mddev_is_clustered(mddev)) {
6083 if (info->state & (1 << MD_DISK_CANDIDATE))
6084 md_cluster_ops->new_disk_ack(mddev, (err == 0));
6085 else {
6086 if (err)
6087 md_cluster_ops->add_new_disk_cancel(mddev);
6088 else
6089 err = add_bound_rdev(rdev);
6090 }
6091
6092 } else if (!err)
6093 err = add_bound_rdev(rdev);
6094
6095 return err;
6096 }
6097
6098
6099
6100
6101 if (mddev->major_version != 0) {
6102 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
6103 mdname(mddev));
6104 return -EINVAL;
6105 }
6106
6107 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6108 int err;
6109 rdev = md_import_device(dev, -1, 0);
6110 if (IS_ERR(rdev)) {
6111 printk(KERN_WARNING
6112 "md: error, md_import_device() returned %ld\n",
6113 PTR_ERR(rdev));
6114 return PTR_ERR(rdev);
6115 }
6116 rdev->desc_nr = info->number;
6117 if (info->raid_disk < mddev->raid_disks)
6118 rdev->raid_disk = info->raid_disk;
6119 else
6120 rdev->raid_disk = -1;
6121
6122 if (rdev->raid_disk < mddev->raid_disks)
6123 if (info->state & (1<<MD_DISK_SYNC))
6124 set_bit(In_sync, &rdev->flags);
6125
6126 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6127 set_bit(WriteMostly, &rdev->flags);
6128
6129 if (!mddev->persistent) {
6130 printk(KERN_INFO "md: nonpersistent superblock ...\n");
6131 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6132 } else
6133 rdev->sb_start = calc_dev_sboffset(rdev);
6134 rdev->sectors = rdev->sb_start;
6135
6136 err = bind_rdev_to_array(rdev, mddev);
6137 if (err) {
6138 export_rdev(rdev);
6139 return err;
6140 }
6141 }
6142
6143 return 0;
6144}
6145
6146static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6147{
6148 char b[BDEVNAME_SIZE];
6149 struct md_rdev *rdev;
6150
6151 rdev = find_rdev(mddev, dev);
6152 if (!rdev)
6153 return -ENXIO;
6154
6155 if (rdev->raid_disk < 0)
6156 goto kick_rdev;
6157
6158 clear_bit(Blocked, &rdev->flags);
6159 remove_and_add_spares(mddev, rdev);
6160
6161 if (rdev->raid_disk >= 0)
6162 goto busy;
6163
6164kick_rdev:
6165 if (mddev_is_clustered(mddev))
6166 md_cluster_ops->remove_disk(mddev, rdev);
6167
6168 md_kick_rdev_from_array(rdev);
6169 md_update_sb(mddev, 1);
6170 md_new_event(mddev);
6171
6172 return 0;
6173busy:
6174 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
6175 bdevname(rdev->bdev,b), mdname(mddev));
6176 return -EBUSY;
6177}
6178
6179static int hot_add_disk(struct mddev *mddev, dev_t dev)
6180{
6181 char b[BDEVNAME_SIZE];
6182 int err;
6183 struct md_rdev *rdev;
6184
6185 if (!mddev->pers)
6186 return -ENODEV;
6187
6188 if (mddev->major_version != 0) {
6189 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
6190 " version-0 superblocks.\n",
6191 mdname(mddev));
6192 return -EINVAL;
6193 }
6194 if (!mddev->pers->hot_add_disk) {
6195 printk(KERN_WARNING
6196 "%s: personality does not support diskops!\n",
6197 mdname(mddev));
6198 return -EINVAL;
6199 }
6200
6201 rdev = md_import_device(dev, -1, 0);
6202 if (IS_ERR(rdev)) {
6203 printk(KERN_WARNING
6204 "md: error, md_import_device() returned %ld\n",
6205 PTR_ERR(rdev));
6206 return -EINVAL;
6207 }
6208
6209 if (mddev->persistent)
6210 rdev->sb_start = calc_dev_sboffset(rdev);
6211 else
6212 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6213
6214 rdev->sectors = rdev->sb_start;
6215
6216 if (test_bit(Faulty, &rdev->flags)) {
6217 printk(KERN_WARNING
6218 "md: can not hot-add faulty %s disk to %s!\n",
6219 bdevname(rdev->bdev,b), mdname(mddev));
6220 err = -EINVAL;
6221 goto abort_export;
6222 }
6223
6224 clear_bit(In_sync, &rdev->flags);
6225 rdev->desc_nr = -1;
6226 rdev->saved_raid_disk = -1;
6227 err = bind_rdev_to_array(rdev, mddev);
6228 if (err)
6229 goto abort_export;
6230
6231
6232
6233
6234
6235
6236 rdev->raid_disk = -1;
6237
6238 md_update_sb(mddev, 1);
6239
6240
6241
6242
6243 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6244 md_wakeup_thread(mddev->thread);
6245 md_new_event(mddev);
6246 return 0;
6247
6248abort_export:
6249 export_rdev(rdev);
6250 return err;
6251}
6252
6253static int set_bitmap_file(struct mddev *mddev, int fd)
6254{
6255 int err = 0;
6256
6257 if (mddev->pers) {
6258 if (!mddev->pers->quiesce || !mddev->thread)
6259 return -EBUSY;
6260 if (mddev->recovery || mddev->sync_thread)
6261 return -EBUSY;
6262
6263 }
6264
6265 if (fd >= 0) {
6266 struct inode *inode;
6267 struct file *f;
6268
6269 if (mddev->bitmap || mddev->bitmap_info.file)
6270 return -EEXIST;
6271 f = fget(fd);
6272
6273 if (f == NULL) {
6274 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
6275 mdname(mddev));
6276 return -EBADF;
6277 }
6278
6279 inode = f->f_mapping->host;
6280 if (!S_ISREG(inode->i_mode)) {
6281 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
6282 mdname(mddev));
6283 err = -EBADF;
6284 } else if (!(f->f_mode & FMODE_WRITE)) {
6285 printk(KERN_ERR "%s: error: bitmap file must open for write\n",
6286 mdname(mddev));
6287 err = -EBADF;
6288 } else if (atomic_read(&inode->i_writecount) != 1) {
6289 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6290 mdname(mddev));
6291 err = -EBUSY;
6292 }
6293 if (err) {
6294 fput(f);
6295 return err;
6296 }
6297 mddev->bitmap_info.file = f;
6298 mddev->bitmap_info.offset = 0;
6299 } else if (mddev->bitmap == NULL)
6300 return -ENOENT;
6301 err = 0;
6302 if (mddev->pers) {
6303 mddev->pers->quiesce(mddev, 1);
6304 if (fd >= 0) {
6305 struct bitmap *bitmap;
6306
6307 bitmap = bitmap_create(mddev, -1);
6308 if (!IS_ERR(bitmap)) {
6309 mddev->bitmap = bitmap;
6310 err = bitmap_load(mddev);
6311 } else
6312 err = PTR_ERR(bitmap);
6313 }
6314 if (fd < 0 || err) {
6315 bitmap_destroy(mddev);
6316 fd = -1;
6317 }
6318 mddev->pers->quiesce(mddev, 0);
6319 }
6320 if (fd < 0) {
6321 struct file *f = mddev->bitmap_info.file;
6322 if (f) {
6323 spin_lock(&mddev->lock);
6324 mddev->bitmap_info.file = NULL;
6325 spin_unlock(&mddev->lock);
6326 fput(f);
6327 }
6328 }
6329
6330 return err;
6331}
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6347{
6348
6349 if (info->raid_disks == 0) {
6350
6351 if (info->major_version < 0 ||
6352 info->major_version >= ARRAY_SIZE(super_types) ||
6353 super_types[info->major_version].name == NULL) {
6354
6355 printk(KERN_INFO
6356 "md: superblock version %d not known\n",
6357 info->major_version);
6358 return -EINVAL;
6359 }
6360 mddev->major_version = info->major_version;
6361 mddev->minor_version = info->minor_version;
6362 mddev->patch_version = info->patch_version;
6363 mddev->persistent = !info->not_persistent;
6364
6365
6366
6367 mddev->ctime = ktime_get_real_seconds();
6368 return 0;
6369 }
6370 mddev->major_version = MD_MAJOR_VERSION;
6371 mddev->minor_version = MD_MINOR_VERSION;
6372 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6373 mddev->ctime = ktime_get_real_seconds();
6374
6375 mddev->level = info->level;
6376 mddev->clevel[0] = 0;
6377 mddev->dev_sectors = 2 * (sector_t)info->size;
6378 mddev->raid_disks = info->raid_disks;
6379
6380
6381
6382 if (info->state & (1<<MD_SB_CLEAN))
6383 mddev->recovery_cp = MaxSector;
6384 else
6385 mddev->recovery_cp = 0;
6386 mddev->persistent = ! info->not_persistent;
6387 mddev->external = 0;
6388
6389 mddev->layout = info->layout;
6390 mddev->chunk_sectors = info->chunk_size >> 9;
6391
6392 mddev->max_disks = MD_SB_DISKS;
6393
6394 if (mddev->persistent)
6395 mddev->flags = 0;
6396 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6397
6398 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6399 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6400 mddev->bitmap_info.offset = 0;
6401
6402 mddev->reshape_position = MaxSector;
6403
6404
6405
6406
6407 get_random_bytes(mddev->uuid, 16);
6408
6409 mddev->new_level = mddev->level;
6410 mddev->new_chunk_sectors = mddev->chunk_sectors;
6411 mddev->new_layout = mddev->layout;
6412 mddev->delta_disks = 0;
6413 mddev->reshape_backwards = 0;
6414
6415 return 0;
6416}
6417
6418void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6419{
6420 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6421
6422 if (mddev->external_size)
6423 return;
6424
6425 mddev->array_sectors = array_sectors;
6426}
6427EXPORT_SYMBOL(md_set_array_sectors);
6428
6429static int update_size(struct mddev *mddev, sector_t num_sectors)
6430{
6431 struct md_rdev *rdev;
6432 int rv;
6433 int fit = (num_sectors == 0);
6434
6435 if (mddev->pers->resize == NULL)
6436 return -EINVAL;
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6447 mddev->sync_thread)
6448 return -EBUSY;
6449 if (mddev->ro)
6450 return -EROFS;
6451
6452 rdev_for_each(rdev, mddev) {
6453 sector_t avail = rdev->sectors;
6454
6455 if (fit && (num_sectors == 0 || num_sectors > avail))
6456 num_sectors = avail;
6457 if (avail < num_sectors)
6458 return -ENOSPC;
6459 }
6460 rv = mddev->pers->resize(mddev, num_sectors);
6461 if (!rv)
6462 revalidate_disk(mddev->gendisk);
6463 return rv;
6464}
6465
6466static int update_raid_disks(struct mddev *mddev, int raid_disks)
6467{
6468 int rv;
6469 struct md_rdev *rdev;
6470
6471 if (mddev->pers->check_reshape == NULL)
6472 return -EINVAL;
6473 if (mddev->ro)
6474 return -EROFS;
6475 if (raid_disks <= 0 ||
6476 (mddev->max_disks && raid_disks >= mddev->max_disks))
6477 return -EINVAL;
6478 if (mddev->sync_thread ||
6479 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6480 mddev->reshape_position != MaxSector)
6481 return -EBUSY;
6482
6483 rdev_for_each(rdev, mddev) {
6484 if (mddev->raid_disks < raid_disks &&
6485 rdev->data_offset < rdev->new_data_offset)
6486 return -EINVAL;
6487 if (mddev->raid_disks > raid_disks &&
6488 rdev->data_offset > rdev->new_data_offset)
6489 return -EINVAL;
6490 }
6491
6492 mddev->delta_disks = raid_disks - mddev->raid_disks;
6493 if (mddev->delta_disks < 0)
6494 mddev->reshape_backwards = 1;
6495 else if (mddev->delta_disks > 0)
6496 mddev->reshape_backwards = 0;
6497
6498 rv = mddev->pers->check_reshape(mddev);
6499 if (rv < 0) {
6500 mddev->delta_disks = 0;
6501 mddev->reshape_backwards = 0;
6502 }
6503 return rv;
6504}
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6515{
6516 int rv = 0;
6517 int cnt = 0;
6518 int state = 0;
6519
6520
6521 if (mddev->bitmap && mddev->bitmap_info.offset)
6522 state |= (1 << MD_SB_BITMAP_PRESENT);
6523
6524 if (mddev->major_version != info->major_version ||
6525 mddev->minor_version != info->minor_version ||
6526
6527 mddev->ctime != info->ctime ||
6528 mddev->level != info->level ||
6529
6530 mddev->persistent != !info->not_persistent ||
6531 mddev->chunk_sectors != info->chunk_size >> 9 ||
6532
6533 ((state^info->state) & 0xfffffe00)
6534 )
6535 return -EINVAL;
6536
6537 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6538 cnt++;
6539 if (mddev->raid_disks != info->raid_disks)
6540 cnt++;
6541 if (mddev->layout != info->layout)
6542 cnt++;
6543 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6544 cnt++;
6545 if (cnt == 0)
6546 return 0;
6547 if (cnt > 1)
6548 return -EINVAL;
6549
6550 if (mddev->layout != info->layout) {
6551
6552
6553
6554
6555 if (mddev->pers->check_reshape == NULL)
6556 return -EINVAL;
6557 else {
6558 mddev->new_layout = info->layout;
6559 rv = mddev->pers->check_reshape(mddev);
6560 if (rv)
6561 mddev->new_layout = mddev->layout;
6562 return rv;
6563 }
6564 }
6565 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6566 rv = update_size(mddev, (sector_t)info->size * 2);
6567
6568 if (mddev->raid_disks != info->raid_disks)
6569 rv = update_raid_disks(mddev, info->raid_disks);
6570
6571 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6572 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6573 rv = -EINVAL;
6574 goto err;
6575 }
6576 if (mddev->recovery || mddev->sync_thread) {
6577 rv = -EBUSY;
6578 goto err;
6579 }
6580 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6581 struct bitmap *bitmap;
6582
6583 if (mddev->bitmap) {
6584 rv = -EEXIST;
6585 goto err;
6586 }
6587 if (mddev->bitmap_info.default_offset == 0) {
6588 rv = -EINVAL;
6589 goto err;
6590 }
6591 mddev->bitmap_info.offset =
6592 mddev->bitmap_info.default_offset;
6593 mddev->bitmap_info.space =
6594 mddev->bitmap_info.default_space;
6595 mddev->pers->quiesce(mddev, 1);
6596 bitmap = bitmap_create(mddev, -1);
6597 if (!IS_ERR(bitmap)) {
6598 mddev->bitmap = bitmap;
6599 rv = bitmap_load(mddev);
6600 } else
6601 rv = PTR_ERR(bitmap);
6602 if (rv)
6603 bitmap_destroy(mddev);
6604 mddev->pers->quiesce(mddev, 0);
6605 } else {
6606
6607 if (!mddev->bitmap) {
6608 rv = -ENOENT;
6609 goto err;
6610 }
6611 if (mddev->bitmap->storage.file) {
6612 rv = -EINVAL;
6613 goto err;
6614 }
6615 if (mddev->bitmap_info.nodes) {
6616
6617 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
6618 printk("md: can't change bitmap to none since the"
6619 " array is in use by more than one node\n");
6620 rv = -EPERM;
6621 md_cluster_ops->unlock_all_bitmaps(mddev);
6622 goto err;
6623 }
6624
6625 mddev->bitmap_info.nodes = 0;
6626 md_cluster_ops->leave(mddev);
6627 }
6628 mddev->pers->quiesce(mddev, 1);
6629 bitmap_destroy(mddev);
6630 mddev->pers->quiesce(mddev, 0);
6631 mddev->bitmap_info.offset = 0;
6632 }
6633 }
6634 md_update_sb(mddev, 1);
6635 return rv;
6636err:
6637 return rv;
6638}
6639
6640static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6641{
6642 struct md_rdev *rdev;
6643 int err = 0;
6644
6645 if (mddev->pers == NULL)
6646 return -ENODEV;
6647
6648 rcu_read_lock();
6649 rdev = find_rdev_rcu(mddev, dev);
6650 if (!rdev)
6651 err = -ENODEV;
6652 else {
6653 md_error(mddev, rdev);
6654 if (!test_bit(Faulty, &rdev->flags))
6655 err = -EBUSY;
6656 }
6657 rcu_read_unlock();
6658 return err;
6659}
6660
6661
6662
6663
6664
6665
6666
6667static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6668{
6669 struct mddev *mddev = bdev->bd_disk->private_data;
6670
6671 geo->heads = 2;
6672 geo->sectors = 4;
6673 geo->cylinders = mddev->array_sectors / 8;
6674 return 0;
6675}
6676
6677static inline bool md_ioctl_valid(unsigned int cmd)
6678{
6679 switch (cmd) {
6680 case ADD_NEW_DISK:
6681 case BLKROSET:
6682 case GET_ARRAY_INFO:
6683 case GET_BITMAP_FILE:
6684 case GET_DISK_INFO:
6685 case HOT_ADD_DISK:
6686 case HOT_REMOVE_DISK:
6687 case RAID_AUTORUN:
6688 case RAID_VERSION:
6689 case RESTART_ARRAY_RW:
6690 case RUN_ARRAY:
6691 case SET_ARRAY_INFO:
6692 case SET_BITMAP_FILE:
6693 case SET_DISK_FAULTY:
6694 case STOP_ARRAY:
6695 case STOP_ARRAY_RO:
6696 case CLUSTERED_DISK_NACK:
6697 return true;
6698 default:
6699 return false;
6700 }
6701}
6702
6703static int md_ioctl(struct block_device *bdev, fmode_t mode,
6704 unsigned int cmd, unsigned long arg)
6705{
6706 int err = 0;
6707 void __user *argp = (void __user *)arg;
6708 struct mddev *mddev = NULL;
6709 int ro;
6710
6711 if (!md_ioctl_valid(cmd))
6712 return -ENOTTY;
6713
6714 switch (cmd) {
6715 case RAID_VERSION:
6716 case GET_ARRAY_INFO:
6717 case GET_DISK_INFO:
6718 break;
6719 default:
6720 if (!capable(CAP_SYS_ADMIN))
6721 return -EACCES;
6722 }
6723
6724
6725
6726
6727
6728 switch (cmd) {
6729 case RAID_VERSION:
6730 err = get_version(argp);
6731 goto out;
6732
6733#ifndef MODULE
6734 case RAID_AUTORUN:
6735 err = 0;
6736 autostart_arrays(arg);
6737 goto out;
6738#endif
6739 default:;
6740 }
6741
6742
6743
6744
6745
6746 mddev = bdev->bd_disk->private_data;
6747
6748 if (!mddev) {
6749 BUG();
6750 goto out;
6751 }
6752
6753
6754 switch (cmd) {
6755 case GET_ARRAY_INFO:
6756 if (!mddev->raid_disks && !mddev->external)
6757 err = -ENODEV;
6758 else
6759 err = get_array_info(mddev, argp);
6760 goto out;
6761
6762 case GET_DISK_INFO:
6763 if (!mddev->raid_disks && !mddev->external)
6764 err = -ENODEV;
6765 else
6766 err = get_disk_info(mddev, argp);
6767 goto out;
6768
6769 case SET_DISK_FAULTY:
6770 err = set_disk_faulty(mddev, new_decode_dev(arg));
6771 goto out;
6772
6773 case GET_BITMAP_FILE:
6774 err = get_bitmap_file(mddev, argp);
6775 goto out;
6776
6777 }
6778
6779 if (cmd == ADD_NEW_DISK)
6780
6781 flush_workqueue(md_misc_wq);
6782
6783 if (cmd == HOT_REMOVE_DISK)
6784
6785 wait_event_interruptible_timeout(mddev->sb_wait,
6786 !test_bit(MD_RECOVERY_NEEDED,
6787 &mddev->flags),
6788 msecs_to_jiffies(5000));
6789 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6790
6791
6792
6793 mutex_lock(&mddev->open_mutex);
6794 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6795 mutex_unlock(&mddev->open_mutex);
6796 err = -EBUSY;
6797 goto out;
6798 }
6799 set_bit(MD_STILL_CLOSED, &mddev->flags);
6800 mutex_unlock(&mddev->open_mutex);
6801 sync_blockdev(bdev);
6802 }
6803 err = mddev_lock(mddev);
6804 if (err) {
6805 printk(KERN_INFO
6806 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6807 err, cmd);
6808 goto out;
6809 }
6810
6811 if (cmd == SET_ARRAY_INFO) {
6812 mdu_array_info_t info;
6813 if (!arg)
6814 memset(&info, 0, sizeof(info));
6815 else if (copy_from_user(&info, argp, sizeof(info))) {
6816 err = -EFAULT;
6817 goto unlock;
6818 }
6819 if (mddev->pers) {
6820 err = update_array_info(mddev, &info);
6821 if (err) {
6822 printk(KERN_WARNING "md: couldn't update"
6823 " array info. %d\n", err);
6824 goto unlock;
6825 }
6826 goto unlock;
6827 }
6828 if (!list_empty(&mddev->disks)) {
6829 printk(KERN_WARNING
6830 "md: array %s already has disks!\n",
6831 mdname(mddev));
6832 err = -EBUSY;
6833 goto unlock;
6834 }
6835 if (mddev->raid_disks) {
6836 printk(KERN_WARNING
6837 "md: array %s already initialised!\n",
6838 mdname(mddev));
6839 err = -EBUSY;
6840 goto unlock;
6841 }
6842 err = set_array_info(mddev, &info);
6843 if (err) {
6844 printk(KERN_WARNING "md: couldn't set"
6845 " array info. %d\n", err);
6846 goto unlock;
6847 }
6848 goto unlock;
6849 }
6850
6851
6852
6853
6854
6855
6856 if ((!mddev->raid_disks && !mddev->external)
6857 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6858 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6859 && cmd != GET_BITMAP_FILE) {
6860 err = -ENODEV;
6861 goto unlock;
6862 }
6863
6864
6865
6866
6867 switch (cmd) {
6868 case RESTART_ARRAY_RW:
6869 err = restart_array(mddev);
6870 goto unlock;
6871
6872 case STOP_ARRAY:
6873 err = do_md_stop(mddev, 0, bdev);
6874 goto unlock;
6875
6876 case STOP_ARRAY_RO:
6877 err = md_set_readonly(mddev, bdev);
6878 goto unlock;
6879
6880 case HOT_REMOVE_DISK:
6881 err = hot_remove_disk(mddev, new_decode_dev(arg));
6882 goto unlock;
6883
6884 case ADD_NEW_DISK:
6885
6886
6887
6888
6889 if (mddev->pers) {
6890 mdu_disk_info_t info;
6891 if (copy_from_user(&info, argp, sizeof(info)))
6892 err = -EFAULT;
6893 else if (!(info.state & (1<<MD_DISK_SYNC)))
6894
6895 break;
6896 else
6897 err = add_new_disk(mddev, &info);
6898 goto unlock;
6899 }
6900 break;
6901
6902 case BLKROSET:
6903 if (get_user(ro, (int __user *)(arg))) {
6904 err = -EFAULT;
6905 goto unlock;
6906 }
6907 err = -EINVAL;
6908
6909
6910
6911
6912 if (ro)
6913 goto unlock;
6914
6915
6916 if (mddev->ro != 1)
6917 goto unlock;
6918
6919
6920
6921
6922 if (mddev->pers) {
6923 err = restart_array(mddev);
6924 if (err == 0) {
6925 mddev->ro = 2;
6926 set_disk_ro(mddev->gendisk, 0);
6927 }
6928 }
6929 goto unlock;
6930 }
6931
6932
6933
6934
6935
6936 if (mddev->ro && mddev->pers) {
6937 if (mddev->ro == 2) {
6938 mddev->ro = 0;
6939 sysfs_notify_dirent_safe(mddev->sysfs_state);
6940 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6941
6942
6943
6944
6945 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6946 mddev_unlock(mddev);
6947 wait_event(mddev->sb_wait,
6948 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6949 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6950 mddev_lock_nointr(mddev);
6951 }
6952 } else {
6953 err = -EROFS;
6954 goto unlock;
6955 }
6956 }
6957
6958 switch (cmd) {
6959 case ADD_NEW_DISK:
6960 {
6961 mdu_disk_info_t info;
6962 if (copy_from_user(&info, argp, sizeof(info)))
6963 err = -EFAULT;
6964 else
6965 err = add_new_disk(mddev, &info);
6966 goto unlock;
6967 }
6968
6969 case CLUSTERED_DISK_NACK:
6970 if (mddev_is_clustered(mddev))
6971 md_cluster_ops->new_disk_ack(mddev, false);
6972 else
6973 err = -EINVAL;
6974 goto unlock;
6975
6976 case HOT_ADD_DISK:
6977 err = hot_add_disk(mddev, new_decode_dev(arg));
6978 goto unlock;
6979
6980 case RUN_ARRAY:
6981 err = do_md_run(mddev);
6982 goto unlock;
6983
6984 case SET_BITMAP_FILE:
6985 err = set_bitmap_file(mddev, (int)arg);
6986 goto unlock;
6987
6988 default:
6989 err = -EINVAL;
6990 goto unlock;
6991 }
6992
6993unlock:
6994 if (mddev->hold_active == UNTIL_IOCTL &&
6995 err != -EINVAL)
6996 mddev->hold_active = 0;
6997 mddev_unlock(mddev);
6998out:
6999 return err;
7000}
7001#ifdef CONFIG_COMPAT
7002static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7003 unsigned int cmd, unsigned long arg)
7004{
7005 switch (cmd) {
7006 case HOT_REMOVE_DISK:
7007 case HOT_ADD_DISK:
7008 case SET_DISK_FAULTY:
7009 case SET_BITMAP_FILE:
7010
7011 break;
7012 default:
7013 arg = (unsigned long)compat_ptr(arg);
7014 break;
7015 }
7016
7017 return md_ioctl(bdev, mode, cmd, arg);
7018}
7019#endif
7020
7021static int md_open(struct block_device *bdev, fmode_t mode)
7022{
7023
7024
7025
7026
7027 struct mddev *mddev = mddev_find(bdev->bd_dev);
7028 int err;
7029
7030 if (!mddev)
7031 return -ENODEV;
7032
7033 if (mddev->gendisk != bdev->bd_disk) {
7034
7035
7036
7037 mddev_put(mddev);
7038
7039 flush_workqueue(md_misc_wq);
7040
7041 return -ERESTARTSYS;
7042 }
7043 BUG_ON(mddev != bdev->bd_disk->private_data);
7044
7045 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7046 goto out;
7047
7048 err = 0;
7049 atomic_inc(&mddev->openers);
7050 clear_bit(MD_STILL_CLOSED, &mddev->flags);
7051 mutex_unlock(&mddev->open_mutex);
7052
7053 check_disk_change(bdev);
7054 out:
7055 return err;
7056}
7057
7058static void md_release(struct gendisk *disk, fmode_t mode)
7059{
7060 struct mddev *mddev = disk->private_data;
7061
7062 BUG_ON(!mddev);
7063 atomic_dec(&mddev->openers);
7064 mddev_put(mddev);
7065}
7066
7067static int md_media_changed(struct gendisk *disk)
7068{
7069 struct mddev *mddev = disk->private_data;
7070
7071 return mddev->changed;
7072}
7073
7074static int md_revalidate(struct gendisk *disk)
7075{
7076 struct mddev *mddev = disk->private_data;
7077
7078 mddev->changed = 0;
7079 return 0;
7080}
7081static const struct block_device_operations md_fops =
7082{
7083 .owner = THIS_MODULE,
7084 .open = md_open,
7085 .release = md_release,
7086 .ioctl = md_ioctl,
7087#ifdef CONFIG_COMPAT
7088 .compat_ioctl = md_compat_ioctl,
7089#endif
7090 .getgeo = md_getgeo,
7091 .media_changed = md_media_changed,
7092 .revalidate_disk= md_revalidate,
7093};
7094
7095static int md_thread(void *arg)
7096{
7097 struct md_thread *thread = arg;
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111 allow_signal(SIGKILL);
7112 while (!kthread_should_stop()) {
7113
7114
7115
7116
7117
7118
7119 if (signal_pending(current))
7120 flush_signals(current);
7121
7122 wait_event_interruptible_timeout
7123 (thread->wqueue,
7124 test_bit(THREAD_WAKEUP, &thread->flags)
7125 || kthread_should_stop(),
7126 thread->timeout);
7127
7128 clear_bit(THREAD_WAKEUP, &thread->flags);
7129 if (!kthread_should_stop())
7130 thread->run(thread);
7131 }
7132
7133 return 0;
7134}
7135
7136void md_wakeup_thread(struct md_thread *thread)
7137{
7138 if (thread) {
7139 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7140 set_bit(THREAD_WAKEUP, &thread->flags);
7141 wake_up(&thread->wqueue);
7142 }
7143}
7144EXPORT_SYMBOL(md_wakeup_thread);
7145
7146struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7147 struct mddev *mddev, const char *name)
7148{
7149 struct md_thread *thread;
7150
7151 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7152 if (!thread)
7153 return NULL;
7154
7155 init_waitqueue_head(&thread->wqueue);
7156
7157 thread->run = run;
7158 thread->mddev = mddev;
7159 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7160 thread->tsk = kthread_run(md_thread, thread,
7161 "%s_%s",
7162 mdname(thread->mddev),
7163 name);
7164 if (IS_ERR(thread->tsk)) {
7165 kfree(thread);
7166 return NULL;
7167 }
7168 return thread;
7169}
7170EXPORT_SYMBOL(md_register_thread);
7171
7172void md_unregister_thread(struct md_thread **threadp)
7173{
7174 struct md_thread *thread = *threadp;
7175 if (!thread)
7176 return;
7177 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7178
7179
7180
7181 spin_lock(&pers_lock);
7182 *threadp = NULL;
7183 spin_unlock(&pers_lock);
7184
7185 kthread_stop(thread->tsk);
7186 kfree(thread);
7187}
7188EXPORT_SYMBOL(md_unregister_thread);
7189
7190void md_error(struct mddev *mddev, struct md_rdev *rdev)
7191{
7192 if (!rdev || test_bit(Faulty, &rdev->flags))
7193 return;
7194
7195 if (!mddev->pers || !mddev->pers->error_handler)
7196 return;
7197 mddev->pers->error_handler(mddev,rdev);
7198 if (mddev->degraded)
7199 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7200 sysfs_notify_dirent_safe(rdev->sysfs_state);
7201 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7202 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7203 md_wakeup_thread(mddev->thread);
7204 if (mddev->event_work.func)
7205 queue_work(md_misc_wq, &mddev->event_work);
7206 md_new_event(mddev);
7207}
7208EXPORT_SYMBOL(md_error);
7209
7210
7211
7212static void status_unused(struct seq_file *seq)
7213{
7214 int i = 0;
7215 struct md_rdev *rdev;
7216
7217 seq_printf(seq, "unused devices: ");
7218
7219 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7220 char b[BDEVNAME_SIZE];
7221 i++;
7222 seq_printf(seq, "%s ",
7223 bdevname(rdev->bdev,b));
7224 }
7225 if (!i)
7226 seq_printf(seq, "<none>");
7227
7228 seq_printf(seq, "\n");
7229}
7230
7231static int status_resync(struct seq_file *seq, struct mddev *mddev)
7232{
7233 sector_t max_sectors, resync, res;
7234 unsigned long dt, db;
7235 sector_t rt;
7236 int scale;
7237 unsigned int per_milli;
7238
7239 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7240 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7241 max_sectors = mddev->resync_max_sectors;
7242 else
7243 max_sectors = mddev->dev_sectors;
7244
7245 resync = mddev->curr_resync;
7246 if (resync <= 3) {
7247 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7248
7249 resync = max_sectors;
7250 } else
7251 resync -= atomic_read(&mddev->recovery_active);
7252
7253 if (resync == 0) {
7254 if (mddev->recovery_cp < MaxSector) {
7255 seq_printf(seq, "\tresync=PENDING");
7256 return 1;
7257 }
7258 return 0;
7259 }
7260 if (resync < 3) {
7261 seq_printf(seq, "\tresync=DELAYED");
7262 return 1;
7263 }
7264
7265 WARN_ON(max_sectors == 0);
7266
7267
7268
7269
7270
7271 scale = 10;
7272 if (sizeof(sector_t) > sizeof(unsigned long)) {
7273 while ( max_sectors/2 > (1ULL<<(scale+32)))
7274 scale++;
7275 }
7276 res = (resync>>scale)*1000;
7277 sector_div(res, (u32)((max_sectors>>scale)+1));
7278
7279 per_milli = res;
7280 {
7281 int i, x = per_milli/50, y = 20-x;
7282 seq_printf(seq, "[");
7283 for (i = 0; i < x; i++)
7284 seq_printf(seq, "=");
7285 seq_printf(seq, ">");
7286 for (i = 0; i < y; i++)
7287 seq_printf(seq, ".");
7288 seq_printf(seq, "] ");
7289 }
7290 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7291 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7292 "reshape" :
7293 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7294 "check" :
7295 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7296 "resync" : "recovery"))),
7297 per_milli/10, per_milli % 10,
7298 (unsigned long long) resync/2,
7299 (unsigned long long) max_sectors/2);
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315 dt = ((jiffies - mddev->resync_mark) / HZ);
7316 if (!dt) dt++;
7317 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7318 - mddev->resync_mark_cnt;
7319
7320 rt = max_sectors - resync;
7321 sector_div(rt, db/32+1);
7322 rt *= dt;
7323 rt >>= 5;
7324
7325 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7326 ((unsigned long)rt % 60)/6);
7327
7328 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7329 return 1;
7330}
7331
7332static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7333{
7334 struct list_head *tmp;
7335 loff_t l = *pos;
7336 struct mddev *mddev;
7337
7338 if (l >= 0x10000)
7339 return NULL;
7340 if (!l--)
7341
7342 return (void*)1;
7343
7344 spin_lock(&all_mddevs_lock);
7345 list_for_each(tmp,&all_mddevs)
7346 if (!l--) {
7347 mddev = list_entry(tmp, struct mddev, all_mddevs);
7348 mddev_get(mddev);
7349 spin_unlock(&all_mddevs_lock);
7350 return mddev;
7351 }
7352 spin_unlock(&all_mddevs_lock);
7353 if (!l--)
7354 return (void*)2;
7355 return NULL;
7356}
7357
7358static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7359{
7360 struct list_head *tmp;
7361 struct mddev *next_mddev, *mddev = v;
7362
7363 ++*pos;
7364 if (v == (void*)2)
7365 return NULL;
7366
7367 spin_lock(&all_mddevs_lock);
7368 if (v == (void*)1)
7369 tmp = all_mddevs.next;
7370 else
7371 tmp = mddev->all_mddevs.next;
7372 if (tmp != &all_mddevs)
7373 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7374 else {
7375 next_mddev = (void*)2;
7376 *pos = 0x10000;
7377 }
7378 spin_unlock(&all_mddevs_lock);
7379
7380 if (v != (void*)1)
7381 mddev_put(mddev);
7382 return next_mddev;
7383
7384}
7385
7386static void md_seq_stop(struct seq_file *seq, void *v)
7387{
7388 struct mddev *mddev = v;
7389
7390 if (mddev && v != (void*)1 && v != (void*)2)
7391 mddev_put(mddev);
7392}
7393
7394static int md_seq_show(struct seq_file *seq, void *v)
7395{
7396 struct mddev *mddev = v;
7397 sector_t sectors;
7398 struct md_rdev *rdev;
7399
7400 if (v == (void*)1) {
7401 struct md_personality *pers;
7402 seq_printf(seq, "Personalities : ");
7403 spin_lock(&pers_lock);
7404 list_for_each_entry(pers, &pers_list, list)
7405 seq_printf(seq, "[%s] ", pers->name);
7406
7407 spin_unlock(&pers_lock);
7408 seq_printf(seq, "\n");
7409 seq->poll_event = atomic_read(&md_event_count);
7410 return 0;
7411 }
7412 if (v == (void*)2) {
7413 status_unused(seq);
7414 return 0;
7415 }
7416
7417 spin_lock(&mddev->lock);
7418 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7419 seq_printf(seq, "%s : %sactive", mdname(mddev),
7420 mddev->pers ? "" : "in");
7421 if (mddev->pers) {
7422 if (mddev->ro==1)
7423 seq_printf(seq, " (read-only)");
7424 if (mddev->ro==2)
7425 seq_printf(seq, " (auto-read-only)");
7426 seq_printf(seq, " %s", mddev->pers->name);
7427 }
7428
7429 sectors = 0;
7430 rcu_read_lock();
7431 rdev_for_each_rcu(rdev, mddev) {
7432 char b[BDEVNAME_SIZE];
7433 seq_printf(seq, " %s[%d]",
7434 bdevname(rdev->bdev,b), rdev->desc_nr);
7435 if (test_bit(WriteMostly, &rdev->flags))
7436 seq_printf(seq, "(W)");
7437 if (test_bit(Journal, &rdev->flags))
7438 seq_printf(seq, "(J)");
7439 if (test_bit(Faulty, &rdev->flags)) {
7440 seq_printf(seq, "(F)");
7441 continue;
7442 }
7443 if (rdev->raid_disk < 0)
7444 seq_printf(seq, "(S)");
7445 if (test_bit(Replacement, &rdev->flags))
7446 seq_printf(seq, "(R)");
7447 sectors += rdev->sectors;
7448 }
7449 rcu_read_unlock();
7450
7451 if (!list_empty(&mddev->disks)) {
7452 if (mddev->pers)
7453 seq_printf(seq, "\n %llu blocks",
7454 (unsigned long long)
7455 mddev->array_sectors / 2);
7456 else
7457 seq_printf(seq, "\n %llu blocks",
7458 (unsigned long long)sectors / 2);
7459 }
7460 if (mddev->persistent) {
7461 if (mddev->major_version != 0 ||
7462 mddev->minor_version != 90) {
7463 seq_printf(seq," super %d.%d",
7464 mddev->major_version,
7465 mddev->minor_version);
7466 }
7467 } else if (mddev->external)
7468 seq_printf(seq, " super external:%s",
7469 mddev->metadata_type);
7470 else
7471 seq_printf(seq, " super non-persistent");
7472
7473 if (mddev->pers) {
7474 mddev->pers->status(seq, mddev);
7475 seq_printf(seq, "\n ");
7476 if (mddev->pers->sync_request) {
7477 if (status_resync(seq, mddev))
7478 seq_printf(seq, "\n ");
7479 }
7480 } else
7481 seq_printf(seq, "\n ");
7482
7483 bitmap_status(seq, mddev->bitmap);
7484
7485 seq_printf(seq, "\n");
7486 }
7487 spin_unlock(&mddev->lock);
7488
7489 return 0;
7490}
7491
7492static const struct seq_operations md_seq_ops = {
7493 .start = md_seq_start,
7494 .next = md_seq_next,
7495 .stop = md_seq_stop,
7496 .show = md_seq_show,
7497};
7498
7499static int md_seq_open(struct inode *inode, struct file *file)
7500{
7501 struct seq_file *seq;
7502 int error;
7503
7504 error = seq_open(file, &md_seq_ops);
7505 if (error)
7506 return error;
7507
7508 seq = file->private_data;
7509 seq->poll_event = atomic_read(&md_event_count);
7510 return error;
7511}
7512
7513static int md_unloading;
7514static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7515{
7516 struct seq_file *seq = filp->private_data;
7517 int mask;
7518
7519 if (md_unloading)
7520 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7521 poll_wait(filp, &md_event_waiters, wait);
7522
7523
7524 mask = POLLIN | POLLRDNORM;
7525
7526 if (seq->poll_event != atomic_read(&md_event_count))
7527 mask |= POLLERR | POLLPRI;
7528 return mask;
7529}
7530
7531static const struct file_operations md_seq_fops = {
7532 .owner = THIS_MODULE,
7533 .open = md_seq_open,
7534 .read = seq_read,
7535 .llseek = seq_lseek,
7536 .release = seq_release_private,
7537 .poll = mdstat_poll,
7538};
7539
7540int register_md_personality(struct md_personality *p)
7541{
7542 printk(KERN_INFO "md: %s personality registered for level %d\n",
7543 p->name, p->level);
7544 spin_lock(&pers_lock);
7545 list_add_tail(&p->list, &pers_list);
7546 spin_unlock(&pers_lock);
7547 return 0;
7548}
7549EXPORT_SYMBOL(register_md_personality);
7550
7551int unregister_md_personality(struct md_personality *p)
7552{
7553 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7554 spin_lock(&pers_lock);
7555 list_del_init(&p->list);
7556 spin_unlock(&pers_lock);
7557 return 0;
7558}
7559EXPORT_SYMBOL(unregister_md_personality);
7560
7561int register_md_cluster_operations(struct md_cluster_operations *ops,
7562 struct module *module)
7563{
7564 int ret = 0;
7565 spin_lock(&pers_lock);
7566 if (md_cluster_ops != NULL)
7567 ret = -EALREADY;
7568 else {
7569 md_cluster_ops = ops;
7570 md_cluster_mod = module;
7571 }
7572 spin_unlock(&pers_lock);
7573 return ret;
7574}
7575EXPORT_SYMBOL(register_md_cluster_operations);
7576
7577int unregister_md_cluster_operations(void)
7578{
7579 spin_lock(&pers_lock);
7580 md_cluster_ops = NULL;
7581 spin_unlock(&pers_lock);
7582 return 0;
7583}
7584EXPORT_SYMBOL(unregister_md_cluster_operations);
7585
7586int md_setup_cluster(struct mddev *mddev, int nodes)
7587{
7588 int err;
7589
7590 err = request_module("md-cluster");
7591 if (err) {
7592 pr_err("md-cluster module not found.\n");
7593 return -ENOENT;
7594 }
7595
7596 spin_lock(&pers_lock);
7597 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7598 spin_unlock(&pers_lock);
7599 return -ENOENT;
7600 }
7601 spin_unlock(&pers_lock);
7602
7603 return md_cluster_ops->join(mddev, nodes);
7604}
7605
7606void md_cluster_stop(struct mddev *mddev)
7607{
7608 if (!md_cluster_ops)
7609 return;
7610 md_cluster_ops->leave(mddev);
7611 module_put(md_cluster_mod);
7612}
7613
7614static int is_mddev_idle(struct mddev *mddev, int init)
7615{
7616 struct md_rdev *rdev;
7617 int idle;
7618 int curr_events;
7619
7620 idle = 1;
7621 rcu_read_lock();
7622 rdev_for_each_rcu(rdev, mddev) {
7623 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7624 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7625 (int)part_stat_read(&disk->part0, sectors[1]) -
7626 atomic_read(&disk->sync_io);
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649 if (init || curr_events - rdev->last_events > 64) {
7650 rdev->last_events = curr_events;
7651 idle = 0;
7652 }
7653 }
7654 rcu_read_unlock();
7655 return idle;
7656}
7657
7658void md_done_sync(struct mddev *mddev, int blocks, int ok)
7659{
7660
7661 atomic_sub(blocks, &mddev->recovery_active);
7662 wake_up(&mddev->recovery_wait);
7663 if (!ok) {
7664 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7665 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7666 md_wakeup_thread(mddev->thread);
7667
7668 }
7669}
7670EXPORT_SYMBOL(md_done_sync);
7671
7672
7673
7674
7675
7676
7677void md_write_start(struct mddev *mddev, struct bio *bi)
7678{
7679 int did_change = 0;
7680 if (bio_data_dir(bi) != WRITE)
7681 return;
7682
7683 BUG_ON(mddev->ro == 1);
7684 if (mddev->ro == 2) {
7685
7686 mddev->ro = 0;
7687 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7688 md_wakeup_thread(mddev->thread);
7689 md_wakeup_thread(mddev->sync_thread);
7690 did_change = 1;
7691 }
7692 atomic_inc(&mddev->writes_pending);
7693 if (mddev->safemode == 1)
7694 mddev->safemode = 0;
7695 if (mddev->in_sync) {
7696 spin_lock(&mddev->lock);
7697 if (mddev->in_sync) {
7698 mddev->in_sync = 0;
7699 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7700 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7701 md_wakeup_thread(mddev->thread);
7702 did_change = 1;
7703 }
7704 spin_unlock(&mddev->lock);
7705 }
7706 if (did_change)
7707 sysfs_notify_dirent_safe(mddev->sysfs_state);
7708 wait_event(mddev->sb_wait,
7709 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7710}
7711EXPORT_SYMBOL(md_write_start);
7712
7713void md_write_end(struct mddev *mddev)
7714{
7715 if (atomic_dec_and_test(&mddev->writes_pending)) {
7716 if (mddev->safemode == 2)
7717 md_wakeup_thread(mddev->thread);
7718 else if (mddev->safemode_delay)
7719 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7720 }
7721}
7722EXPORT_SYMBOL(md_write_end);
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733int md_allow_write(struct mddev *mddev)
7734{
7735 if (!mddev->pers)
7736 return 0;
7737 if (mddev->ro)
7738 return 0;
7739 if (!mddev->pers->sync_request)
7740 return 0;
7741
7742 spin_lock(&mddev->lock);
7743 if (mddev->in_sync) {
7744 mddev->in_sync = 0;
7745 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7746 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7747 if (mddev->safemode_delay &&
7748 mddev->safemode == 0)
7749 mddev->safemode = 1;
7750 spin_unlock(&mddev->lock);
7751 md_update_sb(mddev, 0);
7752 sysfs_notify_dirent_safe(mddev->sysfs_state);
7753 } else
7754 spin_unlock(&mddev->lock);
7755
7756 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7757 return -EAGAIN;
7758 else
7759 return 0;
7760}
7761EXPORT_SYMBOL_GPL(md_allow_write);
7762
7763#define SYNC_MARKS 10
7764#define SYNC_MARK_STEP (3*HZ)
7765#define UPDATE_FREQUENCY (5*60*HZ)
7766void md_do_sync(struct md_thread *thread)
7767{
7768 struct mddev *mddev = thread->mddev;
7769 struct mddev *mddev2;
7770 unsigned int currspeed = 0,
7771 window;
7772 sector_t max_sectors,j, io_sectors, recovery_done;
7773 unsigned long mark[SYNC_MARKS];
7774 unsigned long update_time;
7775 sector_t mark_cnt[SYNC_MARKS];
7776 int last_mark,m;
7777 struct list_head *tmp;
7778 sector_t last_check;
7779 int skipped = 0;
7780 struct md_rdev *rdev;
7781 char *desc, *action = NULL;
7782 struct blk_plug plug;
7783 bool cluster_resync_finished = false;
7784
7785
7786 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7787 return;
7788 if (mddev->ro) {
7789 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7790 return;
7791 }
7792
7793 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7794 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7795 desc = "data-check";
7796 action = "check";
7797 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7798 desc = "requested-resync";
7799 action = "repair";
7800 } else
7801 desc = "resync";
7802 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7803 desc = "reshape";
7804 else
7805 desc = "recovery";
7806
7807 mddev->last_sync_action = action ?: desc;
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825 do {
7826 mddev->curr_resync = 2;
7827
7828 try_again:
7829 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7830 goto skip;
7831 for_each_mddev(mddev2, tmp) {
7832 if (mddev2 == mddev)
7833 continue;
7834 if (!mddev->parallel_resync
7835 && mddev2->curr_resync
7836 && match_mddev_units(mddev, mddev2)) {
7837 DEFINE_WAIT(wq);
7838 if (mddev < mddev2 && mddev->curr_resync == 2) {
7839
7840 mddev->curr_resync = 1;
7841 wake_up(&resync_wait);
7842 }
7843 if (mddev > mddev2 && mddev->curr_resync == 1)
7844
7845
7846
7847 continue;
7848
7849
7850
7851
7852 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7853 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7854 mddev2->curr_resync >= mddev->curr_resync) {
7855 printk(KERN_INFO "md: delaying %s of %s"
7856 " until %s has finished (they"
7857 " share one or more physical units)\n",
7858 desc, mdname(mddev), mdname(mddev2));
7859 mddev_put(mddev2);
7860 if (signal_pending(current))
7861 flush_signals(current);
7862 schedule();
7863 finish_wait(&resync_wait, &wq);
7864 goto try_again;
7865 }
7866 finish_wait(&resync_wait, &wq);
7867 }
7868 }
7869 } while (mddev->curr_resync < 2);
7870
7871 j = 0;
7872 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7873
7874
7875
7876 max_sectors = mddev->resync_max_sectors;
7877 atomic64_set(&mddev->resync_mismatches, 0);
7878
7879 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7880 j = mddev->resync_min;
7881 else if (!mddev->bitmap)
7882 j = mddev->recovery_cp;
7883
7884 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7885 max_sectors = mddev->resync_max_sectors;
7886 else {
7887
7888 max_sectors = mddev->dev_sectors;
7889 j = MaxSector;
7890 rcu_read_lock();
7891 rdev_for_each_rcu(rdev, mddev)
7892 if (rdev->raid_disk >= 0 &&
7893 !test_bit(Journal, &rdev->flags) &&
7894 !test_bit(Faulty, &rdev->flags) &&
7895 !test_bit(In_sync, &rdev->flags) &&
7896 rdev->recovery_offset < j)
7897 j = rdev->recovery_offset;
7898 rcu_read_unlock();
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908 if (mddev->bitmap) {
7909 mddev->pers->quiesce(mddev, 1);
7910 mddev->pers->quiesce(mddev, 0);
7911 }
7912 }
7913
7914 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7915 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7916 " %d KB/sec/disk.\n", speed_min(mddev));
7917 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7918 "(but not more than %d KB/sec) for %s.\n",
7919 speed_max(mddev), desc);
7920
7921 is_mddev_idle(mddev, 1);
7922
7923 io_sectors = 0;
7924 for (m = 0; m < SYNC_MARKS; m++) {
7925 mark[m] = jiffies;
7926 mark_cnt[m] = io_sectors;
7927 }
7928 last_mark = 0;
7929 mddev->resync_mark = mark[last_mark];
7930 mddev->resync_mark_cnt = mark_cnt[last_mark];
7931
7932
7933
7934
7935 window = 32*(PAGE_SIZE/512);
7936 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7937 window/2, (unsigned long long)max_sectors/2);
7938
7939 atomic_set(&mddev->recovery_active, 0);
7940 last_check = 0;
7941
7942 if (j>2) {
7943 printk(KERN_INFO
7944 "md: resuming %s of %s from checkpoint.\n",
7945 desc, mdname(mddev));
7946 mddev->curr_resync = j;
7947 } else
7948 mddev->curr_resync = 3;
7949 mddev->curr_resync_completed = j;
7950 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7951 md_new_event(mddev);
7952 update_time = jiffies;
7953
7954 blk_start_plug(&plug);
7955 while (j < max_sectors) {
7956 sector_t sectors;
7957
7958 skipped = 0;
7959
7960 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7961 ((mddev->curr_resync > mddev->curr_resync_completed &&
7962 (mddev->curr_resync - mddev->curr_resync_completed)
7963 > (max_sectors >> 4)) ||
7964 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7965 (j - mddev->curr_resync_completed)*2
7966 >= mddev->resync_max - mddev->curr_resync_completed ||
7967 mddev->curr_resync_completed > mddev->resync_max
7968 )) {
7969
7970 wait_event(mddev->recovery_wait,
7971 atomic_read(&mddev->recovery_active) == 0);
7972 mddev->curr_resync_completed = j;
7973 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7974 j > mddev->recovery_cp)
7975 mddev->recovery_cp = j;
7976 update_time = jiffies;
7977 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7978 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7979 }
7980
7981 while (j >= mddev->resync_max &&
7982 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7983
7984
7985
7986
7987 flush_signals(current);
7988 wait_event_interruptible(mddev->recovery_wait,
7989 mddev->resync_max > j
7990 || test_bit(MD_RECOVERY_INTR,
7991 &mddev->recovery));
7992 }
7993
7994 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7995 break;
7996
7997 sectors = mddev->pers->sync_request(mddev, j, &skipped);
7998 if (sectors == 0) {
7999 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8000 break;
8001 }
8002
8003 if (!skipped) {
8004 io_sectors += sectors;
8005 atomic_add(sectors, &mddev->recovery_active);
8006 }
8007
8008 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8009 break;
8010
8011 j += sectors;
8012 if (j > max_sectors)
8013
8014 j = max_sectors;
8015 if (j > 2)
8016 mddev->curr_resync = j;
8017 mddev->curr_mark_cnt = io_sectors;
8018 if (last_check == 0)
8019
8020
8021
8022 md_new_event(mddev);
8023
8024 if (last_check + window > io_sectors || j == max_sectors)
8025 continue;
8026
8027 last_check = io_sectors;
8028 repeat:
8029 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8030
8031 int next = (last_mark+1) % SYNC_MARKS;
8032
8033 mddev->resync_mark = mark[next];
8034 mddev->resync_mark_cnt = mark_cnt[next];
8035 mark[next] = jiffies;
8036 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8037 last_mark = next;
8038 }
8039
8040 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8041 break;
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051 cond_resched();
8052
8053 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8054 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8055 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8056
8057 if (currspeed > speed_min(mddev)) {
8058 if (currspeed > speed_max(mddev)) {
8059 msleep(500);
8060 goto repeat;
8061 }
8062 if (!is_mddev_idle(mddev, 0)) {
8063
8064
8065
8066
8067 wait_event(mddev->recovery_wait,
8068 !atomic_read(&mddev->recovery_active));
8069 }
8070 }
8071 }
8072 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
8073 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8074 ? "interrupted" : "done");
8075
8076
8077
8078 blk_finish_plug(&plug);
8079 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8080
8081 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8082 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8083 mddev->curr_resync > 2) {
8084 mddev->curr_resync_completed = mddev->curr_resync;
8085 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8086 }
8087
8088 if (mddev_is_clustered(mddev)) {
8089 md_cluster_ops->resync_finish(mddev);
8090 cluster_resync_finished = true;
8091 }
8092 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8093
8094 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8095 mddev->curr_resync > 2) {
8096 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8097 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8098 if (mddev->curr_resync >= mddev->recovery_cp) {
8099 printk(KERN_INFO
8100 "md: checkpointing %s of %s.\n",
8101 desc, mdname(mddev));
8102 if (test_bit(MD_RECOVERY_ERROR,
8103 &mddev->recovery))
8104 mddev->recovery_cp =
8105 mddev->curr_resync_completed;
8106 else
8107 mddev->recovery_cp =
8108 mddev->curr_resync;
8109 }
8110 } else
8111 mddev->recovery_cp = MaxSector;
8112 } else {
8113 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8114 mddev->curr_resync = MaxSector;
8115 rcu_read_lock();
8116 rdev_for_each_rcu(rdev, mddev)
8117 if (rdev->raid_disk >= 0 &&
8118 mddev->delta_disks >= 0 &&
8119 !test_bit(Journal, &rdev->flags) &&
8120 !test_bit(Faulty, &rdev->flags) &&
8121 !test_bit(In_sync, &rdev->flags) &&
8122 rdev->recovery_offset < mddev->curr_resync)
8123 rdev->recovery_offset = mddev->curr_resync;
8124 rcu_read_unlock();
8125 }
8126 }
8127 skip:
8128 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8129
8130 if (mddev_is_clustered(mddev) &&
8131 test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8132 !cluster_resync_finished)
8133 md_cluster_ops->resync_finish(mddev);
8134
8135 spin_lock(&mddev->lock);
8136 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8137
8138 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8139 mddev->resync_min = 0;
8140 mddev->resync_max = MaxSector;
8141 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8142 mddev->resync_min = mddev->curr_resync_completed;
8143 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8144 mddev->curr_resync = 0;
8145 spin_unlock(&mddev->lock);
8146
8147 wake_up(&resync_wait);
8148 md_wakeup_thread(mddev->thread);
8149 return;
8150}
8151EXPORT_SYMBOL_GPL(md_do_sync);
8152
8153static int remove_and_add_spares(struct mddev *mddev,
8154 struct md_rdev *this)
8155{
8156 struct md_rdev *rdev;
8157 int spares = 0;
8158 int removed = 0;
8159
8160 rdev_for_each(rdev, mddev)
8161 if ((this == NULL || rdev == this) &&
8162 rdev->raid_disk >= 0 &&
8163 !test_bit(Blocked, &rdev->flags) &&
8164 (test_bit(Faulty, &rdev->flags) ||
8165 (!test_bit(In_sync, &rdev->flags) &&
8166 !test_bit(Journal, &rdev->flags))) &&
8167 atomic_read(&rdev->nr_pending)==0) {
8168 if (mddev->pers->hot_remove_disk(
8169 mddev, rdev) == 0) {
8170 sysfs_unlink_rdev(mddev, rdev);
8171 rdev->raid_disk = -1;
8172 removed++;
8173 }
8174 }
8175 if (removed && mddev->kobj.sd)
8176 sysfs_notify(&mddev->kobj, NULL, "degraded");
8177
8178 if (this && removed)
8179 goto no_add;
8180
8181 rdev_for_each(rdev, mddev) {
8182 if (this && this != rdev)
8183 continue;
8184 if (test_bit(Candidate, &rdev->flags))
8185 continue;
8186 if (rdev->raid_disk >= 0 &&
8187 !test_bit(In_sync, &rdev->flags) &&
8188 !test_bit(Journal, &rdev->flags) &&
8189 !test_bit(Faulty, &rdev->flags))
8190 spares++;
8191 if (rdev->raid_disk >= 0)
8192 continue;
8193 if (test_bit(Faulty, &rdev->flags))
8194 continue;
8195 if (!test_bit(Journal, &rdev->flags)) {
8196 if (mddev->ro &&
8197 ! (rdev->saved_raid_disk >= 0 &&
8198 !test_bit(Bitmap_sync, &rdev->flags)))
8199 continue;
8200
8201 rdev->recovery_offset = 0;
8202 }
8203 if (mddev->pers->
8204 hot_add_disk(mddev, rdev) == 0) {
8205 if (sysfs_link_rdev(mddev, rdev))
8206 ;
8207 if (!test_bit(Journal, &rdev->flags))
8208 spares++;
8209 md_new_event(mddev);
8210 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8211 }
8212 }
8213no_add:
8214 if (removed)
8215 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8216 return spares;
8217}
8218
8219static void md_start_sync(struct work_struct *ws)
8220{
8221 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8222 int ret = 0;
8223
8224 if (mddev_is_clustered(mddev)) {
8225 ret = md_cluster_ops->resync_start(mddev);
8226 if (ret) {
8227 mddev->sync_thread = NULL;
8228 goto out;
8229 }
8230 }
8231
8232 mddev->sync_thread = md_register_thread(md_do_sync,
8233 mddev,
8234 "resync");
8235out:
8236 if (!mddev->sync_thread) {
8237 if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
8238 printk(KERN_ERR "%s: could not start resync"
8239 " thread...\n",
8240 mdname(mddev));
8241
8242 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8243 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8244 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8245 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8246 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8247 wake_up(&resync_wait);
8248 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8249 &mddev->recovery))
8250 if (mddev->sysfs_action)
8251 sysfs_notify_dirent_safe(mddev->sysfs_action);
8252 } else
8253 md_wakeup_thread(mddev->sync_thread);
8254 sysfs_notify_dirent_safe(mddev->sysfs_action);
8255 md_new_event(mddev);
8256}
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280void md_check_recovery(struct mddev *mddev)
8281{
8282 if (mddev->suspended)
8283 return;
8284
8285 if (mddev->bitmap)
8286 bitmap_daemon_work(mddev);
8287
8288 if (signal_pending(current)) {
8289 if (mddev->pers->sync_request && !mddev->external) {
8290 printk(KERN_INFO "md: %s in immediate safe mode\n",
8291 mdname(mddev));
8292 mddev->safemode = 2;
8293 }
8294 flush_signals(current);
8295 }
8296
8297 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8298 return;
8299 if ( ! (
8300 (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
8301 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8302 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8303 test_bit(MD_RELOAD_SB, &mddev->flags) ||
8304 (mddev->external == 0 && mddev->safemode == 1) ||
8305 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8306 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8307 ))
8308 return;
8309
8310 if (mddev_trylock(mddev)) {
8311 int spares = 0;
8312
8313 if (mddev->ro) {
8314 struct md_rdev *rdev;
8315 if (!mddev->external && mddev->in_sync)
8316
8317
8318
8319
8320
8321 rdev_for_each(rdev, mddev)
8322 clear_bit(Blocked, &rdev->flags);
8323
8324
8325
8326
8327
8328
8329
8330 remove_and_add_spares(mddev, NULL);
8331
8332
8333
8334 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8335 md_reap_sync_thread(mddev);
8336 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8337 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8338 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
8339 goto unlock;
8340 }
8341
8342 if (mddev_is_clustered(mddev)) {
8343 struct md_rdev *rdev;
8344
8345
8346
8347 rdev_for_each(rdev, mddev) {
8348 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8349 rdev->raid_disk < 0)
8350 md_kick_rdev_from_array(rdev);
8351 }
8352
8353 if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags))
8354 md_reload_sb(mddev, mddev->good_device_nr);
8355 }
8356
8357 if (!mddev->external) {
8358 int did_change = 0;
8359 spin_lock(&mddev->lock);
8360 if (mddev->safemode &&
8361 !atomic_read(&mddev->writes_pending) &&
8362 !mddev->in_sync &&
8363 mddev->recovery_cp == MaxSector) {
8364 mddev->in_sync = 1;
8365 did_change = 1;
8366 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
8367 }
8368 if (mddev->safemode == 1)
8369 mddev->safemode = 0;
8370 spin_unlock(&mddev->lock);
8371 if (did_change)
8372 sysfs_notify_dirent_safe(mddev->sysfs_state);
8373 }
8374
8375 if (mddev->flags & MD_UPDATE_SB_FLAGS)
8376 md_update_sb(mddev, 0);
8377
8378 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8379 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8380
8381 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8382 goto unlock;
8383 }
8384 if (mddev->sync_thread) {
8385 md_reap_sync_thread(mddev);
8386 goto unlock;
8387 }
8388
8389
8390
8391 mddev->curr_resync_completed = 0;
8392 spin_lock(&mddev->lock);
8393 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8394 spin_unlock(&mddev->lock);
8395
8396
8397
8398 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8399 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8400
8401 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8402 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8403 goto not_running;
8404
8405
8406
8407
8408
8409
8410
8411 if (mddev->reshape_position != MaxSector) {
8412 if (mddev->pers->check_reshape == NULL ||
8413 mddev->pers->check_reshape(mddev) != 0)
8414
8415 goto not_running;
8416 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8417 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8418 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8419 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8420 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8421 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8422 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8423 } else if (mddev->recovery_cp < MaxSector) {
8424 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8425 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8426 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8427
8428 goto not_running;
8429
8430 if (mddev->pers->sync_request) {
8431 if (spares) {
8432
8433
8434
8435
8436 bitmap_write_all(mddev->bitmap);
8437 }
8438 INIT_WORK(&mddev->del_work, md_start_sync);
8439 queue_work(md_misc_wq, &mddev->del_work);
8440 goto unlock;
8441 }
8442 not_running:
8443 if (!mddev->sync_thread) {
8444 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8445 wake_up(&resync_wait);
8446 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8447 &mddev->recovery))
8448 if (mddev->sysfs_action)
8449 sysfs_notify_dirent_safe(mddev->sysfs_action);
8450 }
8451 unlock:
8452 wake_up(&mddev->sb_wait);
8453 mddev_unlock(mddev);
8454 }
8455}
8456EXPORT_SYMBOL(md_check_recovery);
8457
8458void md_reap_sync_thread(struct mddev *mddev)
8459{
8460 struct md_rdev *rdev;
8461
8462
8463 md_unregister_thread(&mddev->sync_thread);
8464 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8465 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8466
8467
8468 if (mddev->pers->spare_active(mddev)) {
8469 sysfs_notify(&mddev->kobj, NULL,
8470 "degraded");
8471 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8472 }
8473 }
8474 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8475 mddev->pers->finish_reshape)
8476 mddev->pers->finish_reshape(mddev);
8477
8478
8479
8480
8481 if (!mddev->degraded)
8482 rdev_for_each(rdev, mddev)
8483 rdev->saved_raid_disk = -1;
8484
8485 md_update_sb(mddev, 1);
8486 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8487 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8488 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8489 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8490 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8491 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8492 wake_up(&resync_wait);
8493
8494 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8495 sysfs_notify_dirent_safe(mddev->sysfs_action);
8496 md_new_event(mddev);
8497 if (mddev->event_work.func)
8498 queue_work(md_misc_wq, &mddev->event_work);
8499}
8500EXPORT_SYMBOL(md_reap_sync_thread);
8501
8502void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8503{
8504 sysfs_notify_dirent_safe(rdev->sysfs_state);
8505 wait_event_timeout(rdev->blocked_wait,
8506 !test_bit(Blocked, &rdev->flags) &&
8507 !test_bit(BlockedBadBlocks, &rdev->flags),
8508 msecs_to_jiffies(5000));
8509 rdev_dec_pending(rdev, mddev);
8510}
8511EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8512
8513void md_finish_reshape(struct mddev *mddev)
8514{
8515
8516 struct md_rdev *rdev;
8517
8518 rdev_for_each(rdev, mddev) {
8519 if (rdev->data_offset > rdev->new_data_offset)
8520 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8521 else
8522 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8523 rdev->data_offset = rdev->new_data_offset;
8524 }
8525}
8526EXPORT_SYMBOL(md_finish_reshape);
8527
8528
8529
8530
8531int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8532 int is_new)
8533{
8534 int rv;
8535 if (is_new)
8536 s += rdev->new_data_offset;
8537 else
8538 s += rdev->data_offset;
8539 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8540 if (rv == 0) {
8541
8542 sysfs_notify_dirent_safe(rdev->sysfs_state);
8543 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8544 set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
8545 md_wakeup_thread(rdev->mddev->thread);
8546 return 1;
8547 } else
8548 return 0;
8549}
8550EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8551
8552int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8553 int is_new)
8554{
8555 if (is_new)
8556 s += rdev->new_data_offset;
8557 else
8558 s += rdev->data_offset;
8559 return badblocks_clear(&rdev->badblocks,
8560 s, sectors);
8561}
8562EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8563
8564static int md_notify_reboot(struct notifier_block *this,
8565 unsigned long code, void *x)
8566{
8567 struct list_head *tmp;
8568 struct mddev *mddev;
8569 int need_delay = 0;
8570
8571 for_each_mddev(mddev, tmp) {
8572 if (mddev_trylock(mddev)) {
8573 if (mddev->pers)
8574 __md_stop_writes(mddev);
8575 if (mddev->persistent)
8576 mddev->safemode = 2;
8577 mddev_unlock(mddev);
8578 }
8579 need_delay = 1;
8580 }
8581
8582
8583
8584
8585
8586
8587 if (need_delay)
8588 mdelay(1000*1);
8589
8590 return NOTIFY_DONE;
8591}
8592
8593static struct notifier_block md_notifier = {
8594 .notifier_call = md_notify_reboot,
8595 .next = NULL,
8596 .priority = INT_MAX,
8597};
8598
8599static void md_geninit(void)
8600{
8601 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8602
8603 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8604}
8605
8606static int __init md_init(void)
8607{
8608 int ret = -ENOMEM;
8609
8610 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8611 if (!md_wq)
8612 goto err_wq;
8613
8614 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8615 if (!md_misc_wq)
8616 goto err_misc_wq;
8617
8618 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8619 goto err_md;
8620
8621 if ((ret = register_blkdev(0, "mdp")) < 0)
8622 goto err_mdp;
8623 mdp_major = ret;
8624
8625 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8626 md_probe, NULL, NULL);
8627 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8628 md_probe, NULL, NULL);
8629
8630 register_reboot_notifier(&md_notifier);
8631 raid_table_header = register_sysctl_table(raid_root_table);
8632
8633 md_geninit();
8634 return 0;
8635
8636err_mdp:
8637 unregister_blkdev(MD_MAJOR, "md");
8638err_md:
8639 destroy_workqueue(md_misc_wq);
8640err_misc_wq:
8641 destroy_workqueue(md_wq);
8642err_wq:
8643 return ret;
8644}
8645
8646static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8647{
8648 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
8649 struct md_rdev *rdev2;
8650 int role, ret;
8651 char b[BDEVNAME_SIZE];
8652
8653
8654 rdev_for_each(rdev2, mddev) {
8655 if (test_bit(Faulty, &rdev2->flags))
8656 continue;
8657
8658
8659 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8660
8661 if (test_bit(Candidate, &rdev2->flags)) {
8662 if (role == 0xfffe) {
8663 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8664 md_kick_rdev_from_array(rdev2);
8665 continue;
8666 }
8667 else
8668 clear_bit(Candidate, &rdev2->flags);
8669 }
8670
8671 if (role != rdev2->raid_disk) {
8672
8673 if (rdev2->raid_disk == -1 && role != 0xffff) {
8674 rdev2->saved_raid_disk = role;
8675 ret = remove_and_add_spares(mddev, rdev2);
8676 pr_info("Activated spare: %s\n",
8677 bdevname(rdev2->bdev,b));
8678 }
8679
8680
8681
8682
8683
8684 if ((role == 0xfffe) || (role == 0xfffd)) {
8685 md_error(mddev, rdev2);
8686 clear_bit(Blocked, &rdev2->flags);
8687 }
8688 }
8689 }
8690
8691 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
8692 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
8693
8694
8695 mddev->events = le64_to_cpu(sb->events);
8696}
8697
8698static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
8699{
8700 int err;
8701 struct page *swapout = rdev->sb_page;
8702 struct mdp_superblock_1 *sb;
8703
8704
8705
8706
8707 rdev->sb_page = NULL;
8708 alloc_disk_sb(rdev);
8709 ClearPageUptodate(rdev->sb_page);
8710 rdev->sb_loaded = 0;
8711 err = super_types[mddev->major_version].load_super(rdev, NULL, mddev->minor_version);
8712
8713 if (err < 0) {
8714 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
8715 __func__, __LINE__, rdev->desc_nr, err);
8716 put_page(rdev->sb_page);
8717 rdev->sb_page = swapout;
8718 rdev->sb_loaded = 1;
8719 return err;
8720 }
8721
8722 sb = page_address(rdev->sb_page);
8723
8724
8725
8726
8727 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
8728 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
8729
8730
8731
8732
8733 if (rdev->recovery_offset == MaxSector &&
8734 !test_bit(In_sync, &rdev->flags) &&
8735 mddev->pers->spare_active(mddev))
8736 sysfs_notify(&mddev->kobj, NULL, "degraded");
8737
8738 put_page(swapout);
8739 return 0;
8740}
8741
8742void md_reload_sb(struct mddev *mddev, int nr)
8743{
8744 struct md_rdev *rdev;
8745 int err;
8746
8747
8748 rdev_for_each_rcu(rdev, mddev) {
8749 if (rdev->desc_nr == nr)
8750 break;
8751 }
8752
8753 if (!rdev || rdev->desc_nr != nr) {
8754 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
8755 return;
8756 }
8757
8758 err = read_rdev(mddev, rdev);
8759 if (err < 0)
8760 return;
8761
8762 check_sb_changes(mddev, rdev);
8763
8764
8765 rdev_for_each_rcu(rdev, mddev)
8766 read_rdev(mddev, rdev);
8767}
8768EXPORT_SYMBOL(md_reload_sb);
8769
8770#ifndef MODULE
8771
8772
8773
8774
8775
8776
8777static LIST_HEAD(all_detected_devices);
8778struct detected_devices_node {
8779 struct list_head list;
8780 dev_t dev;
8781};
8782
8783void md_autodetect_dev(dev_t dev)
8784{
8785 struct detected_devices_node *node_detected_dev;
8786
8787 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8788 if (node_detected_dev) {
8789 node_detected_dev->dev = dev;
8790 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8791 } else {
8792 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8793 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8794 }
8795}
8796
8797static void autostart_arrays(int part)
8798{
8799 struct md_rdev *rdev;
8800 struct detected_devices_node *node_detected_dev;
8801 dev_t dev;
8802 int i_scanned, i_passed;
8803
8804 i_scanned = 0;
8805 i_passed = 0;
8806
8807 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8808
8809 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8810 i_scanned++;
8811 node_detected_dev = list_entry(all_detected_devices.next,
8812 struct detected_devices_node, list);
8813 list_del(&node_detected_dev->list);
8814 dev = node_detected_dev->dev;
8815 kfree(node_detected_dev);
8816 rdev = md_import_device(dev,0, 90);
8817 if (IS_ERR(rdev))
8818 continue;
8819
8820 if (test_bit(Faulty, &rdev->flags))
8821 continue;
8822
8823 set_bit(AutoDetected, &rdev->flags);
8824 list_add(&rdev->same_set, &pending_raid_disks);
8825 i_passed++;
8826 }
8827
8828 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8829 i_scanned, i_passed);
8830
8831 autorun_devices(part);
8832}
8833
8834#endif
8835
8836static __exit void md_exit(void)
8837{
8838 struct mddev *mddev;
8839 struct list_head *tmp;
8840 int delay = 1;
8841
8842 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8843 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8844
8845 unregister_blkdev(MD_MAJOR,"md");
8846 unregister_blkdev(mdp_major, "mdp");
8847 unregister_reboot_notifier(&md_notifier);
8848 unregister_sysctl_table(raid_table_header);
8849
8850
8851
8852
8853 md_unloading = 1;
8854 while (waitqueue_active(&md_event_waiters)) {
8855
8856 wake_up(&md_event_waiters);
8857 msleep(delay);
8858 delay += delay;
8859 }
8860 remove_proc_entry("mdstat", NULL);
8861
8862 for_each_mddev(mddev, tmp) {
8863 export_array(mddev);
8864 mddev->hold_active = 0;
8865 }
8866 destroy_workqueue(md_misc_wq);
8867 destroy_workqueue(md_wq);
8868}
8869
8870subsys_initcall(md_init);
8871module_exit(md_exit)
8872
8873static int get_ro(char *buffer, struct kernel_param *kp)
8874{
8875 return sprintf(buffer, "%d", start_readonly);
8876}
8877static int set_ro(const char *val, struct kernel_param *kp)
8878{
8879 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8880}
8881
8882module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8883module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8884module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8885
8886MODULE_LICENSE("GPL");
8887MODULE_DESCRIPTION("MD RAID framework");
8888MODULE_ALIAS("md");
8889MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8890