1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/badblocks.h>
38#include <linux/sysctl.h>
39#include <linux/seq_file.h>
40#include <linux/fs.h>
41#include <linux/poll.h>
42#include <linux/ctype.h>
43#include <linux/string.h>
44#include <linux/hdreg.h>
45#include <linux/proc_fs.h>
46#include <linux/random.h>
47#include <linux/module.h>
48#include <linux/reboot.h>
49#include <linux/file.h>
50#include <linux/compat.h>
51#include <linux/delay.h>
52#include <linux/raid/md_p.h>
53#include <linux/raid/md_u.h>
54#include <linux/slab.h>
55#include "md.h"
56#include "bitmap.h"
57#include "md-cluster.h"
58
59#ifndef MODULE
60static void autostart_arrays(int part);
61#endif
62
63
64
65
66
67
68static LIST_HEAD(pers_list);
69static DEFINE_SPINLOCK(pers_lock);
70
71struct md_cluster_operations *md_cluster_ops;
72EXPORT_SYMBOL(md_cluster_ops);
73struct module *md_cluster_mod;
74EXPORT_SYMBOL(md_cluster_mod);
75
76static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
77static struct workqueue_struct *md_wq;
78static struct workqueue_struct *md_misc_wq;
79
80static int remove_and_add_spares(struct mddev *mddev,
81 struct md_rdev *this);
82static void mddev_detach(struct mddev *mddev);
83
84
85
86
87
88
89#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
90
91
92
93
94
95
96
97
98
99
100
101
102
103static int sysctl_speed_limit_min = 1000;
104static int sysctl_speed_limit_max = 200000;
105static inline int speed_min(struct mddev *mddev)
106{
107 return mddev->sync_speed_min ?
108 mddev->sync_speed_min : sysctl_speed_limit_min;
109}
110
111static inline int speed_max(struct mddev *mddev)
112{
113 return mddev->sync_speed_max ?
114 mddev->sync_speed_max : sysctl_speed_limit_max;
115}
116
117static struct ctl_table_header *raid_table_header;
118
119static struct ctl_table raid_table[] = {
120 {
121 .procname = "speed_limit_min",
122 .data = &sysctl_speed_limit_min,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 {
128 .procname = "speed_limit_max",
129 .data = &sysctl_speed_limit_max,
130 .maxlen = sizeof(int),
131 .mode = S_IRUGO|S_IWUSR,
132 .proc_handler = proc_dointvec,
133 },
134 { }
135};
136
137static struct ctl_table raid_dir_table[] = {
138 {
139 .procname = "raid",
140 .maxlen = 0,
141 .mode = S_IRUGO|S_IXUGO,
142 .child = raid_table,
143 },
144 { }
145};
146
147static struct ctl_table raid_root_table[] = {
148 {
149 .procname = "dev",
150 .maxlen = 0,
151 .mode = 0555,
152 .child = raid_dir_table,
153 },
154 { }
155};
156
157static const struct block_device_operations md_fops;
158
159static int start_readonly;
160
161
162
163
164
165struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
166 struct mddev *mddev)
167{
168 struct bio *b;
169
170 if (!mddev || !mddev->bio_set)
171 return bio_alloc(gfp_mask, nr_iovecs);
172
173 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
174 if (!b)
175 return NULL;
176 return b;
177}
178EXPORT_SYMBOL_GPL(bio_alloc_mddev);
179
180struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
181 struct mddev *mddev)
182{
183 if (!mddev || !mddev->bio_set)
184 return bio_clone(bio, gfp_mask);
185
186 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
187}
188EXPORT_SYMBOL_GPL(bio_clone_mddev);
189
190
191
192
193
194
195
196
197
198
199
200static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
201static atomic_t md_event_count;
202void md_new_event(struct mddev *mddev)
203{
204 atomic_inc(&md_event_count);
205 wake_up(&md_event_waiters);
206}
207EXPORT_SYMBOL_GPL(md_new_event);
208
209
210
211
212
213static LIST_HEAD(all_mddevs);
214static DEFINE_SPINLOCK(all_mddevs_lock);
215
216
217
218
219
220
221
222
223#define for_each_mddev(_mddev,_tmp) \
224 \
225 for (({ spin_lock(&all_mddevs_lock); \
226 _tmp = all_mddevs.next; \
227 _mddev = NULL;}); \
228 ({ if (_tmp != &all_mddevs) \
229 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
230 spin_unlock(&all_mddevs_lock); \
231 if (_mddev) mddev_put(_mddev); \
232 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
233 _tmp != &all_mddevs;}); \
234 ({ spin_lock(&all_mddevs_lock); \
235 _tmp = _tmp->next;}) \
236 )
237
238
239
240
241
242
243
244
245static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
246{
247 const int rw = bio_data_dir(bio);
248 struct mddev *mddev = q->queuedata;
249 unsigned int sectors;
250 int cpu;
251
252 blk_queue_split(q, &bio, q->bio_split);
253
254 if (mddev == NULL || mddev->pers == NULL) {
255 bio_io_error(bio);
256 return BLK_QC_T_NONE;
257 }
258 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
259 if (bio_sectors(bio) != 0)
260 bio->bi_error = -EROFS;
261 bio_endio(bio);
262 return BLK_QC_T_NONE;
263 }
264 smp_rmb();
265 rcu_read_lock();
266 if (mddev->suspended) {
267 DEFINE_WAIT(__wait);
268 for (;;) {
269 prepare_to_wait(&mddev->sb_wait, &__wait,
270 TASK_UNINTERRUPTIBLE);
271 if (!mddev->suspended)
272 break;
273 rcu_read_unlock();
274 schedule();
275 rcu_read_lock();
276 }
277 finish_wait(&mddev->sb_wait, &__wait);
278 }
279 atomic_inc(&mddev->active_io);
280 rcu_read_unlock();
281
282
283
284
285
286 sectors = bio_sectors(bio);
287
288 bio->bi_opf &= ~REQ_NOMERGE;
289 mddev->pers->make_request(mddev, bio);
290
291 cpu = part_stat_lock();
292 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
293 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
294 part_stat_unlock();
295
296 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
297 wake_up(&mddev->sb_wait);
298
299 return BLK_QC_T_NONE;
300}
301
302
303
304
305
306
307
308void mddev_suspend(struct mddev *mddev)
309{
310 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
311 if (mddev->suspended++)
312 return;
313 synchronize_rcu();
314 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
315 mddev->pers->quiesce(mddev, 1);
316
317 del_timer_sync(&mddev->safemode_timer);
318}
319EXPORT_SYMBOL_GPL(mddev_suspend);
320
321void mddev_resume(struct mddev *mddev)
322{
323 if (--mddev->suspended)
324 return;
325 wake_up(&mddev->sb_wait);
326 mddev->pers->quiesce(mddev, 0);
327
328 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
329 md_wakeup_thread(mddev->thread);
330 md_wakeup_thread(mddev->sync_thread);
331}
332EXPORT_SYMBOL_GPL(mddev_resume);
333
334int mddev_congested(struct mddev *mddev, int bits)
335{
336 struct md_personality *pers = mddev->pers;
337 int ret = 0;
338
339 rcu_read_lock();
340 if (mddev->suspended)
341 ret = 1;
342 else if (pers && pers->congested)
343 ret = pers->congested(mddev, bits);
344 rcu_read_unlock();
345 return ret;
346}
347EXPORT_SYMBOL_GPL(mddev_congested);
348static int md_congested(void *data, int bits)
349{
350 struct mddev *mddev = data;
351 return mddev_congested(mddev, bits);
352}
353
354
355
356
357
358static void md_end_flush(struct bio *bio)
359{
360 struct md_rdev *rdev = bio->bi_private;
361 struct mddev *mddev = rdev->mddev;
362
363 rdev_dec_pending(rdev, mddev);
364
365 if (atomic_dec_and_test(&mddev->flush_pending)) {
366
367 queue_work(md_wq, &mddev->flush_work);
368 }
369 bio_put(bio);
370}
371
372static void md_submit_flush_data(struct work_struct *ws);
373
374static void submit_flushes(struct work_struct *ws)
375{
376 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
377 struct md_rdev *rdev;
378
379 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
380 atomic_set(&mddev->flush_pending, 1);
381 rcu_read_lock();
382 rdev_for_each_rcu(rdev, mddev)
383 if (rdev->raid_disk >= 0 &&
384 !test_bit(Faulty, &rdev->flags)) {
385
386
387
388
389 struct bio *bi;
390 atomic_inc(&rdev->nr_pending);
391 atomic_inc(&rdev->nr_pending);
392 rcu_read_unlock();
393 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
394 bi->bi_end_io = md_end_flush;
395 bi->bi_private = rdev;
396 bi->bi_bdev = rdev->bdev;
397 bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
398 atomic_inc(&mddev->flush_pending);
399 submit_bio(bi);
400 rcu_read_lock();
401 rdev_dec_pending(rdev, mddev);
402 }
403 rcu_read_unlock();
404 if (atomic_dec_and_test(&mddev->flush_pending))
405 queue_work(md_wq, &mddev->flush_work);
406}
407
408static void md_submit_flush_data(struct work_struct *ws)
409{
410 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
411 struct bio *bio = mddev->flush_bio;
412
413 if (bio->bi_iter.bi_size == 0)
414
415 bio_endio(bio);
416 else {
417 bio->bi_opf &= ~REQ_PREFLUSH;
418 mddev->pers->make_request(mddev, bio);
419 }
420
421 mddev->flush_bio = NULL;
422 wake_up(&mddev->sb_wait);
423}
424
425void md_flush_request(struct mddev *mddev, struct bio *bio)
426{
427 spin_lock_irq(&mddev->lock);
428 wait_event_lock_irq(mddev->sb_wait,
429 !mddev->flush_bio,
430 mddev->lock);
431 mddev->flush_bio = bio;
432 spin_unlock_irq(&mddev->lock);
433
434 INIT_WORK(&mddev->flush_work, submit_flushes);
435 queue_work(md_wq, &mddev->flush_work);
436}
437EXPORT_SYMBOL(md_flush_request);
438
439void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
440{
441 struct mddev *mddev = cb->data;
442 md_wakeup_thread(mddev->thread);
443 kfree(cb);
444}
445EXPORT_SYMBOL(md_unplug);
446
447static inline struct mddev *mddev_get(struct mddev *mddev)
448{
449 atomic_inc(&mddev->active);
450 return mddev;
451}
452
453static void mddev_delayed_delete(struct work_struct *ws);
454
455static void mddev_put(struct mddev *mddev)
456{
457 struct bio_set *bs = NULL;
458
459 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
460 return;
461 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
462 mddev->ctime == 0 && !mddev->hold_active) {
463
464
465 list_del_init(&mddev->all_mddevs);
466 bs = mddev->bio_set;
467 mddev->bio_set = NULL;
468 if (mddev->gendisk) {
469
470
471
472
473
474 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
475 queue_work(md_misc_wq, &mddev->del_work);
476 } else
477 kfree(mddev);
478 }
479 spin_unlock(&all_mddevs_lock);
480 if (bs)
481 bioset_free(bs);
482}
483
484static void md_safemode_timeout(unsigned long data);
485
486void mddev_init(struct mddev *mddev)
487{
488 mutex_init(&mddev->open_mutex);
489 mutex_init(&mddev->reconfig_mutex);
490 mutex_init(&mddev->bitmap_info.mutex);
491 INIT_LIST_HEAD(&mddev->disks);
492 INIT_LIST_HEAD(&mddev->all_mddevs);
493 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
494 (unsigned long) mddev);
495 atomic_set(&mddev->active, 1);
496 atomic_set(&mddev->openers, 0);
497 atomic_set(&mddev->active_io, 0);
498 spin_lock_init(&mddev->lock);
499 atomic_set(&mddev->flush_pending, 0);
500 init_waitqueue_head(&mddev->sb_wait);
501 init_waitqueue_head(&mddev->recovery_wait);
502 mddev->reshape_position = MaxSector;
503 mddev->reshape_backwards = 0;
504 mddev->last_sync_action = "none";
505 mddev->resync_min = 0;
506 mddev->resync_max = MaxSector;
507 mddev->level = LEVEL_NONE;
508}
509EXPORT_SYMBOL_GPL(mddev_init);
510
511static struct mddev *mddev_find(dev_t unit)
512{
513 struct mddev *mddev, *new = NULL;
514
515 if (unit && MAJOR(unit) != MD_MAJOR)
516 unit &= ~((1<<MdpMinorShift)-1);
517
518 retry:
519 spin_lock(&all_mddevs_lock);
520
521 if (unit) {
522 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
523 if (mddev->unit == unit) {
524 mddev_get(mddev);
525 spin_unlock(&all_mddevs_lock);
526 kfree(new);
527 return mddev;
528 }
529
530 if (new) {
531 list_add(&new->all_mddevs, &all_mddevs);
532 spin_unlock(&all_mddevs_lock);
533 new->hold_active = UNTIL_IOCTL;
534 return new;
535 }
536 } else if (new) {
537
538 static int next_minor = 512;
539 int start = next_minor;
540 int is_free = 0;
541 int dev = 0;
542 while (!is_free) {
543 dev = MKDEV(MD_MAJOR, next_minor);
544 next_minor++;
545 if (next_minor > MINORMASK)
546 next_minor = 0;
547 if (next_minor == start) {
548
549 spin_unlock(&all_mddevs_lock);
550 kfree(new);
551 return NULL;
552 }
553
554 is_free = 1;
555 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
556 if (mddev->unit == dev) {
557 is_free = 0;
558 break;
559 }
560 }
561 new->unit = dev;
562 new->md_minor = MINOR(dev);
563 new->hold_active = UNTIL_STOP;
564 list_add(&new->all_mddevs, &all_mddevs);
565 spin_unlock(&all_mddevs_lock);
566 return new;
567 }
568 spin_unlock(&all_mddevs_lock);
569
570 new = kzalloc(sizeof(*new), GFP_KERNEL);
571 if (!new)
572 return NULL;
573
574 new->unit = unit;
575 if (MAJOR(unit) == MD_MAJOR)
576 new->md_minor = MINOR(unit);
577 else
578 new->md_minor = MINOR(unit) >> MdpMinorShift;
579
580 mddev_init(new);
581
582 goto retry;
583}
584
585static struct attribute_group md_redundancy_group;
586
587void mddev_unlock(struct mddev *mddev)
588{
589 if (mddev->to_remove) {
590
591
592
593
594
595
596
597
598
599
600
601
602 struct attribute_group *to_remove = mddev->to_remove;
603 mddev->to_remove = NULL;
604 mddev->sysfs_active = 1;
605 mutex_unlock(&mddev->reconfig_mutex);
606
607 if (mddev->kobj.sd) {
608 if (to_remove != &md_redundancy_group)
609 sysfs_remove_group(&mddev->kobj, to_remove);
610 if (mddev->pers == NULL ||
611 mddev->pers->sync_request == NULL) {
612 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
613 if (mddev->sysfs_action)
614 sysfs_put(mddev->sysfs_action);
615 mddev->sysfs_action = NULL;
616 }
617 }
618 mddev->sysfs_active = 0;
619 } else
620 mutex_unlock(&mddev->reconfig_mutex);
621
622
623
624
625 spin_lock(&pers_lock);
626 md_wakeup_thread(mddev->thread);
627 spin_unlock(&pers_lock);
628}
629EXPORT_SYMBOL_GPL(mddev_unlock);
630
631struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
632{
633 struct md_rdev *rdev;
634
635 rdev_for_each_rcu(rdev, mddev)
636 if (rdev->desc_nr == nr)
637 return rdev;
638
639 return NULL;
640}
641EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
642
643static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
644{
645 struct md_rdev *rdev;
646
647 rdev_for_each(rdev, mddev)
648 if (rdev->bdev->bd_dev == dev)
649 return rdev;
650
651 return NULL;
652}
653
654static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
655{
656 struct md_rdev *rdev;
657
658 rdev_for_each_rcu(rdev, mddev)
659 if (rdev->bdev->bd_dev == dev)
660 return rdev;
661
662 return NULL;
663}
664
665static struct md_personality *find_pers(int level, char *clevel)
666{
667 struct md_personality *pers;
668 list_for_each_entry(pers, &pers_list, list) {
669 if (level != LEVEL_NONE && pers->level == level)
670 return pers;
671 if (strcmp(pers->name, clevel)==0)
672 return pers;
673 }
674 return NULL;
675}
676
677
678static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
679{
680 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
681 return MD_NEW_SIZE_SECTORS(num_sectors);
682}
683
684static int alloc_disk_sb(struct md_rdev *rdev)
685{
686 rdev->sb_page = alloc_page(GFP_KERNEL);
687 if (!rdev->sb_page) {
688 printk(KERN_ALERT "md: out of memory.\n");
689 return -ENOMEM;
690 }
691
692 return 0;
693}
694
695void md_rdev_clear(struct md_rdev *rdev)
696{
697 if (rdev->sb_page) {
698 put_page(rdev->sb_page);
699 rdev->sb_loaded = 0;
700 rdev->sb_page = NULL;
701 rdev->sb_start = 0;
702 rdev->sectors = 0;
703 }
704 if (rdev->bb_page) {
705 put_page(rdev->bb_page);
706 rdev->bb_page = NULL;
707 }
708 badblocks_exit(&rdev->badblocks);
709}
710EXPORT_SYMBOL_GPL(md_rdev_clear);
711
712static void super_written(struct bio *bio)
713{
714 struct md_rdev *rdev = bio->bi_private;
715 struct mddev *mddev = rdev->mddev;
716
717 if (bio->bi_error) {
718 printk("md: super_written gets error=%d\n", bio->bi_error);
719 md_error(mddev, rdev);
720 }
721
722 if (atomic_dec_and_test(&mddev->pending_writes))
723 wake_up(&mddev->sb_wait);
724 rdev_dec_pending(rdev, mddev);
725 bio_put(bio);
726}
727
728void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
729 sector_t sector, int size, struct page *page)
730{
731
732
733
734
735
736
737 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
738
739 atomic_inc(&rdev->nr_pending);
740
741 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
742 bio->bi_iter.bi_sector = sector;
743 bio_add_page(bio, page, size, 0);
744 bio->bi_private = rdev;
745 bio->bi_end_io = super_written;
746 bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
747
748 atomic_inc(&mddev->pending_writes);
749 submit_bio(bio);
750}
751
752void md_super_wait(struct mddev *mddev)
753{
754
755 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
756}
757
758int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
759 struct page *page, int op, int op_flags, bool metadata_op)
760{
761 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
762 int ret;
763
764 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
765 rdev->meta_bdev : rdev->bdev;
766 bio_set_op_attrs(bio, op, op_flags);
767 if (metadata_op)
768 bio->bi_iter.bi_sector = sector + rdev->sb_start;
769 else if (rdev->mddev->reshape_position != MaxSector &&
770 (rdev->mddev->reshape_backwards ==
771 (sector >= rdev->mddev->reshape_position)))
772 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
773 else
774 bio->bi_iter.bi_sector = sector + rdev->data_offset;
775 bio_add_page(bio, page, size, 0);
776
777 submit_bio_wait(bio);
778
779 ret = !bio->bi_error;
780 bio_put(bio);
781 return ret;
782}
783EXPORT_SYMBOL_GPL(sync_page_io);
784
785static int read_disk_sb(struct md_rdev *rdev, int size)
786{
787 char b[BDEVNAME_SIZE];
788
789 if (rdev->sb_loaded)
790 return 0;
791
792 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
793 goto fail;
794 rdev->sb_loaded = 1;
795 return 0;
796
797fail:
798 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
799 bdevname(rdev->bdev,b));
800 return -EINVAL;
801}
802
803static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
804{
805 return sb1->set_uuid0 == sb2->set_uuid0 &&
806 sb1->set_uuid1 == sb2->set_uuid1 &&
807 sb1->set_uuid2 == sb2->set_uuid2 &&
808 sb1->set_uuid3 == sb2->set_uuid3;
809}
810
811static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
812{
813 int ret;
814 mdp_super_t *tmp1, *tmp2;
815
816 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
817 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
818
819 if (!tmp1 || !tmp2) {
820 ret = 0;
821 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
822 goto abort;
823 }
824
825 *tmp1 = *sb1;
826 *tmp2 = *sb2;
827
828
829
830
831 tmp1->nr_disks = 0;
832 tmp2->nr_disks = 0;
833
834 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
835abort:
836 kfree(tmp1);
837 kfree(tmp2);
838 return ret;
839}
840
841static u32 md_csum_fold(u32 csum)
842{
843 csum = (csum & 0xffff) + (csum >> 16);
844 return (csum & 0xffff) + (csum >> 16);
845}
846
847static unsigned int calc_sb_csum(mdp_super_t *sb)
848{
849 u64 newcsum = 0;
850 u32 *sb32 = (u32*)sb;
851 int i;
852 unsigned int disk_csum, csum;
853
854 disk_csum = sb->sb_csum;
855 sb->sb_csum = 0;
856
857 for (i = 0; i < MD_SB_BYTES/4 ; i++)
858 newcsum += sb32[i];
859 csum = (newcsum & 0xffffffff) + (newcsum>>32);
860
861#ifdef CONFIG_ALPHA
862
863
864
865
866
867
868
869
870 sb->sb_csum = md_csum_fold(disk_csum);
871#else
872 sb->sb_csum = disk_csum;
873#endif
874 return csum;
875}
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907struct super_type {
908 char *name;
909 struct module *owner;
910 int (*load_super)(struct md_rdev *rdev,
911 struct md_rdev *refdev,
912 int minor_version);
913 int (*validate_super)(struct mddev *mddev,
914 struct md_rdev *rdev);
915 void (*sync_super)(struct mddev *mddev,
916 struct md_rdev *rdev);
917 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
918 sector_t num_sectors);
919 int (*allow_new_offset)(struct md_rdev *rdev,
920 unsigned long long new_offset);
921};
922
923
924
925
926
927
928
929
930
931int md_check_no_bitmap(struct mddev *mddev)
932{
933 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
934 return 0;
935 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
936 mdname(mddev), mddev->pers->name);
937 return 1;
938}
939EXPORT_SYMBOL(md_check_no_bitmap);
940
941
942
943
944static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
945{
946 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
947 mdp_super_t *sb;
948 int ret;
949
950
951
952
953
954
955
956 rdev->sb_start = calc_dev_sboffset(rdev);
957
958 ret = read_disk_sb(rdev, MD_SB_BYTES);
959 if (ret) return ret;
960
961 ret = -EINVAL;
962
963 bdevname(rdev->bdev, b);
964 sb = page_address(rdev->sb_page);
965
966 if (sb->md_magic != MD_SB_MAGIC) {
967 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
968 b);
969 goto abort;
970 }
971
972 if (sb->major_version != 0 ||
973 sb->minor_version < 90 ||
974 sb->minor_version > 91) {
975 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
976 sb->major_version, sb->minor_version,
977 b);
978 goto abort;
979 }
980
981 if (sb->raid_disks <= 0)
982 goto abort;
983
984 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
985 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
986 b);
987 goto abort;
988 }
989
990 rdev->preferred_minor = sb->md_minor;
991 rdev->data_offset = 0;
992 rdev->new_data_offset = 0;
993 rdev->sb_size = MD_SB_BYTES;
994 rdev->badblocks.shift = -1;
995
996 if (sb->level == LEVEL_MULTIPATH)
997 rdev->desc_nr = -1;
998 else
999 rdev->desc_nr = sb->this_disk.number;
1000
1001 if (!refdev) {
1002 ret = 1;
1003 } else {
1004 __u64 ev1, ev2;
1005 mdp_super_t *refsb = page_address(refdev->sb_page);
1006 if (!uuid_equal(refsb, sb)) {
1007 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1008 b, bdevname(refdev->bdev,b2));
1009 goto abort;
1010 }
1011 if (!sb_equal(refsb, sb)) {
1012 printk(KERN_WARNING "md: %s has same UUID"
1013 " but different superblock to %s\n",
1014 b, bdevname(refdev->bdev, b2));
1015 goto abort;
1016 }
1017 ev1 = md_event(sb);
1018 ev2 = md_event(refsb);
1019 if (ev1 > ev2)
1020 ret = 1;
1021 else
1022 ret = 0;
1023 }
1024 rdev->sectors = rdev->sb_start;
1025
1026
1027
1028
1029 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1030 sb->level >= 1)
1031 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1032
1033 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1034
1035 ret = -EINVAL;
1036
1037 abort:
1038 return ret;
1039}
1040
1041
1042
1043
1044static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1045{
1046 mdp_disk_t *desc;
1047 mdp_super_t *sb = page_address(rdev->sb_page);
1048 __u64 ev1 = md_event(sb);
1049
1050 rdev->raid_disk = -1;
1051 clear_bit(Faulty, &rdev->flags);
1052 clear_bit(In_sync, &rdev->flags);
1053 clear_bit(Bitmap_sync, &rdev->flags);
1054 clear_bit(WriteMostly, &rdev->flags);
1055
1056 if (mddev->raid_disks == 0) {
1057 mddev->major_version = 0;
1058 mddev->minor_version = sb->minor_version;
1059 mddev->patch_version = sb->patch_version;
1060 mddev->external = 0;
1061 mddev->chunk_sectors = sb->chunk_size >> 9;
1062 mddev->ctime = sb->ctime;
1063 mddev->utime = sb->utime;
1064 mddev->level = sb->level;
1065 mddev->clevel[0] = 0;
1066 mddev->layout = sb->layout;
1067 mddev->raid_disks = sb->raid_disks;
1068 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1069 mddev->events = ev1;
1070 mddev->bitmap_info.offset = 0;
1071 mddev->bitmap_info.space = 0;
1072
1073 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1074 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1075 mddev->reshape_backwards = 0;
1076
1077 if (mddev->minor_version >= 91) {
1078 mddev->reshape_position = sb->reshape_position;
1079 mddev->delta_disks = sb->delta_disks;
1080 mddev->new_level = sb->new_level;
1081 mddev->new_layout = sb->new_layout;
1082 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1083 if (mddev->delta_disks < 0)
1084 mddev->reshape_backwards = 1;
1085 } else {
1086 mddev->reshape_position = MaxSector;
1087 mddev->delta_disks = 0;
1088 mddev->new_level = mddev->level;
1089 mddev->new_layout = mddev->layout;
1090 mddev->new_chunk_sectors = mddev->chunk_sectors;
1091 }
1092
1093 if (sb->state & (1<<MD_SB_CLEAN))
1094 mddev->recovery_cp = MaxSector;
1095 else {
1096 if (sb->events_hi == sb->cp_events_hi &&
1097 sb->events_lo == sb->cp_events_lo) {
1098 mddev->recovery_cp = sb->recovery_cp;
1099 } else
1100 mddev->recovery_cp = 0;
1101 }
1102
1103 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1104 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1105 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1106 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1107
1108 mddev->max_disks = MD_SB_DISKS;
1109
1110 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1111 mddev->bitmap_info.file == NULL) {
1112 mddev->bitmap_info.offset =
1113 mddev->bitmap_info.default_offset;
1114 mddev->bitmap_info.space =
1115 mddev->bitmap_info.default_space;
1116 }
1117
1118 } else if (mddev->pers == NULL) {
1119
1120
1121 ++ev1;
1122 if (sb->disks[rdev->desc_nr].state & (
1123 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1124 if (ev1 < mddev->events)
1125 return -EINVAL;
1126 } else if (mddev->bitmap) {
1127
1128
1129
1130 if (ev1 < mddev->bitmap->events_cleared)
1131 return 0;
1132 if (ev1 < mddev->events)
1133 set_bit(Bitmap_sync, &rdev->flags);
1134 } else {
1135 if (ev1 < mddev->events)
1136
1137 return 0;
1138 }
1139
1140 if (mddev->level != LEVEL_MULTIPATH) {
1141 desc = sb->disks + rdev->desc_nr;
1142
1143 if (desc->state & (1<<MD_DISK_FAULTY))
1144 set_bit(Faulty, &rdev->flags);
1145 else if (desc->state & (1<<MD_DISK_SYNC)
1146) {
1147 set_bit(In_sync, &rdev->flags);
1148 rdev->raid_disk = desc->raid_disk;
1149 rdev->saved_raid_disk = desc->raid_disk;
1150 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1151
1152
1153
1154 if (mddev->minor_version >= 91) {
1155 rdev->recovery_offset = 0;
1156 rdev->raid_disk = desc->raid_disk;
1157 }
1158 }
1159 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1160 set_bit(WriteMostly, &rdev->flags);
1161 } else
1162 set_bit(In_sync, &rdev->flags);
1163 return 0;
1164}
1165
1166
1167
1168
1169static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1170{
1171 mdp_super_t *sb;
1172 struct md_rdev *rdev2;
1173 int next_spare = mddev->raid_disks;
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 int i;
1186 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1187
1188 rdev->sb_size = MD_SB_BYTES;
1189
1190 sb = page_address(rdev->sb_page);
1191
1192 memset(sb, 0, sizeof(*sb));
1193
1194 sb->md_magic = MD_SB_MAGIC;
1195 sb->major_version = mddev->major_version;
1196 sb->patch_version = mddev->patch_version;
1197 sb->gvalid_words = 0;
1198 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1199 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1200 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1201 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1202
1203 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1204 sb->level = mddev->level;
1205 sb->size = mddev->dev_sectors / 2;
1206 sb->raid_disks = mddev->raid_disks;
1207 sb->md_minor = mddev->md_minor;
1208 sb->not_persistent = 0;
1209 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1210 sb->state = 0;
1211 sb->events_hi = (mddev->events>>32);
1212 sb->events_lo = (u32)mddev->events;
1213
1214 if (mddev->reshape_position == MaxSector)
1215 sb->minor_version = 90;
1216 else {
1217 sb->minor_version = 91;
1218 sb->reshape_position = mddev->reshape_position;
1219 sb->new_level = mddev->new_level;
1220 sb->delta_disks = mddev->delta_disks;
1221 sb->new_layout = mddev->new_layout;
1222 sb->new_chunk = mddev->new_chunk_sectors << 9;
1223 }
1224 mddev->minor_version = sb->minor_version;
1225 if (mddev->in_sync)
1226 {
1227 sb->recovery_cp = mddev->recovery_cp;
1228 sb->cp_events_hi = (mddev->events>>32);
1229 sb->cp_events_lo = (u32)mddev->events;
1230 if (mddev->recovery_cp == MaxSector)
1231 sb->state = (1<< MD_SB_CLEAN);
1232 } else
1233 sb->recovery_cp = 0;
1234
1235 sb->layout = mddev->layout;
1236 sb->chunk_size = mddev->chunk_sectors << 9;
1237
1238 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1239 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1240
1241 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1242 rdev_for_each(rdev2, mddev) {
1243 mdp_disk_t *d;
1244 int desc_nr;
1245 int is_active = test_bit(In_sync, &rdev2->flags);
1246
1247 if (rdev2->raid_disk >= 0 &&
1248 sb->minor_version >= 91)
1249
1250
1251
1252
1253 is_active = 1;
1254 if (rdev2->raid_disk < 0 ||
1255 test_bit(Faulty, &rdev2->flags))
1256 is_active = 0;
1257 if (is_active)
1258 desc_nr = rdev2->raid_disk;
1259 else
1260 desc_nr = next_spare++;
1261 rdev2->desc_nr = desc_nr;
1262 d = &sb->disks[rdev2->desc_nr];
1263 nr_disks++;
1264 d->number = rdev2->desc_nr;
1265 d->major = MAJOR(rdev2->bdev->bd_dev);
1266 d->minor = MINOR(rdev2->bdev->bd_dev);
1267 if (is_active)
1268 d->raid_disk = rdev2->raid_disk;
1269 else
1270 d->raid_disk = rdev2->desc_nr;
1271 if (test_bit(Faulty, &rdev2->flags))
1272 d->state = (1<<MD_DISK_FAULTY);
1273 else if (is_active) {
1274 d->state = (1<<MD_DISK_ACTIVE);
1275 if (test_bit(In_sync, &rdev2->flags))
1276 d->state |= (1<<MD_DISK_SYNC);
1277 active++;
1278 working++;
1279 } else {
1280 d->state = 0;
1281 spare++;
1282 working++;
1283 }
1284 if (test_bit(WriteMostly, &rdev2->flags))
1285 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1286 }
1287
1288 for (i=0 ; i < mddev->raid_disks ; i++) {
1289 mdp_disk_t *d = &sb->disks[i];
1290 if (d->state == 0 && d->number == 0) {
1291 d->number = i;
1292 d->raid_disk = i;
1293 d->state = (1<<MD_DISK_REMOVED);
1294 d->state |= (1<<MD_DISK_FAULTY);
1295 failed++;
1296 }
1297 }
1298 sb->nr_disks = nr_disks;
1299 sb->active_disks = active;
1300 sb->working_disks = working;
1301 sb->failed_disks = failed;
1302 sb->spare_disks = spare;
1303
1304 sb->this_disk = sb->disks[rdev->desc_nr];
1305 sb->sb_csum = calc_sb_csum(sb);
1306}
1307
1308
1309
1310
1311static unsigned long long
1312super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1313{
1314 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1315 return 0;
1316 if (rdev->mddev->bitmap_info.offset)
1317 return 0;
1318 rdev->sb_start = calc_dev_sboffset(rdev);
1319 if (!num_sectors || num_sectors > rdev->sb_start)
1320 num_sectors = rdev->sb_start;
1321
1322
1323
1324 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1325 rdev->mddev->level >= 1)
1326 num_sectors = (sector_t)(2ULL << 32) - 2;
1327 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1328 rdev->sb_page);
1329 md_super_wait(rdev->mddev);
1330 return num_sectors;
1331}
1332
1333static int
1334super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1335{
1336
1337 return new_offset == 0;
1338}
1339
1340
1341
1342
1343
1344static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1345{
1346 __le32 disk_csum;
1347 u32 csum;
1348 unsigned long long newcsum;
1349 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1350 __le32 *isuper = (__le32*)sb;
1351
1352 disk_csum = sb->sb_csum;
1353 sb->sb_csum = 0;
1354 newcsum = 0;
1355 for (; size >= 4; size -= 4)
1356 newcsum += le32_to_cpu(*isuper++);
1357
1358 if (size == 2)
1359 newcsum += le16_to_cpu(*(__le16*) isuper);
1360
1361 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1362 sb->sb_csum = disk_csum;
1363 return cpu_to_le32(csum);
1364}
1365
1366static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1367{
1368 struct mdp_superblock_1 *sb;
1369 int ret;
1370 sector_t sb_start;
1371 sector_t sectors;
1372 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1373 int bmask;
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 switch(minor_version) {
1384 case 0:
1385 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1386 sb_start -= 8*2;
1387 sb_start &= ~(sector_t)(4*2-1);
1388 break;
1389 case 1:
1390 sb_start = 0;
1391 break;
1392 case 2:
1393 sb_start = 8;
1394 break;
1395 default:
1396 return -EINVAL;
1397 }
1398 rdev->sb_start = sb_start;
1399
1400
1401
1402
1403 ret = read_disk_sb(rdev, 4096);
1404 if (ret) return ret;
1405
1406 sb = page_address(rdev->sb_page);
1407
1408 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1409 sb->major_version != cpu_to_le32(1) ||
1410 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1411 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1412 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1413 return -EINVAL;
1414
1415 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1416 printk("md: invalid superblock checksum on %s\n",
1417 bdevname(rdev->bdev,b));
1418 return -EINVAL;
1419 }
1420 if (le64_to_cpu(sb->data_size) < 10) {
1421 printk("md: data_size too small on %s\n",
1422 bdevname(rdev->bdev,b));
1423 return -EINVAL;
1424 }
1425 if (sb->pad0 ||
1426 sb->pad3[0] ||
1427 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1428
1429 return -EINVAL;
1430
1431 rdev->preferred_minor = 0xffff;
1432 rdev->data_offset = le64_to_cpu(sb->data_offset);
1433 rdev->new_data_offset = rdev->data_offset;
1434 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1435 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1436 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1437 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1438
1439 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1440 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1441 if (rdev->sb_size & bmask)
1442 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1443
1444 if (minor_version
1445 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1446 return -EINVAL;
1447 if (minor_version
1448 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1449 return -EINVAL;
1450
1451 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1452 rdev->desc_nr = -1;
1453 else
1454 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1455
1456 if (!rdev->bb_page) {
1457 rdev->bb_page = alloc_page(GFP_KERNEL);
1458 if (!rdev->bb_page)
1459 return -ENOMEM;
1460 }
1461 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1462 rdev->badblocks.count == 0) {
1463
1464
1465
1466 s32 offset;
1467 sector_t bb_sector;
1468 u64 *bbp;
1469 int i;
1470 int sectors = le16_to_cpu(sb->bblog_size);
1471 if (sectors > (PAGE_SIZE / 512))
1472 return -EINVAL;
1473 offset = le32_to_cpu(sb->bblog_offset);
1474 if (offset == 0)
1475 return -EINVAL;
1476 bb_sector = (long long)offset;
1477 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1478 rdev->bb_page, REQ_OP_READ, 0, true))
1479 return -EIO;
1480 bbp = (u64 *)page_address(rdev->bb_page);
1481 rdev->badblocks.shift = sb->bblog_shift;
1482 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1483 u64 bb = le64_to_cpu(*bbp);
1484 int count = bb & (0x3ff);
1485 u64 sector = bb >> 10;
1486 sector <<= sb->bblog_shift;
1487 count <<= sb->bblog_shift;
1488 if (bb + 1 == 0)
1489 break;
1490 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1491 return -EINVAL;
1492 }
1493 } else if (sb->bblog_offset != 0)
1494 rdev->badblocks.shift = 0;
1495
1496 if (!refdev) {
1497 ret = 1;
1498 } else {
1499 __u64 ev1, ev2;
1500 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1501
1502 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1503 sb->level != refsb->level ||
1504 sb->layout != refsb->layout ||
1505 sb->chunksize != refsb->chunksize) {
1506 printk(KERN_WARNING "md: %s has strangely different"
1507 " superblock to %s\n",
1508 bdevname(rdev->bdev,b),
1509 bdevname(refdev->bdev,b2));
1510 return -EINVAL;
1511 }
1512 ev1 = le64_to_cpu(sb->events);
1513 ev2 = le64_to_cpu(refsb->events);
1514
1515 if (ev1 > ev2)
1516 ret = 1;
1517 else
1518 ret = 0;
1519 }
1520 if (minor_version) {
1521 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1522 sectors -= rdev->data_offset;
1523 } else
1524 sectors = rdev->sb_start;
1525 if (sectors < le64_to_cpu(sb->data_size))
1526 return -EINVAL;
1527 rdev->sectors = le64_to_cpu(sb->data_size);
1528 return ret;
1529}
1530
1531static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1532{
1533 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1534 __u64 ev1 = le64_to_cpu(sb->events);
1535
1536 rdev->raid_disk = -1;
1537 clear_bit(Faulty, &rdev->flags);
1538 clear_bit(In_sync, &rdev->flags);
1539 clear_bit(Bitmap_sync, &rdev->flags);
1540 clear_bit(WriteMostly, &rdev->flags);
1541
1542 if (mddev->raid_disks == 0) {
1543 mddev->major_version = 1;
1544 mddev->patch_version = 0;
1545 mddev->external = 0;
1546 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1547 mddev->ctime = le64_to_cpu(sb->ctime);
1548 mddev->utime = le64_to_cpu(sb->utime);
1549 mddev->level = le32_to_cpu(sb->level);
1550 mddev->clevel[0] = 0;
1551 mddev->layout = le32_to_cpu(sb->layout);
1552 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1553 mddev->dev_sectors = le64_to_cpu(sb->size);
1554 mddev->events = ev1;
1555 mddev->bitmap_info.offset = 0;
1556 mddev->bitmap_info.space = 0;
1557
1558
1559
1560 mddev->bitmap_info.default_offset = 1024 >> 9;
1561 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1562 mddev->reshape_backwards = 0;
1563
1564 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1565 memcpy(mddev->uuid, sb->set_uuid, 16);
1566
1567 mddev->max_disks = (4096-256)/2;
1568
1569 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1570 mddev->bitmap_info.file == NULL) {
1571 mddev->bitmap_info.offset =
1572 (__s32)le32_to_cpu(sb->bitmap_offset);
1573
1574
1575
1576
1577
1578 if (mddev->minor_version > 0)
1579 mddev->bitmap_info.space = 0;
1580 else if (mddev->bitmap_info.offset > 0)
1581 mddev->bitmap_info.space =
1582 8 - mddev->bitmap_info.offset;
1583 else
1584 mddev->bitmap_info.space =
1585 -mddev->bitmap_info.offset;
1586 }
1587
1588 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1589 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1590 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1591 mddev->new_level = le32_to_cpu(sb->new_level);
1592 mddev->new_layout = le32_to_cpu(sb->new_layout);
1593 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1594 if (mddev->delta_disks < 0 ||
1595 (mddev->delta_disks == 0 &&
1596 (le32_to_cpu(sb->feature_map)
1597 & MD_FEATURE_RESHAPE_BACKWARDS)))
1598 mddev->reshape_backwards = 1;
1599 } else {
1600 mddev->reshape_position = MaxSector;
1601 mddev->delta_disks = 0;
1602 mddev->new_level = mddev->level;
1603 mddev->new_layout = mddev->layout;
1604 mddev->new_chunk_sectors = mddev->chunk_sectors;
1605 }
1606
1607 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1608 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1609 } else if (mddev->pers == NULL) {
1610
1611
1612 ++ev1;
1613 if (rdev->desc_nr >= 0 &&
1614 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1615 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1616 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1617 if (ev1 < mddev->events)
1618 return -EINVAL;
1619 } else if (mddev->bitmap) {
1620
1621
1622
1623 if (ev1 < mddev->bitmap->events_cleared)
1624 return 0;
1625 if (ev1 < mddev->events)
1626 set_bit(Bitmap_sync, &rdev->flags);
1627 } else {
1628 if (ev1 < mddev->events)
1629
1630 return 0;
1631 }
1632 if (mddev->level != LEVEL_MULTIPATH) {
1633 int role;
1634 if (rdev->desc_nr < 0 ||
1635 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1636 role = MD_DISK_ROLE_SPARE;
1637 rdev->desc_nr = -1;
1638 } else
1639 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1640 switch(role) {
1641 case MD_DISK_ROLE_SPARE:
1642 break;
1643 case MD_DISK_ROLE_FAULTY:
1644 set_bit(Faulty, &rdev->flags);
1645 break;
1646 case MD_DISK_ROLE_JOURNAL:
1647 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1648
1649 printk(KERN_WARNING
1650 "md: journal device provided without journal feature, ignoring the device\n");
1651 return -EINVAL;
1652 }
1653 set_bit(Journal, &rdev->flags);
1654 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1655 rdev->raid_disk = 0;
1656 break;
1657 default:
1658 rdev->saved_raid_disk = role;
1659 if ((le32_to_cpu(sb->feature_map) &
1660 MD_FEATURE_RECOVERY_OFFSET)) {
1661 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1662 if (!(le32_to_cpu(sb->feature_map) &
1663 MD_FEATURE_RECOVERY_BITMAP))
1664 rdev->saved_raid_disk = -1;
1665 } else
1666 set_bit(In_sync, &rdev->flags);
1667 rdev->raid_disk = role;
1668 break;
1669 }
1670 if (sb->devflags & WriteMostly1)
1671 set_bit(WriteMostly, &rdev->flags);
1672 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1673 set_bit(Replacement, &rdev->flags);
1674 } else
1675 set_bit(In_sync, &rdev->flags);
1676
1677 return 0;
1678}
1679
1680static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1681{
1682 struct mdp_superblock_1 *sb;
1683 struct md_rdev *rdev2;
1684 int max_dev, i;
1685
1686
1687 sb = page_address(rdev->sb_page);
1688
1689 sb->feature_map = 0;
1690 sb->pad0 = 0;
1691 sb->recovery_offset = cpu_to_le64(0);
1692 memset(sb->pad3, 0, sizeof(sb->pad3));
1693
1694 sb->utime = cpu_to_le64((__u64)mddev->utime);
1695 sb->events = cpu_to_le64(mddev->events);
1696 if (mddev->in_sync)
1697 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1698 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1699 sb->resync_offset = cpu_to_le64(MaxSector);
1700 else
1701 sb->resync_offset = cpu_to_le64(0);
1702
1703 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1704
1705 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1706 sb->size = cpu_to_le64(mddev->dev_sectors);
1707 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1708 sb->level = cpu_to_le32(mddev->level);
1709 sb->layout = cpu_to_le32(mddev->layout);
1710
1711 if (test_bit(WriteMostly, &rdev->flags))
1712 sb->devflags |= WriteMostly1;
1713 else
1714 sb->devflags &= ~WriteMostly1;
1715 sb->data_offset = cpu_to_le64(rdev->data_offset);
1716 sb->data_size = cpu_to_le64(rdev->sectors);
1717
1718 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1719 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1720 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1721 }
1722
1723 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1724 !test_bit(In_sync, &rdev->flags)) {
1725 sb->feature_map |=
1726 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1727 sb->recovery_offset =
1728 cpu_to_le64(rdev->recovery_offset);
1729 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1730 sb->feature_map |=
1731 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1732 }
1733
1734 if (test_bit(Journal, &rdev->flags))
1735 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1736 if (test_bit(Replacement, &rdev->flags))
1737 sb->feature_map |=
1738 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1739
1740 if (mddev->reshape_position != MaxSector) {
1741 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1742 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1743 sb->new_layout = cpu_to_le32(mddev->new_layout);
1744 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1745 sb->new_level = cpu_to_le32(mddev->new_level);
1746 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1747 if (mddev->delta_disks == 0 &&
1748 mddev->reshape_backwards)
1749 sb->feature_map
1750 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1751 if (rdev->new_data_offset != rdev->data_offset) {
1752 sb->feature_map
1753 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1754 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1755 - rdev->data_offset));
1756 }
1757 }
1758
1759 if (mddev_is_clustered(mddev))
1760 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1761
1762 if (rdev->badblocks.count == 0)
1763 ;
1764 else if (sb->bblog_offset == 0)
1765
1766 md_error(mddev, rdev);
1767 else {
1768 struct badblocks *bb = &rdev->badblocks;
1769 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1770 u64 *p = bb->page;
1771 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1772 if (bb->changed) {
1773 unsigned seq;
1774
1775retry:
1776 seq = read_seqbegin(&bb->lock);
1777
1778 memset(bbp, 0xff, PAGE_SIZE);
1779
1780 for (i = 0 ; i < bb->count ; i++) {
1781 u64 internal_bb = p[i];
1782 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1783 | BB_LEN(internal_bb));
1784 bbp[i] = cpu_to_le64(store_bb);
1785 }
1786 bb->changed = 0;
1787 if (read_seqretry(&bb->lock, seq))
1788 goto retry;
1789
1790 bb->sector = (rdev->sb_start +
1791 (int)le32_to_cpu(sb->bblog_offset));
1792 bb->size = le16_to_cpu(sb->bblog_size);
1793 }
1794 }
1795
1796 max_dev = 0;
1797 rdev_for_each(rdev2, mddev)
1798 if (rdev2->desc_nr+1 > max_dev)
1799 max_dev = rdev2->desc_nr+1;
1800
1801 if (max_dev > le32_to_cpu(sb->max_dev)) {
1802 int bmask;
1803 sb->max_dev = cpu_to_le32(max_dev);
1804 rdev->sb_size = max_dev * 2 + 256;
1805 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1806 if (rdev->sb_size & bmask)
1807 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1808 } else
1809 max_dev = le32_to_cpu(sb->max_dev);
1810
1811 for (i=0; i<max_dev;i++)
1812 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1813
1814 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1815 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1816
1817 rdev_for_each(rdev2, mddev) {
1818 i = rdev2->desc_nr;
1819 if (test_bit(Faulty, &rdev2->flags))
1820 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1821 else if (test_bit(In_sync, &rdev2->flags))
1822 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1823 else if (test_bit(Journal, &rdev2->flags))
1824 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1825 else if (rdev2->raid_disk >= 0)
1826 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1827 else
1828 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1829 }
1830
1831 sb->sb_csum = calc_sb_1_csum(sb);
1832}
1833
1834static unsigned long long
1835super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1836{
1837 struct mdp_superblock_1 *sb;
1838 sector_t max_sectors;
1839 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1840 return 0;
1841 if (rdev->data_offset != rdev->new_data_offset)
1842 return 0;
1843 if (rdev->sb_start < rdev->data_offset) {
1844
1845 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1846 max_sectors -= rdev->data_offset;
1847 if (!num_sectors || num_sectors > max_sectors)
1848 num_sectors = max_sectors;
1849 } else if (rdev->mddev->bitmap_info.offset) {
1850
1851 return 0;
1852 } else {
1853
1854 sector_t sb_start;
1855 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1856 sb_start &= ~(sector_t)(4*2 - 1);
1857 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1858 if (!num_sectors || num_sectors > max_sectors)
1859 num_sectors = max_sectors;
1860 rdev->sb_start = sb_start;
1861 }
1862 sb = page_address(rdev->sb_page);
1863 sb->data_size = cpu_to_le64(num_sectors);
1864 sb->super_offset = rdev->sb_start;
1865 sb->sb_csum = calc_sb_1_csum(sb);
1866 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1867 rdev->sb_page);
1868 md_super_wait(rdev->mddev);
1869 return num_sectors;
1870
1871}
1872
1873static int
1874super_1_allow_new_offset(struct md_rdev *rdev,
1875 unsigned long long new_offset)
1876{
1877
1878 struct bitmap *bitmap;
1879 if (new_offset >= rdev->data_offset)
1880 return 1;
1881
1882
1883
1884 if (rdev->mddev->minor_version == 0)
1885 return 1;
1886
1887
1888
1889
1890
1891
1892
1893 if (rdev->sb_start + (32+4)*2 > new_offset)
1894 return 0;
1895 bitmap = rdev->mddev->bitmap;
1896 if (bitmap && !rdev->mddev->bitmap_info.file &&
1897 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1898 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1899 return 0;
1900 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1901 return 0;
1902
1903 return 1;
1904}
1905
1906static struct super_type super_types[] = {
1907 [0] = {
1908 .name = "0.90.0",
1909 .owner = THIS_MODULE,
1910 .load_super = super_90_load,
1911 .validate_super = super_90_validate,
1912 .sync_super = super_90_sync,
1913 .rdev_size_change = super_90_rdev_size_change,
1914 .allow_new_offset = super_90_allow_new_offset,
1915 },
1916 [1] = {
1917 .name = "md-1",
1918 .owner = THIS_MODULE,
1919 .load_super = super_1_load,
1920 .validate_super = super_1_validate,
1921 .sync_super = super_1_sync,
1922 .rdev_size_change = super_1_rdev_size_change,
1923 .allow_new_offset = super_1_allow_new_offset,
1924 },
1925};
1926
1927static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1928{
1929 if (mddev->sync_super) {
1930 mddev->sync_super(mddev, rdev);
1931 return;
1932 }
1933
1934 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1935
1936 super_types[mddev->major_version].sync_super(mddev, rdev);
1937}
1938
1939static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1940{
1941 struct md_rdev *rdev, *rdev2;
1942
1943 rcu_read_lock();
1944 rdev_for_each_rcu(rdev, mddev1) {
1945 if (test_bit(Faulty, &rdev->flags) ||
1946 test_bit(Journal, &rdev->flags) ||
1947 rdev->raid_disk == -1)
1948 continue;
1949 rdev_for_each_rcu(rdev2, mddev2) {
1950 if (test_bit(Faulty, &rdev2->flags) ||
1951 test_bit(Journal, &rdev2->flags) ||
1952 rdev2->raid_disk == -1)
1953 continue;
1954 if (rdev->bdev->bd_contains ==
1955 rdev2->bdev->bd_contains) {
1956 rcu_read_unlock();
1957 return 1;
1958 }
1959 }
1960 }
1961 rcu_read_unlock();
1962 return 0;
1963}
1964
1965static LIST_HEAD(pending_raid_disks);
1966
1967
1968
1969
1970
1971
1972
1973
1974int md_integrity_register(struct mddev *mddev)
1975{
1976 struct md_rdev *rdev, *reference = NULL;
1977
1978 if (list_empty(&mddev->disks))
1979 return 0;
1980 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1981 return 0;
1982 rdev_for_each(rdev, mddev) {
1983
1984 if (test_bit(Faulty, &rdev->flags))
1985 continue;
1986 if (rdev->raid_disk < 0)
1987 continue;
1988 if (!reference) {
1989
1990 reference = rdev;
1991 continue;
1992 }
1993
1994 if (blk_integrity_compare(reference->bdev->bd_disk,
1995 rdev->bdev->bd_disk) < 0)
1996 return -EINVAL;
1997 }
1998 if (!reference || !bdev_get_integrity(reference->bdev))
1999 return 0;
2000
2001
2002
2003
2004 blk_integrity_register(mddev->gendisk,
2005 bdev_get_integrity(reference->bdev));
2006
2007 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2008 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2009 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2010 mdname(mddev));
2011 return -EINVAL;
2012 }
2013 return 0;
2014}
2015EXPORT_SYMBOL(md_integrity_register);
2016
2017
2018
2019
2020
2021int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2022{
2023 struct blk_integrity *bi_rdev;
2024 struct blk_integrity *bi_mddev;
2025 char name[BDEVNAME_SIZE];
2026
2027 if (!mddev->gendisk)
2028 return 0;
2029
2030 bi_rdev = bdev_get_integrity(rdev->bdev);
2031 bi_mddev = blk_get_integrity(mddev->gendisk);
2032
2033 if (!bi_mddev)
2034 return 0;
2035
2036 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2037 printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n",
2038 mdname(mddev), bdevname(rdev->bdev, name));
2039 return -ENXIO;
2040 }
2041
2042 return 0;
2043}
2044EXPORT_SYMBOL(md_integrity_add_rdev);
2045
2046static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2047{
2048 char b[BDEVNAME_SIZE];
2049 struct kobject *ko;
2050 int err;
2051
2052
2053 if (find_rdev(mddev, rdev->bdev->bd_dev))
2054 return -EEXIST;
2055
2056
2057 if (!test_bit(Journal, &rdev->flags) &&
2058 rdev->sectors &&
2059 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2060 if (mddev->pers) {
2061
2062
2063
2064
2065 if (mddev->level > 0)
2066 return -ENOSPC;
2067 } else
2068 mddev->dev_sectors = rdev->sectors;
2069 }
2070
2071
2072
2073
2074
2075 rcu_read_lock();
2076 if (rdev->desc_nr < 0) {
2077 int choice = 0;
2078 if (mddev->pers)
2079 choice = mddev->raid_disks;
2080 while (md_find_rdev_nr_rcu(mddev, choice))
2081 choice++;
2082 rdev->desc_nr = choice;
2083 } else {
2084 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2085 rcu_read_unlock();
2086 return -EBUSY;
2087 }
2088 }
2089 rcu_read_unlock();
2090 if (!test_bit(Journal, &rdev->flags) &&
2091 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2092 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2093 mdname(mddev), mddev->max_disks);
2094 return -EBUSY;
2095 }
2096 bdevname(rdev->bdev,b);
2097 strreplace(b, '/', '!');
2098
2099 rdev->mddev = mddev;
2100 printk(KERN_INFO "md: bind<%s>\n", b);
2101
2102 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2103 goto fail;
2104
2105 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2106 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2107 ;
2108 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2109
2110 list_add_rcu(&rdev->same_set, &mddev->disks);
2111 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2112
2113
2114 mddev->recovery_disabled++;
2115
2116 return 0;
2117
2118 fail:
2119 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2120 b, mdname(mddev));
2121 return err;
2122}
2123
2124static void md_delayed_delete(struct work_struct *ws)
2125{
2126 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2127 kobject_del(&rdev->kobj);
2128 kobject_put(&rdev->kobj);
2129}
2130
2131static void unbind_rdev_from_array(struct md_rdev *rdev)
2132{
2133 char b[BDEVNAME_SIZE];
2134
2135 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2136 list_del_rcu(&rdev->same_set);
2137 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2138 rdev->mddev = NULL;
2139 sysfs_remove_link(&rdev->kobj, "block");
2140 sysfs_put(rdev->sysfs_state);
2141 rdev->sysfs_state = NULL;
2142 rdev->badblocks.count = 0;
2143
2144
2145
2146
2147 synchronize_rcu();
2148 INIT_WORK(&rdev->del_work, md_delayed_delete);
2149 kobject_get(&rdev->kobj);
2150 queue_work(md_misc_wq, &rdev->del_work);
2151}
2152
2153
2154
2155
2156
2157
2158static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2159{
2160 int err = 0;
2161 struct block_device *bdev;
2162 char b[BDEVNAME_SIZE];
2163
2164 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2165 shared ? (struct md_rdev *)lock_rdev : rdev);
2166 if (IS_ERR(bdev)) {
2167 printk(KERN_ERR "md: could not open %s.\n",
2168 __bdevname(dev, b));
2169 return PTR_ERR(bdev);
2170 }
2171 rdev->bdev = bdev;
2172 return err;
2173}
2174
2175static void unlock_rdev(struct md_rdev *rdev)
2176{
2177 struct block_device *bdev = rdev->bdev;
2178 rdev->bdev = NULL;
2179 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2180}
2181
2182void md_autodetect_dev(dev_t dev);
2183
2184static void export_rdev(struct md_rdev *rdev)
2185{
2186 char b[BDEVNAME_SIZE];
2187
2188 printk(KERN_INFO "md: export_rdev(%s)\n",
2189 bdevname(rdev->bdev,b));
2190 md_rdev_clear(rdev);
2191#ifndef MODULE
2192 if (test_bit(AutoDetected, &rdev->flags))
2193 md_autodetect_dev(rdev->bdev->bd_dev);
2194#endif
2195 unlock_rdev(rdev);
2196 kobject_put(&rdev->kobj);
2197}
2198
2199void md_kick_rdev_from_array(struct md_rdev *rdev)
2200{
2201 unbind_rdev_from_array(rdev);
2202 export_rdev(rdev);
2203}
2204EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2205
2206static void export_array(struct mddev *mddev)
2207{
2208 struct md_rdev *rdev;
2209
2210 while (!list_empty(&mddev->disks)) {
2211 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2212 same_set);
2213 md_kick_rdev_from_array(rdev);
2214 }
2215 mddev->raid_disks = 0;
2216 mddev->major_version = 0;
2217}
2218
2219static void sync_sbs(struct mddev *mddev, int nospares)
2220{
2221
2222
2223
2224
2225
2226
2227 struct md_rdev *rdev;
2228 rdev_for_each(rdev, mddev) {
2229 if (rdev->sb_events == mddev->events ||
2230 (nospares &&
2231 rdev->raid_disk < 0 &&
2232 rdev->sb_events+1 == mddev->events)) {
2233
2234 rdev->sb_loaded = 2;
2235 } else {
2236 sync_super(mddev, rdev);
2237 rdev->sb_loaded = 1;
2238 }
2239 }
2240}
2241
2242static bool does_sb_need_changing(struct mddev *mddev)
2243{
2244 struct md_rdev *rdev;
2245 struct mdp_superblock_1 *sb;
2246 int role;
2247
2248
2249 rdev_for_each(rdev, mddev)
2250 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2251 break;
2252
2253
2254 if (!rdev)
2255 return false;
2256
2257 sb = page_address(rdev->sb_page);
2258
2259 rdev_for_each(rdev, mddev) {
2260 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2261
2262 if (role == 0xffff && rdev->raid_disk >=0 &&
2263 !test_bit(Faulty, &rdev->flags))
2264 return true;
2265
2266 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2267 return true;
2268 }
2269
2270
2271 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2272 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2273 (mddev->layout != le64_to_cpu(sb->layout)) ||
2274 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2275 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2276 return true;
2277
2278 return false;
2279}
2280
2281void md_update_sb(struct mddev *mddev, int force_change)
2282{
2283 struct md_rdev *rdev;
2284 int sync_req;
2285 int nospares = 0;
2286 int any_badblocks_changed = 0;
2287 int ret = -1;
2288
2289 if (mddev->ro) {
2290 if (force_change)
2291 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2292 return;
2293 }
2294
2295repeat:
2296 if (mddev_is_clustered(mddev)) {
2297 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2298 force_change = 1;
2299 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2300 nospares = 1;
2301 ret = md_cluster_ops->metadata_update_start(mddev);
2302
2303 if (!does_sb_need_changing(mddev)) {
2304 if (ret == 0)
2305 md_cluster_ops->metadata_update_cancel(mddev);
2306 bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
2307 BIT(MD_CHANGE_DEVS) |
2308 BIT(MD_CHANGE_CLEAN));
2309 return;
2310 }
2311 }
2312
2313
2314 rdev_for_each(rdev, mddev) {
2315 if (rdev->raid_disk >= 0 &&
2316 mddev->delta_disks >= 0 &&
2317 !test_bit(Journal, &rdev->flags) &&
2318 !test_bit(In_sync, &rdev->flags) &&
2319 mddev->curr_resync_completed > rdev->recovery_offset)
2320 rdev->recovery_offset = mddev->curr_resync_completed;
2321
2322 }
2323 if (!mddev->persistent) {
2324 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2325 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2326 if (!mddev->external) {
2327 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2328 rdev_for_each(rdev, mddev) {
2329 if (rdev->badblocks.changed) {
2330 rdev->badblocks.changed = 0;
2331 ack_all_badblocks(&rdev->badblocks);
2332 md_error(mddev, rdev);
2333 }
2334 clear_bit(Blocked, &rdev->flags);
2335 clear_bit(BlockedBadBlocks, &rdev->flags);
2336 wake_up(&rdev->blocked_wait);
2337 }
2338 }
2339 wake_up(&mddev->sb_wait);
2340 return;
2341 }
2342
2343 spin_lock(&mddev->lock);
2344
2345 mddev->utime = ktime_get_real_seconds();
2346
2347 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2348 force_change = 1;
2349 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2350
2351
2352
2353
2354 nospares = 1;
2355 if (force_change)
2356 nospares = 0;
2357 if (mddev->degraded)
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367 nospares = 0;
2368
2369 sync_req = mddev->in_sync;
2370
2371
2372
2373 if (nospares
2374 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2375 && mddev->can_decrease_events
2376 && mddev->events != 1) {
2377 mddev->events--;
2378 mddev->can_decrease_events = 0;
2379 } else {
2380
2381 mddev->events ++;
2382 mddev->can_decrease_events = nospares;
2383 }
2384
2385
2386
2387
2388
2389
2390 WARN_ON(mddev->events == 0);
2391
2392 rdev_for_each(rdev, mddev) {
2393 if (rdev->badblocks.changed)
2394 any_badblocks_changed++;
2395 if (test_bit(Faulty, &rdev->flags))
2396 set_bit(FaultRecorded, &rdev->flags);
2397 }
2398
2399 sync_sbs(mddev, nospares);
2400 spin_unlock(&mddev->lock);
2401
2402 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2403 mdname(mddev), mddev->in_sync);
2404
2405 bitmap_update_sb(mddev->bitmap);
2406 rdev_for_each(rdev, mddev) {
2407 char b[BDEVNAME_SIZE];
2408
2409 if (rdev->sb_loaded != 1)
2410 continue;
2411
2412 if (!test_bit(Faulty, &rdev->flags)) {
2413 md_super_write(mddev,rdev,
2414 rdev->sb_start, rdev->sb_size,
2415 rdev->sb_page);
2416 pr_debug("md: (write) %s's sb offset: %llu\n",
2417 bdevname(rdev->bdev, b),
2418 (unsigned long long)rdev->sb_start);
2419 rdev->sb_events = mddev->events;
2420 if (rdev->badblocks.size) {
2421 md_super_write(mddev, rdev,
2422 rdev->badblocks.sector,
2423 rdev->badblocks.size << 9,
2424 rdev->bb_page);
2425 rdev->badblocks.size = 0;
2426 }
2427
2428 } else
2429 pr_debug("md: %s (skipping faulty)\n",
2430 bdevname(rdev->bdev, b));
2431
2432 if (mddev->level == LEVEL_MULTIPATH)
2433
2434 break;
2435 }
2436 md_super_wait(mddev);
2437
2438
2439 if (mddev_is_clustered(mddev) && ret == 0)
2440 md_cluster_ops->metadata_update_finish(mddev);
2441
2442 if (mddev->in_sync != sync_req ||
2443 !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
2444 BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
2445
2446 goto repeat;
2447 wake_up(&mddev->sb_wait);
2448 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2449 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2450
2451 rdev_for_each(rdev, mddev) {
2452 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2453 clear_bit(Blocked, &rdev->flags);
2454
2455 if (any_badblocks_changed)
2456 ack_all_badblocks(&rdev->badblocks);
2457 clear_bit(BlockedBadBlocks, &rdev->flags);
2458 wake_up(&rdev->blocked_wait);
2459 }
2460}
2461EXPORT_SYMBOL(md_update_sb);
2462
2463static int add_bound_rdev(struct md_rdev *rdev)
2464{
2465 struct mddev *mddev = rdev->mddev;
2466 int err = 0;
2467 bool add_journal = test_bit(Journal, &rdev->flags);
2468
2469 if (!mddev->pers->hot_remove_disk || add_journal) {
2470
2471
2472
2473
2474 super_types[mddev->major_version].
2475 validate_super(mddev, rdev);
2476 if (add_journal)
2477 mddev_suspend(mddev);
2478 err = mddev->pers->hot_add_disk(mddev, rdev);
2479 if (add_journal)
2480 mddev_resume(mddev);
2481 if (err) {
2482 md_kick_rdev_from_array(rdev);
2483 return err;
2484 }
2485 }
2486 sysfs_notify_dirent_safe(rdev->sysfs_state);
2487
2488 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2489 if (mddev->degraded)
2490 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2491 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2492 md_new_event(mddev);
2493 md_wakeup_thread(mddev->thread);
2494 return 0;
2495}
2496
2497
2498
2499
2500static int cmd_match(const char *cmd, const char *str)
2501{
2502
2503
2504
2505
2506 while (*cmd && *str && *cmd == *str) {
2507 cmd++;
2508 str++;
2509 }
2510 if (*cmd == '\n')
2511 cmd++;
2512 if (*str || *cmd)
2513 return 0;
2514 return 1;
2515}
2516
2517struct rdev_sysfs_entry {
2518 struct attribute attr;
2519 ssize_t (*show)(struct md_rdev *, char *);
2520 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2521};
2522
2523static ssize_t
2524state_show(struct md_rdev *rdev, char *page)
2525{
2526 char *sep = "";
2527 size_t len = 0;
2528 unsigned long flags = ACCESS_ONCE(rdev->flags);
2529
2530 if (test_bit(Faulty, &flags) ||
2531 rdev->badblocks.unacked_exist) {
2532 len+= sprintf(page+len, "%sfaulty",sep);
2533 sep = ",";
2534 }
2535 if (test_bit(In_sync, &flags)) {
2536 len += sprintf(page+len, "%sin_sync",sep);
2537 sep = ",";
2538 }
2539 if (test_bit(Journal, &flags)) {
2540 len += sprintf(page+len, "%sjournal",sep);
2541 sep = ",";
2542 }
2543 if (test_bit(WriteMostly, &flags)) {
2544 len += sprintf(page+len, "%swrite_mostly",sep);
2545 sep = ",";
2546 }
2547 if (test_bit(Blocked, &flags) ||
2548 (rdev->badblocks.unacked_exist
2549 && !test_bit(Faulty, &flags))) {
2550 len += sprintf(page+len, "%sblocked", sep);
2551 sep = ",";
2552 }
2553 if (!test_bit(Faulty, &flags) &&
2554 !test_bit(Journal, &flags) &&
2555 !test_bit(In_sync, &flags)) {
2556 len += sprintf(page+len, "%sspare", sep);
2557 sep = ",";
2558 }
2559 if (test_bit(WriteErrorSeen, &flags)) {
2560 len += sprintf(page+len, "%swrite_error", sep);
2561 sep = ",";
2562 }
2563 if (test_bit(WantReplacement, &flags)) {
2564 len += sprintf(page+len, "%swant_replacement", sep);
2565 sep = ",";
2566 }
2567 if (test_bit(Replacement, &flags)) {
2568 len += sprintf(page+len, "%sreplacement", sep);
2569 sep = ",";
2570 }
2571
2572 return len+sprintf(page+len, "\n");
2573}
2574
2575static ssize_t
2576state_store(struct md_rdev *rdev, const char *buf, size_t len)
2577{
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591 int err = -EINVAL;
2592 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2593 md_error(rdev->mddev, rdev);
2594 if (test_bit(Faulty, &rdev->flags))
2595 err = 0;
2596 else
2597 err = -EBUSY;
2598 } else if (cmd_match(buf, "remove")) {
2599 if (rdev->mddev->pers) {
2600 clear_bit(Blocked, &rdev->flags);
2601 remove_and_add_spares(rdev->mddev, rdev);
2602 }
2603 if (rdev->raid_disk >= 0)
2604 err = -EBUSY;
2605 else {
2606 struct mddev *mddev = rdev->mddev;
2607 err = 0;
2608 if (mddev_is_clustered(mddev))
2609 err = md_cluster_ops->remove_disk(mddev, rdev);
2610
2611 if (err == 0) {
2612 md_kick_rdev_from_array(rdev);
2613 if (mddev->pers)
2614 md_update_sb(mddev, 1);
2615 md_new_event(mddev);
2616 }
2617 }
2618 } else if (cmd_match(buf, "writemostly")) {
2619 set_bit(WriteMostly, &rdev->flags);
2620 err = 0;
2621 } else if (cmd_match(buf, "-writemostly")) {
2622 clear_bit(WriteMostly, &rdev->flags);
2623 err = 0;
2624 } else if (cmd_match(buf, "blocked")) {
2625 set_bit(Blocked, &rdev->flags);
2626 err = 0;
2627 } else if (cmd_match(buf, "-blocked")) {
2628 if (!test_bit(Faulty, &rdev->flags) &&
2629 rdev->badblocks.unacked_exist) {
2630
2631
2632
2633 md_error(rdev->mddev, rdev);
2634 }
2635 clear_bit(Blocked, &rdev->flags);
2636 clear_bit(BlockedBadBlocks, &rdev->flags);
2637 wake_up(&rdev->blocked_wait);
2638 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2639 md_wakeup_thread(rdev->mddev->thread);
2640
2641 err = 0;
2642 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2643 set_bit(In_sync, &rdev->flags);
2644 err = 0;
2645 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2646 !test_bit(Journal, &rdev->flags)) {
2647 if (rdev->mddev->pers == NULL) {
2648 clear_bit(In_sync, &rdev->flags);
2649 rdev->saved_raid_disk = rdev->raid_disk;
2650 rdev->raid_disk = -1;
2651 err = 0;
2652 }
2653 } else if (cmd_match(buf, "write_error")) {
2654 set_bit(WriteErrorSeen, &rdev->flags);
2655 err = 0;
2656 } else if (cmd_match(buf, "-write_error")) {
2657 clear_bit(WriteErrorSeen, &rdev->flags);
2658 err = 0;
2659 } else if (cmd_match(buf, "want_replacement")) {
2660
2661
2662
2663
2664 if (rdev->raid_disk >= 0 &&
2665 !test_bit(Journal, &rdev->flags) &&
2666 !test_bit(Replacement, &rdev->flags))
2667 set_bit(WantReplacement, &rdev->flags);
2668 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2669 md_wakeup_thread(rdev->mddev->thread);
2670 err = 0;
2671 } else if (cmd_match(buf, "-want_replacement")) {
2672
2673
2674
2675 err = 0;
2676 clear_bit(WantReplacement, &rdev->flags);
2677 } else if (cmd_match(buf, "replacement")) {
2678
2679
2680
2681
2682 if (rdev->mddev->pers)
2683 err = -EBUSY;
2684 else {
2685 set_bit(Replacement, &rdev->flags);
2686 err = 0;
2687 }
2688 } else if (cmd_match(buf, "-replacement")) {
2689
2690 if (rdev->mddev->pers)
2691 err = -EBUSY;
2692 else {
2693 clear_bit(Replacement, &rdev->flags);
2694 err = 0;
2695 }
2696 } else if (cmd_match(buf, "re-add")) {
2697 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2698
2699
2700
2701
2702
2703
2704 if (!mddev_is_clustered(rdev->mddev) ||
2705 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2706 clear_bit(Faulty, &rdev->flags);
2707 err = add_bound_rdev(rdev);
2708 }
2709 } else
2710 err = -EBUSY;
2711 }
2712 if (!err)
2713 sysfs_notify_dirent_safe(rdev->sysfs_state);
2714 return err ? err : len;
2715}
2716static struct rdev_sysfs_entry rdev_state =
2717__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2718
2719static ssize_t
2720errors_show(struct md_rdev *rdev, char *page)
2721{
2722 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2723}
2724
2725static ssize_t
2726errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2727{
2728 unsigned int n;
2729 int rv;
2730
2731 rv = kstrtouint(buf, 10, &n);
2732 if (rv < 0)
2733 return rv;
2734 atomic_set(&rdev->corrected_errors, n);
2735 return len;
2736}
2737static struct rdev_sysfs_entry rdev_errors =
2738__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2739
2740static ssize_t
2741slot_show(struct md_rdev *rdev, char *page)
2742{
2743 if (test_bit(Journal, &rdev->flags))
2744 return sprintf(page, "journal\n");
2745 else if (rdev->raid_disk < 0)
2746 return sprintf(page, "none\n");
2747 else
2748 return sprintf(page, "%d\n", rdev->raid_disk);
2749}
2750
2751static ssize_t
2752slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2753{
2754 int slot;
2755 int err;
2756
2757 if (test_bit(Journal, &rdev->flags))
2758 return -EBUSY;
2759 if (strncmp(buf, "none", 4)==0)
2760 slot = -1;
2761 else {
2762 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2763 if (err < 0)
2764 return err;
2765 }
2766 if (rdev->mddev->pers && slot == -1) {
2767
2768
2769
2770
2771
2772
2773
2774 if (rdev->raid_disk == -1)
2775 return -EEXIST;
2776
2777 if (rdev->mddev->pers->hot_remove_disk == NULL)
2778 return -EINVAL;
2779 clear_bit(Blocked, &rdev->flags);
2780 remove_and_add_spares(rdev->mddev, rdev);
2781 if (rdev->raid_disk >= 0)
2782 return -EBUSY;
2783 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2784 md_wakeup_thread(rdev->mddev->thread);
2785 } else if (rdev->mddev->pers) {
2786
2787
2788
2789 int err;
2790
2791 if (rdev->raid_disk != -1)
2792 return -EBUSY;
2793
2794 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2795 return -EBUSY;
2796
2797 if (rdev->mddev->pers->hot_add_disk == NULL)
2798 return -EINVAL;
2799
2800 if (slot >= rdev->mddev->raid_disks &&
2801 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2802 return -ENOSPC;
2803
2804 rdev->raid_disk = slot;
2805 if (test_bit(In_sync, &rdev->flags))
2806 rdev->saved_raid_disk = slot;
2807 else
2808 rdev->saved_raid_disk = -1;
2809 clear_bit(In_sync, &rdev->flags);
2810 clear_bit(Bitmap_sync, &rdev->flags);
2811 err = rdev->mddev->pers->
2812 hot_add_disk(rdev->mddev, rdev);
2813 if (err) {
2814 rdev->raid_disk = -1;
2815 return err;
2816 } else
2817 sysfs_notify_dirent_safe(rdev->sysfs_state);
2818 if (sysfs_link_rdev(rdev->mddev, rdev))
2819 ;
2820
2821 } else {
2822 if (slot >= rdev->mddev->raid_disks &&
2823 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2824 return -ENOSPC;
2825 rdev->raid_disk = slot;
2826
2827 clear_bit(Faulty, &rdev->flags);
2828 clear_bit(WriteMostly, &rdev->flags);
2829 set_bit(In_sync, &rdev->flags);
2830 sysfs_notify_dirent_safe(rdev->sysfs_state);
2831 }
2832 return len;
2833}
2834
2835static struct rdev_sysfs_entry rdev_slot =
2836__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2837
2838static ssize_t
2839offset_show(struct md_rdev *rdev, char *page)
2840{
2841 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2842}
2843
2844static ssize_t
2845offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2846{
2847 unsigned long long offset;
2848 if (kstrtoull(buf, 10, &offset) < 0)
2849 return -EINVAL;
2850 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2851 return -EBUSY;
2852 if (rdev->sectors && rdev->mddev->external)
2853
2854
2855 return -EBUSY;
2856 rdev->data_offset = offset;
2857 rdev->new_data_offset = offset;
2858 return len;
2859}
2860
2861static struct rdev_sysfs_entry rdev_offset =
2862__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2863
2864static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2865{
2866 return sprintf(page, "%llu\n",
2867 (unsigned long long)rdev->new_data_offset);
2868}
2869
2870static ssize_t new_offset_store(struct md_rdev *rdev,
2871 const char *buf, size_t len)
2872{
2873 unsigned long long new_offset;
2874 struct mddev *mddev = rdev->mddev;
2875
2876 if (kstrtoull(buf, 10, &new_offset) < 0)
2877 return -EINVAL;
2878
2879 if (mddev->sync_thread ||
2880 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2881 return -EBUSY;
2882 if (new_offset == rdev->data_offset)
2883
2884 ;
2885 else if (new_offset > rdev->data_offset) {
2886
2887 if (new_offset - rdev->data_offset
2888 + mddev->dev_sectors > rdev->sectors)
2889 return -E2BIG;
2890 }
2891
2892
2893
2894
2895
2896 if (new_offset < rdev->data_offset &&
2897 mddev->reshape_backwards)
2898 return -EINVAL;
2899
2900
2901
2902
2903 if (new_offset > rdev->data_offset &&
2904 !mddev->reshape_backwards)
2905 return -EINVAL;
2906
2907 if (mddev->pers && mddev->persistent &&
2908 !super_types[mddev->major_version]
2909 .allow_new_offset(rdev, new_offset))
2910 return -E2BIG;
2911 rdev->new_data_offset = new_offset;
2912 if (new_offset > rdev->data_offset)
2913 mddev->reshape_backwards = 1;
2914 else if (new_offset < rdev->data_offset)
2915 mddev->reshape_backwards = 0;
2916
2917 return len;
2918}
2919static struct rdev_sysfs_entry rdev_new_offset =
2920__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2921
2922static ssize_t
2923rdev_size_show(struct md_rdev *rdev, char *page)
2924{
2925 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2926}
2927
2928static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2929{
2930
2931 if (s1+l1 <= s2)
2932 return 0;
2933 if (s2+l2 <= s1)
2934 return 0;
2935 return 1;
2936}
2937
2938static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2939{
2940 unsigned long long blocks;
2941 sector_t new;
2942
2943 if (kstrtoull(buf, 10, &blocks) < 0)
2944 return -EINVAL;
2945
2946 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2947 return -EINVAL;
2948
2949 new = blocks * 2;
2950 if (new != blocks * 2)
2951 return -EINVAL;
2952
2953 *sectors = new;
2954 return 0;
2955}
2956
2957static ssize_t
2958rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2959{
2960 struct mddev *my_mddev = rdev->mddev;
2961 sector_t oldsectors = rdev->sectors;
2962 sector_t sectors;
2963
2964 if (test_bit(Journal, &rdev->flags))
2965 return -EBUSY;
2966 if (strict_blocks_to_sectors(buf, §ors) < 0)
2967 return -EINVAL;
2968 if (rdev->data_offset != rdev->new_data_offset)
2969 return -EINVAL;
2970 if (my_mddev->pers && rdev->raid_disk >= 0) {
2971 if (my_mddev->persistent) {
2972 sectors = super_types[my_mddev->major_version].
2973 rdev_size_change(rdev, sectors);
2974 if (!sectors)
2975 return -EBUSY;
2976 } else if (!sectors)
2977 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2978 rdev->data_offset;
2979 if (!my_mddev->pers->resize)
2980
2981 return -EINVAL;
2982 }
2983 if (sectors < my_mddev->dev_sectors)
2984 return -EINVAL;
2985
2986 rdev->sectors = sectors;
2987 if (sectors > oldsectors && my_mddev->external) {
2988
2989
2990
2991
2992
2993
2994 struct mddev *mddev;
2995 int overlap = 0;
2996 struct list_head *tmp;
2997
2998 rcu_read_lock();
2999 for_each_mddev(mddev, tmp) {
3000 struct md_rdev *rdev2;
3001
3002 rdev_for_each(rdev2, mddev)
3003 if (rdev->bdev == rdev2->bdev &&
3004 rdev != rdev2 &&
3005 overlaps(rdev->data_offset, rdev->sectors,
3006 rdev2->data_offset,
3007 rdev2->sectors)) {
3008 overlap = 1;
3009 break;
3010 }
3011 if (overlap) {
3012 mddev_put(mddev);
3013 break;
3014 }
3015 }
3016 rcu_read_unlock();
3017 if (overlap) {
3018
3019
3020
3021
3022
3023
3024 rdev->sectors = oldsectors;
3025 return -EBUSY;
3026 }
3027 }
3028 return len;
3029}
3030
3031static struct rdev_sysfs_entry rdev_size =
3032__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3033
3034static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3035{
3036 unsigned long long recovery_start = rdev->recovery_offset;
3037
3038 if (test_bit(In_sync, &rdev->flags) ||
3039 recovery_start == MaxSector)
3040 return sprintf(page, "none\n");
3041
3042 return sprintf(page, "%llu\n", recovery_start);
3043}
3044
3045static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3046{
3047 unsigned long long recovery_start;
3048
3049 if (cmd_match(buf, "none"))
3050 recovery_start = MaxSector;
3051 else if (kstrtoull(buf, 10, &recovery_start))
3052 return -EINVAL;
3053
3054 if (rdev->mddev->pers &&
3055 rdev->raid_disk >= 0)
3056 return -EBUSY;
3057
3058 rdev->recovery_offset = recovery_start;
3059 if (recovery_start == MaxSector)
3060 set_bit(In_sync, &rdev->flags);
3061 else
3062 clear_bit(In_sync, &rdev->flags);
3063 return len;
3064}
3065
3066static struct rdev_sysfs_entry rdev_recovery_start =
3067__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080static ssize_t bb_show(struct md_rdev *rdev, char *page)
3081{
3082 return badblocks_show(&rdev->badblocks, page, 0);
3083}
3084static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3085{
3086 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3087
3088 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3089 wake_up(&rdev->blocked_wait);
3090 return rv;
3091}
3092static struct rdev_sysfs_entry rdev_bad_blocks =
3093__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3094
3095static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3096{
3097 return badblocks_show(&rdev->badblocks, page, 1);
3098}
3099static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3100{
3101 return badblocks_store(&rdev->badblocks, page, len, 1);
3102}
3103static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3104__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3105
3106static struct attribute *rdev_default_attrs[] = {
3107 &rdev_state.attr,
3108 &rdev_errors.attr,
3109 &rdev_slot.attr,
3110 &rdev_offset.attr,
3111 &rdev_new_offset.attr,
3112 &rdev_size.attr,
3113 &rdev_recovery_start.attr,
3114 &rdev_bad_blocks.attr,
3115 &rdev_unack_bad_blocks.attr,
3116 NULL,
3117};
3118static ssize_t
3119rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3120{
3121 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3122 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3123
3124 if (!entry->show)
3125 return -EIO;
3126 if (!rdev->mddev)
3127 return -EBUSY;
3128 return entry->show(rdev, page);
3129}
3130
3131static ssize_t
3132rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3133 const char *page, size_t length)
3134{
3135 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3136 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3137 ssize_t rv;
3138 struct mddev *mddev = rdev->mddev;
3139
3140 if (!entry->store)
3141 return -EIO;
3142 if (!capable(CAP_SYS_ADMIN))
3143 return -EACCES;
3144 rv = mddev ? mddev_lock(mddev): -EBUSY;
3145 if (!rv) {
3146 if (rdev->mddev == NULL)
3147 rv = -EBUSY;
3148 else
3149 rv = entry->store(rdev, page, length);
3150 mddev_unlock(mddev);
3151 }
3152 return rv;
3153}
3154
3155static void rdev_free(struct kobject *ko)
3156{
3157 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3158 kfree(rdev);
3159}
3160static const struct sysfs_ops rdev_sysfs_ops = {
3161 .show = rdev_attr_show,
3162 .store = rdev_attr_store,
3163};
3164static struct kobj_type rdev_ktype = {
3165 .release = rdev_free,
3166 .sysfs_ops = &rdev_sysfs_ops,
3167 .default_attrs = rdev_default_attrs,
3168};
3169
3170int md_rdev_init(struct md_rdev *rdev)
3171{
3172 rdev->desc_nr = -1;
3173 rdev->saved_raid_disk = -1;
3174 rdev->raid_disk = -1;
3175 rdev->flags = 0;
3176 rdev->data_offset = 0;
3177 rdev->new_data_offset = 0;
3178 rdev->sb_events = 0;
3179 rdev->last_read_error = 0;
3180 rdev->sb_loaded = 0;
3181 rdev->bb_page = NULL;
3182 atomic_set(&rdev->nr_pending, 0);
3183 atomic_set(&rdev->read_errors, 0);
3184 atomic_set(&rdev->corrected_errors, 0);
3185
3186 INIT_LIST_HEAD(&rdev->same_set);
3187 init_waitqueue_head(&rdev->blocked_wait);
3188
3189
3190
3191
3192
3193 return badblocks_init(&rdev->badblocks, 0);
3194}
3195EXPORT_SYMBOL_GPL(md_rdev_init);
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3207{
3208 char b[BDEVNAME_SIZE];
3209 int err;
3210 struct md_rdev *rdev;
3211 sector_t size;
3212
3213 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3214 if (!rdev) {
3215 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3216 return ERR_PTR(-ENOMEM);
3217 }
3218
3219 err = md_rdev_init(rdev);
3220 if (err)
3221 goto abort_free;
3222 err = alloc_disk_sb(rdev);
3223 if (err)
3224 goto abort_free;
3225
3226 err = lock_rdev(rdev, newdev, super_format == -2);
3227 if (err)
3228 goto abort_free;
3229
3230 kobject_init(&rdev->kobj, &rdev_ktype);
3231
3232 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3233 if (!size) {
3234 printk(KERN_WARNING
3235 "md: %s has zero or unknown size, marking faulty!\n",
3236 bdevname(rdev->bdev,b));
3237 err = -EINVAL;
3238 goto abort_free;
3239 }
3240
3241 if (super_format >= 0) {
3242 err = super_types[super_format].
3243 load_super(rdev, NULL, super_minor);
3244 if (err == -EINVAL) {
3245 printk(KERN_WARNING
3246 "md: %s does not have a valid v%d.%d "
3247 "superblock, not importing!\n",
3248 bdevname(rdev->bdev,b),
3249 super_format, super_minor);
3250 goto abort_free;
3251 }
3252 if (err < 0) {
3253 printk(KERN_WARNING
3254 "md: could not read %s's sb, not importing!\n",
3255 bdevname(rdev->bdev,b));
3256 goto abort_free;
3257 }
3258 }
3259
3260 return rdev;
3261
3262abort_free:
3263 if (rdev->bdev)
3264 unlock_rdev(rdev);
3265 md_rdev_clear(rdev);
3266 kfree(rdev);
3267 return ERR_PTR(err);
3268}
3269
3270
3271
3272
3273
3274static void analyze_sbs(struct mddev *mddev)
3275{
3276 int i;
3277 struct md_rdev *rdev, *freshest, *tmp;
3278 char b[BDEVNAME_SIZE];
3279
3280 freshest = NULL;
3281 rdev_for_each_safe(rdev, tmp, mddev)
3282 switch (super_types[mddev->major_version].
3283 load_super(rdev, freshest, mddev->minor_version)) {
3284 case 1:
3285 freshest = rdev;
3286 break;
3287 case 0:
3288 break;
3289 default:
3290 printk( KERN_ERR \
3291 "md: fatal superblock inconsistency in %s"
3292 " -- removing from array\n",
3293 bdevname(rdev->bdev,b));
3294 md_kick_rdev_from_array(rdev);
3295 }
3296
3297 super_types[mddev->major_version].
3298 validate_super(mddev, freshest);
3299
3300 i = 0;
3301 rdev_for_each_safe(rdev, tmp, mddev) {
3302 if (mddev->max_disks &&
3303 (rdev->desc_nr >= mddev->max_disks ||
3304 i > mddev->max_disks)) {
3305 printk(KERN_WARNING
3306 "md: %s: %s: only %d devices permitted\n",
3307 mdname(mddev), bdevname(rdev->bdev, b),
3308 mddev->max_disks);
3309 md_kick_rdev_from_array(rdev);
3310 continue;
3311 }
3312 if (rdev != freshest) {
3313 if (super_types[mddev->major_version].
3314 validate_super(mddev, rdev)) {
3315 printk(KERN_WARNING "md: kicking non-fresh %s"
3316 " from array!\n",
3317 bdevname(rdev->bdev,b));
3318 md_kick_rdev_from_array(rdev);
3319 continue;
3320 }
3321 }
3322 if (mddev->level == LEVEL_MULTIPATH) {
3323 rdev->desc_nr = i++;
3324 rdev->raid_disk = rdev->desc_nr;
3325 set_bit(In_sync, &rdev->flags);
3326 } else if (rdev->raid_disk >=
3327 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3328 !test_bit(Journal, &rdev->flags)) {
3329 rdev->raid_disk = -1;
3330 clear_bit(In_sync, &rdev->flags);
3331 }
3332 }
3333}
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3346{
3347 unsigned long result = 0;
3348 long decimals = -1;
3349 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3350 if (*cp == '.')
3351 decimals = 0;
3352 else if (decimals < scale) {
3353 unsigned int value;
3354 value = *cp - '0';
3355 result = result * 10 + value;
3356 if (decimals >= 0)
3357 decimals++;
3358 }
3359 cp++;
3360 }
3361 if (*cp == '\n')
3362 cp++;
3363 if (*cp)
3364 return -EINVAL;
3365 if (decimals < 0)
3366 decimals = 0;
3367 while (decimals < scale) {
3368 result *= 10;
3369 decimals ++;
3370 }
3371 *res = result;
3372 return 0;
3373}
3374
3375static ssize_t
3376safe_delay_show(struct mddev *mddev, char *page)
3377{
3378 int msec = (mddev->safemode_delay*1000)/HZ;
3379 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3380}
3381static ssize_t
3382safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3383{
3384 unsigned long msec;
3385
3386 if (mddev_is_clustered(mddev)) {
3387 pr_info("md: Safemode is disabled for clustered mode\n");
3388 return -EINVAL;
3389 }
3390
3391 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3392 return -EINVAL;
3393 if (msec == 0)
3394 mddev->safemode_delay = 0;
3395 else {
3396 unsigned long old_delay = mddev->safemode_delay;
3397 unsigned long new_delay = (msec*HZ)/1000;
3398
3399 if (new_delay == 0)
3400 new_delay = 1;
3401 mddev->safemode_delay = new_delay;
3402 if (new_delay < old_delay || old_delay == 0)
3403 mod_timer(&mddev->safemode_timer, jiffies+1);
3404 }
3405 return len;
3406}
3407static struct md_sysfs_entry md_safe_delay =
3408__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3409
3410static ssize_t
3411level_show(struct mddev *mddev, char *page)
3412{
3413 struct md_personality *p;
3414 int ret;
3415 spin_lock(&mddev->lock);
3416 p = mddev->pers;
3417 if (p)
3418 ret = sprintf(page, "%s\n", p->name);
3419 else if (mddev->clevel[0])
3420 ret = sprintf(page, "%s\n", mddev->clevel);
3421 else if (mddev->level != LEVEL_NONE)
3422 ret = sprintf(page, "%d\n", mddev->level);
3423 else
3424 ret = 0;
3425 spin_unlock(&mddev->lock);
3426 return ret;
3427}
3428
3429static ssize_t
3430level_store(struct mddev *mddev, const char *buf, size_t len)
3431{
3432 char clevel[16];
3433 ssize_t rv;
3434 size_t slen = len;
3435 struct md_personality *pers, *oldpers;
3436 long level;
3437 void *priv, *oldpriv;
3438 struct md_rdev *rdev;
3439
3440 if (slen == 0 || slen >= sizeof(clevel))
3441 return -EINVAL;
3442
3443 rv = mddev_lock(mddev);
3444 if (rv)
3445 return rv;
3446
3447 if (mddev->pers == NULL) {
3448 strncpy(mddev->clevel, buf, slen);
3449 if (mddev->clevel[slen-1] == '\n')
3450 slen--;
3451 mddev->clevel[slen] = 0;
3452 mddev->level = LEVEL_NONE;
3453 rv = len;
3454 goto out_unlock;
3455 }
3456 rv = -EROFS;
3457 if (mddev->ro)
3458 goto out_unlock;
3459
3460
3461
3462
3463
3464
3465
3466 rv = -EBUSY;
3467 if (mddev->sync_thread ||
3468 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3469 mddev->reshape_position != MaxSector ||
3470 mddev->sysfs_active)
3471 goto out_unlock;
3472
3473 rv = -EINVAL;
3474 if (!mddev->pers->quiesce) {
3475 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3476 mdname(mddev), mddev->pers->name);
3477 goto out_unlock;
3478 }
3479
3480
3481 strncpy(clevel, buf, slen);
3482 if (clevel[slen-1] == '\n')
3483 slen--;
3484 clevel[slen] = 0;
3485 if (kstrtol(clevel, 10, &level))
3486 level = LEVEL_NONE;
3487
3488 if (request_module("md-%s", clevel) != 0)
3489 request_module("md-level-%s", clevel);
3490 spin_lock(&pers_lock);
3491 pers = find_pers(level, clevel);
3492 if (!pers || !try_module_get(pers->owner)) {
3493 spin_unlock(&pers_lock);
3494 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3495 rv = -EINVAL;
3496 goto out_unlock;
3497 }
3498 spin_unlock(&pers_lock);
3499
3500 if (pers == mddev->pers) {
3501
3502 module_put(pers->owner);
3503 rv = len;
3504 goto out_unlock;
3505 }
3506 if (!pers->takeover) {
3507 module_put(pers->owner);
3508 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3509 mdname(mddev), clevel);
3510 rv = -EINVAL;
3511 goto out_unlock;
3512 }
3513
3514 rdev_for_each(rdev, mddev)
3515 rdev->new_raid_disk = rdev->raid_disk;
3516
3517
3518
3519
3520 priv = pers->takeover(mddev);
3521 if (IS_ERR(priv)) {
3522 mddev->new_level = mddev->level;
3523 mddev->new_layout = mddev->layout;
3524 mddev->new_chunk_sectors = mddev->chunk_sectors;
3525 mddev->raid_disks -= mddev->delta_disks;
3526 mddev->delta_disks = 0;
3527 mddev->reshape_backwards = 0;
3528 module_put(pers->owner);
3529 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3530 mdname(mddev), clevel);
3531 rv = PTR_ERR(priv);
3532 goto out_unlock;
3533 }
3534
3535
3536 mddev_suspend(mddev);
3537 mddev_detach(mddev);
3538
3539 spin_lock(&mddev->lock);
3540 oldpers = mddev->pers;
3541 oldpriv = mddev->private;
3542 mddev->pers = pers;
3543 mddev->private = priv;
3544 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3545 mddev->level = mddev->new_level;
3546 mddev->layout = mddev->new_layout;
3547 mddev->chunk_sectors = mddev->new_chunk_sectors;
3548 mddev->delta_disks = 0;
3549 mddev->reshape_backwards = 0;
3550 mddev->degraded = 0;
3551 spin_unlock(&mddev->lock);
3552
3553 if (oldpers->sync_request == NULL &&
3554 mddev->external) {
3555
3556
3557
3558
3559
3560
3561
3562 mddev->in_sync = 0;
3563 mddev->safemode_delay = 0;
3564 mddev->safemode = 0;
3565 }
3566
3567 oldpers->free(mddev, oldpriv);
3568
3569 if (oldpers->sync_request == NULL &&
3570 pers->sync_request != NULL) {
3571
3572 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3573 printk(KERN_WARNING
3574 "md: cannot register extra attributes for %s\n",
3575 mdname(mddev));
3576 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3577 }
3578 if (oldpers->sync_request != NULL &&
3579 pers->sync_request == NULL) {
3580
3581 if (mddev->to_remove == NULL)
3582 mddev->to_remove = &md_redundancy_group;
3583 }
3584
3585 module_put(oldpers->owner);
3586
3587 rdev_for_each(rdev, mddev) {
3588 if (rdev->raid_disk < 0)
3589 continue;
3590 if (rdev->new_raid_disk >= mddev->raid_disks)
3591 rdev->new_raid_disk = -1;
3592 if (rdev->new_raid_disk == rdev->raid_disk)
3593 continue;
3594 sysfs_unlink_rdev(mddev, rdev);
3595 }
3596 rdev_for_each(rdev, mddev) {
3597 if (rdev->raid_disk < 0)
3598 continue;
3599 if (rdev->new_raid_disk == rdev->raid_disk)
3600 continue;
3601 rdev->raid_disk = rdev->new_raid_disk;
3602 if (rdev->raid_disk < 0)
3603 clear_bit(In_sync, &rdev->flags);
3604 else {
3605 if (sysfs_link_rdev(mddev, rdev))
3606 printk(KERN_WARNING "md: cannot register rd%d"
3607 " for %s after level change\n",
3608 rdev->raid_disk, mdname(mddev));
3609 }
3610 }
3611
3612 if (pers->sync_request == NULL) {
3613
3614
3615
3616 mddev->in_sync = 1;
3617 del_timer_sync(&mddev->safemode_timer);
3618 }
3619 blk_set_stacking_limits(&mddev->queue->limits);
3620 pers->run(mddev);
3621 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3622 mddev_resume(mddev);
3623 if (!mddev->thread)
3624 md_update_sb(mddev, 1);
3625 sysfs_notify(&mddev->kobj, NULL, "level");
3626 md_new_event(mddev);
3627 rv = len;
3628out_unlock:
3629 mddev_unlock(mddev);
3630 return rv;
3631}
3632
3633static struct md_sysfs_entry md_level =
3634__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3635
3636static ssize_t
3637layout_show(struct mddev *mddev, char *page)
3638{
3639
3640 if (mddev->reshape_position != MaxSector &&
3641 mddev->layout != mddev->new_layout)
3642 return sprintf(page, "%d (%d)\n",
3643 mddev->new_layout, mddev->layout);
3644 return sprintf(page, "%d\n", mddev->layout);
3645}
3646
3647static ssize_t
3648layout_store(struct mddev *mddev, const char *buf, size_t len)
3649{
3650 unsigned int n;
3651 int err;
3652
3653 err = kstrtouint(buf, 10, &n);
3654 if (err < 0)
3655 return err;
3656 err = mddev_lock(mddev);
3657 if (err)
3658 return err;
3659
3660 if (mddev->pers) {
3661 if (mddev->pers->check_reshape == NULL)
3662 err = -EBUSY;
3663 else if (mddev->ro)
3664 err = -EROFS;
3665 else {
3666 mddev->new_layout = n;
3667 err = mddev->pers->check_reshape(mddev);
3668 if (err)
3669 mddev->new_layout = mddev->layout;
3670 }
3671 } else {
3672 mddev->new_layout = n;
3673 if (mddev->reshape_position == MaxSector)
3674 mddev->layout = n;
3675 }
3676 mddev_unlock(mddev);
3677 return err ?: len;
3678}
3679static struct md_sysfs_entry md_layout =
3680__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3681
3682static ssize_t
3683raid_disks_show(struct mddev *mddev, char *page)
3684{
3685 if (mddev->raid_disks == 0)
3686 return 0;
3687 if (mddev->reshape_position != MaxSector &&
3688 mddev->delta_disks != 0)
3689 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3690 mddev->raid_disks - mddev->delta_disks);
3691 return sprintf(page, "%d\n", mddev->raid_disks);
3692}
3693
3694static int update_raid_disks(struct mddev *mddev, int raid_disks);
3695
3696static ssize_t
3697raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3698{
3699 unsigned int n;
3700 int err;
3701
3702 err = kstrtouint(buf, 10, &n);
3703 if (err < 0)
3704 return err;
3705
3706 err = mddev_lock(mddev);
3707 if (err)
3708 return err;
3709 if (mddev->pers)
3710 err = update_raid_disks(mddev, n);
3711 else if (mddev->reshape_position != MaxSector) {
3712 struct md_rdev *rdev;
3713 int olddisks = mddev->raid_disks - mddev->delta_disks;
3714
3715 err = -EINVAL;
3716 rdev_for_each(rdev, mddev) {
3717 if (olddisks < n &&
3718 rdev->data_offset < rdev->new_data_offset)
3719 goto out_unlock;
3720 if (olddisks > n &&
3721 rdev->data_offset > rdev->new_data_offset)
3722 goto out_unlock;
3723 }
3724 err = 0;
3725 mddev->delta_disks = n - olddisks;
3726 mddev->raid_disks = n;
3727 mddev->reshape_backwards = (mddev->delta_disks < 0);
3728 } else
3729 mddev->raid_disks = n;
3730out_unlock:
3731 mddev_unlock(mddev);
3732 return err ? err : len;
3733}
3734static struct md_sysfs_entry md_raid_disks =
3735__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3736
3737static ssize_t
3738chunk_size_show(struct mddev *mddev, char *page)
3739{
3740 if (mddev->reshape_position != MaxSector &&
3741 mddev->chunk_sectors != mddev->new_chunk_sectors)
3742 return sprintf(page, "%d (%d)\n",
3743 mddev->new_chunk_sectors << 9,
3744 mddev->chunk_sectors << 9);
3745 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3746}
3747
3748static ssize_t
3749chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3750{
3751 unsigned long n;
3752 int err;
3753
3754 err = kstrtoul(buf, 10, &n);
3755 if (err < 0)
3756 return err;
3757
3758 err = mddev_lock(mddev);
3759 if (err)
3760 return err;
3761 if (mddev->pers) {
3762 if (mddev->pers->check_reshape == NULL)
3763 err = -EBUSY;
3764 else if (mddev->ro)
3765 err = -EROFS;
3766 else {
3767 mddev->new_chunk_sectors = n >> 9;
3768 err = mddev->pers->check_reshape(mddev);
3769 if (err)
3770 mddev->new_chunk_sectors = mddev->chunk_sectors;
3771 }
3772 } else {
3773 mddev->new_chunk_sectors = n >> 9;
3774 if (mddev->reshape_position == MaxSector)
3775 mddev->chunk_sectors = n >> 9;
3776 }
3777 mddev_unlock(mddev);
3778 return err ?: len;
3779}
3780static struct md_sysfs_entry md_chunk_size =
3781__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3782
3783static ssize_t
3784resync_start_show(struct mddev *mddev, char *page)
3785{
3786 if (mddev->recovery_cp == MaxSector)
3787 return sprintf(page, "none\n");
3788 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3789}
3790
3791static ssize_t
3792resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3793{
3794 unsigned long long n;
3795 int err;
3796
3797 if (cmd_match(buf, "none"))
3798 n = MaxSector;
3799 else {
3800 err = kstrtoull(buf, 10, &n);
3801 if (err < 0)
3802 return err;
3803 if (n != (sector_t)n)
3804 return -EINVAL;
3805 }
3806
3807 err = mddev_lock(mddev);
3808 if (err)
3809 return err;
3810 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3811 err = -EBUSY;
3812
3813 if (!err) {
3814 mddev->recovery_cp = n;
3815 if (mddev->pers)
3816 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3817 }
3818 mddev_unlock(mddev);
3819 return err ?: len;
3820}
3821static struct md_sysfs_entry md_resync_start =
3822__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3823 resync_start_show, resync_start_store);
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3862 write_pending, active_idle, bad_word};
3863static char *array_states[] = {
3864 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3865 "write-pending", "active-idle", NULL };
3866
3867static int match_word(const char *word, char **list)
3868{
3869 int n;
3870 for (n=0; list[n]; n++)
3871 if (cmd_match(word, list[n]))
3872 break;
3873 return n;
3874}
3875
3876static ssize_t
3877array_state_show(struct mddev *mddev, char *page)
3878{
3879 enum array_state st = inactive;
3880
3881 if (mddev->pers)
3882 switch(mddev->ro) {
3883 case 1:
3884 st = readonly;
3885 break;
3886 case 2:
3887 st = read_auto;
3888 break;
3889 case 0:
3890 if (mddev->in_sync)
3891 st = clean;
3892 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3893 st = write_pending;
3894 else if (mddev->safemode)
3895 st = active_idle;
3896 else
3897 st = active;
3898 }
3899 else {
3900 if (list_empty(&mddev->disks) &&
3901 mddev->raid_disks == 0 &&
3902 mddev->dev_sectors == 0)
3903 st = clear;
3904 else
3905 st = inactive;
3906 }
3907 return sprintf(page, "%s\n", array_states[st]);
3908}
3909
3910static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3911static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3912static int do_md_run(struct mddev *mddev);
3913static int restart_array(struct mddev *mddev);
3914
3915static ssize_t
3916array_state_store(struct mddev *mddev, const char *buf, size_t len)
3917{
3918 int err;
3919 enum array_state st = match_word(buf, array_states);
3920
3921 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3922
3923
3924
3925 spin_lock(&mddev->lock);
3926 if (st == active) {
3927 restart_array(mddev);
3928 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3929 wake_up(&mddev->sb_wait);
3930 err = 0;
3931 } else {
3932 restart_array(mddev);
3933 if (atomic_read(&mddev->writes_pending) == 0) {
3934 if (mddev->in_sync == 0) {
3935 mddev->in_sync = 1;
3936 if (mddev->safemode == 1)
3937 mddev->safemode = 0;
3938 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3939 }
3940 err = 0;
3941 } else
3942 err = -EBUSY;
3943 }
3944 if (!err)
3945 sysfs_notify_dirent_safe(mddev->sysfs_state);
3946 spin_unlock(&mddev->lock);
3947 return err ?: len;
3948 }
3949 err = mddev_lock(mddev);
3950 if (err)
3951 return err;
3952 err = -EINVAL;
3953 switch(st) {
3954 case bad_word:
3955 break;
3956 case clear:
3957
3958 err = do_md_stop(mddev, 0, NULL);
3959 break;
3960 case inactive:
3961
3962 if (mddev->pers)
3963 err = do_md_stop(mddev, 2, NULL);
3964 else
3965 err = 0;
3966 break;
3967 case suspended:
3968 break;
3969 case readonly:
3970 if (mddev->pers)
3971 err = md_set_readonly(mddev, NULL);
3972 else {
3973 mddev->ro = 1;
3974 set_disk_ro(mddev->gendisk, 1);
3975 err = do_md_run(mddev);
3976 }
3977 break;
3978 case read_auto:
3979 if (mddev->pers) {
3980 if (mddev->ro == 0)
3981 err = md_set_readonly(mddev, NULL);
3982 else if (mddev->ro == 1)
3983 err = restart_array(mddev);
3984 if (err == 0) {
3985 mddev->ro = 2;
3986 set_disk_ro(mddev->gendisk, 0);
3987 }
3988 } else {
3989 mddev->ro = 2;
3990 err = do_md_run(mddev);
3991 }
3992 break;
3993 case clean:
3994 if (mddev->pers) {
3995 err = restart_array(mddev);
3996 if (err)
3997 break;
3998 spin_lock(&mddev->lock);
3999 if (atomic_read(&mddev->writes_pending) == 0) {
4000 if (mddev->in_sync == 0) {
4001 mddev->in_sync = 1;
4002 if (mddev->safemode == 1)
4003 mddev->safemode = 0;
4004 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
4005 }
4006 err = 0;
4007 } else
4008 err = -EBUSY;
4009 spin_unlock(&mddev->lock);
4010 } else
4011 err = -EINVAL;
4012 break;
4013 case active:
4014 if (mddev->pers) {
4015 err = restart_array(mddev);
4016 if (err)
4017 break;
4018 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
4019 wake_up(&mddev->sb_wait);
4020 err = 0;
4021 } else {
4022 mddev->ro = 0;
4023 set_disk_ro(mddev->gendisk, 0);
4024 err = do_md_run(mddev);
4025 }
4026 break;
4027 case write_pending:
4028 case active_idle:
4029
4030 break;
4031 }
4032
4033 if (!err) {
4034 if (mddev->hold_active == UNTIL_IOCTL)
4035 mddev->hold_active = 0;
4036 sysfs_notify_dirent_safe(mddev->sysfs_state);
4037 }
4038 mddev_unlock(mddev);
4039 return err ?: len;
4040}
4041static struct md_sysfs_entry md_array_state =
4042__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4043
4044static ssize_t
4045max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4046 return sprintf(page, "%d\n",
4047 atomic_read(&mddev->max_corr_read_errors));
4048}
4049
4050static ssize_t
4051max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4052{
4053 unsigned int n;
4054 int rv;
4055
4056 rv = kstrtouint(buf, 10, &n);
4057 if (rv < 0)
4058 return rv;
4059 atomic_set(&mddev->max_corr_read_errors, n);
4060 return len;
4061}
4062
4063static struct md_sysfs_entry max_corr_read_errors =
4064__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4065 max_corrected_read_errors_store);
4066
4067static ssize_t
4068null_show(struct mddev *mddev, char *page)
4069{
4070 return -EINVAL;
4071}
4072
4073static ssize_t
4074new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4075{
4076
4077
4078
4079
4080
4081
4082
4083 char *e;
4084 int major = simple_strtoul(buf, &e, 10);
4085 int minor;
4086 dev_t dev;
4087 struct md_rdev *rdev;
4088 int err;
4089
4090 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4091 return -EINVAL;
4092 minor = simple_strtoul(e+1, &e, 10);
4093 if (*e && *e != '\n')
4094 return -EINVAL;
4095 dev = MKDEV(major, minor);
4096 if (major != MAJOR(dev) ||
4097 minor != MINOR(dev))
4098 return -EOVERFLOW;
4099
4100 flush_workqueue(md_misc_wq);
4101
4102 err = mddev_lock(mddev);
4103 if (err)
4104 return err;
4105 if (mddev->persistent) {
4106 rdev = md_import_device(dev, mddev->major_version,
4107 mddev->minor_version);
4108 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4109 struct md_rdev *rdev0
4110 = list_entry(mddev->disks.next,
4111 struct md_rdev, same_set);
4112 err = super_types[mddev->major_version]
4113 .load_super(rdev, rdev0, mddev->minor_version);
4114 if (err < 0)
4115 goto out;
4116 }
4117 } else if (mddev->external)
4118 rdev = md_import_device(dev, -2, -1);
4119 else
4120 rdev = md_import_device(dev, -1, -1);
4121
4122 if (IS_ERR(rdev)) {
4123 mddev_unlock(mddev);
4124 return PTR_ERR(rdev);
4125 }
4126 err = bind_rdev_to_array(rdev, mddev);
4127 out:
4128 if (err)
4129 export_rdev(rdev);
4130 mddev_unlock(mddev);
4131 return err ? err : len;
4132}
4133
4134static struct md_sysfs_entry md_new_device =
4135__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4136
4137static ssize_t
4138bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4139{
4140 char *end;
4141 unsigned long chunk, end_chunk;
4142 int err;
4143
4144 err = mddev_lock(mddev);
4145 if (err)
4146 return err;
4147 if (!mddev->bitmap)
4148 goto out;
4149
4150 while (*buf) {
4151 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4152 if (buf == end) break;
4153 if (*end == '-') {
4154 buf = end + 1;
4155 end_chunk = simple_strtoul(buf, &end, 0);
4156 if (buf == end) break;
4157 }
4158 if (*end && !isspace(*end)) break;
4159 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4160 buf = skip_spaces(end);
4161 }
4162 bitmap_unplug(mddev->bitmap);
4163out:
4164 mddev_unlock(mddev);
4165 return len;
4166}
4167
4168static struct md_sysfs_entry md_bitmap =
4169__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4170
4171static ssize_t
4172size_show(struct mddev *mddev, char *page)
4173{
4174 return sprintf(page, "%llu\n",
4175 (unsigned long long)mddev->dev_sectors / 2);
4176}
4177
4178static int update_size(struct mddev *mddev, sector_t num_sectors);
4179
4180static ssize_t
4181size_store(struct mddev *mddev, const char *buf, size_t len)
4182{
4183
4184
4185
4186
4187 sector_t sectors;
4188 int err = strict_blocks_to_sectors(buf, §ors);
4189
4190 if (err < 0)
4191 return err;
4192 err = mddev_lock(mddev);
4193 if (err)
4194 return err;
4195 if (mddev->pers) {
4196 err = update_size(mddev, sectors);
4197 if (err == 0)
4198 md_update_sb(mddev, 1);
4199 } else {
4200 if (mddev->dev_sectors == 0 ||
4201 mddev->dev_sectors > sectors)
4202 mddev->dev_sectors = sectors;
4203 else
4204 err = -ENOSPC;
4205 }
4206 mddev_unlock(mddev);
4207 return err ? err : len;
4208}
4209
4210static struct md_sysfs_entry md_size =
4211__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4212
4213
4214
4215
4216
4217
4218
4219static ssize_t
4220metadata_show(struct mddev *mddev, char *page)
4221{
4222 if (mddev->persistent)
4223 return sprintf(page, "%d.%d\n",
4224 mddev->major_version, mddev->minor_version);
4225 else if (mddev->external)
4226 return sprintf(page, "external:%s\n", mddev->metadata_type);
4227 else
4228 return sprintf(page, "none\n");
4229}
4230
4231static ssize_t
4232metadata_store(struct mddev *mddev, const char *buf, size_t len)
4233{
4234 int major, minor;
4235 char *e;
4236 int err;
4237
4238
4239
4240
4241
4242 err = mddev_lock(mddev);
4243 if (err)
4244 return err;
4245 err = -EBUSY;
4246 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4247 ;
4248 else if (!list_empty(&mddev->disks))
4249 goto out_unlock;
4250
4251 err = 0;
4252 if (cmd_match(buf, "none")) {
4253 mddev->persistent = 0;
4254 mddev->external = 0;
4255 mddev->major_version = 0;
4256 mddev->minor_version = 90;
4257 goto out_unlock;
4258 }
4259 if (strncmp(buf, "external:", 9) == 0) {
4260 size_t namelen = len-9;
4261 if (namelen >= sizeof(mddev->metadata_type))
4262 namelen = sizeof(mddev->metadata_type)-1;
4263 strncpy(mddev->metadata_type, buf+9, namelen);
4264 mddev->metadata_type[namelen] = 0;
4265 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4266 mddev->metadata_type[--namelen] = 0;
4267 mddev->persistent = 0;
4268 mddev->external = 1;
4269 mddev->major_version = 0;
4270 mddev->minor_version = 90;
4271 goto out_unlock;
4272 }
4273 major = simple_strtoul(buf, &e, 10);
4274 err = -EINVAL;
4275 if (e==buf || *e != '.')
4276 goto out_unlock;
4277 buf = e+1;
4278 minor = simple_strtoul(buf, &e, 10);
4279 if (e==buf || (*e && *e != '\n') )
4280 goto out_unlock;
4281 err = -ENOENT;
4282 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4283 goto out_unlock;
4284 mddev->major_version = major;
4285 mddev->minor_version = minor;
4286 mddev->persistent = 1;
4287 mddev->external = 0;
4288 err = 0;
4289out_unlock:
4290 mddev_unlock(mddev);
4291 return err ?: len;
4292}
4293
4294static struct md_sysfs_entry md_metadata =
4295__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4296
4297static ssize_t
4298action_show(struct mddev *mddev, char *page)
4299{
4300 char *type = "idle";
4301 unsigned long recovery = mddev->recovery;
4302 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4303 type = "frozen";
4304 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4305 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4306 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4307 type = "reshape";
4308 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4309 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4310 type = "resync";
4311 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4312 type = "check";
4313 else
4314 type = "repair";
4315 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4316 type = "recover";
4317 else if (mddev->reshape_position != MaxSector)
4318 type = "reshape";
4319 }
4320 return sprintf(page, "%s\n", type);
4321}
4322
4323static ssize_t
4324action_store(struct mddev *mddev, const char *page, size_t len)
4325{
4326 if (!mddev->pers || !mddev->pers->sync_request)
4327 return -EINVAL;
4328
4329
4330 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4331 if (cmd_match(page, "frozen"))
4332 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4333 else
4334 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4335 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4336 mddev_lock(mddev) == 0) {
4337 flush_workqueue(md_misc_wq);
4338 if (mddev->sync_thread) {
4339 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4340 md_reap_sync_thread(mddev);
4341 }
4342 mddev_unlock(mddev);
4343 }
4344 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4345 return -EBUSY;
4346 else if (cmd_match(page, "resync"))
4347 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4348 else if (cmd_match(page, "recover")) {
4349 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4350 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4351 } else if (cmd_match(page, "reshape")) {
4352 int err;
4353 if (mddev->pers->start_reshape == NULL)
4354 return -EINVAL;
4355 err = mddev_lock(mddev);
4356 if (!err) {
4357 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4358 err = -EBUSY;
4359 else {
4360 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4361 err = mddev->pers->start_reshape(mddev);
4362 }
4363 mddev_unlock(mddev);
4364 }
4365 if (err)
4366 return err;
4367 sysfs_notify(&mddev->kobj, NULL, "degraded");
4368 } else {
4369 if (cmd_match(page, "check"))
4370 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4371 else if (!cmd_match(page, "repair"))
4372 return -EINVAL;
4373 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4374 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4375 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4376 }
4377 if (mddev->ro == 2) {
4378
4379
4380
4381 mddev->ro = 0;
4382 md_wakeup_thread(mddev->sync_thread);
4383 }
4384 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4385 md_wakeup_thread(mddev->thread);
4386 sysfs_notify_dirent_safe(mddev->sysfs_action);
4387 return len;
4388}
4389
4390static struct md_sysfs_entry md_scan_mode =
4391__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4392
4393static ssize_t
4394last_sync_action_show(struct mddev *mddev, char *page)
4395{
4396 return sprintf(page, "%s\n", mddev->last_sync_action);
4397}
4398
4399static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4400
4401static ssize_t
4402mismatch_cnt_show(struct mddev *mddev, char *page)
4403{
4404 return sprintf(page, "%llu\n",
4405 (unsigned long long)
4406 atomic64_read(&mddev->resync_mismatches));
4407}
4408
4409static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4410
4411static ssize_t
4412sync_min_show(struct mddev *mddev, char *page)
4413{
4414 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4415 mddev->sync_speed_min ? "local": "system");
4416}
4417
4418static ssize_t
4419sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4420{
4421 unsigned int min;
4422 int rv;
4423
4424 if (strncmp(buf, "system", 6)==0) {
4425 min = 0;
4426 } else {
4427 rv = kstrtouint(buf, 10, &min);
4428 if (rv < 0)
4429 return rv;
4430 if (min == 0)
4431 return -EINVAL;
4432 }
4433 mddev->sync_speed_min = min;
4434 return len;
4435}
4436
4437static struct md_sysfs_entry md_sync_min =
4438__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4439
4440static ssize_t
4441sync_max_show(struct mddev *mddev, char *page)
4442{
4443 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4444 mddev->sync_speed_max ? "local": "system");
4445}
4446
4447static ssize_t
4448sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4449{
4450 unsigned int max;
4451 int rv;
4452
4453 if (strncmp(buf, "system", 6)==0) {
4454 max = 0;
4455 } else {
4456 rv = kstrtouint(buf, 10, &max);
4457 if (rv < 0)
4458 return rv;
4459 if (max == 0)
4460 return -EINVAL;
4461 }
4462 mddev->sync_speed_max = max;
4463 return len;
4464}
4465
4466static struct md_sysfs_entry md_sync_max =
4467__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4468
4469static ssize_t
4470degraded_show(struct mddev *mddev, char *page)
4471{
4472 return sprintf(page, "%d\n", mddev->degraded);
4473}
4474static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4475
4476static ssize_t
4477sync_force_parallel_show(struct mddev *mddev, char *page)
4478{
4479 return sprintf(page, "%d\n", mddev->parallel_resync);
4480}
4481
4482static ssize_t
4483sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4484{
4485 long n;
4486
4487 if (kstrtol(buf, 10, &n))
4488 return -EINVAL;
4489
4490 if (n != 0 && n != 1)
4491 return -EINVAL;
4492
4493 mddev->parallel_resync = n;
4494
4495 if (mddev->sync_thread)
4496 wake_up(&resync_wait);
4497
4498 return len;
4499}
4500
4501
4502static struct md_sysfs_entry md_sync_force_parallel =
4503__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4504 sync_force_parallel_show, sync_force_parallel_store);
4505
4506static ssize_t
4507sync_speed_show(struct mddev *mddev, char *page)
4508{
4509 unsigned long resync, dt, db;
4510 if (mddev->curr_resync == 0)
4511 return sprintf(page, "none\n");
4512 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4513 dt = (jiffies - mddev->resync_mark) / HZ;
4514 if (!dt) dt++;
4515 db = resync - mddev->resync_mark_cnt;
4516 return sprintf(page, "%lu\n", db/dt/2);
4517}
4518
4519static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4520
4521static ssize_t
4522sync_completed_show(struct mddev *mddev, char *page)
4523{
4524 unsigned long long max_sectors, resync;
4525
4526 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4527 return sprintf(page, "none\n");
4528
4529 if (mddev->curr_resync == 1 ||
4530 mddev->curr_resync == 2)
4531 return sprintf(page, "delayed\n");
4532
4533 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4534 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4535 max_sectors = mddev->resync_max_sectors;
4536 else
4537 max_sectors = mddev->dev_sectors;
4538
4539 resync = mddev->curr_resync_completed;
4540 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4541}
4542
4543static struct md_sysfs_entry md_sync_completed =
4544 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4545
4546static ssize_t
4547min_sync_show(struct mddev *mddev, char *page)
4548{
4549 return sprintf(page, "%llu\n",
4550 (unsigned long long)mddev->resync_min);
4551}
4552static ssize_t
4553min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4554{
4555 unsigned long long min;
4556 int err;
4557
4558 if (kstrtoull(buf, 10, &min))
4559 return -EINVAL;
4560
4561 spin_lock(&mddev->lock);
4562 err = -EINVAL;
4563 if (min > mddev->resync_max)
4564 goto out_unlock;
4565
4566 err = -EBUSY;
4567 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4568 goto out_unlock;
4569
4570
4571 mddev->resync_min = round_down(min, 8);
4572 err = 0;
4573
4574out_unlock:
4575 spin_unlock(&mddev->lock);
4576 return err ?: len;
4577}
4578
4579static struct md_sysfs_entry md_min_sync =
4580__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4581
4582static ssize_t
4583max_sync_show(struct mddev *mddev, char *page)
4584{
4585 if (mddev->resync_max == MaxSector)
4586 return sprintf(page, "max\n");
4587 else
4588 return sprintf(page, "%llu\n",
4589 (unsigned long long)mddev->resync_max);
4590}
4591static ssize_t
4592max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4593{
4594 int err;
4595 spin_lock(&mddev->lock);
4596 if (strncmp(buf, "max", 3) == 0)
4597 mddev->resync_max = MaxSector;
4598 else {
4599 unsigned long long max;
4600 int chunk;
4601
4602 err = -EINVAL;
4603 if (kstrtoull(buf, 10, &max))
4604 goto out_unlock;
4605 if (max < mddev->resync_min)
4606 goto out_unlock;
4607
4608 err = -EBUSY;
4609 if (max < mddev->resync_max &&
4610 mddev->ro == 0 &&
4611 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4612 goto out_unlock;
4613
4614
4615 chunk = mddev->chunk_sectors;
4616 if (chunk) {
4617 sector_t temp = max;
4618
4619 err = -EINVAL;
4620 if (sector_div(temp, chunk))
4621 goto out_unlock;
4622 }
4623 mddev->resync_max = max;
4624 }
4625 wake_up(&mddev->recovery_wait);
4626 err = 0;
4627out_unlock:
4628 spin_unlock(&mddev->lock);
4629 return err ?: len;
4630}
4631
4632static struct md_sysfs_entry md_max_sync =
4633__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4634
4635static ssize_t
4636suspend_lo_show(struct mddev *mddev, char *page)
4637{
4638 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4639}
4640
4641static ssize_t
4642suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4643{
4644 unsigned long long old, new;
4645 int err;
4646
4647 err = kstrtoull(buf, 10, &new);
4648 if (err < 0)
4649 return err;
4650 if (new != (sector_t)new)
4651 return -EINVAL;
4652
4653 err = mddev_lock(mddev);
4654 if (err)
4655 return err;
4656 err = -EINVAL;
4657 if (mddev->pers == NULL ||
4658 mddev->pers->quiesce == NULL)
4659 goto unlock;
4660 old = mddev->suspend_lo;
4661 mddev->suspend_lo = new;
4662 if (new >= old)
4663
4664 mddev->pers->quiesce(mddev, 2);
4665 else {
4666
4667 mddev->pers->quiesce(mddev, 1);
4668 mddev->pers->quiesce(mddev, 0);
4669 }
4670 err = 0;
4671unlock:
4672 mddev_unlock(mddev);
4673 return err ?: len;
4674}
4675static struct md_sysfs_entry md_suspend_lo =
4676__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4677
4678static ssize_t
4679suspend_hi_show(struct mddev *mddev, char *page)
4680{
4681 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4682}
4683
4684static ssize_t
4685suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4686{
4687 unsigned long long old, new;
4688 int err;
4689
4690 err = kstrtoull(buf, 10, &new);
4691 if (err < 0)
4692 return err;
4693 if (new != (sector_t)new)
4694 return -EINVAL;
4695
4696 err = mddev_lock(mddev);
4697 if (err)
4698 return err;
4699 err = -EINVAL;
4700 if (mddev->pers == NULL ||
4701 mddev->pers->quiesce == NULL)
4702 goto unlock;
4703 old = mddev->suspend_hi;
4704 mddev->suspend_hi = new;
4705 if (new <= old)
4706
4707 mddev->pers->quiesce(mddev, 2);
4708 else {
4709
4710 mddev->pers->quiesce(mddev, 1);
4711 mddev->pers->quiesce(mddev, 0);
4712 }
4713 err = 0;
4714unlock:
4715 mddev_unlock(mddev);
4716 return err ?: len;
4717}
4718static struct md_sysfs_entry md_suspend_hi =
4719__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4720
4721static ssize_t
4722reshape_position_show(struct mddev *mddev, char *page)
4723{
4724 if (mddev->reshape_position != MaxSector)
4725 return sprintf(page, "%llu\n",
4726 (unsigned long long)mddev->reshape_position);
4727 strcpy(page, "none\n");
4728 return 5;
4729}
4730
4731static ssize_t
4732reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4733{
4734 struct md_rdev *rdev;
4735 unsigned long long new;
4736 int err;
4737
4738 err = kstrtoull(buf, 10, &new);
4739 if (err < 0)
4740 return err;
4741 if (new != (sector_t)new)
4742 return -EINVAL;
4743 err = mddev_lock(mddev);
4744 if (err)
4745 return err;
4746 err = -EBUSY;
4747 if (mddev->pers)
4748 goto unlock;
4749 mddev->reshape_position = new;
4750 mddev->delta_disks = 0;
4751 mddev->reshape_backwards = 0;
4752 mddev->new_level = mddev->level;
4753 mddev->new_layout = mddev->layout;
4754 mddev->new_chunk_sectors = mddev->chunk_sectors;
4755 rdev_for_each(rdev, mddev)
4756 rdev->new_data_offset = rdev->data_offset;
4757 err = 0;
4758unlock:
4759 mddev_unlock(mddev);
4760 return err ?: len;
4761}
4762
4763static struct md_sysfs_entry md_reshape_position =
4764__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4765 reshape_position_store);
4766
4767static ssize_t
4768reshape_direction_show(struct mddev *mddev, char *page)
4769{
4770 return sprintf(page, "%s\n",
4771 mddev->reshape_backwards ? "backwards" : "forwards");
4772}
4773
4774static ssize_t
4775reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4776{
4777 int backwards = 0;
4778 int err;
4779
4780 if (cmd_match(buf, "forwards"))
4781 backwards = 0;
4782 else if (cmd_match(buf, "backwards"))
4783 backwards = 1;
4784 else
4785 return -EINVAL;
4786 if (mddev->reshape_backwards == backwards)
4787 return len;
4788
4789 err = mddev_lock(mddev);
4790 if (err)
4791 return err;
4792
4793 if (mddev->delta_disks)
4794 err = -EBUSY;
4795 else if (mddev->persistent &&
4796 mddev->major_version == 0)
4797 err = -EINVAL;
4798 else
4799 mddev->reshape_backwards = backwards;
4800 mddev_unlock(mddev);
4801 return err ?: len;
4802}
4803
4804static struct md_sysfs_entry md_reshape_direction =
4805__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4806 reshape_direction_store);
4807
4808static ssize_t
4809array_size_show(struct mddev *mddev, char *page)
4810{
4811 if (mddev->external_size)
4812 return sprintf(page, "%llu\n",
4813 (unsigned long long)mddev->array_sectors/2);
4814 else
4815 return sprintf(page, "default\n");
4816}
4817
4818static ssize_t
4819array_size_store(struct mddev *mddev, const char *buf, size_t len)
4820{
4821 sector_t sectors;
4822 int err;
4823
4824 err = mddev_lock(mddev);
4825 if (err)
4826 return err;
4827
4828
4829 if (mddev_is_clustered(mddev))
4830 return -EINVAL;
4831
4832 if (strncmp(buf, "default", 7) == 0) {
4833 if (mddev->pers)
4834 sectors = mddev->pers->size(mddev, 0, 0);
4835 else
4836 sectors = mddev->array_sectors;
4837
4838 mddev->external_size = 0;
4839 } else {
4840 if (strict_blocks_to_sectors(buf, §ors) < 0)
4841 err = -EINVAL;
4842 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4843 err = -E2BIG;
4844 else
4845 mddev->external_size = 1;
4846 }
4847
4848 if (!err) {
4849 mddev->array_sectors = sectors;
4850 if (mddev->pers) {
4851 set_capacity(mddev->gendisk, mddev->array_sectors);
4852 revalidate_disk(mddev->gendisk);
4853 }
4854 }
4855 mddev_unlock(mddev);
4856 return err ?: len;
4857}
4858
4859static struct md_sysfs_entry md_array_size =
4860__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4861 array_size_store);
4862
4863static struct attribute *md_default_attrs[] = {
4864 &md_level.attr,
4865 &md_layout.attr,
4866 &md_raid_disks.attr,
4867 &md_chunk_size.attr,
4868 &md_size.attr,
4869 &md_resync_start.attr,
4870 &md_metadata.attr,
4871 &md_new_device.attr,
4872 &md_safe_delay.attr,
4873 &md_array_state.attr,
4874 &md_reshape_position.attr,
4875 &md_reshape_direction.attr,
4876 &md_array_size.attr,
4877 &max_corr_read_errors.attr,
4878 NULL,
4879};
4880
4881static struct attribute *md_redundancy_attrs[] = {
4882 &md_scan_mode.attr,
4883 &md_last_scan_mode.attr,
4884 &md_mismatches.attr,
4885 &md_sync_min.attr,
4886 &md_sync_max.attr,
4887 &md_sync_speed.attr,
4888 &md_sync_force_parallel.attr,
4889 &md_sync_completed.attr,
4890 &md_min_sync.attr,
4891 &md_max_sync.attr,
4892 &md_suspend_lo.attr,
4893 &md_suspend_hi.attr,
4894 &md_bitmap.attr,
4895 &md_degraded.attr,
4896 NULL,
4897};
4898static struct attribute_group md_redundancy_group = {
4899 .name = NULL,
4900 .attrs = md_redundancy_attrs,
4901};
4902
4903static ssize_t
4904md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4905{
4906 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4907 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4908 ssize_t rv;
4909
4910 if (!entry->show)
4911 return -EIO;
4912 spin_lock(&all_mddevs_lock);
4913 if (list_empty(&mddev->all_mddevs)) {
4914 spin_unlock(&all_mddevs_lock);
4915 return -EBUSY;
4916 }
4917 mddev_get(mddev);
4918 spin_unlock(&all_mddevs_lock);
4919
4920 rv = entry->show(mddev, page);
4921 mddev_put(mddev);
4922 return rv;
4923}
4924
4925static ssize_t
4926md_attr_store(struct kobject *kobj, struct attribute *attr,
4927 const char *page, size_t length)
4928{
4929 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4930 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4931 ssize_t rv;
4932
4933 if (!entry->store)
4934 return -EIO;
4935 if (!capable(CAP_SYS_ADMIN))
4936 return -EACCES;
4937 spin_lock(&all_mddevs_lock);
4938 if (list_empty(&mddev->all_mddevs)) {
4939 spin_unlock(&all_mddevs_lock);
4940 return -EBUSY;
4941 }
4942 mddev_get(mddev);
4943 spin_unlock(&all_mddevs_lock);
4944 rv = entry->store(mddev, page, length);
4945 mddev_put(mddev);
4946 return rv;
4947}
4948
4949static void md_free(struct kobject *ko)
4950{
4951 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4952
4953 if (mddev->sysfs_state)
4954 sysfs_put(mddev->sysfs_state);
4955
4956 if (mddev->queue)
4957 blk_cleanup_queue(mddev->queue);
4958 if (mddev->gendisk) {
4959 del_gendisk(mddev->gendisk);
4960 put_disk(mddev->gendisk);
4961 }
4962
4963 kfree(mddev);
4964}
4965
4966static const struct sysfs_ops md_sysfs_ops = {
4967 .show = md_attr_show,
4968 .store = md_attr_store,
4969};
4970static struct kobj_type md_ktype = {
4971 .release = md_free,
4972 .sysfs_ops = &md_sysfs_ops,
4973 .default_attrs = md_default_attrs,
4974};
4975
4976int mdp_major = 0;
4977
4978static void mddev_delayed_delete(struct work_struct *ws)
4979{
4980 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4981
4982 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4983 kobject_del(&mddev->kobj);
4984 kobject_put(&mddev->kobj);
4985}
4986
4987static int md_alloc(dev_t dev, char *name)
4988{
4989 static DEFINE_MUTEX(disks_mutex);
4990 struct mddev *mddev = mddev_find(dev);
4991 struct gendisk *disk;
4992 int partitioned;
4993 int shift;
4994 int unit;
4995 int error;
4996
4997 if (!mddev)
4998 return -ENODEV;
4999
5000 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5001 shift = partitioned ? MdpMinorShift : 0;
5002 unit = MINOR(mddev->unit) >> shift;
5003
5004
5005
5006
5007 flush_workqueue(md_misc_wq);
5008
5009 mutex_lock(&disks_mutex);
5010 error = -EEXIST;
5011 if (mddev->gendisk)
5012 goto abort;
5013
5014 if (name) {
5015
5016
5017 struct mddev *mddev2;
5018 spin_lock(&all_mddevs_lock);
5019
5020 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5021 if (mddev2->gendisk &&
5022 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5023 spin_unlock(&all_mddevs_lock);
5024 goto abort;
5025 }
5026 spin_unlock(&all_mddevs_lock);
5027 }
5028
5029 error = -ENOMEM;
5030 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5031 if (!mddev->queue)
5032 goto abort;
5033 mddev->queue->queuedata = mddev;
5034
5035 blk_queue_make_request(mddev->queue, md_make_request);
5036 blk_set_stacking_limits(&mddev->queue->limits);
5037
5038 disk = alloc_disk(1 << shift);
5039 if (!disk) {
5040 blk_cleanup_queue(mddev->queue);
5041 mddev->queue = NULL;
5042 goto abort;
5043 }
5044 disk->major = MAJOR(mddev->unit);
5045 disk->first_minor = unit << shift;
5046 if (name)
5047 strcpy(disk->disk_name, name);
5048 else if (partitioned)
5049 sprintf(disk->disk_name, "md_d%d", unit);
5050 else
5051 sprintf(disk->disk_name, "md%d", unit);
5052 disk->fops = &md_fops;
5053 disk->private_data = mddev;
5054 disk->queue = mddev->queue;
5055 blk_queue_write_cache(mddev->queue, true, true);
5056
5057
5058
5059
5060 disk->flags |= GENHD_FL_EXT_DEVT;
5061 mddev->gendisk = disk;
5062
5063
5064
5065 mutex_lock(&mddev->open_mutex);
5066 add_disk(disk);
5067
5068 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5069 &disk_to_dev(disk)->kobj, "%s", "md");
5070 if (error) {
5071
5072
5073
5074 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
5075 disk->disk_name);
5076 error = 0;
5077 }
5078 if (mddev->kobj.sd &&
5079 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5080 printk(KERN_DEBUG "pointless warning\n");
5081 mutex_unlock(&mddev->open_mutex);
5082 abort:
5083 mutex_unlock(&disks_mutex);
5084 if (!error && mddev->kobj.sd) {
5085 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5086 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5087 }
5088 mddev_put(mddev);
5089 return error;
5090}
5091
5092static struct kobject *md_probe(dev_t dev, int *part, void *data)
5093{
5094 md_alloc(dev, NULL);
5095 return NULL;
5096}
5097
5098static int add_named_array(const char *val, struct kernel_param *kp)
5099{
5100
5101
5102
5103
5104 int len = strlen(val);
5105 char buf[DISK_NAME_LEN];
5106
5107 while (len && val[len-1] == '\n')
5108 len--;
5109 if (len >= DISK_NAME_LEN)
5110 return -E2BIG;
5111 strlcpy(buf, val, len+1);
5112 if (strncmp(buf, "md_", 3) != 0)
5113 return -EINVAL;
5114 return md_alloc(0, buf);
5115}
5116
5117static void md_safemode_timeout(unsigned long data)
5118{
5119 struct mddev *mddev = (struct mddev *) data;
5120
5121 if (!atomic_read(&mddev->writes_pending)) {
5122 mddev->safemode = 1;
5123 if (mddev->external)
5124 sysfs_notify_dirent_safe(mddev->sysfs_state);
5125 }
5126 md_wakeup_thread(mddev->thread);
5127}
5128
5129static int start_dirty_degraded;
5130
5131int md_run(struct mddev *mddev)
5132{
5133 int err;
5134 struct md_rdev *rdev;
5135 struct md_personality *pers;
5136
5137 if (list_empty(&mddev->disks))
5138
5139 return -EINVAL;
5140
5141 if (mddev->pers)
5142 return -EBUSY;
5143
5144 if (mddev->sysfs_active)
5145 return -EBUSY;
5146
5147
5148
5149
5150 if (!mddev->raid_disks) {
5151 if (!mddev->persistent)
5152 return -EINVAL;
5153 analyze_sbs(mddev);
5154 }
5155
5156 if (mddev->level != LEVEL_NONE)
5157 request_module("md-level-%d", mddev->level);
5158 else if (mddev->clevel[0])
5159 request_module("md-%s", mddev->clevel);
5160
5161
5162
5163
5164
5165
5166 rdev_for_each(rdev, mddev) {
5167 if (test_bit(Faulty, &rdev->flags))
5168 continue;
5169 sync_blockdev(rdev->bdev);
5170 invalidate_bdev(rdev->bdev);
5171
5172
5173
5174
5175
5176 if (rdev->meta_bdev) {
5177 ;
5178 } else if (rdev->data_offset < rdev->sb_start) {
5179 if (mddev->dev_sectors &&
5180 rdev->data_offset + mddev->dev_sectors
5181 > rdev->sb_start) {
5182 printk("md: %s: data overlaps metadata\n",
5183 mdname(mddev));
5184 return -EINVAL;
5185 }
5186 } else {
5187 if (rdev->sb_start + rdev->sb_size/512
5188 > rdev->data_offset) {
5189 printk("md: %s: metadata overlaps data\n",
5190 mdname(mddev));
5191 return -EINVAL;
5192 }
5193 }
5194 sysfs_notify_dirent_safe(rdev->sysfs_state);
5195 }
5196
5197 if (mddev->bio_set == NULL)
5198 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5199
5200 spin_lock(&pers_lock);
5201 pers = find_pers(mddev->level, mddev->clevel);
5202 if (!pers || !try_module_get(pers->owner)) {
5203 spin_unlock(&pers_lock);
5204 if (mddev->level != LEVEL_NONE)
5205 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5206 mddev->level);
5207 else
5208 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5209 mddev->clevel);
5210 return -EINVAL;
5211 }
5212 spin_unlock(&pers_lock);
5213 if (mddev->level != pers->level) {
5214 mddev->level = pers->level;
5215 mddev->new_level = pers->level;
5216 }
5217 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5218
5219 if (mddev->reshape_position != MaxSector &&
5220 pers->start_reshape == NULL) {
5221
5222 module_put(pers->owner);
5223 return -EINVAL;
5224 }
5225
5226 if (pers->sync_request) {
5227
5228
5229
5230 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5231 struct md_rdev *rdev2;
5232 int warned = 0;
5233
5234 rdev_for_each(rdev, mddev)
5235 rdev_for_each(rdev2, mddev) {
5236 if (rdev < rdev2 &&
5237 rdev->bdev->bd_contains ==
5238 rdev2->bdev->bd_contains) {
5239 printk(KERN_WARNING
5240 "%s: WARNING: %s appears to be"
5241 " on the same physical disk as"
5242 " %s.\n",
5243 mdname(mddev),
5244 bdevname(rdev->bdev,b),
5245 bdevname(rdev2->bdev,b2));
5246 warned = 1;
5247 }
5248 }
5249
5250 if (warned)
5251 printk(KERN_WARNING
5252 "True protection against single-disk"
5253 " failure might be compromised.\n");
5254 }
5255
5256 mddev->recovery = 0;
5257
5258 mddev->resync_max_sectors = mddev->dev_sectors;
5259
5260 mddev->ok_start_degraded = start_dirty_degraded;
5261
5262 if (start_readonly && mddev->ro == 0)
5263 mddev->ro = 2;
5264
5265 err = pers->run(mddev);
5266 if (err)
5267 printk(KERN_ERR "md: pers->run() failed ...\n");
5268 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5269 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5270 " but 'external_size' not in effect?\n", __func__);
5271 printk(KERN_ERR
5272 "md: invalid array_size %llu > default size %llu\n",
5273 (unsigned long long)mddev->array_sectors / 2,
5274 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5275 err = -EINVAL;
5276 }
5277 if (err == 0 && pers->sync_request &&
5278 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5279 struct bitmap *bitmap;
5280
5281 bitmap = bitmap_create(mddev, -1);
5282 if (IS_ERR(bitmap)) {
5283 err = PTR_ERR(bitmap);
5284 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5285 mdname(mddev), err);
5286 } else
5287 mddev->bitmap = bitmap;
5288
5289 }
5290 if (err) {
5291 mddev_detach(mddev);
5292 if (mddev->private)
5293 pers->free(mddev, mddev->private);
5294 mddev->private = NULL;
5295 module_put(pers->owner);
5296 bitmap_destroy(mddev);
5297 return err;
5298 }
5299 if (mddev->queue) {
5300 mddev->queue->backing_dev_info.congested_data = mddev;
5301 mddev->queue->backing_dev_info.congested_fn = md_congested;
5302 }
5303 if (pers->sync_request) {
5304 if (mddev->kobj.sd &&
5305 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5306 printk(KERN_WARNING
5307 "md: cannot register extra attributes for %s\n",
5308 mdname(mddev));
5309 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5310 } else if (mddev->ro == 2)
5311 mddev->ro = 0;
5312
5313 atomic_set(&mddev->writes_pending,0);
5314 atomic_set(&mddev->max_corr_read_errors,
5315 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5316 mddev->safemode = 0;
5317 if (mddev_is_clustered(mddev))
5318 mddev->safemode_delay = 0;
5319 else
5320 mddev->safemode_delay = (200 * HZ)/1000 +1;
5321 mddev->in_sync = 1;
5322 smp_wmb();
5323 spin_lock(&mddev->lock);
5324 mddev->pers = pers;
5325 spin_unlock(&mddev->lock);
5326 rdev_for_each(rdev, mddev)
5327 if (rdev->raid_disk >= 0)
5328 if (sysfs_link_rdev(mddev, rdev))
5329 ;
5330
5331 if (mddev->degraded && !mddev->ro)
5332
5333
5334
5335 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5336 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5337
5338 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5339 md_update_sb(mddev, 0);
5340
5341 md_new_event(mddev);
5342 sysfs_notify_dirent_safe(mddev->sysfs_state);
5343 sysfs_notify_dirent_safe(mddev->sysfs_action);
5344 sysfs_notify(&mddev->kobj, NULL, "degraded");
5345 return 0;
5346}
5347EXPORT_SYMBOL_GPL(md_run);
5348
5349static int do_md_run(struct mddev *mddev)
5350{
5351 int err;
5352
5353 err = md_run(mddev);
5354 if (err)
5355 goto out;
5356 err = bitmap_load(mddev);
5357 if (err) {
5358 bitmap_destroy(mddev);
5359 goto out;
5360 }
5361
5362 if (mddev_is_clustered(mddev))
5363 md_allow_write(mddev);
5364
5365 md_wakeup_thread(mddev->thread);
5366 md_wakeup_thread(mddev->sync_thread);
5367
5368 set_capacity(mddev->gendisk, mddev->array_sectors);
5369 revalidate_disk(mddev->gendisk);
5370 mddev->changed = 1;
5371 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5372out:
5373 return err;
5374}
5375
5376static int restart_array(struct mddev *mddev)
5377{
5378 struct gendisk *disk = mddev->gendisk;
5379
5380
5381 if (list_empty(&mddev->disks))
5382 return -ENXIO;
5383 if (!mddev->pers)
5384 return -EINVAL;
5385 if (!mddev->ro)
5386 return -EBUSY;
5387 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5388 struct md_rdev *rdev;
5389 bool has_journal = false;
5390
5391 rcu_read_lock();
5392 rdev_for_each_rcu(rdev, mddev) {
5393 if (test_bit(Journal, &rdev->flags) &&
5394 !test_bit(Faulty, &rdev->flags)) {
5395 has_journal = true;
5396 break;
5397 }
5398 }
5399 rcu_read_unlock();
5400
5401
5402 if (!has_journal)
5403 return -EINVAL;
5404 }
5405
5406 mddev->safemode = 0;
5407 mddev->ro = 0;
5408 set_disk_ro(disk, 0);
5409 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5410 mdname(mddev));
5411
5412 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5413 md_wakeup_thread(mddev->thread);
5414 md_wakeup_thread(mddev->sync_thread);
5415 sysfs_notify_dirent_safe(mddev->sysfs_state);
5416 return 0;
5417}
5418
5419static void md_clean(struct mddev *mddev)
5420{
5421 mddev->array_sectors = 0;
5422 mddev->external_size = 0;
5423 mddev->dev_sectors = 0;
5424 mddev->raid_disks = 0;
5425 mddev->recovery_cp = 0;
5426 mddev->resync_min = 0;
5427 mddev->resync_max = MaxSector;
5428 mddev->reshape_position = MaxSector;
5429 mddev->external = 0;
5430 mddev->persistent = 0;
5431 mddev->level = LEVEL_NONE;
5432 mddev->clevel[0] = 0;
5433 mddev->flags = 0;
5434 mddev->ro = 0;
5435 mddev->metadata_type[0] = 0;
5436 mddev->chunk_sectors = 0;
5437 mddev->ctime = mddev->utime = 0;
5438 mddev->layout = 0;
5439 mddev->max_disks = 0;
5440 mddev->events = 0;
5441 mddev->can_decrease_events = 0;
5442 mddev->delta_disks = 0;
5443 mddev->reshape_backwards = 0;
5444 mddev->new_level = LEVEL_NONE;
5445 mddev->new_layout = 0;
5446 mddev->new_chunk_sectors = 0;
5447 mddev->curr_resync = 0;
5448 atomic64_set(&mddev->resync_mismatches, 0);
5449 mddev->suspend_lo = mddev->suspend_hi = 0;
5450 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5451 mddev->recovery = 0;
5452 mddev->in_sync = 0;
5453 mddev->changed = 0;
5454 mddev->degraded = 0;
5455 mddev->safemode = 0;
5456 mddev->private = NULL;
5457 mddev->bitmap_info.offset = 0;
5458 mddev->bitmap_info.default_offset = 0;
5459 mddev->bitmap_info.default_space = 0;
5460 mddev->bitmap_info.chunksize = 0;
5461 mddev->bitmap_info.daemon_sleep = 0;
5462 mddev->bitmap_info.max_write_behind = 0;
5463}
5464
5465static void __md_stop_writes(struct mddev *mddev)
5466{
5467 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5468 flush_workqueue(md_misc_wq);
5469 if (mddev->sync_thread) {
5470 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5471 md_reap_sync_thread(mddev);
5472 }
5473
5474 del_timer_sync(&mddev->safemode_timer);
5475
5476 bitmap_flush(mddev);
5477 md_super_wait(mddev);
5478
5479 if (mddev->ro == 0 &&
5480 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5481 (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5482
5483 if (!mddev_is_clustered(mddev))
5484 mddev->in_sync = 1;
5485 md_update_sb(mddev, 1);
5486 }
5487}
5488
5489void md_stop_writes(struct mddev *mddev)
5490{
5491 mddev_lock_nointr(mddev);
5492 __md_stop_writes(mddev);
5493 mddev_unlock(mddev);
5494}
5495EXPORT_SYMBOL_GPL(md_stop_writes);
5496
5497static void mddev_detach(struct mddev *mddev)
5498{
5499 struct bitmap *bitmap = mddev->bitmap;
5500
5501 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5502 printk(KERN_INFO "md:%s: behind writes in progress - waiting to stop.\n",
5503 mdname(mddev));
5504
5505 wait_event(bitmap->behind_wait,
5506 atomic_read(&bitmap->behind_writes) == 0);
5507 }
5508 if (mddev->pers && mddev->pers->quiesce) {
5509 mddev->pers->quiesce(mddev, 1);
5510 mddev->pers->quiesce(mddev, 0);
5511 }
5512 md_unregister_thread(&mddev->thread);
5513 if (mddev->queue)
5514 blk_sync_queue(mddev->queue);
5515}
5516
5517static void __md_stop(struct mddev *mddev)
5518{
5519 struct md_personality *pers = mddev->pers;
5520 mddev_detach(mddev);
5521
5522 flush_workqueue(md_misc_wq);
5523 spin_lock(&mddev->lock);
5524 mddev->pers = NULL;
5525 spin_unlock(&mddev->lock);
5526 pers->free(mddev, mddev->private);
5527 mddev->private = NULL;
5528 if (pers->sync_request && mddev->to_remove == NULL)
5529 mddev->to_remove = &md_redundancy_group;
5530 module_put(pers->owner);
5531 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5532}
5533
5534void md_stop(struct mddev *mddev)
5535{
5536
5537
5538
5539 __md_stop(mddev);
5540 bitmap_destroy(mddev);
5541 if (mddev->bio_set)
5542 bioset_free(mddev->bio_set);
5543}
5544
5545EXPORT_SYMBOL_GPL(md_stop);
5546
5547static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5548{
5549 int err = 0;
5550 int did_freeze = 0;
5551
5552 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5553 did_freeze = 1;
5554 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5555 md_wakeup_thread(mddev->thread);
5556 }
5557 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5558 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5559 if (mddev->sync_thread)
5560
5561
5562 wake_up_process(mddev->sync_thread->tsk);
5563
5564 if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags))
5565 return -EBUSY;
5566 mddev_unlock(mddev);
5567 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5568 &mddev->recovery));
5569 wait_event(mddev->sb_wait,
5570 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5571 mddev_lock_nointr(mddev);
5572
5573 mutex_lock(&mddev->open_mutex);
5574 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5575 mddev->sync_thread ||
5576 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
5577 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5578 printk("md: %s still in use.\n",mdname(mddev));
5579 if (did_freeze) {
5580 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5581 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5582 md_wakeup_thread(mddev->thread);
5583 }
5584 err = -EBUSY;
5585 goto out;
5586 }
5587 if (mddev->pers) {
5588 __md_stop_writes(mddev);
5589
5590 err = -ENXIO;
5591 if (mddev->ro==1)
5592 goto out;
5593 mddev->ro = 1;
5594 set_disk_ro(mddev->gendisk, 1);
5595 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5596 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5597 md_wakeup_thread(mddev->thread);
5598 sysfs_notify_dirent_safe(mddev->sysfs_state);
5599 err = 0;
5600 }
5601out:
5602 mutex_unlock(&mddev->open_mutex);
5603 return err;
5604}
5605
5606
5607
5608
5609
5610static int do_md_stop(struct mddev *mddev, int mode,
5611 struct block_device *bdev)
5612{
5613 struct gendisk *disk = mddev->gendisk;
5614 struct md_rdev *rdev;
5615 int did_freeze = 0;
5616
5617 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5618 did_freeze = 1;
5619 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5620 md_wakeup_thread(mddev->thread);
5621 }
5622 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5623 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5624 if (mddev->sync_thread)
5625
5626
5627 wake_up_process(mddev->sync_thread->tsk);
5628
5629 mddev_unlock(mddev);
5630 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5631 !test_bit(MD_RECOVERY_RUNNING,
5632 &mddev->recovery)));
5633 mddev_lock_nointr(mddev);
5634
5635 mutex_lock(&mddev->open_mutex);
5636 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5637 mddev->sysfs_active ||
5638 mddev->sync_thread ||
5639 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
5640 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5641 printk("md: %s still in use.\n",mdname(mddev));
5642 mutex_unlock(&mddev->open_mutex);
5643 if (did_freeze) {
5644 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5645 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5646 md_wakeup_thread(mddev->thread);
5647 }
5648 return -EBUSY;
5649 }
5650 if (mddev->pers) {
5651 if (mddev->ro)
5652 set_disk_ro(disk, 0);
5653
5654 __md_stop_writes(mddev);
5655 __md_stop(mddev);
5656 mddev->queue->backing_dev_info.congested_fn = NULL;
5657
5658
5659 sysfs_notify_dirent_safe(mddev->sysfs_state);
5660
5661 rdev_for_each(rdev, mddev)
5662 if (rdev->raid_disk >= 0)
5663 sysfs_unlink_rdev(mddev, rdev);
5664
5665 set_capacity(disk, 0);
5666 mutex_unlock(&mddev->open_mutex);
5667 mddev->changed = 1;
5668 revalidate_disk(disk);
5669
5670 if (mddev->ro)
5671 mddev->ro = 0;
5672 } else
5673 mutex_unlock(&mddev->open_mutex);
5674
5675
5676
5677 if (mode == 0) {
5678 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5679
5680 bitmap_destroy(mddev);
5681 if (mddev->bitmap_info.file) {
5682 struct file *f = mddev->bitmap_info.file;
5683 spin_lock(&mddev->lock);
5684 mddev->bitmap_info.file = NULL;
5685 spin_unlock(&mddev->lock);
5686 fput(f);
5687 }
5688 mddev->bitmap_info.offset = 0;
5689
5690 export_array(mddev);
5691
5692 md_clean(mddev);
5693 if (mddev->hold_active == UNTIL_STOP)
5694 mddev->hold_active = 0;
5695 }
5696 md_new_event(mddev);
5697 sysfs_notify_dirent_safe(mddev->sysfs_state);
5698 return 0;
5699}
5700
5701#ifndef MODULE
5702static void autorun_array(struct mddev *mddev)
5703{
5704 struct md_rdev *rdev;
5705 int err;
5706
5707 if (list_empty(&mddev->disks))
5708 return;
5709
5710 printk(KERN_INFO "md: running: ");
5711
5712 rdev_for_each(rdev, mddev) {
5713 char b[BDEVNAME_SIZE];
5714 printk("<%s>", bdevname(rdev->bdev,b));
5715 }
5716 printk("\n");
5717
5718 err = do_md_run(mddev);
5719 if (err) {
5720 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5721 do_md_stop(mddev, 0, NULL);
5722 }
5723}
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737static void autorun_devices(int part)
5738{
5739 struct md_rdev *rdev0, *rdev, *tmp;
5740 struct mddev *mddev;
5741 char b[BDEVNAME_SIZE];
5742
5743 printk(KERN_INFO "md: autorun ...\n");
5744 while (!list_empty(&pending_raid_disks)) {
5745 int unit;
5746 dev_t dev;
5747 LIST_HEAD(candidates);
5748 rdev0 = list_entry(pending_raid_disks.next,
5749 struct md_rdev, same_set);
5750
5751 printk(KERN_INFO "md: considering %s ...\n",
5752 bdevname(rdev0->bdev,b));
5753 INIT_LIST_HEAD(&candidates);
5754 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5755 if (super_90_load(rdev, rdev0, 0) >= 0) {
5756 printk(KERN_INFO "md: adding %s ...\n",
5757 bdevname(rdev->bdev,b));
5758 list_move(&rdev->same_set, &candidates);
5759 }
5760
5761
5762
5763
5764
5765 if (part) {
5766 dev = MKDEV(mdp_major,
5767 rdev0->preferred_minor << MdpMinorShift);
5768 unit = MINOR(dev) >> MdpMinorShift;
5769 } else {
5770 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5771 unit = MINOR(dev);
5772 }
5773 if (rdev0->preferred_minor != unit) {
5774 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5775 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5776 break;
5777 }
5778
5779 md_probe(dev, NULL, NULL);
5780 mddev = mddev_find(dev);
5781 if (!mddev || !mddev->gendisk) {
5782 if (mddev)
5783 mddev_put(mddev);
5784 printk(KERN_ERR
5785 "md: cannot allocate memory for md drive.\n");
5786 break;
5787 }
5788 if (mddev_lock(mddev))
5789 printk(KERN_WARNING "md: %s locked, cannot run\n",
5790 mdname(mddev));
5791 else if (mddev->raid_disks || mddev->major_version
5792 || !list_empty(&mddev->disks)) {
5793 printk(KERN_WARNING
5794 "md: %s already running, cannot run %s\n",
5795 mdname(mddev), bdevname(rdev0->bdev,b));
5796 mddev_unlock(mddev);
5797 } else {
5798 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5799 mddev->persistent = 1;
5800 rdev_for_each_list(rdev, tmp, &candidates) {
5801 list_del_init(&rdev->same_set);
5802 if (bind_rdev_to_array(rdev, mddev))
5803 export_rdev(rdev);
5804 }
5805 autorun_array(mddev);
5806 mddev_unlock(mddev);
5807 }
5808
5809
5810
5811 rdev_for_each_list(rdev, tmp, &candidates) {
5812 list_del_init(&rdev->same_set);
5813 export_rdev(rdev);
5814 }
5815 mddev_put(mddev);
5816 }
5817 printk(KERN_INFO "md: ... autorun DONE.\n");
5818}
5819#endif
5820
5821static int get_version(void __user *arg)
5822{
5823 mdu_version_t ver;
5824
5825 ver.major = MD_MAJOR_VERSION;
5826 ver.minor = MD_MINOR_VERSION;
5827 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5828
5829 if (copy_to_user(arg, &ver, sizeof(ver)))
5830 return -EFAULT;
5831
5832 return 0;
5833}
5834
5835static int get_array_info(struct mddev *mddev, void __user *arg)
5836{
5837 mdu_array_info_t info;
5838 int nr,working,insync,failed,spare;
5839 struct md_rdev *rdev;
5840
5841 nr = working = insync = failed = spare = 0;
5842 rcu_read_lock();
5843 rdev_for_each_rcu(rdev, mddev) {
5844 nr++;
5845 if (test_bit(Faulty, &rdev->flags))
5846 failed++;
5847 else {
5848 working++;
5849 if (test_bit(In_sync, &rdev->flags))
5850 insync++;
5851 else if (test_bit(Journal, &rdev->flags))
5852
5853 ;
5854 else
5855 spare++;
5856 }
5857 }
5858 rcu_read_unlock();
5859
5860 info.major_version = mddev->major_version;
5861 info.minor_version = mddev->minor_version;
5862 info.patch_version = MD_PATCHLEVEL_VERSION;
5863 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
5864 info.level = mddev->level;
5865 info.size = mddev->dev_sectors / 2;
5866 if (info.size != mddev->dev_sectors / 2)
5867 info.size = -1;
5868 info.nr_disks = nr;
5869 info.raid_disks = mddev->raid_disks;
5870 info.md_minor = mddev->md_minor;
5871 info.not_persistent= !mddev->persistent;
5872
5873 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
5874 info.state = 0;
5875 if (mddev->in_sync)
5876 info.state = (1<<MD_SB_CLEAN);
5877 if (mddev->bitmap && mddev->bitmap_info.offset)
5878 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5879 if (mddev_is_clustered(mddev))
5880 info.state |= (1<<MD_SB_CLUSTERED);
5881 info.active_disks = insync;
5882 info.working_disks = working;
5883 info.failed_disks = failed;
5884 info.spare_disks = spare;
5885
5886 info.layout = mddev->layout;
5887 info.chunk_size = mddev->chunk_sectors << 9;
5888
5889 if (copy_to_user(arg, &info, sizeof(info)))
5890 return -EFAULT;
5891
5892 return 0;
5893}
5894
5895static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5896{
5897 mdu_bitmap_file_t *file = NULL;
5898 char *ptr;
5899 int err;
5900
5901 file = kzalloc(sizeof(*file), GFP_NOIO);
5902 if (!file)
5903 return -ENOMEM;
5904
5905 err = 0;
5906 spin_lock(&mddev->lock);
5907
5908 if (mddev->bitmap_info.file) {
5909 ptr = file_path(mddev->bitmap_info.file, file->pathname,
5910 sizeof(file->pathname));
5911 if (IS_ERR(ptr))
5912 err = PTR_ERR(ptr);
5913 else
5914 memmove(file->pathname, ptr,
5915 sizeof(file->pathname)-(ptr-file->pathname));
5916 }
5917 spin_unlock(&mddev->lock);
5918
5919 if (err == 0 &&
5920 copy_to_user(arg, file, sizeof(*file)))
5921 err = -EFAULT;
5922
5923 kfree(file);
5924 return err;
5925}
5926
5927static int get_disk_info(struct mddev *mddev, void __user * arg)
5928{
5929 mdu_disk_info_t info;
5930 struct md_rdev *rdev;
5931
5932 if (copy_from_user(&info, arg, sizeof(info)))
5933 return -EFAULT;
5934
5935 rcu_read_lock();
5936 rdev = md_find_rdev_nr_rcu(mddev, info.number);
5937 if (rdev) {
5938 info.major = MAJOR(rdev->bdev->bd_dev);
5939 info.minor = MINOR(rdev->bdev->bd_dev);
5940 info.raid_disk = rdev->raid_disk;
5941 info.state = 0;
5942 if (test_bit(Faulty, &rdev->flags))
5943 info.state |= (1<<MD_DISK_FAULTY);
5944 else if (test_bit(In_sync, &rdev->flags)) {
5945 info.state |= (1<<MD_DISK_ACTIVE);
5946 info.state |= (1<<MD_DISK_SYNC);
5947 }
5948 if (test_bit(Journal, &rdev->flags))
5949 info.state |= (1<<MD_DISK_JOURNAL);
5950 if (test_bit(WriteMostly, &rdev->flags))
5951 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5952 } else {
5953 info.major = info.minor = 0;
5954 info.raid_disk = -1;
5955 info.state = (1<<MD_DISK_REMOVED);
5956 }
5957 rcu_read_unlock();
5958
5959 if (copy_to_user(arg, &info, sizeof(info)))
5960 return -EFAULT;
5961
5962 return 0;
5963}
5964
5965static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
5966{
5967 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5968 struct md_rdev *rdev;
5969 dev_t dev = MKDEV(info->major,info->minor);
5970
5971 if (mddev_is_clustered(mddev) &&
5972 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
5973 pr_err("%s: Cannot add to clustered mddev.\n",
5974 mdname(mddev));
5975 return -EINVAL;
5976 }
5977
5978 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5979 return -EOVERFLOW;
5980
5981 if (!mddev->raid_disks) {
5982 int err;
5983
5984 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5985 if (IS_ERR(rdev)) {
5986 printk(KERN_WARNING
5987 "md: md_import_device returned %ld\n",
5988 PTR_ERR(rdev));
5989 return PTR_ERR(rdev);
5990 }
5991 if (!list_empty(&mddev->disks)) {
5992 struct md_rdev *rdev0
5993 = list_entry(mddev->disks.next,
5994 struct md_rdev, same_set);
5995 err = super_types[mddev->major_version]
5996 .load_super(rdev, rdev0, mddev->minor_version);
5997 if (err < 0) {
5998 printk(KERN_WARNING
5999 "md: %s has different UUID to %s\n",
6000 bdevname(rdev->bdev,b),
6001 bdevname(rdev0->bdev,b2));
6002 export_rdev(rdev);
6003 return -EINVAL;
6004 }
6005 }
6006 err = bind_rdev_to_array(rdev, mddev);
6007 if (err)
6008 export_rdev(rdev);
6009 return err;
6010 }
6011
6012
6013
6014
6015
6016
6017 if (mddev->pers) {
6018 int err;
6019 if (!mddev->pers->hot_add_disk) {
6020 printk(KERN_WARNING
6021 "%s: personality does not support diskops!\n",
6022 mdname(mddev));
6023 return -EINVAL;
6024 }
6025 if (mddev->persistent)
6026 rdev = md_import_device(dev, mddev->major_version,
6027 mddev->minor_version);
6028 else
6029 rdev = md_import_device(dev, -1, -1);
6030 if (IS_ERR(rdev)) {
6031 printk(KERN_WARNING
6032 "md: md_import_device returned %ld\n",
6033 PTR_ERR(rdev));
6034 return PTR_ERR(rdev);
6035 }
6036
6037 if (!mddev->persistent) {
6038 if (info->state & (1<<MD_DISK_SYNC) &&
6039 info->raid_disk < mddev->raid_disks) {
6040 rdev->raid_disk = info->raid_disk;
6041 set_bit(In_sync, &rdev->flags);
6042 clear_bit(Bitmap_sync, &rdev->flags);
6043 } else
6044 rdev->raid_disk = -1;
6045 rdev->saved_raid_disk = rdev->raid_disk;
6046 } else
6047 super_types[mddev->major_version].
6048 validate_super(mddev, rdev);
6049 if ((info->state & (1<<MD_DISK_SYNC)) &&
6050 rdev->raid_disk != info->raid_disk) {
6051
6052
6053
6054 export_rdev(rdev);
6055 return -EINVAL;
6056 }
6057
6058 clear_bit(In_sync, &rdev->flags);
6059 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6060 set_bit(WriteMostly, &rdev->flags);
6061 else
6062 clear_bit(WriteMostly, &rdev->flags);
6063
6064 if (info->state & (1<<MD_DISK_JOURNAL)) {
6065 struct md_rdev *rdev2;
6066 bool has_journal = false;
6067
6068
6069 rdev_for_each(rdev2, mddev) {
6070 if (test_bit(Journal, &rdev2->flags)) {
6071 has_journal = true;
6072 break;
6073 }
6074 }
6075 if (has_journal) {
6076 export_rdev(rdev);
6077 return -EBUSY;
6078 }
6079 set_bit(Journal, &rdev->flags);
6080 }
6081
6082
6083
6084 if (mddev_is_clustered(mddev)) {
6085 if (info->state & (1 << MD_DISK_CANDIDATE))
6086 set_bit(Candidate, &rdev->flags);
6087 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6088
6089 err = md_cluster_ops->add_new_disk(mddev, rdev);
6090 if (err) {
6091 export_rdev(rdev);
6092 return err;
6093 }
6094 }
6095 }
6096
6097 rdev->raid_disk = -1;
6098 err = bind_rdev_to_array(rdev, mddev);
6099
6100 if (err)
6101 export_rdev(rdev);
6102
6103 if (mddev_is_clustered(mddev)) {
6104 if (info->state & (1 << MD_DISK_CANDIDATE))
6105 md_cluster_ops->new_disk_ack(mddev, (err == 0));
6106 else {
6107 if (err)
6108 md_cluster_ops->add_new_disk_cancel(mddev);
6109 else
6110 err = add_bound_rdev(rdev);
6111 }
6112
6113 } else if (!err)
6114 err = add_bound_rdev(rdev);
6115
6116 return err;
6117 }
6118
6119
6120
6121
6122 if (mddev->major_version != 0) {
6123 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
6124 mdname(mddev));
6125 return -EINVAL;
6126 }
6127
6128 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6129 int err;
6130 rdev = md_import_device(dev, -1, 0);
6131 if (IS_ERR(rdev)) {
6132 printk(KERN_WARNING
6133 "md: error, md_import_device() returned %ld\n",
6134 PTR_ERR(rdev));
6135 return PTR_ERR(rdev);
6136 }
6137 rdev->desc_nr = info->number;
6138 if (info->raid_disk < mddev->raid_disks)
6139 rdev->raid_disk = info->raid_disk;
6140 else
6141 rdev->raid_disk = -1;
6142
6143 if (rdev->raid_disk < mddev->raid_disks)
6144 if (info->state & (1<<MD_DISK_SYNC))
6145 set_bit(In_sync, &rdev->flags);
6146
6147 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6148 set_bit(WriteMostly, &rdev->flags);
6149
6150 if (!mddev->persistent) {
6151 printk(KERN_INFO "md: nonpersistent superblock ...\n");
6152 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6153 } else
6154 rdev->sb_start = calc_dev_sboffset(rdev);
6155 rdev->sectors = rdev->sb_start;
6156
6157 err = bind_rdev_to_array(rdev, mddev);
6158 if (err) {
6159 export_rdev(rdev);
6160 return err;
6161 }
6162 }
6163
6164 return 0;
6165}
6166
6167static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6168{
6169 char b[BDEVNAME_SIZE];
6170 struct md_rdev *rdev;
6171
6172 rdev = find_rdev(mddev, dev);
6173 if (!rdev)
6174 return -ENXIO;
6175
6176 if (rdev->raid_disk < 0)
6177 goto kick_rdev;
6178
6179 clear_bit(Blocked, &rdev->flags);
6180 remove_and_add_spares(mddev, rdev);
6181
6182 if (rdev->raid_disk >= 0)
6183 goto busy;
6184
6185kick_rdev:
6186 if (mddev_is_clustered(mddev))
6187 md_cluster_ops->remove_disk(mddev, rdev);
6188
6189 md_kick_rdev_from_array(rdev);
6190 md_update_sb(mddev, 1);
6191 md_new_event(mddev);
6192
6193 return 0;
6194busy:
6195 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
6196 bdevname(rdev->bdev,b), mdname(mddev));
6197 return -EBUSY;
6198}
6199
6200static int hot_add_disk(struct mddev *mddev, dev_t dev)
6201{
6202 char b[BDEVNAME_SIZE];
6203 int err;
6204 struct md_rdev *rdev;
6205
6206 if (!mddev->pers)
6207 return -ENODEV;
6208
6209 if (mddev->major_version != 0) {
6210 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
6211 " version-0 superblocks.\n",
6212 mdname(mddev));
6213 return -EINVAL;
6214 }
6215 if (!mddev->pers->hot_add_disk) {
6216 printk(KERN_WARNING
6217 "%s: personality does not support diskops!\n",
6218 mdname(mddev));
6219 return -EINVAL;
6220 }
6221
6222 rdev = md_import_device(dev, -1, 0);
6223 if (IS_ERR(rdev)) {
6224 printk(KERN_WARNING
6225 "md: error, md_import_device() returned %ld\n",
6226 PTR_ERR(rdev));
6227 return -EINVAL;
6228 }
6229
6230 if (mddev->persistent)
6231 rdev->sb_start = calc_dev_sboffset(rdev);
6232 else
6233 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6234
6235 rdev->sectors = rdev->sb_start;
6236
6237 if (test_bit(Faulty, &rdev->flags)) {
6238 printk(KERN_WARNING
6239 "md: can not hot-add faulty %s disk to %s!\n",
6240 bdevname(rdev->bdev,b), mdname(mddev));
6241 err = -EINVAL;
6242 goto abort_export;
6243 }
6244
6245 clear_bit(In_sync, &rdev->flags);
6246 rdev->desc_nr = -1;
6247 rdev->saved_raid_disk = -1;
6248 err = bind_rdev_to_array(rdev, mddev);
6249 if (err)
6250 goto abort_export;
6251
6252
6253
6254
6255
6256
6257 rdev->raid_disk = -1;
6258
6259 md_update_sb(mddev, 1);
6260
6261
6262
6263
6264 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6265 md_wakeup_thread(mddev->thread);
6266 md_new_event(mddev);
6267 return 0;
6268
6269abort_export:
6270 export_rdev(rdev);
6271 return err;
6272}
6273
6274static int set_bitmap_file(struct mddev *mddev, int fd)
6275{
6276 int err = 0;
6277
6278 if (mddev->pers) {
6279 if (!mddev->pers->quiesce || !mddev->thread)
6280 return -EBUSY;
6281 if (mddev->recovery || mddev->sync_thread)
6282 return -EBUSY;
6283
6284 }
6285
6286 if (fd >= 0) {
6287 struct inode *inode;
6288 struct file *f;
6289
6290 if (mddev->bitmap || mddev->bitmap_info.file)
6291 return -EEXIST;
6292 f = fget(fd);
6293
6294 if (f == NULL) {
6295 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
6296 mdname(mddev));
6297 return -EBADF;
6298 }
6299
6300 inode = f->f_mapping->host;
6301 if (!S_ISREG(inode->i_mode)) {
6302 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
6303 mdname(mddev));
6304 err = -EBADF;
6305 } else if (!(f->f_mode & FMODE_WRITE)) {
6306 printk(KERN_ERR "%s: error: bitmap file must open for write\n",
6307 mdname(mddev));
6308 err = -EBADF;
6309 } else if (atomic_read(&inode->i_writecount) != 1) {
6310 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6311 mdname(mddev));
6312 err = -EBUSY;
6313 }
6314 if (err) {
6315 fput(f);
6316 return err;
6317 }
6318 mddev->bitmap_info.file = f;
6319 mddev->bitmap_info.offset = 0;
6320 } else if (mddev->bitmap == NULL)
6321 return -ENOENT;
6322 err = 0;
6323 if (mddev->pers) {
6324 mddev->pers->quiesce(mddev, 1);
6325 if (fd >= 0) {
6326 struct bitmap *bitmap;
6327
6328 bitmap = bitmap_create(mddev, -1);
6329 if (!IS_ERR(bitmap)) {
6330 mddev->bitmap = bitmap;
6331 err = bitmap_load(mddev);
6332 } else
6333 err = PTR_ERR(bitmap);
6334 }
6335 if (fd < 0 || err) {
6336 bitmap_destroy(mddev);
6337 fd = -1;
6338 }
6339 mddev->pers->quiesce(mddev, 0);
6340 }
6341 if (fd < 0) {
6342 struct file *f = mddev->bitmap_info.file;
6343 if (f) {
6344 spin_lock(&mddev->lock);
6345 mddev->bitmap_info.file = NULL;
6346 spin_unlock(&mddev->lock);
6347 fput(f);
6348 }
6349 }
6350
6351 return err;
6352}
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6368{
6369
6370 if (info->raid_disks == 0) {
6371
6372 if (info->major_version < 0 ||
6373 info->major_version >= ARRAY_SIZE(super_types) ||
6374 super_types[info->major_version].name == NULL) {
6375
6376 printk(KERN_INFO
6377 "md: superblock version %d not known\n",
6378 info->major_version);
6379 return -EINVAL;
6380 }
6381 mddev->major_version = info->major_version;
6382 mddev->minor_version = info->minor_version;
6383 mddev->patch_version = info->patch_version;
6384 mddev->persistent = !info->not_persistent;
6385
6386
6387
6388 mddev->ctime = ktime_get_real_seconds();
6389 return 0;
6390 }
6391 mddev->major_version = MD_MAJOR_VERSION;
6392 mddev->minor_version = MD_MINOR_VERSION;
6393 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6394 mddev->ctime = ktime_get_real_seconds();
6395
6396 mddev->level = info->level;
6397 mddev->clevel[0] = 0;
6398 mddev->dev_sectors = 2 * (sector_t)info->size;
6399 mddev->raid_disks = info->raid_disks;
6400
6401
6402
6403 if (info->state & (1<<MD_SB_CLEAN))
6404 mddev->recovery_cp = MaxSector;
6405 else
6406 mddev->recovery_cp = 0;
6407 mddev->persistent = ! info->not_persistent;
6408 mddev->external = 0;
6409
6410 mddev->layout = info->layout;
6411 mddev->chunk_sectors = info->chunk_size >> 9;
6412
6413 mddev->max_disks = MD_SB_DISKS;
6414
6415 if (mddev->persistent)
6416 mddev->flags = 0;
6417 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6418
6419 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6420 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6421 mddev->bitmap_info.offset = 0;
6422
6423 mddev->reshape_position = MaxSector;
6424
6425
6426
6427
6428 get_random_bytes(mddev->uuid, 16);
6429
6430 mddev->new_level = mddev->level;
6431 mddev->new_chunk_sectors = mddev->chunk_sectors;
6432 mddev->new_layout = mddev->layout;
6433 mddev->delta_disks = 0;
6434 mddev->reshape_backwards = 0;
6435
6436 return 0;
6437}
6438
6439void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6440{
6441 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6442
6443 if (mddev->external_size)
6444 return;
6445
6446 mddev->array_sectors = array_sectors;
6447}
6448EXPORT_SYMBOL(md_set_array_sectors);
6449
6450static int update_size(struct mddev *mddev, sector_t num_sectors)
6451{
6452 struct md_rdev *rdev;
6453 int rv;
6454 int fit = (num_sectors == 0);
6455
6456
6457 if (mddev_is_clustered(mddev))
6458 return -EINVAL;
6459
6460 if (mddev->pers->resize == NULL)
6461 return -EINVAL;
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6472 mddev->sync_thread)
6473 return -EBUSY;
6474 if (mddev->ro)
6475 return -EROFS;
6476
6477 rdev_for_each(rdev, mddev) {
6478 sector_t avail = rdev->sectors;
6479
6480 if (fit && (num_sectors == 0 || num_sectors > avail))
6481 num_sectors = avail;
6482 if (avail < num_sectors)
6483 return -ENOSPC;
6484 }
6485 rv = mddev->pers->resize(mddev, num_sectors);
6486 if (!rv)
6487 revalidate_disk(mddev->gendisk);
6488 return rv;
6489}
6490
6491static int update_raid_disks(struct mddev *mddev, int raid_disks)
6492{
6493 int rv;
6494 struct md_rdev *rdev;
6495
6496 if (mddev->pers->check_reshape == NULL)
6497 return -EINVAL;
6498 if (mddev->ro)
6499 return -EROFS;
6500 if (raid_disks <= 0 ||
6501 (mddev->max_disks && raid_disks >= mddev->max_disks))
6502 return -EINVAL;
6503 if (mddev->sync_thread ||
6504 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6505 mddev->reshape_position != MaxSector)
6506 return -EBUSY;
6507
6508 rdev_for_each(rdev, mddev) {
6509 if (mddev->raid_disks < raid_disks &&
6510 rdev->data_offset < rdev->new_data_offset)
6511 return -EINVAL;
6512 if (mddev->raid_disks > raid_disks &&
6513 rdev->data_offset > rdev->new_data_offset)
6514 return -EINVAL;
6515 }
6516
6517 mddev->delta_disks = raid_disks - mddev->raid_disks;
6518 if (mddev->delta_disks < 0)
6519 mddev->reshape_backwards = 1;
6520 else if (mddev->delta_disks > 0)
6521 mddev->reshape_backwards = 0;
6522
6523 rv = mddev->pers->check_reshape(mddev);
6524 if (rv < 0) {
6525 mddev->delta_disks = 0;
6526 mddev->reshape_backwards = 0;
6527 }
6528 return rv;
6529}
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6540{
6541 int rv = 0;
6542 int cnt = 0;
6543 int state = 0;
6544
6545
6546 if (mddev->bitmap && mddev->bitmap_info.offset)
6547 state |= (1 << MD_SB_BITMAP_PRESENT);
6548
6549 if (mddev->major_version != info->major_version ||
6550 mddev->minor_version != info->minor_version ||
6551
6552 mddev->ctime != info->ctime ||
6553 mddev->level != info->level ||
6554
6555 mddev->persistent != !info->not_persistent ||
6556 mddev->chunk_sectors != info->chunk_size >> 9 ||
6557
6558 ((state^info->state) & 0xfffffe00)
6559 )
6560 return -EINVAL;
6561
6562 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6563 cnt++;
6564 if (mddev->raid_disks != info->raid_disks)
6565 cnt++;
6566 if (mddev->layout != info->layout)
6567 cnt++;
6568 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6569 cnt++;
6570 if (cnt == 0)
6571 return 0;
6572 if (cnt > 1)
6573 return -EINVAL;
6574
6575 if (mddev->layout != info->layout) {
6576
6577
6578
6579
6580 if (mddev->pers->check_reshape == NULL)
6581 return -EINVAL;
6582 else {
6583 mddev->new_layout = info->layout;
6584 rv = mddev->pers->check_reshape(mddev);
6585 if (rv)
6586 mddev->new_layout = mddev->layout;
6587 return rv;
6588 }
6589 }
6590 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6591 rv = update_size(mddev, (sector_t)info->size * 2);
6592
6593 if (mddev->raid_disks != info->raid_disks)
6594 rv = update_raid_disks(mddev, info->raid_disks);
6595
6596 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6597 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6598 rv = -EINVAL;
6599 goto err;
6600 }
6601 if (mddev->recovery || mddev->sync_thread) {
6602 rv = -EBUSY;
6603 goto err;
6604 }
6605 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6606 struct bitmap *bitmap;
6607
6608 if (mddev->bitmap) {
6609 rv = -EEXIST;
6610 goto err;
6611 }
6612 if (mddev->bitmap_info.default_offset == 0) {
6613 rv = -EINVAL;
6614 goto err;
6615 }
6616 mddev->bitmap_info.offset =
6617 mddev->bitmap_info.default_offset;
6618 mddev->bitmap_info.space =
6619 mddev->bitmap_info.default_space;
6620 mddev->pers->quiesce(mddev, 1);
6621 bitmap = bitmap_create(mddev, -1);
6622 if (!IS_ERR(bitmap)) {
6623 mddev->bitmap = bitmap;
6624 rv = bitmap_load(mddev);
6625 } else
6626 rv = PTR_ERR(bitmap);
6627 if (rv)
6628 bitmap_destroy(mddev);
6629 mddev->pers->quiesce(mddev, 0);
6630 } else {
6631
6632 if (!mddev->bitmap) {
6633 rv = -ENOENT;
6634 goto err;
6635 }
6636 if (mddev->bitmap->storage.file) {
6637 rv = -EINVAL;
6638 goto err;
6639 }
6640 if (mddev->bitmap_info.nodes) {
6641
6642 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
6643 printk("md: can't change bitmap to none since the"
6644 " array is in use by more than one node\n");
6645 rv = -EPERM;
6646 md_cluster_ops->unlock_all_bitmaps(mddev);
6647 goto err;
6648 }
6649
6650 mddev->bitmap_info.nodes = 0;
6651 md_cluster_ops->leave(mddev);
6652 }
6653 mddev->pers->quiesce(mddev, 1);
6654 bitmap_destroy(mddev);
6655 mddev->pers->quiesce(mddev, 0);
6656 mddev->bitmap_info.offset = 0;
6657 }
6658 }
6659 md_update_sb(mddev, 1);
6660 return rv;
6661err:
6662 return rv;
6663}
6664
6665static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6666{
6667 struct md_rdev *rdev;
6668 int err = 0;
6669
6670 if (mddev->pers == NULL)
6671 return -ENODEV;
6672
6673 rcu_read_lock();
6674 rdev = find_rdev_rcu(mddev, dev);
6675 if (!rdev)
6676 err = -ENODEV;
6677 else {
6678 md_error(mddev, rdev);
6679 if (!test_bit(Faulty, &rdev->flags))
6680 err = -EBUSY;
6681 }
6682 rcu_read_unlock();
6683 return err;
6684}
6685
6686
6687
6688
6689
6690
6691
6692static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6693{
6694 struct mddev *mddev = bdev->bd_disk->private_data;
6695
6696 geo->heads = 2;
6697 geo->sectors = 4;
6698 geo->cylinders = mddev->array_sectors / 8;
6699 return 0;
6700}
6701
6702static inline bool md_ioctl_valid(unsigned int cmd)
6703{
6704 switch (cmd) {
6705 case ADD_NEW_DISK:
6706 case BLKROSET:
6707 case GET_ARRAY_INFO:
6708 case GET_BITMAP_FILE:
6709 case GET_DISK_INFO:
6710 case HOT_ADD_DISK:
6711 case HOT_REMOVE_DISK:
6712 case RAID_AUTORUN:
6713 case RAID_VERSION:
6714 case RESTART_ARRAY_RW:
6715 case RUN_ARRAY:
6716 case SET_ARRAY_INFO:
6717 case SET_BITMAP_FILE:
6718 case SET_DISK_FAULTY:
6719 case STOP_ARRAY:
6720 case STOP_ARRAY_RO:
6721 case CLUSTERED_DISK_NACK:
6722 return true;
6723 default:
6724 return false;
6725 }
6726}
6727
6728static int md_ioctl(struct block_device *bdev, fmode_t mode,
6729 unsigned int cmd, unsigned long arg)
6730{
6731 int err = 0;
6732 void __user *argp = (void __user *)arg;
6733 struct mddev *mddev = NULL;
6734 int ro;
6735
6736 if (!md_ioctl_valid(cmd))
6737 return -ENOTTY;
6738
6739 switch (cmd) {
6740 case RAID_VERSION:
6741 case GET_ARRAY_INFO:
6742 case GET_DISK_INFO:
6743 break;
6744 default:
6745 if (!capable(CAP_SYS_ADMIN))
6746 return -EACCES;
6747 }
6748
6749
6750
6751
6752
6753 switch (cmd) {
6754 case RAID_VERSION:
6755 err = get_version(argp);
6756 goto out;
6757
6758#ifndef MODULE
6759 case RAID_AUTORUN:
6760 err = 0;
6761 autostart_arrays(arg);
6762 goto out;
6763#endif
6764 default:;
6765 }
6766
6767
6768
6769
6770
6771 mddev = bdev->bd_disk->private_data;
6772
6773 if (!mddev) {
6774 BUG();
6775 goto out;
6776 }
6777
6778
6779 switch (cmd) {
6780 case GET_ARRAY_INFO:
6781 if (!mddev->raid_disks && !mddev->external)
6782 err = -ENODEV;
6783 else
6784 err = get_array_info(mddev, argp);
6785 goto out;
6786
6787 case GET_DISK_INFO:
6788 if (!mddev->raid_disks && !mddev->external)
6789 err = -ENODEV;
6790 else
6791 err = get_disk_info(mddev, argp);
6792 goto out;
6793
6794 case SET_DISK_FAULTY:
6795 err = set_disk_faulty(mddev, new_decode_dev(arg));
6796 goto out;
6797
6798 case GET_BITMAP_FILE:
6799 err = get_bitmap_file(mddev, argp);
6800 goto out;
6801
6802 }
6803
6804 if (cmd == ADD_NEW_DISK)
6805
6806 flush_workqueue(md_misc_wq);
6807
6808 if (cmd == HOT_REMOVE_DISK)
6809
6810 wait_event_interruptible_timeout(mddev->sb_wait,
6811 !test_bit(MD_RECOVERY_NEEDED,
6812 &mddev->flags),
6813 msecs_to_jiffies(5000));
6814 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6815
6816
6817
6818 mutex_lock(&mddev->open_mutex);
6819 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6820 mutex_unlock(&mddev->open_mutex);
6821 err = -EBUSY;
6822 goto out;
6823 }
6824 set_bit(MD_STILL_CLOSED, &mddev->flags);
6825 mutex_unlock(&mddev->open_mutex);
6826 sync_blockdev(bdev);
6827 }
6828 err = mddev_lock(mddev);
6829 if (err) {
6830 printk(KERN_INFO
6831 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6832 err, cmd);
6833 goto out;
6834 }
6835
6836 if (cmd == SET_ARRAY_INFO) {
6837 mdu_array_info_t info;
6838 if (!arg)
6839 memset(&info, 0, sizeof(info));
6840 else if (copy_from_user(&info, argp, sizeof(info))) {
6841 err = -EFAULT;
6842 goto unlock;
6843 }
6844 if (mddev->pers) {
6845 err = update_array_info(mddev, &info);
6846 if (err) {
6847 printk(KERN_WARNING "md: couldn't update"
6848 " array info. %d\n", err);
6849 goto unlock;
6850 }
6851 goto unlock;
6852 }
6853 if (!list_empty(&mddev->disks)) {
6854 printk(KERN_WARNING
6855 "md: array %s already has disks!\n",
6856 mdname(mddev));
6857 err = -EBUSY;
6858 goto unlock;
6859 }
6860 if (mddev->raid_disks) {
6861 printk(KERN_WARNING
6862 "md: array %s already initialised!\n",
6863 mdname(mddev));
6864 err = -EBUSY;
6865 goto unlock;
6866 }
6867 err = set_array_info(mddev, &info);
6868 if (err) {
6869 printk(KERN_WARNING "md: couldn't set"
6870 " array info. %d\n", err);
6871 goto unlock;
6872 }
6873 goto unlock;
6874 }
6875
6876
6877
6878
6879
6880
6881 if ((!mddev->raid_disks && !mddev->external)
6882 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6883 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6884 && cmd != GET_BITMAP_FILE) {
6885 err = -ENODEV;
6886 goto unlock;
6887 }
6888
6889
6890
6891
6892 switch (cmd) {
6893 case RESTART_ARRAY_RW:
6894 err = restart_array(mddev);
6895 goto unlock;
6896
6897 case STOP_ARRAY:
6898 err = do_md_stop(mddev, 0, bdev);
6899 goto unlock;
6900
6901 case STOP_ARRAY_RO:
6902 err = md_set_readonly(mddev, bdev);
6903 goto unlock;
6904
6905 case HOT_REMOVE_DISK:
6906 err = hot_remove_disk(mddev, new_decode_dev(arg));
6907 goto unlock;
6908
6909 case ADD_NEW_DISK:
6910
6911
6912
6913
6914 if (mddev->pers) {
6915 mdu_disk_info_t info;
6916 if (copy_from_user(&info, argp, sizeof(info)))
6917 err = -EFAULT;
6918 else if (!(info.state & (1<<MD_DISK_SYNC)))
6919
6920 break;
6921 else
6922 err = add_new_disk(mddev, &info);
6923 goto unlock;
6924 }
6925 break;
6926
6927 case BLKROSET:
6928 if (get_user(ro, (int __user *)(arg))) {
6929 err = -EFAULT;
6930 goto unlock;
6931 }
6932 err = -EINVAL;
6933
6934
6935
6936
6937 if (ro)
6938 goto unlock;
6939
6940
6941 if (mddev->ro != 1)
6942 goto unlock;
6943
6944
6945
6946
6947 if (mddev->pers) {
6948 err = restart_array(mddev);
6949 if (err == 0) {
6950 mddev->ro = 2;
6951 set_disk_ro(mddev->gendisk, 0);
6952 }
6953 }
6954 goto unlock;
6955 }
6956
6957
6958
6959
6960
6961 if (mddev->ro && mddev->pers) {
6962 if (mddev->ro == 2) {
6963 mddev->ro = 0;
6964 sysfs_notify_dirent_safe(mddev->sysfs_state);
6965 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6966
6967
6968
6969
6970 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6971 mddev_unlock(mddev);
6972 wait_event(mddev->sb_wait,
6973 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6974 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6975 mddev_lock_nointr(mddev);
6976 }
6977 } else {
6978 err = -EROFS;
6979 goto unlock;
6980 }
6981 }
6982
6983 switch (cmd) {
6984 case ADD_NEW_DISK:
6985 {
6986 mdu_disk_info_t info;
6987 if (copy_from_user(&info, argp, sizeof(info)))
6988 err = -EFAULT;
6989 else
6990 err = add_new_disk(mddev, &info);
6991 goto unlock;
6992 }
6993
6994 case CLUSTERED_DISK_NACK:
6995 if (mddev_is_clustered(mddev))
6996 md_cluster_ops->new_disk_ack(mddev, false);
6997 else
6998 err = -EINVAL;
6999 goto unlock;
7000
7001 case HOT_ADD_DISK:
7002 err = hot_add_disk(mddev, new_decode_dev(arg));
7003 goto unlock;
7004
7005 case RUN_ARRAY:
7006 err = do_md_run(mddev);
7007 goto unlock;
7008
7009 case SET_BITMAP_FILE:
7010 err = set_bitmap_file(mddev, (int)arg);
7011 goto unlock;
7012
7013 default:
7014 err = -EINVAL;
7015 goto unlock;
7016 }
7017
7018unlock:
7019 if (mddev->hold_active == UNTIL_IOCTL &&
7020 err != -EINVAL)
7021 mddev->hold_active = 0;
7022 mddev_unlock(mddev);
7023out:
7024 return err;
7025}
7026#ifdef CONFIG_COMPAT
7027static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7028 unsigned int cmd, unsigned long arg)
7029{
7030 switch (cmd) {
7031 case HOT_REMOVE_DISK:
7032 case HOT_ADD_DISK:
7033 case SET_DISK_FAULTY:
7034 case SET_BITMAP_FILE:
7035
7036 break;
7037 default:
7038 arg = (unsigned long)compat_ptr(arg);
7039 break;
7040 }
7041
7042 return md_ioctl(bdev, mode, cmd, arg);
7043}
7044#endif
7045
7046static int md_open(struct block_device *bdev, fmode_t mode)
7047{
7048
7049
7050
7051
7052 struct mddev *mddev = mddev_find(bdev->bd_dev);
7053 int err;
7054
7055 if (!mddev)
7056 return -ENODEV;
7057
7058 if (mddev->gendisk != bdev->bd_disk) {
7059
7060
7061
7062 mddev_put(mddev);
7063
7064 flush_workqueue(md_misc_wq);
7065
7066 return -ERESTARTSYS;
7067 }
7068 BUG_ON(mddev != bdev->bd_disk->private_data);
7069
7070 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7071 goto out;
7072
7073 err = 0;
7074 atomic_inc(&mddev->openers);
7075 clear_bit(MD_STILL_CLOSED, &mddev->flags);
7076 mutex_unlock(&mddev->open_mutex);
7077
7078 check_disk_change(bdev);
7079 out:
7080 return err;
7081}
7082
7083static void md_release(struct gendisk *disk, fmode_t mode)
7084{
7085 struct mddev *mddev = disk->private_data;
7086
7087 BUG_ON(!mddev);
7088 atomic_dec(&mddev->openers);
7089 mddev_put(mddev);
7090}
7091
7092static int md_media_changed(struct gendisk *disk)
7093{
7094 struct mddev *mddev = disk->private_data;
7095
7096 return mddev->changed;
7097}
7098
7099static int md_revalidate(struct gendisk *disk)
7100{
7101 struct mddev *mddev = disk->private_data;
7102
7103 mddev->changed = 0;
7104 return 0;
7105}
7106static const struct block_device_operations md_fops =
7107{
7108 .owner = THIS_MODULE,
7109 .open = md_open,
7110 .release = md_release,
7111 .ioctl = md_ioctl,
7112#ifdef CONFIG_COMPAT
7113 .compat_ioctl = md_compat_ioctl,
7114#endif
7115 .getgeo = md_getgeo,
7116 .media_changed = md_media_changed,
7117 .revalidate_disk= md_revalidate,
7118};
7119
7120static int md_thread(void *arg)
7121{
7122 struct md_thread *thread = arg;
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136 allow_signal(SIGKILL);
7137 while (!kthread_should_stop()) {
7138
7139
7140
7141
7142
7143
7144 if (signal_pending(current))
7145 flush_signals(current);
7146
7147 wait_event_interruptible_timeout
7148 (thread->wqueue,
7149 test_bit(THREAD_WAKEUP, &thread->flags)
7150 || kthread_should_stop(),
7151 thread->timeout);
7152
7153 clear_bit(THREAD_WAKEUP, &thread->flags);
7154 if (!kthread_should_stop())
7155 thread->run(thread);
7156 }
7157
7158 return 0;
7159}
7160
7161void md_wakeup_thread(struct md_thread *thread)
7162{
7163 if (thread) {
7164 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7165 set_bit(THREAD_WAKEUP, &thread->flags);
7166 wake_up(&thread->wqueue);
7167 }
7168}
7169EXPORT_SYMBOL(md_wakeup_thread);
7170
7171struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7172 struct mddev *mddev, const char *name)
7173{
7174 struct md_thread *thread;
7175
7176 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7177 if (!thread)
7178 return NULL;
7179
7180 init_waitqueue_head(&thread->wqueue);
7181
7182 thread->run = run;
7183 thread->mddev = mddev;
7184 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7185 thread->tsk = kthread_run(md_thread, thread,
7186 "%s_%s",
7187 mdname(thread->mddev),
7188 name);
7189 if (IS_ERR(thread->tsk)) {
7190 kfree(thread);
7191 return NULL;
7192 }
7193 return thread;
7194}
7195EXPORT_SYMBOL(md_register_thread);
7196
7197void md_unregister_thread(struct md_thread **threadp)
7198{
7199 struct md_thread *thread = *threadp;
7200 if (!thread)
7201 return;
7202 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7203
7204
7205
7206 spin_lock(&pers_lock);
7207 *threadp = NULL;
7208 spin_unlock(&pers_lock);
7209
7210 kthread_stop(thread->tsk);
7211 kfree(thread);
7212}
7213EXPORT_SYMBOL(md_unregister_thread);
7214
7215void md_error(struct mddev *mddev, struct md_rdev *rdev)
7216{
7217 if (!rdev || test_bit(Faulty, &rdev->flags))
7218 return;
7219
7220 if (!mddev->pers || !mddev->pers->error_handler)
7221 return;
7222 mddev->pers->error_handler(mddev,rdev);
7223 if (mddev->degraded)
7224 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7225 sysfs_notify_dirent_safe(rdev->sysfs_state);
7226 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7227 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7228 md_wakeup_thread(mddev->thread);
7229 if (mddev->event_work.func)
7230 queue_work(md_misc_wq, &mddev->event_work);
7231 md_new_event(mddev);
7232}
7233EXPORT_SYMBOL(md_error);
7234
7235
7236
7237static void status_unused(struct seq_file *seq)
7238{
7239 int i = 0;
7240 struct md_rdev *rdev;
7241
7242 seq_printf(seq, "unused devices: ");
7243
7244 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7245 char b[BDEVNAME_SIZE];
7246 i++;
7247 seq_printf(seq, "%s ",
7248 bdevname(rdev->bdev,b));
7249 }
7250 if (!i)
7251 seq_printf(seq, "<none>");
7252
7253 seq_printf(seq, "\n");
7254}
7255
7256static int status_resync(struct seq_file *seq, struct mddev *mddev)
7257{
7258 sector_t max_sectors, resync, res;
7259 unsigned long dt, db;
7260 sector_t rt;
7261 int scale;
7262 unsigned int per_milli;
7263
7264 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7265 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7266 max_sectors = mddev->resync_max_sectors;
7267 else
7268 max_sectors = mddev->dev_sectors;
7269
7270 resync = mddev->curr_resync;
7271 if (resync <= 3) {
7272 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7273
7274 resync = max_sectors;
7275 } else
7276 resync -= atomic_read(&mddev->recovery_active);
7277
7278 if (resync == 0) {
7279 if (mddev->recovery_cp < MaxSector) {
7280 seq_printf(seq, "\tresync=PENDING");
7281 return 1;
7282 }
7283 return 0;
7284 }
7285 if (resync < 3) {
7286 seq_printf(seq, "\tresync=DELAYED");
7287 return 1;
7288 }
7289
7290 WARN_ON(max_sectors == 0);
7291
7292
7293
7294
7295
7296 scale = 10;
7297 if (sizeof(sector_t) > sizeof(unsigned long)) {
7298 while ( max_sectors/2 > (1ULL<<(scale+32)))
7299 scale++;
7300 }
7301 res = (resync>>scale)*1000;
7302 sector_div(res, (u32)((max_sectors>>scale)+1));
7303
7304 per_milli = res;
7305 {
7306 int i, x = per_milli/50, y = 20-x;
7307 seq_printf(seq, "[");
7308 for (i = 0; i < x; i++)
7309 seq_printf(seq, "=");
7310 seq_printf(seq, ">");
7311 for (i = 0; i < y; i++)
7312 seq_printf(seq, ".");
7313 seq_printf(seq, "] ");
7314 }
7315 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7316 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7317 "reshape" :
7318 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7319 "check" :
7320 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7321 "resync" : "recovery"))),
7322 per_milli/10, per_milli % 10,
7323 (unsigned long long) resync/2,
7324 (unsigned long long) max_sectors/2);
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340 dt = ((jiffies - mddev->resync_mark) / HZ);
7341 if (!dt) dt++;
7342 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7343 - mddev->resync_mark_cnt;
7344
7345 rt = max_sectors - resync;
7346 sector_div(rt, db/32+1);
7347 rt *= dt;
7348 rt >>= 5;
7349
7350 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7351 ((unsigned long)rt % 60)/6);
7352
7353 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7354 return 1;
7355}
7356
7357static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7358{
7359 struct list_head *tmp;
7360 loff_t l = *pos;
7361 struct mddev *mddev;
7362
7363 if (l >= 0x10000)
7364 return NULL;
7365 if (!l--)
7366
7367 return (void*)1;
7368
7369 spin_lock(&all_mddevs_lock);
7370 list_for_each(tmp,&all_mddevs)
7371 if (!l--) {
7372 mddev = list_entry(tmp, struct mddev, all_mddevs);
7373 mddev_get(mddev);
7374 spin_unlock(&all_mddevs_lock);
7375 return mddev;
7376 }
7377 spin_unlock(&all_mddevs_lock);
7378 if (!l--)
7379 return (void*)2;
7380 return NULL;
7381}
7382
7383static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7384{
7385 struct list_head *tmp;
7386 struct mddev *next_mddev, *mddev = v;
7387
7388 ++*pos;
7389 if (v == (void*)2)
7390 return NULL;
7391
7392 spin_lock(&all_mddevs_lock);
7393 if (v == (void*)1)
7394 tmp = all_mddevs.next;
7395 else
7396 tmp = mddev->all_mddevs.next;
7397 if (tmp != &all_mddevs)
7398 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7399 else {
7400 next_mddev = (void*)2;
7401 *pos = 0x10000;
7402 }
7403 spin_unlock(&all_mddevs_lock);
7404
7405 if (v != (void*)1)
7406 mddev_put(mddev);
7407 return next_mddev;
7408
7409}
7410
7411static void md_seq_stop(struct seq_file *seq, void *v)
7412{
7413 struct mddev *mddev = v;
7414
7415 if (mddev && v != (void*)1 && v != (void*)2)
7416 mddev_put(mddev);
7417}
7418
7419static int md_seq_show(struct seq_file *seq, void *v)
7420{
7421 struct mddev *mddev = v;
7422 sector_t sectors;
7423 struct md_rdev *rdev;
7424
7425 if (v == (void*)1) {
7426 struct md_personality *pers;
7427 seq_printf(seq, "Personalities : ");
7428 spin_lock(&pers_lock);
7429 list_for_each_entry(pers, &pers_list, list)
7430 seq_printf(seq, "[%s] ", pers->name);
7431
7432 spin_unlock(&pers_lock);
7433 seq_printf(seq, "\n");
7434 seq->poll_event = atomic_read(&md_event_count);
7435 return 0;
7436 }
7437 if (v == (void*)2) {
7438 status_unused(seq);
7439 return 0;
7440 }
7441
7442 spin_lock(&mddev->lock);
7443 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7444 seq_printf(seq, "%s : %sactive", mdname(mddev),
7445 mddev->pers ? "" : "in");
7446 if (mddev->pers) {
7447 if (mddev->ro==1)
7448 seq_printf(seq, " (read-only)");
7449 if (mddev->ro==2)
7450 seq_printf(seq, " (auto-read-only)");
7451 seq_printf(seq, " %s", mddev->pers->name);
7452 }
7453
7454 sectors = 0;
7455 rcu_read_lock();
7456 rdev_for_each_rcu(rdev, mddev) {
7457 char b[BDEVNAME_SIZE];
7458 seq_printf(seq, " %s[%d]",
7459 bdevname(rdev->bdev,b), rdev->desc_nr);
7460 if (test_bit(WriteMostly, &rdev->flags))
7461 seq_printf(seq, "(W)");
7462 if (test_bit(Journal, &rdev->flags))
7463 seq_printf(seq, "(J)");
7464 if (test_bit(Faulty, &rdev->flags)) {
7465 seq_printf(seq, "(F)");
7466 continue;
7467 }
7468 if (rdev->raid_disk < 0)
7469 seq_printf(seq, "(S)");
7470 if (test_bit(Replacement, &rdev->flags))
7471 seq_printf(seq, "(R)");
7472 sectors += rdev->sectors;
7473 }
7474 rcu_read_unlock();
7475
7476 if (!list_empty(&mddev->disks)) {
7477 if (mddev->pers)
7478 seq_printf(seq, "\n %llu blocks",
7479 (unsigned long long)
7480 mddev->array_sectors / 2);
7481 else
7482 seq_printf(seq, "\n %llu blocks",
7483 (unsigned long long)sectors / 2);
7484 }
7485 if (mddev->persistent) {
7486 if (mddev->major_version != 0 ||
7487 mddev->minor_version != 90) {
7488 seq_printf(seq," super %d.%d",
7489 mddev->major_version,
7490 mddev->minor_version);
7491 }
7492 } else if (mddev->external)
7493 seq_printf(seq, " super external:%s",
7494 mddev->metadata_type);
7495 else
7496 seq_printf(seq, " super non-persistent");
7497
7498 if (mddev->pers) {
7499 mddev->pers->status(seq, mddev);
7500 seq_printf(seq, "\n ");
7501 if (mddev->pers->sync_request) {
7502 if (status_resync(seq, mddev))
7503 seq_printf(seq, "\n ");
7504 }
7505 } else
7506 seq_printf(seq, "\n ");
7507
7508 bitmap_status(seq, mddev->bitmap);
7509
7510 seq_printf(seq, "\n");
7511 }
7512 spin_unlock(&mddev->lock);
7513
7514 return 0;
7515}
7516
7517static const struct seq_operations md_seq_ops = {
7518 .start = md_seq_start,
7519 .next = md_seq_next,
7520 .stop = md_seq_stop,
7521 .show = md_seq_show,
7522};
7523
7524static int md_seq_open(struct inode *inode, struct file *file)
7525{
7526 struct seq_file *seq;
7527 int error;
7528
7529 error = seq_open(file, &md_seq_ops);
7530 if (error)
7531 return error;
7532
7533 seq = file->private_data;
7534 seq->poll_event = atomic_read(&md_event_count);
7535 return error;
7536}
7537
7538static int md_unloading;
7539static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7540{
7541 struct seq_file *seq = filp->private_data;
7542 int mask;
7543
7544 if (md_unloading)
7545 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7546 poll_wait(filp, &md_event_waiters, wait);
7547
7548
7549 mask = POLLIN | POLLRDNORM;
7550
7551 if (seq->poll_event != atomic_read(&md_event_count))
7552 mask |= POLLERR | POLLPRI;
7553 return mask;
7554}
7555
7556static const struct file_operations md_seq_fops = {
7557 .owner = THIS_MODULE,
7558 .open = md_seq_open,
7559 .read = seq_read,
7560 .llseek = seq_lseek,
7561 .release = seq_release_private,
7562 .poll = mdstat_poll,
7563};
7564
7565int register_md_personality(struct md_personality *p)
7566{
7567 printk(KERN_INFO "md: %s personality registered for level %d\n",
7568 p->name, p->level);
7569 spin_lock(&pers_lock);
7570 list_add_tail(&p->list, &pers_list);
7571 spin_unlock(&pers_lock);
7572 return 0;
7573}
7574EXPORT_SYMBOL(register_md_personality);
7575
7576int unregister_md_personality(struct md_personality *p)
7577{
7578 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7579 spin_lock(&pers_lock);
7580 list_del_init(&p->list);
7581 spin_unlock(&pers_lock);
7582 return 0;
7583}
7584EXPORT_SYMBOL(unregister_md_personality);
7585
7586int register_md_cluster_operations(struct md_cluster_operations *ops,
7587 struct module *module)
7588{
7589 int ret = 0;
7590 spin_lock(&pers_lock);
7591 if (md_cluster_ops != NULL)
7592 ret = -EALREADY;
7593 else {
7594 md_cluster_ops = ops;
7595 md_cluster_mod = module;
7596 }
7597 spin_unlock(&pers_lock);
7598 return ret;
7599}
7600EXPORT_SYMBOL(register_md_cluster_operations);
7601
7602int unregister_md_cluster_operations(void)
7603{
7604 spin_lock(&pers_lock);
7605 md_cluster_ops = NULL;
7606 spin_unlock(&pers_lock);
7607 return 0;
7608}
7609EXPORT_SYMBOL(unregister_md_cluster_operations);
7610
7611int md_setup_cluster(struct mddev *mddev, int nodes)
7612{
7613 if (!md_cluster_ops)
7614 request_module("md-cluster");
7615 spin_lock(&pers_lock);
7616
7617 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7618 pr_err("can't find md-cluster module or get it's reference.\n");
7619 spin_unlock(&pers_lock);
7620 return -ENOENT;
7621 }
7622 spin_unlock(&pers_lock);
7623
7624 return md_cluster_ops->join(mddev, nodes);
7625}
7626
7627void md_cluster_stop(struct mddev *mddev)
7628{
7629 if (!md_cluster_ops)
7630 return;
7631 md_cluster_ops->leave(mddev);
7632 module_put(md_cluster_mod);
7633}
7634
7635static int is_mddev_idle(struct mddev *mddev, int init)
7636{
7637 struct md_rdev *rdev;
7638 int idle;
7639 int curr_events;
7640
7641 idle = 1;
7642 rcu_read_lock();
7643 rdev_for_each_rcu(rdev, mddev) {
7644 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7645 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7646 (int)part_stat_read(&disk->part0, sectors[1]) -
7647 atomic_read(&disk->sync_io);
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670 if (init || curr_events - rdev->last_events > 64) {
7671 rdev->last_events = curr_events;
7672 idle = 0;
7673 }
7674 }
7675 rcu_read_unlock();
7676 return idle;
7677}
7678
7679void md_done_sync(struct mddev *mddev, int blocks, int ok)
7680{
7681
7682 atomic_sub(blocks, &mddev->recovery_active);
7683 wake_up(&mddev->recovery_wait);
7684 if (!ok) {
7685 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7686 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7687 md_wakeup_thread(mddev->thread);
7688
7689 }
7690}
7691EXPORT_SYMBOL(md_done_sync);
7692
7693
7694
7695
7696
7697
7698void md_write_start(struct mddev *mddev, struct bio *bi)
7699{
7700 int did_change = 0;
7701 if (bio_data_dir(bi) != WRITE)
7702 return;
7703
7704 BUG_ON(mddev->ro == 1);
7705 if (mddev->ro == 2) {
7706
7707 mddev->ro = 0;
7708 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7709 md_wakeup_thread(mddev->thread);
7710 md_wakeup_thread(mddev->sync_thread);
7711 did_change = 1;
7712 }
7713 atomic_inc(&mddev->writes_pending);
7714 if (mddev->safemode == 1)
7715 mddev->safemode = 0;
7716 if (mddev->in_sync) {
7717 spin_lock(&mddev->lock);
7718 if (mddev->in_sync) {
7719 mddev->in_sync = 0;
7720 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7721 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7722 md_wakeup_thread(mddev->thread);
7723 did_change = 1;
7724 }
7725 spin_unlock(&mddev->lock);
7726 }
7727 if (did_change)
7728 sysfs_notify_dirent_safe(mddev->sysfs_state);
7729 wait_event(mddev->sb_wait,
7730 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7731}
7732EXPORT_SYMBOL(md_write_start);
7733
7734void md_write_end(struct mddev *mddev)
7735{
7736 if (atomic_dec_and_test(&mddev->writes_pending)) {
7737 if (mddev->safemode == 2)
7738 md_wakeup_thread(mddev->thread);
7739 else if (mddev->safemode_delay)
7740 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7741 }
7742}
7743EXPORT_SYMBOL(md_write_end);
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754int md_allow_write(struct mddev *mddev)
7755{
7756 if (!mddev->pers)
7757 return 0;
7758 if (mddev->ro)
7759 return 0;
7760 if (!mddev->pers->sync_request)
7761 return 0;
7762
7763 spin_lock(&mddev->lock);
7764 if (mddev->in_sync) {
7765 mddev->in_sync = 0;
7766 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7767 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7768 if (mddev->safemode_delay &&
7769 mddev->safemode == 0)
7770 mddev->safemode = 1;
7771 spin_unlock(&mddev->lock);
7772 md_update_sb(mddev, 0);
7773 sysfs_notify_dirent_safe(mddev->sysfs_state);
7774 } else
7775 spin_unlock(&mddev->lock);
7776
7777 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7778 return -EAGAIN;
7779 else
7780 return 0;
7781}
7782EXPORT_SYMBOL_GPL(md_allow_write);
7783
7784#define SYNC_MARKS 10
7785#define SYNC_MARK_STEP (3*HZ)
7786#define UPDATE_FREQUENCY (5*60*HZ)
7787void md_do_sync(struct md_thread *thread)
7788{
7789 struct mddev *mddev = thread->mddev;
7790 struct mddev *mddev2;
7791 unsigned int currspeed = 0,
7792 window;
7793 sector_t max_sectors,j, io_sectors, recovery_done;
7794 unsigned long mark[SYNC_MARKS];
7795 unsigned long update_time;
7796 sector_t mark_cnt[SYNC_MARKS];
7797 int last_mark,m;
7798 struct list_head *tmp;
7799 sector_t last_check;
7800 int skipped = 0;
7801 struct md_rdev *rdev;
7802 char *desc, *action = NULL;
7803 struct blk_plug plug;
7804 int ret;
7805
7806
7807 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7808 return;
7809 if (mddev->ro) {
7810 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7811 return;
7812 }
7813
7814 if (mddev_is_clustered(mddev)) {
7815 ret = md_cluster_ops->resync_start(mddev);
7816 if (ret)
7817 goto skip;
7818
7819 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
7820 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7821 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
7822 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
7823 && ((unsigned long long)mddev->curr_resync_completed
7824 < (unsigned long long)mddev->resync_max_sectors))
7825 goto skip;
7826 }
7827
7828 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7829 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7830 desc = "data-check";
7831 action = "check";
7832 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7833 desc = "requested-resync";
7834 action = "repair";
7835 } else
7836 desc = "resync";
7837 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7838 desc = "reshape";
7839 else
7840 desc = "recovery";
7841
7842 mddev->last_sync_action = action ?: desc;
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860 do {
7861 int mddev2_minor = -1;
7862 mddev->curr_resync = 2;
7863
7864 try_again:
7865 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7866 goto skip;
7867 for_each_mddev(mddev2, tmp) {
7868 if (mddev2 == mddev)
7869 continue;
7870 if (!mddev->parallel_resync
7871 && mddev2->curr_resync
7872 && match_mddev_units(mddev, mddev2)) {
7873 DEFINE_WAIT(wq);
7874 if (mddev < mddev2 && mddev->curr_resync == 2) {
7875
7876 mddev->curr_resync = 1;
7877 wake_up(&resync_wait);
7878 }
7879 if (mddev > mddev2 && mddev->curr_resync == 1)
7880
7881
7882
7883 continue;
7884
7885
7886
7887
7888 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7889 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7890 mddev2->curr_resync >= mddev->curr_resync) {
7891 if (mddev2_minor != mddev2->md_minor) {
7892 mddev2_minor = mddev2->md_minor;
7893 printk(KERN_INFO "md: delaying %s of %s"
7894 " until %s has finished (they"
7895 " share one or more physical units)\n",
7896 desc, mdname(mddev),
7897 mdname(mddev2));
7898 }
7899 mddev_put(mddev2);
7900 if (signal_pending(current))
7901 flush_signals(current);
7902 schedule();
7903 finish_wait(&resync_wait, &wq);
7904 goto try_again;
7905 }
7906 finish_wait(&resync_wait, &wq);
7907 }
7908 }
7909 } while (mddev->curr_resync < 2);
7910
7911 j = 0;
7912 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7913
7914
7915
7916 max_sectors = mddev->resync_max_sectors;
7917 atomic64_set(&mddev->resync_mismatches, 0);
7918
7919 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7920 j = mddev->resync_min;
7921 else if (!mddev->bitmap)
7922 j = mddev->recovery_cp;
7923
7924 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7925 max_sectors = mddev->resync_max_sectors;
7926 else {
7927
7928 max_sectors = mddev->dev_sectors;
7929 j = MaxSector;
7930 rcu_read_lock();
7931 rdev_for_each_rcu(rdev, mddev)
7932 if (rdev->raid_disk >= 0 &&
7933 !test_bit(Journal, &rdev->flags) &&
7934 !test_bit(Faulty, &rdev->flags) &&
7935 !test_bit(In_sync, &rdev->flags) &&
7936 rdev->recovery_offset < j)
7937 j = rdev->recovery_offset;
7938 rcu_read_unlock();
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948 if (mddev->bitmap) {
7949 mddev->pers->quiesce(mddev, 1);
7950 mddev->pers->quiesce(mddev, 0);
7951 }
7952 }
7953
7954 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7955 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7956 " %d KB/sec/disk.\n", speed_min(mddev));
7957 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7958 "(but not more than %d KB/sec) for %s.\n",
7959 speed_max(mddev), desc);
7960
7961 is_mddev_idle(mddev, 1);
7962
7963 io_sectors = 0;
7964 for (m = 0; m < SYNC_MARKS; m++) {
7965 mark[m] = jiffies;
7966 mark_cnt[m] = io_sectors;
7967 }
7968 last_mark = 0;
7969 mddev->resync_mark = mark[last_mark];
7970 mddev->resync_mark_cnt = mark_cnt[last_mark];
7971
7972
7973
7974
7975 window = 32*(PAGE_SIZE/512);
7976 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7977 window/2, (unsigned long long)max_sectors/2);
7978
7979 atomic_set(&mddev->recovery_active, 0);
7980 last_check = 0;
7981
7982 if (j>2) {
7983 printk(KERN_INFO
7984 "md: resuming %s of %s from checkpoint.\n",
7985 desc, mdname(mddev));
7986 mddev->curr_resync = j;
7987 } else
7988 mddev->curr_resync = 3;
7989 mddev->curr_resync_completed = j;
7990 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7991 md_new_event(mddev);
7992 update_time = jiffies;
7993
7994 blk_start_plug(&plug);
7995 while (j < max_sectors) {
7996 sector_t sectors;
7997
7998 skipped = 0;
7999
8000 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8001 ((mddev->curr_resync > mddev->curr_resync_completed &&
8002 (mddev->curr_resync - mddev->curr_resync_completed)
8003 > (max_sectors >> 4)) ||
8004 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8005 (j - mddev->curr_resync_completed)*2
8006 >= mddev->resync_max - mddev->curr_resync_completed ||
8007 mddev->curr_resync_completed > mddev->resync_max
8008 )) {
8009
8010 wait_event(mddev->recovery_wait,
8011 atomic_read(&mddev->recovery_active) == 0);
8012 mddev->curr_resync_completed = j;
8013 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8014 j > mddev->recovery_cp)
8015 mddev->recovery_cp = j;
8016 update_time = jiffies;
8017 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
8018 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8019 }
8020
8021 while (j >= mddev->resync_max &&
8022 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8023
8024
8025
8026
8027 flush_signals(current);
8028 wait_event_interruptible(mddev->recovery_wait,
8029 mddev->resync_max > j
8030 || test_bit(MD_RECOVERY_INTR,
8031 &mddev->recovery));
8032 }
8033
8034 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8035 break;
8036
8037 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8038 if (sectors == 0) {
8039 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8040 break;
8041 }
8042
8043 if (!skipped) {
8044 io_sectors += sectors;
8045 atomic_add(sectors, &mddev->recovery_active);
8046 }
8047
8048 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8049 break;
8050
8051 j += sectors;
8052 if (j > max_sectors)
8053
8054 j = max_sectors;
8055 if (j > 2)
8056 mddev->curr_resync = j;
8057 mddev->curr_mark_cnt = io_sectors;
8058 if (last_check == 0)
8059
8060
8061
8062 md_new_event(mddev);
8063
8064 if (last_check + window > io_sectors || j == max_sectors)
8065 continue;
8066
8067 last_check = io_sectors;
8068 repeat:
8069 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8070
8071 int next = (last_mark+1) % SYNC_MARKS;
8072
8073 mddev->resync_mark = mark[next];
8074 mddev->resync_mark_cnt = mark_cnt[next];
8075 mark[next] = jiffies;
8076 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8077 last_mark = next;
8078 }
8079
8080 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8081 break;
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091 cond_resched();
8092
8093 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8094 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8095 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8096
8097 if (currspeed > speed_min(mddev)) {
8098 if (currspeed > speed_max(mddev)) {
8099 msleep(500);
8100 goto repeat;
8101 }
8102 if (!is_mddev_idle(mddev, 0)) {
8103
8104
8105
8106
8107 wait_event(mddev->recovery_wait,
8108 !atomic_read(&mddev->recovery_active));
8109 }
8110 }
8111 }
8112 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
8113 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8114 ? "interrupted" : "done");
8115
8116
8117
8118 blk_finish_plug(&plug);
8119 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8120
8121 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8122 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8123 mddev->curr_resync > 2) {
8124 mddev->curr_resync_completed = mddev->curr_resync;
8125 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8126 }
8127 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8128
8129 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8130 mddev->curr_resync > 2) {
8131 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8132 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8133 if (mddev->curr_resync >= mddev->recovery_cp) {
8134 printk(KERN_INFO
8135 "md: checkpointing %s of %s.\n",
8136 desc, mdname(mddev));
8137 if (test_bit(MD_RECOVERY_ERROR,
8138 &mddev->recovery))
8139 mddev->recovery_cp =
8140 mddev->curr_resync_completed;
8141 else
8142 mddev->recovery_cp =
8143 mddev->curr_resync;
8144 }
8145 } else
8146 mddev->recovery_cp = MaxSector;
8147 } else {
8148 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8149 mddev->curr_resync = MaxSector;
8150 rcu_read_lock();
8151 rdev_for_each_rcu(rdev, mddev)
8152 if (rdev->raid_disk >= 0 &&
8153 mddev->delta_disks >= 0 &&
8154 !test_bit(Journal, &rdev->flags) &&
8155 !test_bit(Faulty, &rdev->flags) &&
8156 !test_bit(In_sync, &rdev->flags) &&
8157 rdev->recovery_offset < mddev->curr_resync)
8158 rdev->recovery_offset = mddev->curr_resync;
8159 rcu_read_unlock();
8160 }
8161 }
8162 skip:
8163
8164
8165
8166 set_mask_bits(&mddev->flags, 0,
8167 BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
8168
8169 spin_lock(&mddev->lock);
8170 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8171
8172 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8173 mddev->resync_min = 0;
8174 mddev->resync_max = MaxSector;
8175 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8176 mddev->resync_min = mddev->curr_resync_completed;
8177 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8178 mddev->curr_resync = 0;
8179 spin_unlock(&mddev->lock);
8180
8181 wake_up(&resync_wait);
8182 md_wakeup_thread(mddev->thread);
8183 return;
8184}
8185EXPORT_SYMBOL_GPL(md_do_sync);
8186
8187static int remove_and_add_spares(struct mddev *mddev,
8188 struct md_rdev *this)
8189{
8190 struct md_rdev *rdev;
8191 int spares = 0;
8192 int removed = 0;
8193 bool remove_some = false;
8194
8195 rdev_for_each(rdev, mddev) {
8196 if ((this == NULL || rdev == this) &&
8197 rdev->raid_disk >= 0 &&
8198 !test_bit(Blocked, &rdev->flags) &&
8199 test_bit(Faulty, &rdev->flags) &&
8200 atomic_read(&rdev->nr_pending)==0) {
8201
8202
8203
8204
8205
8206 remove_some = true;
8207 set_bit(RemoveSynchronized, &rdev->flags);
8208 }
8209 }
8210
8211 if (remove_some)
8212 synchronize_rcu();
8213 rdev_for_each(rdev, mddev) {
8214 if ((this == NULL || rdev == this) &&
8215 rdev->raid_disk >= 0 &&
8216 !test_bit(Blocked, &rdev->flags) &&
8217 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8218 (!test_bit(In_sync, &rdev->flags) &&
8219 !test_bit(Journal, &rdev->flags))) &&
8220 atomic_read(&rdev->nr_pending)==0)) {
8221 if (mddev->pers->hot_remove_disk(
8222 mddev, rdev) == 0) {
8223 sysfs_unlink_rdev(mddev, rdev);
8224 rdev->raid_disk = -1;
8225 removed++;
8226 }
8227 }
8228 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8229 clear_bit(RemoveSynchronized, &rdev->flags);
8230 }
8231
8232 if (removed && mddev->kobj.sd)
8233 sysfs_notify(&mddev->kobj, NULL, "degraded");
8234
8235 if (this && removed)
8236 goto no_add;
8237
8238 rdev_for_each(rdev, mddev) {
8239 if (this && this != rdev)
8240 continue;
8241 if (test_bit(Candidate, &rdev->flags))
8242 continue;
8243 if (rdev->raid_disk >= 0 &&
8244 !test_bit(In_sync, &rdev->flags) &&
8245 !test_bit(Journal, &rdev->flags) &&
8246 !test_bit(Faulty, &rdev->flags))
8247 spares++;
8248 if (rdev->raid_disk >= 0)
8249 continue;
8250 if (test_bit(Faulty, &rdev->flags))
8251 continue;
8252 if (!test_bit(Journal, &rdev->flags)) {
8253 if (mddev->ro &&
8254 ! (rdev->saved_raid_disk >= 0 &&
8255 !test_bit(Bitmap_sync, &rdev->flags)))
8256 continue;
8257
8258 rdev->recovery_offset = 0;
8259 }
8260 if (mddev->pers->
8261 hot_add_disk(mddev, rdev) == 0) {
8262 if (sysfs_link_rdev(mddev, rdev))
8263 ;
8264 if (!test_bit(Journal, &rdev->flags))
8265 spares++;
8266 md_new_event(mddev);
8267 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8268 }
8269 }
8270no_add:
8271 if (removed)
8272 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8273 return spares;
8274}
8275
8276static void md_start_sync(struct work_struct *ws)
8277{
8278 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8279
8280 mddev->sync_thread = md_register_thread(md_do_sync,
8281 mddev,
8282 "resync");
8283 if (!mddev->sync_thread) {
8284 printk(KERN_ERR "%s: could not start resync thread...\n",
8285 mdname(mddev));
8286
8287 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8288 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8289 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8290 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8291 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8292 wake_up(&resync_wait);
8293 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8294 &mddev->recovery))
8295 if (mddev->sysfs_action)
8296 sysfs_notify_dirent_safe(mddev->sysfs_action);
8297 } else
8298 md_wakeup_thread(mddev->sync_thread);
8299 sysfs_notify_dirent_safe(mddev->sysfs_action);
8300 md_new_event(mddev);
8301}
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325void md_check_recovery(struct mddev *mddev)
8326{
8327 if (mddev->suspended)
8328 return;
8329
8330 if (mddev->bitmap)
8331 bitmap_daemon_work(mddev);
8332
8333 if (signal_pending(current)) {
8334 if (mddev->pers->sync_request && !mddev->external) {
8335 printk(KERN_INFO "md: %s in immediate safe mode\n",
8336 mdname(mddev));
8337 mddev->safemode = 2;
8338 }
8339 flush_signals(current);
8340 }
8341
8342 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8343 return;
8344 if ( ! (
8345 (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
8346 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8347 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8348 test_bit(MD_RELOAD_SB, &mddev->flags) ||
8349 (mddev->external == 0 && mddev->safemode == 1) ||
8350 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8351 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8352 ))
8353 return;
8354
8355 if (mddev_trylock(mddev)) {
8356 int spares = 0;
8357
8358 if (mddev->ro) {
8359 struct md_rdev *rdev;
8360 if (!mddev->external && mddev->in_sync)
8361
8362
8363
8364
8365
8366 rdev_for_each(rdev, mddev)
8367 clear_bit(Blocked, &rdev->flags);
8368
8369
8370
8371
8372
8373
8374
8375 remove_and_add_spares(mddev, NULL);
8376
8377
8378
8379 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8380 md_reap_sync_thread(mddev);
8381 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8382 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8383 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
8384 goto unlock;
8385 }
8386
8387 if (mddev_is_clustered(mddev)) {
8388 struct md_rdev *rdev;
8389
8390
8391
8392 rdev_for_each(rdev, mddev) {
8393 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8394 rdev->raid_disk < 0)
8395 md_kick_rdev_from_array(rdev);
8396 }
8397
8398 if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags))
8399 md_reload_sb(mddev, mddev->good_device_nr);
8400 }
8401
8402 if (!mddev->external) {
8403 int did_change = 0;
8404 spin_lock(&mddev->lock);
8405 if (mddev->safemode &&
8406 !atomic_read(&mddev->writes_pending) &&
8407 !mddev->in_sync &&
8408 mddev->recovery_cp == MaxSector) {
8409 mddev->in_sync = 1;
8410 did_change = 1;
8411 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
8412 }
8413 if (mddev->safemode == 1)
8414 mddev->safemode = 0;
8415 spin_unlock(&mddev->lock);
8416 if (did_change)
8417 sysfs_notify_dirent_safe(mddev->sysfs_state);
8418 }
8419
8420 if (mddev->flags & MD_UPDATE_SB_FLAGS)
8421 md_update_sb(mddev, 0);
8422
8423 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8424 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8425
8426 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8427 goto unlock;
8428 }
8429 if (mddev->sync_thread) {
8430 md_reap_sync_thread(mddev);
8431 goto unlock;
8432 }
8433
8434
8435
8436 mddev->curr_resync_completed = 0;
8437 spin_lock(&mddev->lock);
8438 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8439 spin_unlock(&mddev->lock);
8440
8441
8442
8443 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8444 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8445
8446 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8447 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8448 goto not_running;
8449
8450
8451
8452
8453
8454
8455
8456 if (mddev->reshape_position != MaxSector) {
8457 if (mddev->pers->check_reshape == NULL ||
8458 mddev->pers->check_reshape(mddev) != 0)
8459
8460 goto not_running;
8461 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8462 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8463 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8464 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8465 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8466 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8467 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8468 } else if (mddev->recovery_cp < MaxSector) {
8469 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8470 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8471 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8472
8473 goto not_running;
8474
8475 if (mddev->pers->sync_request) {
8476 if (spares) {
8477
8478
8479
8480
8481 bitmap_write_all(mddev->bitmap);
8482 }
8483 INIT_WORK(&mddev->del_work, md_start_sync);
8484 queue_work(md_misc_wq, &mddev->del_work);
8485 goto unlock;
8486 }
8487 not_running:
8488 if (!mddev->sync_thread) {
8489 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8490 wake_up(&resync_wait);
8491 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8492 &mddev->recovery))
8493 if (mddev->sysfs_action)
8494 sysfs_notify_dirent_safe(mddev->sysfs_action);
8495 }
8496 unlock:
8497 wake_up(&mddev->sb_wait);
8498 mddev_unlock(mddev);
8499 }
8500}
8501EXPORT_SYMBOL(md_check_recovery);
8502
8503void md_reap_sync_thread(struct mddev *mddev)
8504{
8505 struct md_rdev *rdev;
8506
8507
8508 md_unregister_thread(&mddev->sync_thread);
8509 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8510 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8511
8512
8513 if (mddev->pers->spare_active(mddev)) {
8514 sysfs_notify(&mddev->kobj, NULL,
8515 "degraded");
8516 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8517 }
8518 }
8519 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8520 mddev->pers->finish_reshape)
8521 mddev->pers->finish_reshape(mddev);
8522
8523
8524
8525
8526 if (!mddev->degraded)
8527 rdev_for_each(rdev, mddev)
8528 rdev->saved_raid_disk = -1;
8529
8530 md_update_sb(mddev, 1);
8531
8532
8533
8534 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
8535 md_cluster_ops->resync_finish(mddev);
8536 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8537 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8538 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8539 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8540 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8541 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8542 wake_up(&resync_wait);
8543
8544 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8545 sysfs_notify_dirent_safe(mddev->sysfs_action);
8546 md_new_event(mddev);
8547 if (mddev->event_work.func)
8548 queue_work(md_misc_wq, &mddev->event_work);
8549}
8550EXPORT_SYMBOL(md_reap_sync_thread);
8551
8552void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8553{
8554 sysfs_notify_dirent_safe(rdev->sysfs_state);
8555 wait_event_timeout(rdev->blocked_wait,
8556 !test_bit(Blocked, &rdev->flags) &&
8557 !test_bit(BlockedBadBlocks, &rdev->flags),
8558 msecs_to_jiffies(5000));
8559 rdev_dec_pending(rdev, mddev);
8560}
8561EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8562
8563void md_finish_reshape(struct mddev *mddev)
8564{
8565
8566 struct md_rdev *rdev;
8567
8568 rdev_for_each(rdev, mddev) {
8569 if (rdev->data_offset > rdev->new_data_offset)
8570 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8571 else
8572 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8573 rdev->data_offset = rdev->new_data_offset;
8574 }
8575}
8576EXPORT_SYMBOL(md_finish_reshape);
8577
8578
8579
8580
8581int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8582 int is_new)
8583{
8584 struct mddev *mddev = rdev->mddev;
8585 int rv;
8586 if (is_new)
8587 s += rdev->new_data_offset;
8588 else
8589 s += rdev->data_offset;
8590 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8591 if (rv == 0) {
8592
8593 sysfs_notify_dirent_safe(rdev->sysfs_state);
8594 set_mask_bits(&mddev->flags, 0,
8595 BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
8596 md_wakeup_thread(rdev->mddev->thread);
8597 return 1;
8598 } else
8599 return 0;
8600}
8601EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8602
8603int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8604 int is_new)
8605{
8606 if (is_new)
8607 s += rdev->new_data_offset;
8608 else
8609 s += rdev->data_offset;
8610 return badblocks_clear(&rdev->badblocks,
8611 s, sectors);
8612}
8613EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8614
8615static int md_notify_reboot(struct notifier_block *this,
8616 unsigned long code, void *x)
8617{
8618 struct list_head *tmp;
8619 struct mddev *mddev;
8620 int need_delay = 0;
8621
8622 for_each_mddev(mddev, tmp) {
8623 if (mddev_trylock(mddev)) {
8624 if (mddev->pers)
8625 __md_stop_writes(mddev);
8626 if (mddev->persistent)
8627 mddev->safemode = 2;
8628 mddev_unlock(mddev);
8629 }
8630 need_delay = 1;
8631 }
8632
8633
8634
8635
8636
8637
8638 if (need_delay)
8639 mdelay(1000*1);
8640
8641 return NOTIFY_DONE;
8642}
8643
8644static struct notifier_block md_notifier = {
8645 .notifier_call = md_notify_reboot,
8646 .next = NULL,
8647 .priority = INT_MAX,
8648};
8649
8650static void md_geninit(void)
8651{
8652 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8653
8654 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8655}
8656
8657static int __init md_init(void)
8658{
8659 int ret = -ENOMEM;
8660
8661 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8662 if (!md_wq)
8663 goto err_wq;
8664
8665 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8666 if (!md_misc_wq)
8667 goto err_misc_wq;
8668
8669 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8670 goto err_md;
8671
8672 if ((ret = register_blkdev(0, "mdp")) < 0)
8673 goto err_mdp;
8674 mdp_major = ret;
8675
8676 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8677 md_probe, NULL, NULL);
8678 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8679 md_probe, NULL, NULL);
8680
8681 register_reboot_notifier(&md_notifier);
8682 raid_table_header = register_sysctl_table(raid_root_table);
8683
8684 md_geninit();
8685 return 0;
8686
8687err_mdp:
8688 unregister_blkdev(MD_MAJOR, "md");
8689err_md:
8690 destroy_workqueue(md_misc_wq);
8691err_misc_wq:
8692 destroy_workqueue(md_wq);
8693err_wq:
8694 return ret;
8695}
8696
8697static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8698{
8699 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
8700 struct md_rdev *rdev2;
8701 int role, ret;
8702 char b[BDEVNAME_SIZE];
8703
8704
8705 rdev_for_each(rdev2, mddev) {
8706 if (test_bit(Faulty, &rdev2->flags))
8707 continue;
8708
8709
8710 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8711
8712 if (test_bit(Candidate, &rdev2->flags)) {
8713 if (role == 0xfffe) {
8714 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8715 md_kick_rdev_from_array(rdev2);
8716 continue;
8717 }
8718 else
8719 clear_bit(Candidate, &rdev2->flags);
8720 }
8721
8722 if (role != rdev2->raid_disk) {
8723
8724 if (rdev2->raid_disk == -1 && role != 0xffff) {
8725 rdev2->saved_raid_disk = role;
8726 ret = remove_and_add_spares(mddev, rdev2);
8727 pr_info("Activated spare: %s\n",
8728 bdevname(rdev2->bdev,b));
8729
8730
8731 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8732 md_wakeup_thread(mddev->thread);
8733
8734 }
8735
8736
8737
8738
8739
8740 if ((role == 0xfffe) || (role == 0xfffd)) {
8741 md_error(mddev, rdev2);
8742 clear_bit(Blocked, &rdev2->flags);
8743 }
8744 }
8745 }
8746
8747 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
8748 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
8749
8750
8751 mddev->events = le64_to_cpu(sb->events);
8752}
8753
8754static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
8755{
8756 int err;
8757 struct page *swapout = rdev->sb_page;
8758 struct mdp_superblock_1 *sb;
8759
8760
8761
8762
8763 rdev->sb_page = NULL;
8764 alloc_disk_sb(rdev);
8765 ClearPageUptodate(rdev->sb_page);
8766 rdev->sb_loaded = 0;
8767 err = super_types[mddev->major_version].load_super(rdev, NULL, mddev->minor_version);
8768
8769 if (err < 0) {
8770 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
8771 __func__, __LINE__, rdev->desc_nr, err);
8772 put_page(rdev->sb_page);
8773 rdev->sb_page = swapout;
8774 rdev->sb_loaded = 1;
8775 return err;
8776 }
8777
8778 sb = page_address(rdev->sb_page);
8779
8780
8781
8782
8783 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
8784 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
8785
8786
8787
8788
8789 if (rdev->recovery_offset == MaxSector &&
8790 !test_bit(In_sync, &rdev->flags) &&
8791 mddev->pers->spare_active(mddev))
8792 sysfs_notify(&mddev->kobj, NULL, "degraded");
8793
8794 put_page(swapout);
8795 return 0;
8796}
8797
8798void md_reload_sb(struct mddev *mddev, int nr)
8799{
8800 struct md_rdev *rdev;
8801 int err;
8802
8803
8804 rdev_for_each_rcu(rdev, mddev) {
8805 if (rdev->desc_nr == nr)
8806 break;
8807 }
8808
8809 if (!rdev || rdev->desc_nr != nr) {
8810 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
8811 return;
8812 }
8813
8814 err = read_rdev(mddev, rdev);
8815 if (err < 0)
8816 return;
8817
8818 check_sb_changes(mddev, rdev);
8819
8820
8821 rdev_for_each_rcu(rdev, mddev)
8822 read_rdev(mddev, rdev);
8823}
8824EXPORT_SYMBOL(md_reload_sb);
8825
8826#ifndef MODULE
8827
8828
8829
8830
8831
8832
8833static DEFINE_MUTEX(detected_devices_mutex);
8834static LIST_HEAD(all_detected_devices);
8835struct detected_devices_node {
8836 struct list_head list;
8837 dev_t dev;
8838};
8839
8840void md_autodetect_dev(dev_t dev)
8841{
8842 struct detected_devices_node *node_detected_dev;
8843
8844 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8845 if (node_detected_dev) {
8846 node_detected_dev->dev = dev;
8847 mutex_lock(&detected_devices_mutex);
8848 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8849 mutex_unlock(&detected_devices_mutex);
8850 } else {
8851 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8852 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8853 }
8854}
8855
8856static void autostart_arrays(int part)
8857{
8858 struct md_rdev *rdev;
8859 struct detected_devices_node *node_detected_dev;
8860 dev_t dev;
8861 int i_scanned, i_passed;
8862
8863 i_scanned = 0;
8864 i_passed = 0;
8865
8866 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8867
8868 mutex_lock(&detected_devices_mutex);
8869 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8870 i_scanned++;
8871 node_detected_dev = list_entry(all_detected_devices.next,
8872 struct detected_devices_node, list);
8873 list_del(&node_detected_dev->list);
8874 dev = node_detected_dev->dev;
8875 kfree(node_detected_dev);
8876 rdev = md_import_device(dev,0, 90);
8877 if (IS_ERR(rdev))
8878 continue;
8879
8880 if (test_bit(Faulty, &rdev->flags))
8881 continue;
8882
8883 set_bit(AutoDetected, &rdev->flags);
8884 list_add(&rdev->same_set, &pending_raid_disks);
8885 i_passed++;
8886 }
8887 mutex_unlock(&detected_devices_mutex);
8888
8889 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8890 i_scanned, i_passed);
8891
8892 autorun_devices(part);
8893}
8894
8895#endif
8896
8897static __exit void md_exit(void)
8898{
8899 struct mddev *mddev;
8900 struct list_head *tmp;
8901 int delay = 1;
8902
8903 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8904 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8905
8906 unregister_blkdev(MD_MAJOR,"md");
8907 unregister_blkdev(mdp_major, "mdp");
8908 unregister_reboot_notifier(&md_notifier);
8909 unregister_sysctl_table(raid_table_header);
8910
8911
8912
8913
8914 md_unloading = 1;
8915 while (waitqueue_active(&md_event_waiters)) {
8916
8917 wake_up(&md_event_waiters);
8918 msleep(delay);
8919 delay += delay;
8920 }
8921 remove_proc_entry("mdstat", NULL);
8922
8923 for_each_mddev(mddev, tmp) {
8924 export_array(mddev);
8925 mddev->hold_active = 0;
8926 }
8927 destroy_workqueue(md_misc_wq);
8928 destroy_workqueue(md_wq);
8929}
8930
8931subsys_initcall(md_init);
8932module_exit(md_exit)
8933
8934static int get_ro(char *buffer, struct kernel_param *kp)
8935{
8936 return sprintf(buffer, "%d", start_readonly);
8937}
8938static int set_ro(const char *val, struct kernel_param *kp)
8939{
8940 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8941}
8942
8943module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8944module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8945module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8946
8947MODULE_LICENSE("GPL");
8948MODULE_DESCRIPTION("MD RAID framework");
8949MODULE_ALIAS("md");
8950MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8951