1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/badblocks.h>
38#include <linux/sysctl.h>
39#include <linux/seq_file.h>
40#include <linux/fs.h>
41#include <linux/poll.h>
42#include <linux/ctype.h>
43#include <linux/string.h>
44#include <linux/hdreg.h>
45#include <linux/proc_fs.h>
46#include <linux/random.h>
47#include <linux/module.h>
48#include <linux/reboot.h>
49#include <linux/file.h>
50#include <linux/compat.h>
51#include <linux/delay.h>
52#include <linux/raid/md_p.h>
53#include <linux/raid/md_u.h>
54#include <linux/slab.h>
55#include "md.h"
56#include "bitmap.h"
57#include "md-cluster.h"
58
59#ifndef MODULE
60static void autostart_arrays(int part);
61#endif
62
63
64
65
66
67
68static LIST_HEAD(pers_list);
69static DEFINE_SPINLOCK(pers_lock);
70
71struct md_cluster_operations *md_cluster_ops;
72EXPORT_SYMBOL(md_cluster_ops);
73struct module *md_cluster_mod;
74EXPORT_SYMBOL(md_cluster_mod);
75
76static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
77static struct workqueue_struct *md_wq;
78static struct workqueue_struct *md_misc_wq;
79
80static int remove_and_add_spares(struct mddev *mddev,
81 struct md_rdev *this);
82static void mddev_detach(struct mddev *mddev);
83
84
85
86
87
88
89#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
90
91
92
93
94
95
96
97
98
99
100
101
102
103static int sysctl_speed_limit_min = 1000;
104static int sysctl_speed_limit_max = 200000;
105static inline int speed_min(struct mddev *mddev)
106{
107 return mddev->sync_speed_min ?
108 mddev->sync_speed_min : sysctl_speed_limit_min;
109}
110
111static inline int speed_max(struct mddev *mddev)
112{
113 return mddev->sync_speed_max ?
114 mddev->sync_speed_max : sysctl_speed_limit_max;
115}
116
117static struct ctl_table_header *raid_table_header;
118
119static struct ctl_table raid_table[] = {
120 {
121 .procname = "speed_limit_min",
122 .data = &sysctl_speed_limit_min,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 {
128 .procname = "speed_limit_max",
129 .data = &sysctl_speed_limit_max,
130 .maxlen = sizeof(int),
131 .mode = S_IRUGO|S_IWUSR,
132 .proc_handler = proc_dointvec,
133 },
134 { }
135};
136
137static struct ctl_table raid_dir_table[] = {
138 {
139 .procname = "raid",
140 .maxlen = 0,
141 .mode = S_IRUGO|S_IXUGO,
142 .child = raid_table,
143 },
144 { }
145};
146
147static struct ctl_table raid_root_table[] = {
148 {
149 .procname = "dev",
150 .maxlen = 0,
151 .mode = 0555,
152 .child = raid_dir_table,
153 },
154 { }
155};
156
157static const struct block_device_operations md_fops;
158
159static int start_readonly;
160
161
162
163
164
165struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
166 struct mddev *mddev)
167{
168 struct bio *b;
169
170 if (!mddev || !mddev->bio_set)
171 return bio_alloc(gfp_mask, nr_iovecs);
172
173 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
174 if (!b)
175 return NULL;
176 return b;
177}
178EXPORT_SYMBOL_GPL(bio_alloc_mddev);
179
180struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
181 struct mddev *mddev)
182{
183 if (!mddev || !mddev->bio_set)
184 return bio_clone(bio, gfp_mask);
185
186 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
187}
188EXPORT_SYMBOL_GPL(bio_clone_mddev);
189
190
191
192
193
194
195
196
197
198
199
200static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
201static atomic_t md_event_count;
202void md_new_event(struct mddev *mddev)
203{
204 atomic_inc(&md_event_count);
205 wake_up(&md_event_waiters);
206}
207EXPORT_SYMBOL_GPL(md_new_event);
208
209
210
211
212
213static LIST_HEAD(all_mddevs);
214static DEFINE_SPINLOCK(all_mddevs_lock);
215
216
217
218
219
220
221
222
223#define for_each_mddev(_mddev,_tmp) \
224 \
225 for (({ spin_lock(&all_mddevs_lock); \
226 _tmp = all_mddevs.next; \
227 _mddev = NULL;}); \
228 ({ if (_tmp != &all_mddevs) \
229 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
230 spin_unlock(&all_mddevs_lock); \
231 if (_mddev) mddev_put(_mddev); \
232 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
233 _tmp != &all_mddevs;}); \
234 ({ spin_lock(&all_mddevs_lock); \
235 _tmp = _tmp->next;}) \
236 )
237
238
239
240
241
242
243
244
245static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
246{
247 const int rw = bio_data_dir(bio);
248 struct mddev *mddev = q->queuedata;
249 unsigned int sectors;
250 int cpu;
251
252 blk_queue_split(q, &bio, q->bio_split);
253
254 if (mddev == NULL || mddev->pers == NULL) {
255 bio_io_error(bio);
256 return BLK_QC_T_NONE;
257 }
258 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
259 if (bio_sectors(bio) != 0)
260 bio->bi_error = -EROFS;
261 bio_endio(bio);
262 return BLK_QC_T_NONE;
263 }
264 smp_rmb();
265 rcu_read_lock();
266 if (mddev->suspended) {
267 DEFINE_WAIT(__wait);
268 for (;;) {
269 prepare_to_wait(&mddev->sb_wait, &__wait,
270 TASK_UNINTERRUPTIBLE);
271 if (!mddev->suspended)
272 break;
273 rcu_read_unlock();
274 schedule();
275 rcu_read_lock();
276 }
277 finish_wait(&mddev->sb_wait, &__wait);
278 }
279 atomic_inc(&mddev->active_io);
280 rcu_read_unlock();
281
282
283
284
285
286 sectors = bio_sectors(bio);
287
288 bio->bi_opf &= ~REQ_NOMERGE;
289 mddev->pers->make_request(mddev, bio);
290
291 cpu = part_stat_lock();
292 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
293 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
294 part_stat_unlock();
295
296 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
297 wake_up(&mddev->sb_wait);
298
299 return BLK_QC_T_NONE;
300}
301
302
303
304
305
306
307
308void mddev_suspend(struct mddev *mddev)
309{
310 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
311 if (mddev->suspended++)
312 return;
313 synchronize_rcu();
314 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
315 mddev->pers->quiesce(mddev, 1);
316
317 del_timer_sync(&mddev->safemode_timer);
318}
319EXPORT_SYMBOL_GPL(mddev_suspend);
320
321void mddev_resume(struct mddev *mddev)
322{
323 if (--mddev->suspended)
324 return;
325 wake_up(&mddev->sb_wait);
326 mddev->pers->quiesce(mddev, 0);
327
328 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
329 md_wakeup_thread(mddev->thread);
330 md_wakeup_thread(mddev->sync_thread);
331}
332EXPORT_SYMBOL_GPL(mddev_resume);
333
334int mddev_congested(struct mddev *mddev, int bits)
335{
336 struct md_personality *pers = mddev->pers;
337 int ret = 0;
338
339 rcu_read_lock();
340 if (mddev->suspended)
341 ret = 1;
342 else if (pers && pers->congested)
343 ret = pers->congested(mddev, bits);
344 rcu_read_unlock();
345 return ret;
346}
347EXPORT_SYMBOL_GPL(mddev_congested);
348static int md_congested(void *data, int bits)
349{
350 struct mddev *mddev = data;
351 return mddev_congested(mddev, bits);
352}
353
354
355
356
357
358static void md_end_flush(struct bio *bio)
359{
360 struct md_rdev *rdev = bio->bi_private;
361 struct mddev *mddev = rdev->mddev;
362
363 rdev_dec_pending(rdev, mddev);
364
365 if (atomic_dec_and_test(&mddev->flush_pending)) {
366
367 queue_work(md_wq, &mddev->flush_work);
368 }
369 bio_put(bio);
370}
371
372static void md_submit_flush_data(struct work_struct *ws);
373
374static void submit_flushes(struct work_struct *ws)
375{
376 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
377 struct md_rdev *rdev;
378
379 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
380 atomic_set(&mddev->flush_pending, 1);
381 rcu_read_lock();
382 rdev_for_each_rcu(rdev, mddev)
383 if (rdev->raid_disk >= 0 &&
384 !test_bit(Faulty, &rdev->flags)) {
385
386
387
388
389 struct bio *bi;
390 atomic_inc(&rdev->nr_pending);
391 atomic_inc(&rdev->nr_pending);
392 rcu_read_unlock();
393 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
394 bi->bi_end_io = md_end_flush;
395 bi->bi_private = rdev;
396 bi->bi_bdev = rdev->bdev;
397 bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
398 atomic_inc(&mddev->flush_pending);
399 submit_bio(bi);
400 rcu_read_lock();
401 rdev_dec_pending(rdev, mddev);
402 }
403 rcu_read_unlock();
404 if (atomic_dec_and_test(&mddev->flush_pending))
405 queue_work(md_wq, &mddev->flush_work);
406}
407
408static void md_submit_flush_data(struct work_struct *ws)
409{
410 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
411 struct bio *bio = mddev->flush_bio;
412
413 if (bio->bi_iter.bi_size == 0)
414
415 bio_endio(bio);
416 else {
417 bio->bi_opf &= ~REQ_PREFLUSH;
418 mddev->pers->make_request(mddev, bio);
419 }
420
421 mddev->flush_bio = NULL;
422 wake_up(&mddev->sb_wait);
423}
424
425void md_flush_request(struct mddev *mddev, struct bio *bio)
426{
427 spin_lock_irq(&mddev->lock);
428 wait_event_lock_irq(mddev->sb_wait,
429 !mddev->flush_bio,
430 mddev->lock);
431 mddev->flush_bio = bio;
432 spin_unlock_irq(&mddev->lock);
433
434 INIT_WORK(&mddev->flush_work, submit_flushes);
435 queue_work(md_wq, &mddev->flush_work);
436}
437EXPORT_SYMBOL(md_flush_request);
438
439void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
440{
441 struct mddev *mddev = cb->data;
442 md_wakeup_thread(mddev->thread);
443 kfree(cb);
444}
445EXPORT_SYMBOL(md_unplug);
446
447static inline struct mddev *mddev_get(struct mddev *mddev)
448{
449 atomic_inc(&mddev->active);
450 return mddev;
451}
452
453static void mddev_delayed_delete(struct work_struct *ws);
454
455static void mddev_put(struct mddev *mddev)
456{
457 struct bio_set *bs = NULL;
458
459 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
460 return;
461 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
462 mddev->ctime == 0 && !mddev->hold_active) {
463
464
465 list_del_init(&mddev->all_mddevs);
466 bs = mddev->bio_set;
467 mddev->bio_set = NULL;
468 if (mddev->gendisk) {
469
470
471
472
473
474 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
475 queue_work(md_misc_wq, &mddev->del_work);
476 } else
477 kfree(mddev);
478 }
479 spin_unlock(&all_mddevs_lock);
480 if (bs)
481 bioset_free(bs);
482}
483
484static void md_safemode_timeout(unsigned long data);
485
486void mddev_init(struct mddev *mddev)
487{
488 mutex_init(&mddev->open_mutex);
489 mutex_init(&mddev->reconfig_mutex);
490 mutex_init(&mddev->bitmap_info.mutex);
491 INIT_LIST_HEAD(&mddev->disks);
492 INIT_LIST_HEAD(&mddev->all_mddevs);
493 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
494 (unsigned long) mddev);
495 atomic_set(&mddev->active, 1);
496 atomic_set(&mddev->openers, 0);
497 atomic_set(&mddev->active_io, 0);
498 spin_lock_init(&mddev->lock);
499 atomic_set(&mddev->flush_pending, 0);
500 init_waitqueue_head(&mddev->sb_wait);
501 init_waitqueue_head(&mddev->recovery_wait);
502 mddev->reshape_position = MaxSector;
503 mddev->reshape_backwards = 0;
504 mddev->last_sync_action = "none";
505 mddev->resync_min = 0;
506 mddev->resync_max = MaxSector;
507 mddev->level = LEVEL_NONE;
508}
509EXPORT_SYMBOL_GPL(mddev_init);
510
511static struct mddev *mddev_find(dev_t unit)
512{
513 struct mddev *mddev, *new = NULL;
514
515 if (unit && MAJOR(unit) != MD_MAJOR)
516 unit &= ~((1<<MdpMinorShift)-1);
517
518 retry:
519 spin_lock(&all_mddevs_lock);
520
521 if (unit) {
522 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
523 if (mddev->unit == unit) {
524 mddev_get(mddev);
525 spin_unlock(&all_mddevs_lock);
526 kfree(new);
527 return mddev;
528 }
529
530 if (new) {
531 list_add(&new->all_mddevs, &all_mddevs);
532 spin_unlock(&all_mddevs_lock);
533 new->hold_active = UNTIL_IOCTL;
534 return new;
535 }
536 } else if (new) {
537
538 static int next_minor = 512;
539 int start = next_minor;
540 int is_free = 0;
541 int dev = 0;
542 while (!is_free) {
543 dev = MKDEV(MD_MAJOR, next_minor);
544 next_minor++;
545 if (next_minor > MINORMASK)
546 next_minor = 0;
547 if (next_minor == start) {
548
549 spin_unlock(&all_mddevs_lock);
550 kfree(new);
551 return NULL;
552 }
553
554 is_free = 1;
555 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
556 if (mddev->unit == dev) {
557 is_free = 0;
558 break;
559 }
560 }
561 new->unit = dev;
562 new->md_minor = MINOR(dev);
563 new->hold_active = UNTIL_STOP;
564 list_add(&new->all_mddevs, &all_mddevs);
565 spin_unlock(&all_mddevs_lock);
566 return new;
567 }
568 spin_unlock(&all_mddevs_lock);
569
570 new = kzalloc(sizeof(*new), GFP_KERNEL);
571 if (!new)
572 return NULL;
573
574 new->unit = unit;
575 if (MAJOR(unit) == MD_MAJOR)
576 new->md_minor = MINOR(unit);
577 else
578 new->md_minor = MINOR(unit) >> MdpMinorShift;
579
580 mddev_init(new);
581
582 goto retry;
583}
584
585static struct attribute_group md_redundancy_group;
586
587void mddev_unlock(struct mddev *mddev)
588{
589 if (mddev->to_remove) {
590
591
592
593
594
595
596
597
598
599
600
601
602 struct attribute_group *to_remove = mddev->to_remove;
603 mddev->to_remove = NULL;
604 mddev->sysfs_active = 1;
605 mutex_unlock(&mddev->reconfig_mutex);
606
607 if (mddev->kobj.sd) {
608 if (to_remove != &md_redundancy_group)
609 sysfs_remove_group(&mddev->kobj, to_remove);
610 if (mddev->pers == NULL ||
611 mddev->pers->sync_request == NULL) {
612 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
613 if (mddev->sysfs_action)
614 sysfs_put(mddev->sysfs_action);
615 mddev->sysfs_action = NULL;
616 }
617 }
618 mddev->sysfs_active = 0;
619 } else
620 mutex_unlock(&mddev->reconfig_mutex);
621
622
623
624
625 spin_lock(&pers_lock);
626 md_wakeup_thread(mddev->thread);
627 spin_unlock(&pers_lock);
628}
629EXPORT_SYMBOL_GPL(mddev_unlock);
630
631struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
632{
633 struct md_rdev *rdev;
634
635 rdev_for_each_rcu(rdev, mddev)
636 if (rdev->desc_nr == nr)
637 return rdev;
638
639 return NULL;
640}
641EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
642
643static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
644{
645 struct md_rdev *rdev;
646
647 rdev_for_each(rdev, mddev)
648 if (rdev->bdev->bd_dev == dev)
649 return rdev;
650
651 return NULL;
652}
653
654static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
655{
656 struct md_rdev *rdev;
657
658 rdev_for_each_rcu(rdev, mddev)
659 if (rdev->bdev->bd_dev == dev)
660 return rdev;
661
662 return NULL;
663}
664
665static struct md_personality *find_pers(int level, char *clevel)
666{
667 struct md_personality *pers;
668 list_for_each_entry(pers, &pers_list, list) {
669 if (level != LEVEL_NONE && pers->level == level)
670 return pers;
671 if (strcmp(pers->name, clevel)==0)
672 return pers;
673 }
674 return NULL;
675}
676
677
678static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
679{
680 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
681 return MD_NEW_SIZE_SECTORS(num_sectors);
682}
683
684static int alloc_disk_sb(struct md_rdev *rdev)
685{
686 rdev->sb_page = alloc_page(GFP_KERNEL);
687 if (!rdev->sb_page) {
688 printk(KERN_ALERT "md: out of memory.\n");
689 return -ENOMEM;
690 }
691
692 return 0;
693}
694
695void md_rdev_clear(struct md_rdev *rdev)
696{
697 if (rdev->sb_page) {
698 put_page(rdev->sb_page);
699 rdev->sb_loaded = 0;
700 rdev->sb_page = NULL;
701 rdev->sb_start = 0;
702 rdev->sectors = 0;
703 }
704 if (rdev->bb_page) {
705 put_page(rdev->bb_page);
706 rdev->bb_page = NULL;
707 }
708 badblocks_exit(&rdev->badblocks);
709}
710EXPORT_SYMBOL_GPL(md_rdev_clear);
711
712static void super_written(struct bio *bio)
713{
714 struct md_rdev *rdev = bio->bi_private;
715 struct mddev *mddev = rdev->mddev;
716
717 if (bio->bi_error) {
718 printk("md: super_written gets error=%d\n", bio->bi_error);
719 md_error(mddev, rdev);
720 }
721
722 if (atomic_dec_and_test(&mddev->pending_writes))
723 wake_up(&mddev->sb_wait);
724 rdev_dec_pending(rdev, mddev);
725 bio_put(bio);
726}
727
728void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
729 sector_t sector, int size, struct page *page)
730{
731
732
733
734
735
736
737 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
738
739 atomic_inc(&rdev->nr_pending);
740
741 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
742 bio->bi_iter.bi_sector = sector;
743 bio_add_page(bio, page, size, 0);
744 bio->bi_private = rdev;
745 bio->bi_end_io = super_written;
746 bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
747
748 atomic_inc(&mddev->pending_writes);
749 submit_bio(bio);
750}
751
752void md_super_wait(struct mddev *mddev)
753{
754
755 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
756}
757
758int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
759 struct page *page, int op, int op_flags, bool metadata_op)
760{
761 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
762 int ret;
763
764 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
765 rdev->meta_bdev : rdev->bdev;
766 bio_set_op_attrs(bio, op, op_flags);
767 if (metadata_op)
768 bio->bi_iter.bi_sector = sector + rdev->sb_start;
769 else if (rdev->mddev->reshape_position != MaxSector &&
770 (rdev->mddev->reshape_backwards ==
771 (sector >= rdev->mddev->reshape_position)))
772 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
773 else
774 bio->bi_iter.bi_sector = sector + rdev->data_offset;
775 bio_add_page(bio, page, size, 0);
776
777 submit_bio_wait(bio);
778
779 ret = !bio->bi_error;
780 bio_put(bio);
781 return ret;
782}
783EXPORT_SYMBOL_GPL(sync_page_io);
784
785static int read_disk_sb(struct md_rdev *rdev, int size)
786{
787 char b[BDEVNAME_SIZE];
788
789 if (rdev->sb_loaded)
790 return 0;
791
792 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
793 goto fail;
794 rdev->sb_loaded = 1;
795 return 0;
796
797fail:
798 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
799 bdevname(rdev->bdev,b));
800 return -EINVAL;
801}
802
803static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
804{
805 return sb1->set_uuid0 == sb2->set_uuid0 &&
806 sb1->set_uuid1 == sb2->set_uuid1 &&
807 sb1->set_uuid2 == sb2->set_uuid2 &&
808 sb1->set_uuid3 == sb2->set_uuid3;
809}
810
811static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
812{
813 int ret;
814 mdp_super_t *tmp1, *tmp2;
815
816 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
817 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
818
819 if (!tmp1 || !tmp2) {
820 ret = 0;
821 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
822 goto abort;
823 }
824
825 *tmp1 = *sb1;
826 *tmp2 = *sb2;
827
828
829
830
831 tmp1->nr_disks = 0;
832 tmp2->nr_disks = 0;
833
834 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
835abort:
836 kfree(tmp1);
837 kfree(tmp2);
838 return ret;
839}
840
841static u32 md_csum_fold(u32 csum)
842{
843 csum = (csum & 0xffff) + (csum >> 16);
844 return (csum & 0xffff) + (csum >> 16);
845}
846
847static unsigned int calc_sb_csum(mdp_super_t *sb)
848{
849 u64 newcsum = 0;
850 u32 *sb32 = (u32*)sb;
851 int i;
852 unsigned int disk_csum, csum;
853
854 disk_csum = sb->sb_csum;
855 sb->sb_csum = 0;
856
857 for (i = 0; i < MD_SB_BYTES/4 ; i++)
858 newcsum += sb32[i];
859 csum = (newcsum & 0xffffffff) + (newcsum>>32);
860
861#ifdef CONFIG_ALPHA
862
863
864
865
866
867
868
869
870 sb->sb_csum = md_csum_fold(disk_csum);
871#else
872 sb->sb_csum = disk_csum;
873#endif
874 return csum;
875}
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907struct super_type {
908 char *name;
909 struct module *owner;
910 int (*load_super)(struct md_rdev *rdev,
911 struct md_rdev *refdev,
912 int minor_version);
913 int (*validate_super)(struct mddev *mddev,
914 struct md_rdev *rdev);
915 void (*sync_super)(struct mddev *mddev,
916 struct md_rdev *rdev);
917 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
918 sector_t num_sectors);
919 int (*allow_new_offset)(struct md_rdev *rdev,
920 unsigned long long new_offset);
921};
922
923
924
925
926
927
928
929
930
931int md_check_no_bitmap(struct mddev *mddev)
932{
933 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
934 return 0;
935 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
936 mdname(mddev), mddev->pers->name);
937 return 1;
938}
939EXPORT_SYMBOL(md_check_no_bitmap);
940
941
942
943
944static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
945{
946 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
947 mdp_super_t *sb;
948 int ret;
949
950
951
952
953
954
955
956 rdev->sb_start = calc_dev_sboffset(rdev);
957
958 ret = read_disk_sb(rdev, MD_SB_BYTES);
959 if (ret) return ret;
960
961 ret = -EINVAL;
962
963 bdevname(rdev->bdev, b);
964 sb = page_address(rdev->sb_page);
965
966 if (sb->md_magic != MD_SB_MAGIC) {
967 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
968 b);
969 goto abort;
970 }
971
972 if (sb->major_version != 0 ||
973 sb->minor_version < 90 ||
974 sb->minor_version > 91) {
975 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
976 sb->major_version, sb->minor_version,
977 b);
978 goto abort;
979 }
980
981 if (sb->raid_disks <= 0)
982 goto abort;
983
984 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
985 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
986 b);
987 goto abort;
988 }
989
990 rdev->preferred_minor = sb->md_minor;
991 rdev->data_offset = 0;
992 rdev->new_data_offset = 0;
993 rdev->sb_size = MD_SB_BYTES;
994 rdev->badblocks.shift = -1;
995
996 if (sb->level == LEVEL_MULTIPATH)
997 rdev->desc_nr = -1;
998 else
999 rdev->desc_nr = sb->this_disk.number;
1000
1001 if (!refdev) {
1002 ret = 1;
1003 } else {
1004 __u64 ev1, ev2;
1005 mdp_super_t *refsb = page_address(refdev->sb_page);
1006 if (!uuid_equal(refsb, sb)) {
1007 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1008 b, bdevname(refdev->bdev,b2));
1009 goto abort;
1010 }
1011 if (!sb_equal(refsb, sb)) {
1012 printk(KERN_WARNING "md: %s has same UUID"
1013 " but different superblock to %s\n",
1014 b, bdevname(refdev->bdev, b2));
1015 goto abort;
1016 }
1017 ev1 = md_event(sb);
1018 ev2 = md_event(refsb);
1019 if (ev1 > ev2)
1020 ret = 1;
1021 else
1022 ret = 0;
1023 }
1024 rdev->sectors = rdev->sb_start;
1025
1026
1027
1028
1029 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1030 sb->level >= 1)
1031 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1032
1033 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1034
1035 ret = -EINVAL;
1036
1037 abort:
1038 return ret;
1039}
1040
1041
1042
1043
1044static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1045{
1046 mdp_disk_t *desc;
1047 mdp_super_t *sb = page_address(rdev->sb_page);
1048 __u64 ev1 = md_event(sb);
1049
1050 rdev->raid_disk = -1;
1051 clear_bit(Faulty, &rdev->flags);
1052 clear_bit(In_sync, &rdev->flags);
1053 clear_bit(Bitmap_sync, &rdev->flags);
1054 clear_bit(WriteMostly, &rdev->flags);
1055
1056 if (mddev->raid_disks == 0) {
1057 mddev->major_version = 0;
1058 mddev->minor_version = sb->minor_version;
1059 mddev->patch_version = sb->patch_version;
1060 mddev->external = 0;
1061 mddev->chunk_sectors = sb->chunk_size >> 9;
1062 mddev->ctime = sb->ctime;
1063 mddev->utime = sb->utime;
1064 mddev->level = sb->level;
1065 mddev->clevel[0] = 0;
1066 mddev->layout = sb->layout;
1067 mddev->raid_disks = sb->raid_disks;
1068 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1069 mddev->events = ev1;
1070 mddev->bitmap_info.offset = 0;
1071 mddev->bitmap_info.space = 0;
1072
1073 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1074 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1075 mddev->reshape_backwards = 0;
1076
1077 if (mddev->minor_version >= 91) {
1078 mddev->reshape_position = sb->reshape_position;
1079 mddev->delta_disks = sb->delta_disks;
1080 mddev->new_level = sb->new_level;
1081 mddev->new_layout = sb->new_layout;
1082 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1083 if (mddev->delta_disks < 0)
1084 mddev->reshape_backwards = 1;
1085 } else {
1086 mddev->reshape_position = MaxSector;
1087 mddev->delta_disks = 0;
1088 mddev->new_level = mddev->level;
1089 mddev->new_layout = mddev->layout;
1090 mddev->new_chunk_sectors = mddev->chunk_sectors;
1091 }
1092
1093 if (sb->state & (1<<MD_SB_CLEAN))
1094 mddev->recovery_cp = MaxSector;
1095 else {
1096 if (sb->events_hi == sb->cp_events_hi &&
1097 sb->events_lo == sb->cp_events_lo) {
1098 mddev->recovery_cp = sb->recovery_cp;
1099 } else
1100 mddev->recovery_cp = 0;
1101 }
1102
1103 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1104 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1105 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1106 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1107
1108 mddev->max_disks = MD_SB_DISKS;
1109
1110 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1111 mddev->bitmap_info.file == NULL) {
1112 mddev->bitmap_info.offset =
1113 mddev->bitmap_info.default_offset;
1114 mddev->bitmap_info.space =
1115 mddev->bitmap_info.default_space;
1116 }
1117
1118 } else if (mddev->pers == NULL) {
1119
1120
1121 ++ev1;
1122 if (sb->disks[rdev->desc_nr].state & (
1123 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1124 if (ev1 < mddev->events)
1125 return -EINVAL;
1126 } else if (mddev->bitmap) {
1127
1128
1129
1130 if (ev1 < mddev->bitmap->events_cleared)
1131 return 0;
1132 if (ev1 < mddev->events)
1133 set_bit(Bitmap_sync, &rdev->flags);
1134 } else {
1135 if (ev1 < mddev->events)
1136
1137 return 0;
1138 }
1139
1140 if (mddev->level != LEVEL_MULTIPATH) {
1141 desc = sb->disks + rdev->desc_nr;
1142
1143 if (desc->state & (1<<MD_DISK_FAULTY))
1144 set_bit(Faulty, &rdev->flags);
1145 else if (desc->state & (1<<MD_DISK_SYNC)
1146) {
1147 set_bit(In_sync, &rdev->flags);
1148 rdev->raid_disk = desc->raid_disk;
1149 rdev->saved_raid_disk = desc->raid_disk;
1150 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1151
1152
1153
1154 if (mddev->minor_version >= 91) {
1155 rdev->recovery_offset = 0;
1156 rdev->raid_disk = desc->raid_disk;
1157 }
1158 }
1159 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1160 set_bit(WriteMostly, &rdev->flags);
1161 } else
1162 set_bit(In_sync, &rdev->flags);
1163 return 0;
1164}
1165
1166
1167
1168
1169static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1170{
1171 mdp_super_t *sb;
1172 struct md_rdev *rdev2;
1173 int next_spare = mddev->raid_disks;
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 int i;
1186 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1187
1188 rdev->sb_size = MD_SB_BYTES;
1189
1190 sb = page_address(rdev->sb_page);
1191
1192 memset(sb, 0, sizeof(*sb));
1193
1194 sb->md_magic = MD_SB_MAGIC;
1195 sb->major_version = mddev->major_version;
1196 sb->patch_version = mddev->patch_version;
1197 sb->gvalid_words = 0;
1198 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1199 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1200 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1201 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1202
1203 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1204 sb->level = mddev->level;
1205 sb->size = mddev->dev_sectors / 2;
1206 sb->raid_disks = mddev->raid_disks;
1207 sb->md_minor = mddev->md_minor;
1208 sb->not_persistent = 0;
1209 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1210 sb->state = 0;
1211 sb->events_hi = (mddev->events>>32);
1212 sb->events_lo = (u32)mddev->events;
1213
1214 if (mddev->reshape_position == MaxSector)
1215 sb->minor_version = 90;
1216 else {
1217 sb->minor_version = 91;
1218 sb->reshape_position = mddev->reshape_position;
1219 sb->new_level = mddev->new_level;
1220 sb->delta_disks = mddev->delta_disks;
1221 sb->new_layout = mddev->new_layout;
1222 sb->new_chunk = mddev->new_chunk_sectors << 9;
1223 }
1224 mddev->minor_version = sb->minor_version;
1225 if (mddev->in_sync)
1226 {
1227 sb->recovery_cp = mddev->recovery_cp;
1228 sb->cp_events_hi = (mddev->events>>32);
1229 sb->cp_events_lo = (u32)mddev->events;
1230 if (mddev->recovery_cp == MaxSector)
1231 sb->state = (1<< MD_SB_CLEAN);
1232 } else
1233 sb->recovery_cp = 0;
1234
1235 sb->layout = mddev->layout;
1236 sb->chunk_size = mddev->chunk_sectors << 9;
1237
1238 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1239 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1240
1241 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1242 rdev_for_each(rdev2, mddev) {
1243 mdp_disk_t *d;
1244 int desc_nr;
1245 int is_active = test_bit(In_sync, &rdev2->flags);
1246
1247 if (rdev2->raid_disk >= 0 &&
1248 sb->minor_version >= 91)
1249
1250
1251
1252
1253 is_active = 1;
1254 if (rdev2->raid_disk < 0 ||
1255 test_bit(Faulty, &rdev2->flags))
1256 is_active = 0;
1257 if (is_active)
1258 desc_nr = rdev2->raid_disk;
1259 else
1260 desc_nr = next_spare++;
1261 rdev2->desc_nr = desc_nr;
1262 d = &sb->disks[rdev2->desc_nr];
1263 nr_disks++;
1264 d->number = rdev2->desc_nr;
1265 d->major = MAJOR(rdev2->bdev->bd_dev);
1266 d->minor = MINOR(rdev2->bdev->bd_dev);
1267 if (is_active)
1268 d->raid_disk = rdev2->raid_disk;
1269 else
1270 d->raid_disk = rdev2->desc_nr;
1271 if (test_bit(Faulty, &rdev2->flags))
1272 d->state = (1<<MD_DISK_FAULTY);
1273 else if (is_active) {
1274 d->state = (1<<MD_DISK_ACTIVE);
1275 if (test_bit(In_sync, &rdev2->flags))
1276 d->state |= (1<<MD_DISK_SYNC);
1277 active++;
1278 working++;
1279 } else {
1280 d->state = 0;
1281 spare++;
1282 working++;
1283 }
1284 if (test_bit(WriteMostly, &rdev2->flags))
1285 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1286 }
1287
1288 for (i=0 ; i < mddev->raid_disks ; i++) {
1289 mdp_disk_t *d = &sb->disks[i];
1290 if (d->state == 0 && d->number == 0) {
1291 d->number = i;
1292 d->raid_disk = i;
1293 d->state = (1<<MD_DISK_REMOVED);
1294 d->state |= (1<<MD_DISK_FAULTY);
1295 failed++;
1296 }
1297 }
1298 sb->nr_disks = nr_disks;
1299 sb->active_disks = active;
1300 sb->working_disks = working;
1301 sb->failed_disks = failed;
1302 sb->spare_disks = spare;
1303
1304 sb->this_disk = sb->disks[rdev->desc_nr];
1305 sb->sb_csum = calc_sb_csum(sb);
1306}
1307
1308
1309
1310
1311static unsigned long long
1312super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1313{
1314 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1315 return 0;
1316 if (rdev->mddev->bitmap_info.offset)
1317 return 0;
1318 rdev->sb_start = calc_dev_sboffset(rdev);
1319 if (!num_sectors || num_sectors > rdev->sb_start)
1320 num_sectors = rdev->sb_start;
1321
1322
1323
1324 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1325 rdev->mddev->level >= 1)
1326 num_sectors = (sector_t)(2ULL << 32) - 2;
1327 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1328 rdev->sb_page);
1329 md_super_wait(rdev->mddev);
1330 return num_sectors;
1331}
1332
1333static int
1334super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1335{
1336
1337 return new_offset == 0;
1338}
1339
1340
1341
1342
1343
1344static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1345{
1346 __le32 disk_csum;
1347 u32 csum;
1348 unsigned long long newcsum;
1349 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1350 __le32 *isuper = (__le32*)sb;
1351
1352 disk_csum = sb->sb_csum;
1353 sb->sb_csum = 0;
1354 newcsum = 0;
1355 for (; size >= 4; size -= 4)
1356 newcsum += le32_to_cpu(*isuper++);
1357
1358 if (size == 2)
1359 newcsum += le16_to_cpu(*(__le16*) isuper);
1360
1361 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1362 sb->sb_csum = disk_csum;
1363 return cpu_to_le32(csum);
1364}
1365
1366static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1367{
1368 struct mdp_superblock_1 *sb;
1369 int ret;
1370 sector_t sb_start;
1371 sector_t sectors;
1372 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1373 int bmask;
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 switch(minor_version) {
1384 case 0:
1385 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1386 sb_start -= 8*2;
1387 sb_start &= ~(sector_t)(4*2-1);
1388 break;
1389 case 1:
1390 sb_start = 0;
1391 break;
1392 case 2:
1393 sb_start = 8;
1394 break;
1395 default:
1396 return -EINVAL;
1397 }
1398 rdev->sb_start = sb_start;
1399
1400
1401
1402
1403 ret = read_disk_sb(rdev, 4096);
1404 if (ret) return ret;
1405
1406 sb = page_address(rdev->sb_page);
1407
1408 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1409 sb->major_version != cpu_to_le32(1) ||
1410 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1411 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1412 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1413 return -EINVAL;
1414
1415 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1416 printk("md: invalid superblock checksum on %s\n",
1417 bdevname(rdev->bdev,b));
1418 return -EINVAL;
1419 }
1420 if (le64_to_cpu(sb->data_size) < 10) {
1421 printk("md: data_size too small on %s\n",
1422 bdevname(rdev->bdev,b));
1423 return -EINVAL;
1424 }
1425 if (sb->pad0 ||
1426 sb->pad3[0] ||
1427 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1428
1429 return -EINVAL;
1430
1431 rdev->preferred_minor = 0xffff;
1432 rdev->data_offset = le64_to_cpu(sb->data_offset);
1433 rdev->new_data_offset = rdev->data_offset;
1434 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1435 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1436 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1437 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1438
1439 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1440 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1441 if (rdev->sb_size & bmask)
1442 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1443
1444 if (minor_version
1445 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1446 return -EINVAL;
1447 if (minor_version
1448 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1449 return -EINVAL;
1450
1451 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1452 rdev->desc_nr = -1;
1453 else
1454 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1455
1456 if (!rdev->bb_page) {
1457 rdev->bb_page = alloc_page(GFP_KERNEL);
1458 if (!rdev->bb_page)
1459 return -ENOMEM;
1460 }
1461 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1462 rdev->badblocks.count == 0) {
1463
1464
1465
1466 s32 offset;
1467 sector_t bb_sector;
1468 u64 *bbp;
1469 int i;
1470 int sectors = le16_to_cpu(sb->bblog_size);
1471 if (sectors > (PAGE_SIZE / 512))
1472 return -EINVAL;
1473 offset = le32_to_cpu(sb->bblog_offset);
1474 if (offset == 0)
1475 return -EINVAL;
1476 bb_sector = (long long)offset;
1477 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1478 rdev->bb_page, REQ_OP_READ, 0, true))
1479 return -EIO;
1480 bbp = (u64 *)page_address(rdev->bb_page);
1481 rdev->badblocks.shift = sb->bblog_shift;
1482 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1483 u64 bb = le64_to_cpu(*bbp);
1484 int count = bb & (0x3ff);
1485 u64 sector = bb >> 10;
1486 sector <<= sb->bblog_shift;
1487 count <<= sb->bblog_shift;
1488 if (bb + 1 == 0)
1489 break;
1490 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1491 return -EINVAL;
1492 }
1493 } else if (sb->bblog_offset != 0)
1494 rdev->badblocks.shift = 0;
1495
1496 if (!refdev) {
1497 ret = 1;
1498 } else {
1499 __u64 ev1, ev2;
1500 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1501
1502 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1503 sb->level != refsb->level ||
1504 sb->layout != refsb->layout ||
1505 sb->chunksize != refsb->chunksize) {
1506 printk(KERN_WARNING "md: %s has strangely different"
1507 " superblock to %s\n",
1508 bdevname(rdev->bdev,b),
1509 bdevname(refdev->bdev,b2));
1510 return -EINVAL;
1511 }
1512 ev1 = le64_to_cpu(sb->events);
1513 ev2 = le64_to_cpu(refsb->events);
1514
1515 if (ev1 > ev2)
1516 ret = 1;
1517 else
1518 ret = 0;
1519 }
1520 if (minor_version) {
1521 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1522 sectors -= rdev->data_offset;
1523 } else
1524 sectors = rdev->sb_start;
1525 if (sectors < le64_to_cpu(sb->data_size))
1526 return -EINVAL;
1527 rdev->sectors = le64_to_cpu(sb->data_size);
1528 return ret;
1529}
1530
1531static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1532{
1533 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1534 __u64 ev1 = le64_to_cpu(sb->events);
1535
1536 rdev->raid_disk = -1;
1537 clear_bit(Faulty, &rdev->flags);
1538 clear_bit(In_sync, &rdev->flags);
1539 clear_bit(Bitmap_sync, &rdev->flags);
1540 clear_bit(WriteMostly, &rdev->flags);
1541
1542 if (mddev->raid_disks == 0) {
1543 mddev->major_version = 1;
1544 mddev->patch_version = 0;
1545 mddev->external = 0;
1546 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1547 mddev->ctime = le64_to_cpu(sb->ctime);
1548 mddev->utime = le64_to_cpu(sb->utime);
1549 mddev->level = le32_to_cpu(sb->level);
1550 mddev->clevel[0] = 0;
1551 mddev->layout = le32_to_cpu(sb->layout);
1552 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1553 mddev->dev_sectors = le64_to_cpu(sb->size);
1554 mddev->events = ev1;
1555 mddev->bitmap_info.offset = 0;
1556 mddev->bitmap_info.space = 0;
1557
1558
1559
1560 mddev->bitmap_info.default_offset = 1024 >> 9;
1561 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1562 mddev->reshape_backwards = 0;
1563
1564 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1565 memcpy(mddev->uuid, sb->set_uuid, 16);
1566
1567 mddev->max_disks = (4096-256)/2;
1568
1569 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1570 mddev->bitmap_info.file == NULL) {
1571 mddev->bitmap_info.offset =
1572 (__s32)le32_to_cpu(sb->bitmap_offset);
1573
1574
1575
1576
1577
1578 if (mddev->minor_version > 0)
1579 mddev->bitmap_info.space = 0;
1580 else if (mddev->bitmap_info.offset > 0)
1581 mddev->bitmap_info.space =
1582 8 - mddev->bitmap_info.offset;
1583 else
1584 mddev->bitmap_info.space =
1585 -mddev->bitmap_info.offset;
1586 }
1587
1588 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1589 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1590 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1591 mddev->new_level = le32_to_cpu(sb->new_level);
1592 mddev->new_layout = le32_to_cpu(sb->new_layout);
1593 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1594 if (mddev->delta_disks < 0 ||
1595 (mddev->delta_disks == 0 &&
1596 (le32_to_cpu(sb->feature_map)
1597 & MD_FEATURE_RESHAPE_BACKWARDS)))
1598 mddev->reshape_backwards = 1;
1599 } else {
1600 mddev->reshape_position = MaxSector;
1601 mddev->delta_disks = 0;
1602 mddev->new_level = mddev->level;
1603 mddev->new_layout = mddev->layout;
1604 mddev->new_chunk_sectors = mddev->chunk_sectors;
1605 }
1606
1607 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1608 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1609 } else if (mddev->pers == NULL) {
1610
1611
1612 ++ev1;
1613 if (rdev->desc_nr >= 0 &&
1614 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1615 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1616 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1617 if (ev1 < mddev->events)
1618 return -EINVAL;
1619 } else if (mddev->bitmap) {
1620
1621
1622
1623 if (ev1 < mddev->bitmap->events_cleared)
1624 return 0;
1625 if (ev1 < mddev->events)
1626 set_bit(Bitmap_sync, &rdev->flags);
1627 } else {
1628 if (ev1 < mddev->events)
1629
1630 return 0;
1631 }
1632 if (mddev->level != LEVEL_MULTIPATH) {
1633 int role;
1634 if (rdev->desc_nr < 0 ||
1635 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1636 role = MD_DISK_ROLE_SPARE;
1637 rdev->desc_nr = -1;
1638 } else
1639 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1640 switch(role) {
1641 case MD_DISK_ROLE_SPARE:
1642 break;
1643 case MD_DISK_ROLE_FAULTY:
1644 set_bit(Faulty, &rdev->flags);
1645 break;
1646 case MD_DISK_ROLE_JOURNAL:
1647 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1648
1649 printk(KERN_WARNING
1650 "md: journal device provided without journal feature, ignoring the device\n");
1651 return -EINVAL;
1652 }
1653 set_bit(Journal, &rdev->flags);
1654 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1655 rdev->raid_disk = 0;
1656 break;
1657 default:
1658 rdev->saved_raid_disk = role;
1659 if ((le32_to_cpu(sb->feature_map) &
1660 MD_FEATURE_RECOVERY_OFFSET)) {
1661 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1662 if (!(le32_to_cpu(sb->feature_map) &
1663 MD_FEATURE_RECOVERY_BITMAP))
1664 rdev->saved_raid_disk = -1;
1665 } else
1666 set_bit(In_sync, &rdev->flags);
1667 rdev->raid_disk = role;
1668 break;
1669 }
1670 if (sb->devflags & WriteMostly1)
1671 set_bit(WriteMostly, &rdev->flags);
1672 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1673 set_bit(Replacement, &rdev->flags);
1674 } else
1675 set_bit(In_sync, &rdev->flags);
1676
1677 return 0;
1678}
1679
1680static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1681{
1682 struct mdp_superblock_1 *sb;
1683 struct md_rdev *rdev2;
1684 int max_dev, i;
1685
1686
1687 sb = page_address(rdev->sb_page);
1688
1689 sb->feature_map = 0;
1690 sb->pad0 = 0;
1691 sb->recovery_offset = cpu_to_le64(0);
1692 memset(sb->pad3, 0, sizeof(sb->pad3));
1693
1694 sb->utime = cpu_to_le64((__u64)mddev->utime);
1695 sb->events = cpu_to_le64(mddev->events);
1696 if (mddev->in_sync)
1697 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1698 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1699 sb->resync_offset = cpu_to_le64(MaxSector);
1700 else
1701 sb->resync_offset = cpu_to_le64(0);
1702
1703 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1704
1705 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1706 sb->size = cpu_to_le64(mddev->dev_sectors);
1707 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1708 sb->level = cpu_to_le32(mddev->level);
1709 sb->layout = cpu_to_le32(mddev->layout);
1710
1711 if (test_bit(WriteMostly, &rdev->flags))
1712 sb->devflags |= WriteMostly1;
1713 else
1714 sb->devflags &= ~WriteMostly1;
1715 sb->data_offset = cpu_to_le64(rdev->data_offset);
1716 sb->data_size = cpu_to_le64(rdev->sectors);
1717
1718 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1719 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1720 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1721 }
1722
1723 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1724 !test_bit(In_sync, &rdev->flags)) {
1725 sb->feature_map |=
1726 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1727 sb->recovery_offset =
1728 cpu_to_le64(rdev->recovery_offset);
1729 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1730 sb->feature_map |=
1731 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1732 }
1733
1734 if (test_bit(Journal, &rdev->flags))
1735 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1736 if (test_bit(Replacement, &rdev->flags))
1737 sb->feature_map |=
1738 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1739
1740 if (mddev->reshape_position != MaxSector) {
1741 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1742 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1743 sb->new_layout = cpu_to_le32(mddev->new_layout);
1744 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1745 sb->new_level = cpu_to_le32(mddev->new_level);
1746 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1747 if (mddev->delta_disks == 0 &&
1748 mddev->reshape_backwards)
1749 sb->feature_map
1750 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1751 if (rdev->new_data_offset != rdev->data_offset) {
1752 sb->feature_map
1753 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1754 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1755 - rdev->data_offset));
1756 }
1757 }
1758
1759 if (mddev_is_clustered(mddev))
1760 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1761
1762 if (rdev->badblocks.count == 0)
1763 ;
1764 else if (sb->bblog_offset == 0)
1765
1766 md_error(mddev, rdev);
1767 else {
1768 struct badblocks *bb = &rdev->badblocks;
1769 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1770 u64 *p = bb->page;
1771 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1772 if (bb->changed) {
1773 unsigned seq;
1774
1775retry:
1776 seq = read_seqbegin(&bb->lock);
1777
1778 memset(bbp, 0xff, PAGE_SIZE);
1779
1780 for (i = 0 ; i < bb->count ; i++) {
1781 u64 internal_bb = p[i];
1782 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1783 | BB_LEN(internal_bb));
1784 bbp[i] = cpu_to_le64(store_bb);
1785 }
1786 bb->changed = 0;
1787 if (read_seqretry(&bb->lock, seq))
1788 goto retry;
1789
1790 bb->sector = (rdev->sb_start +
1791 (int)le32_to_cpu(sb->bblog_offset));
1792 bb->size = le16_to_cpu(sb->bblog_size);
1793 }
1794 }
1795
1796 max_dev = 0;
1797 rdev_for_each(rdev2, mddev)
1798 if (rdev2->desc_nr+1 > max_dev)
1799 max_dev = rdev2->desc_nr+1;
1800
1801 if (max_dev > le32_to_cpu(sb->max_dev)) {
1802 int bmask;
1803 sb->max_dev = cpu_to_le32(max_dev);
1804 rdev->sb_size = max_dev * 2 + 256;
1805 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1806 if (rdev->sb_size & bmask)
1807 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1808 } else
1809 max_dev = le32_to_cpu(sb->max_dev);
1810
1811 for (i=0; i<max_dev;i++)
1812 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1813
1814 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1815 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1816
1817 rdev_for_each(rdev2, mddev) {
1818 i = rdev2->desc_nr;
1819 if (test_bit(Faulty, &rdev2->flags))
1820 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1821 else if (test_bit(In_sync, &rdev2->flags))
1822 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1823 else if (test_bit(Journal, &rdev2->flags))
1824 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1825 else if (rdev2->raid_disk >= 0)
1826 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1827 else
1828 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1829 }
1830
1831 sb->sb_csum = calc_sb_1_csum(sb);
1832}
1833
1834static unsigned long long
1835super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1836{
1837 struct mdp_superblock_1 *sb;
1838 sector_t max_sectors;
1839 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1840 return 0;
1841 if (rdev->data_offset != rdev->new_data_offset)
1842 return 0;
1843 if (rdev->sb_start < rdev->data_offset) {
1844
1845 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1846 max_sectors -= rdev->data_offset;
1847 if (!num_sectors || num_sectors > max_sectors)
1848 num_sectors = max_sectors;
1849 } else if (rdev->mddev->bitmap_info.offset) {
1850
1851 return 0;
1852 } else {
1853
1854 sector_t sb_start;
1855 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1856 sb_start &= ~(sector_t)(4*2 - 1);
1857 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1858 if (!num_sectors || num_sectors > max_sectors)
1859 num_sectors = max_sectors;
1860 rdev->sb_start = sb_start;
1861 }
1862 sb = page_address(rdev->sb_page);
1863 sb->data_size = cpu_to_le64(num_sectors);
1864 sb->super_offset = rdev->sb_start;
1865 sb->sb_csum = calc_sb_1_csum(sb);
1866 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1867 rdev->sb_page);
1868 md_super_wait(rdev->mddev);
1869 return num_sectors;
1870
1871}
1872
1873static int
1874super_1_allow_new_offset(struct md_rdev *rdev,
1875 unsigned long long new_offset)
1876{
1877
1878 struct bitmap *bitmap;
1879 if (new_offset >= rdev->data_offset)
1880 return 1;
1881
1882
1883
1884 if (rdev->mddev->minor_version == 0)
1885 return 1;
1886
1887
1888
1889
1890
1891
1892
1893 if (rdev->sb_start + (32+4)*2 > new_offset)
1894 return 0;
1895 bitmap = rdev->mddev->bitmap;
1896 if (bitmap && !rdev->mddev->bitmap_info.file &&
1897 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1898 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1899 return 0;
1900 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1901 return 0;
1902
1903 return 1;
1904}
1905
1906static struct super_type super_types[] = {
1907 [0] = {
1908 .name = "0.90.0",
1909 .owner = THIS_MODULE,
1910 .load_super = super_90_load,
1911 .validate_super = super_90_validate,
1912 .sync_super = super_90_sync,
1913 .rdev_size_change = super_90_rdev_size_change,
1914 .allow_new_offset = super_90_allow_new_offset,
1915 },
1916 [1] = {
1917 .name = "md-1",
1918 .owner = THIS_MODULE,
1919 .load_super = super_1_load,
1920 .validate_super = super_1_validate,
1921 .sync_super = super_1_sync,
1922 .rdev_size_change = super_1_rdev_size_change,
1923 .allow_new_offset = super_1_allow_new_offset,
1924 },
1925};
1926
1927static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1928{
1929 if (mddev->sync_super) {
1930 mddev->sync_super(mddev, rdev);
1931 return;
1932 }
1933
1934 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1935
1936 super_types[mddev->major_version].sync_super(mddev, rdev);
1937}
1938
1939static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1940{
1941 struct md_rdev *rdev, *rdev2;
1942
1943 rcu_read_lock();
1944 rdev_for_each_rcu(rdev, mddev1) {
1945 if (test_bit(Faulty, &rdev->flags) ||
1946 test_bit(Journal, &rdev->flags) ||
1947 rdev->raid_disk == -1)
1948 continue;
1949 rdev_for_each_rcu(rdev2, mddev2) {
1950 if (test_bit(Faulty, &rdev2->flags) ||
1951 test_bit(Journal, &rdev2->flags) ||
1952 rdev2->raid_disk == -1)
1953 continue;
1954 if (rdev->bdev->bd_contains ==
1955 rdev2->bdev->bd_contains) {
1956 rcu_read_unlock();
1957 return 1;
1958 }
1959 }
1960 }
1961 rcu_read_unlock();
1962 return 0;
1963}
1964
1965static LIST_HEAD(pending_raid_disks);
1966
1967
1968
1969
1970
1971
1972
1973
1974int md_integrity_register(struct mddev *mddev)
1975{
1976 struct md_rdev *rdev, *reference = NULL;
1977
1978 if (list_empty(&mddev->disks))
1979 return 0;
1980 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1981 return 0;
1982 rdev_for_each(rdev, mddev) {
1983
1984 if (test_bit(Faulty, &rdev->flags))
1985 continue;
1986 if (rdev->raid_disk < 0)
1987 continue;
1988 if (!reference) {
1989
1990 reference = rdev;
1991 continue;
1992 }
1993
1994 if (blk_integrity_compare(reference->bdev->bd_disk,
1995 rdev->bdev->bd_disk) < 0)
1996 return -EINVAL;
1997 }
1998 if (!reference || !bdev_get_integrity(reference->bdev))
1999 return 0;
2000
2001
2002
2003
2004 blk_integrity_register(mddev->gendisk,
2005 bdev_get_integrity(reference->bdev));
2006
2007 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2008 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2009 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2010 mdname(mddev));
2011 return -EINVAL;
2012 }
2013 return 0;
2014}
2015EXPORT_SYMBOL(md_integrity_register);
2016
2017
2018
2019
2020
2021int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2022{
2023 struct blk_integrity *bi_rdev;
2024 struct blk_integrity *bi_mddev;
2025 char name[BDEVNAME_SIZE];
2026
2027 if (!mddev->gendisk)
2028 return 0;
2029
2030 bi_rdev = bdev_get_integrity(rdev->bdev);
2031 bi_mddev = blk_get_integrity(mddev->gendisk);
2032
2033 if (!bi_mddev)
2034 return 0;
2035
2036 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2037 printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n",
2038 mdname(mddev), bdevname(rdev->bdev, name));
2039 return -ENXIO;
2040 }
2041
2042 return 0;
2043}
2044EXPORT_SYMBOL(md_integrity_add_rdev);
2045
2046static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2047{
2048 char b[BDEVNAME_SIZE];
2049 struct kobject *ko;
2050 int err;
2051
2052
2053 if (find_rdev(mddev, rdev->bdev->bd_dev))
2054 return -EEXIST;
2055
2056
2057 if (!test_bit(Journal, &rdev->flags) &&
2058 rdev->sectors &&
2059 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2060 if (mddev->pers) {
2061
2062
2063
2064
2065 if (mddev->level > 0)
2066 return -ENOSPC;
2067 } else
2068 mddev->dev_sectors = rdev->sectors;
2069 }
2070
2071
2072
2073
2074
2075 rcu_read_lock();
2076 if (rdev->desc_nr < 0) {
2077 int choice = 0;
2078 if (mddev->pers)
2079 choice = mddev->raid_disks;
2080 while (md_find_rdev_nr_rcu(mddev, choice))
2081 choice++;
2082 rdev->desc_nr = choice;
2083 } else {
2084 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2085 rcu_read_unlock();
2086 return -EBUSY;
2087 }
2088 }
2089 rcu_read_unlock();
2090 if (!test_bit(Journal, &rdev->flags) &&
2091 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2092 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2093 mdname(mddev), mddev->max_disks);
2094 return -EBUSY;
2095 }
2096 bdevname(rdev->bdev,b);
2097 strreplace(b, '/', '!');
2098
2099 rdev->mddev = mddev;
2100 printk(KERN_INFO "md: bind<%s>\n", b);
2101
2102 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2103 goto fail;
2104
2105 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2106 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2107 ;
2108 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2109
2110 list_add_rcu(&rdev->same_set, &mddev->disks);
2111 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2112
2113
2114 mddev->recovery_disabled++;
2115
2116 return 0;
2117
2118 fail:
2119 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2120 b, mdname(mddev));
2121 return err;
2122}
2123
2124static void md_delayed_delete(struct work_struct *ws)
2125{
2126 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2127 kobject_del(&rdev->kobj);
2128 kobject_put(&rdev->kobj);
2129}
2130
2131static void unbind_rdev_from_array(struct md_rdev *rdev)
2132{
2133 char b[BDEVNAME_SIZE];
2134
2135 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2136 list_del_rcu(&rdev->same_set);
2137 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2138 rdev->mddev = NULL;
2139 sysfs_remove_link(&rdev->kobj, "block");
2140 sysfs_put(rdev->sysfs_state);
2141 rdev->sysfs_state = NULL;
2142 rdev->badblocks.count = 0;
2143
2144
2145
2146
2147 synchronize_rcu();
2148 INIT_WORK(&rdev->del_work, md_delayed_delete);
2149 kobject_get(&rdev->kobj);
2150 queue_work(md_misc_wq, &rdev->del_work);
2151}
2152
2153
2154
2155
2156
2157
2158static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2159{
2160 int err = 0;
2161 struct block_device *bdev;
2162 char b[BDEVNAME_SIZE];
2163
2164 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2165 shared ? (struct md_rdev *)lock_rdev : rdev);
2166 if (IS_ERR(bdev)) {
2167 printk(KERN_ERR "md: could not open %s.\n",
2168 __bdevname(dev, b));
2169 return PTR_ERR(bdev);
2170 }
2171 rdev->bdev = bdev;
2172 return err;
2173}
2174
2175static void unlock_rdev(struct md_rdev *rdev)
2176{
2177 struct block_device *bdev = rdev->bdev;
2178 rdev->bdev = NULL;
2179 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2180}
2181
2182void md_autodetect_dev(dev_t dev);
2183
2184static void export_rdev(struct md_rdev *rdev)
2185{
2186 char b[BDEVNAME_SIZE];
2187
2188 printk(KERN_INFO "md: export_rdev(%s)\n",
2189 bdevname(rdev->bdev,b));
2190 md_rdev_clear(rdev);
2191#ifndef MODULE
2192 if (test_bit(AutoDetected, &rdev->flags))
2193 md_autodetect_dev(rdev->bdev->bd_dev);
2194#endif
2195 unlock_rdev(rdev);
2196 kobject_put(&rdev->kobj);
2197}
2198
2199void md_kick_rdev_from_array(struct md_rdev *rdev)
2200{
2201 unbind_rdev_from_array(rdev);
2202 export_rdev(rdev);
2203}
2204EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2205
2206static void export_array(struct mddev *mddev)
2207{
2208 struct md_rdev *rdev;
2209
2210 while (!list_empty(&mddev->disks)) {
2211 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2212 same_set);
2213 md_kick_rdev_from_array(rdev);
2214 }
2215 mddev->raid_disks = 0;
2216 mddev->major_version = 0;
2217}
2218
2219static void sync_sbs(struct mddev *mddev, int nospares)
2220{
2221
2222
2223
2224
2225
2226
2227 struct md_rdev *rdev;
2228 rdev_for_each(rdev, mddev) {
2229 if (rdev->sb_events == mddev->events ||
2230 (nospares &&
2231 rdev->raid_disk < 0 &&
2232 rdev->sb_events+1 == mddev->events)) {
2233
2234 rdev->sb_loaded = 2;
2235 } else {
2236 sync_super(mddev, rdev);
2237 rdev->sb_loaded = 1;
2238 }
2239 }
2240}
2241
2242static bool does_sb_need_changing(struct mddev *mddev)
2243{
2244 struct md_rdev *rdev;
2245 struct mdp_superblock_1 *sb;
2246 int role;
2247
2248
2249 rdev_for_each(rdev, mddev)
2250 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2251 break;
2252
2253
2254 if (!rdev)
2255 return false;
2256
2257 sb = page_address(rdev->sb_page);
2258
2259 rdev_for_each(rdev, mddev) {
2260 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2261
2262 if (role == 0xffff && rdev->raid_disk >=0 &&
2263 !test_bit(Faulty, &rdev->flags))
2264 return true;
2265
2266 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2267 return true;
2268 }
2269
2270
2271 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2272 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2273 (mddev->layout != le64_to_cpu(sb->layout)) ||
2274 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2275 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2276 return true;
2277
2278 return false;
2279}
2280
2281void md_update_sb(struct mddev *mddev, int force_change)
2282{
2283 struct md_rdev *rdev;
2284 int sync_req;
2285 int nospares = 0;
2286 int any_badblocks_changed = 0;
2287 int ret = -1;
2288
2289 if (mddev->ro) {
2290 if (force_change)
2291 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2292 return;
2293 }
2294
2295repeat:
2296 if (mddev_is_clustered(mddev)) {
2297 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2298 force_change = 1;
2299 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2300 nospares = 1;
2301 ret = md_cluster_ops->metadata_update_start(mddev);
2302
2303 if (!does_sb_need_changing(mddev)) {
2304 if (ret == 0)
2305 md_cluster_ops->metadata_update_cancel(mddev);
2306 bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
2307 BIT(MD_CHANGE_DEVS) |
2308 BIT(MD_CHANGE_CLEAN));
2309 return;
2310 }
2311 }
2312
2313
2314 rdev_for_each(rdev, mddev) {
2315 if (rdev->raid_disk >= 0 &&
2316 mddev->delta_disks >= 0 &&
2317 !test_bit(Journal, &rdev->flags) &&
2318 !test_bit(In_sync, &rdev->flags) &&
2319 mddev->curr_resync_completed > rdev->recovery_offset)
2320 rdev->recovery_offset = mddev->curr_resync_completed;
2321
2322 }
2323 if (!mddev->persistent) {
2324 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2325 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2326 if (!mddev->external) {
2327 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2328 rdev_for_each(rdev, mddev) {
2329 if (rdev->badblocks.changed) {
2330 rdev->badblocks.changed = 0;
2331 ack_all_badblocks(&rdev->badblocks);
2332 md_error(mddev, rdev);
2333 }
2334 clear_bit(Blocked, &rdev->flags);
2335 clear_bit(BlockedBadBlocks, &rdev->flags);
2336 wake_up(&rdev->blocked_wait);
2337 }
2338 }
2339 wake_up(&mddev->sb_wait);
2340 return;
2341 }
2342
2343 spin_lock(&mddev->lock);
2344
2345 mddev->utime = ktime_get_real_seconds();
2346
2347 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2348 force_change = 1;
2349 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2350
2351
2352
2353
2354 nospares = 1;
2355 if (force_change)
2356 nospares = 0;
2357 if (mddev->degraded)
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367 nospares = 0;
2368
2369 sync_req = mddev->in_sync;
2370
2371
2372
2373 if (nospares
2374 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2375 && mddev->can_decrease_events
2376 && mddev->events != 1) {
2377 mddev->events--;
2378 mddev->can_decrease_events = 0;
2379 } else {
2380
2381 mddev->events ++;
2382 mddev->can_decrease_events = nospares;
2383 }
2384
2385
2386
2387
2388
2389
2390 WARN_ON(mddev->events == 0);
2391
2392 rdev_for_each(rdev, mddev) {
2393 if (rdev->badblocks.changed)
2394 any_badblocks_changed++;
2395 if (test_bit(Faulty, &rdev->flags))
2396 set_bit(FaultRecorded, &rdev->flags);
2397 }
2398
2399 sync_sbs(mddev, nospares);
2400 spin_unlock(&mddev->lock);
2401
2402 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2403 mdname(mddev), mddev->in_sync);
2404
2405 bitmap_update_sb(mddev->bitmap);
2406 rdev_for_each(rdev, mddev) {
2407 char b[BDEVNAME_SIZE];
2408
2409 if (rdev->sb_loaded != 1)
2410 continue;
2411
2412 if (!test_bit(Faulty, &rdev->flags)) {
2413 md_super_write(mddev,rdev,
2414 rdev->sb_start, rdev->sb_size,
2415 rdev->sb_page);
2416 pr_debug("md: (write) %s's sb offset: %llu\n",
2417 bdevname(rdev->bdev, b),
2418 (unsigned long long)rdev->sb_start);
2419 rdev->sb_events = mddev->events;
2420 if (rdev->badblocks.size) {
2421 md_super_write(mddev, rdev,
2422 rdev->badblocks.sector,
2423 rdev->badblocks.size << 9,
2424 rdev->bb_page);
2425 rdev->badblocks.size = 0;
2426 }
2427
2428 } else
2429 pr_debug("md: %s (skipping faulty)\n",
2430 bdevname(rdev->bdev, b));
2431
2432 if (mddev->level == LEVEL_MULTIPATH)
2433
2434 break;
2435 }
2436 md_super_wait(mddev);
2437
2438
2439 if (mddev_is_clustered(mddev) && ret == 0)
2440 md_cluster_ops->metadata_update_finish(mddev);
2441
2442 if (mddev->in_sync != sync_req ||
2443 !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
2444 BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
2445
2446 goto repeat;
2447 wake_up(&mddev->sb_wait);
2448 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2449 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2450
2451 rdev_for_each(rdev, mddev) {
2452 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2453 clear_bit(Blocked, &rdev->flags);
2454
2455 if (any_badblocks_changed)
2456 ack_all_badblocks(&rdev->badblocks);
2457 clear_bit(BlockedBadBlocks, &rdev->flags);
2458 wake_up(&rdev->blocked_wait);
2459 }
2460}
2461EXPORT_SYMBOL(md_update_sb);
2462
2463static int add_bound_rdev(struct md_rdev *rdev)
2464{
2465 struct mddev *mddev = rdev->mddev;
2466 int err = 0;
2467 bool add_journal = test_bit(Journal, &rdev->flags);
2468
2469 if (!mddev->pers->hot_remove_disk || add_journal) {
2470
2471
2472
2473
2474 super_types[mddev->major_version].
2475 validate_super(mddev, rdev);
2476 if (add_journal)
2477 mddev_suspend(mddev);
2478 err = mddev->pers->hot_add_disk(mddev, rdev);
2479 if (add_journal)
2480 mddev_resume(mddev);
2481 if (err) {
2482 md_kick_rdev_from_array(rdev);
2483 return err;
2484 }
2485 }
2486 sysfs_notify_dirent_safe(rdev->sysfs_state);
2487
2488 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2489 if (mddev->degraded)
2490 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2491 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2492 md_new_event(mddev);
2493 md_wakeup_thread(mddev->thread);
2494 return 0;
2495}
2496
2497
2498
2499
2500static int cmd_match(const char *cmd, const char *str)
2501{
2502
2503
2504
2505
2506 while (*cmd && *str && *cmd == *str) {
2507 cmd++;
2508 str++;
2509 }
2510 if (*cmd == '\n')
2511 cmd++;
2512 if (*str || *cmd)
2513 return 0;
2514 return 1;
2515}
2516
2517struct rdev_sysfs_entry {
2518 struct attribute attr;
2519 ssize_t (*show)(struct md_rdev *, char *);
2520 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2521};
2522
2523static ssize_t
2524state_show(struct md_rdev *rdev, char *page)
2525{
2526 char *sep = "";
2527 size_t len = 0;
2528 unsigned long flags = ACCESS_ONCE(rdev->flags);
2529
2530 if (test_bit(Faulty, &flags) ||
2531 rdev->badblocks.unacked_exist) {
2532 len+= sprintf(page+len, "%sfaulty",sep);
2533 sep = ",";
2534 }
2535 if (test_bit(In_sync, &flags)) {
2536 len += sprintf(page+len, "%sin_sync",sep);
2537 sep = ",";
2538 }
2539 if (test_bit(Journal, &flags)) {
2540 len += sprintf(page+len, "%sjournal",sep);
2541 sep = ",";
2542 }
2543 if (test_bit(WriteMostly, &flags)) {
2544 len += sprintf(page+len, "%swrite_mostly",sep);
2545 sep = ",";
2546 }
2547 if (test_bit(Blocked, &flags) ||
2548 (rdev->badblocks.unacked_exist
2549 && !test_bit(Faulty, &flags))) {
2550 len += sprintf(page+len, "%sblocked", sep);
2551 sep = ",";
2552 }
2553 if (!test_bit(Faulty, &flags) &&
2554 !test_bit(Journal, &flags) &&
2555 !test_bit(In_sync, &flags)) {
2556 len += sprintf(page+len, "%sspare", sep);
2557 sep = ",";
2558 }
2559 if (test_bit(WriteErrorSeen, &flags)) {
2560 len += sprintf(page+len, "%swrite_error", sep);
2561 sep = ",";
2562 }
2563 if (test_bit(WantReplacement, &flags)) {
2564 len += sprintf(page+len, "%swant_replacement", sep);
2565 sep = ",";
2566 }
2567 if (test_bit(Replacement, &flags)) {
2568 len += sprintf(page+len, "%sreplacement", sep);
2569 sep = ",";
2570 }
2571
2572 return len+sprintf(page+len, "\n");
2573}
2574
2575static ssize_t
2576state_store(struct md_rdev *rdev, const char *buf, size_t len)
2577{
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591 int err = -EINVAL;
2592 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2593 md_error(rdev->mddev, rdev);
2594 if (test_bit(Faulty, &rdev->flags))
2595 err = 0;
2596 else
2597 err = -EBUSY;
2598 } else if (cmd_match(buf, "remove")) {
2599 if (rdev->mddev->pers) {
2600 clear_bit(Blocked, &rdev->flags);
2601 remove_and_add_spares(rdev->mddev, rdev);
2602 }
2603 if (rdev->raid_disk >= 0)
2604 err = -EBUSY;
2605 else {
2606 struct mddev *mddev = rdev->mddev;
2607 err = 0;
2608 if (mddev_is_clustered(mddev))
2609 err = md_cluster_ops->remove_disk(mddev, rdev);
2610
2611 if (err == 0) {
2612 md_kick_rdev_from_array(rdev);
2613 if (mddev->pers)
2614 md_update_sb(mddev, 1);
2615 md_new_event(mddev);
2616 }
2617 }
2618 } else if (cmd_match(buf, "writemostly")) {
2619 set_bit(WriteMostly, &rdev->flags);
2620 err = 0;
2621 } else if (cmd_match(buf, "-writemostly")) {
2622 clear_bit(WriteMostly, &rdev->flags);
2623 err = 0;
2624 } else if (cmd_match(buf, "blocked")) {
2625 set_bit(Blocked, &rdev->flags);
2626 err = 0;
2627 } else if (cmd_match(buf, "-blocked")) {
2628 if (!test_bit(Faulty, &rdev->flags) &&
2629 rdev->badblocks.unacked_exist) {
2630
2631
2632
2633 md_error(rdev->mddev, rdev);
2634 }
2635 clear_bit(Blocked, &rdev->flags);
2636 clear_bit(BlockedBadBlocks, &rdev->flags);
2637 wake_up(&rdev->blocked_wait);
2638 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2639 md_wakeup_thread(rdev->mddev->thread);
2640
2641 err = 0;
2642 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2643 set_bit(In_sync, &rdev->flags);
2644 err = 0;
2645 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2646 !test_bit(Journal, &rdev->flags)) {
2647 if (rdev->mddev->pers == NULL) {
2648 clear_bit(In_sync, &rdev->flags);
2649 rdev->saved_raid_disk = rdev->raid_disk;
2650 rdev->raid_disk = -1;
2651 err = 0;
2652 }
2653 } else if (cmd_match(buf, "write_error")) {
2654 set_bit(WriteErrorSeen, &rdev->flags);
2655 err = 0;
2656 } else if (cmd_match(buf, "-write_error")) {
2657 clear_bit(WriteErrorSeen, &rdev->flags);
2658 err = 0;
2659 } else if (cmd_match(buf, "want_replacement")) {
2660
2661
2662
2663
2664 if (rdev->raid_disk >= 0 &&
2665 !test_bit(Journal, &rdev->flags) &&
2666 !test_bit(Replacement, &rdev->flags))
2667 set_bit(WantReplacement, &rdev->flags);
2668 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2669 md_wakeup_thread(rdev->mddev->thread);
2670 err = 0;
2671 } else if (cmd_match(buf, "-want_replacement")) {
2672
2673
2674
2675 err = 0;
2676 clear_bit(WantReplacement, &rdev->flags);
2677 } else if (cmd_match(buf, "replacement")) {
2678
2679
2680
2681
2682 if (rdev->mddev->pers)
2683 err = -EBUSY;
2684 else {
2685 set_bit(Replacement, &rdev->flags);
2686 err = 0;
2687 }
2688 } else if (cmd_match(buf, "-replacement")) {
2689
2690 if (rdev->mddev->pers)
2691 err = -EBUSY;
2692 else {
2693 clear_bit(Replacement, &rdev->flags);
2694 err = 0;
2695 }
2696 } else if (cmd_match(buf, "re-add")) {
2697 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2698
2699
2700
2701
2702
2703
2704 if (!mddev_is_clustered(rdev->mddev) ||
2705 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2706 clear_bit(Faulty, &rdev->flags);
2707 err = add_bound_rdev(rdev);
2708 }
2709 } else
2710 err = -EBUSY;
2711 }
2712 if (!err)
2713 sysfs_notify_dirent_safe(rdev->sysfs_state);
2714 return err ? err : len;
2715}
2716static struct rdev_sysfs_entry rdev_state =
2717__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2718
2719static ssize_t
2720errors_show(struct md_rdev *rdev, char *page)
2721{
2722 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2723}
2724
2725static ssize_t
2726errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2727{
2728 unsigned int n;
2729 int rv;
2730
2731 rv = kstrtouint(buf, 10, &n);
2732 if (rv < 0)
2733 return rv;
2734 atomic_set(&rdev->corrected_errors, n);
2735 return len;
2736}
2737static struct rdev_sysfs_entry rdev_errors =
2738__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2739
2740static ssize_t
2741slot_show(struct md_rdev *rdev, char *page)
2742{
2743 if (test_bit(Journal, &rdev->flags))
2744 return sprintf(page, "journal\n");
2745 else if (rdev->raid_disk < 0)
2746 return sprintf(page, "none\n");
2747 else
2748 return sprintf(page, "%d\n", rdev->raid_disk);
2749}
2750
2751static ssize_t
2752slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2753{
2754 int slot;
2755 int err;
2756
2757 if (test_bit(Journal, &rdev->flags))
2758 return -EBUSY;
2759 if (strncmp(buf, "none", 4)==0)
2760 slot = -1;
2761 else {
2762 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2763 if (err < 0)
2764 return err;
2765 }
2766 if (rdev->mddev->pers && slot == -1) {
2767
2768
2769
2770
2771
2772
2773
2774 if (rdev->raid_disk == -1)
2775 return -EEXIST;
2776
2777 if (rdev->mddev->pers->hot_remove_disk == NULL)
2778 return -EINVAL;
2779 clear_bit(Blocked, &rdev->flags);
2780 remove_and_add_spares(rdev->mddev, rdev);
2781 if (rdev->raid_disk >= 0)
2782 return -EBUSY;
2783 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2784 md_wakeup_thread(rdev->mddev->thread);
2785 } else if (rdev->mddev->pers) {
2786
2787
2788
2789 int err;
2790
2791 if (rdev->raid_disk != -1)
2792 return -EBUSY;
2793
2794 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2795 return -EBUSY;
2796
2797 if (rdev->mddev->pers->hot_add_disk == NULL)
2798 return -EINVAL;
2799
2800 if (slot >= rdev->mddev->raid_disks &&
2801 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2802 return -ENOSPC;
2803
2804 rdev->raid_disk = slot;
2805 if (test_bit(In_sync, &rdev->flags))
2806 rdev->saved_raid_disk = slot;
2807 else
2808 rdev->saved_raid_disk = -1;
2809 clear_bit(In_sync, &rdev->flags);
2810 clear_bit(Bitmap_sync, &rdev->flags);
2811 err = rdev->mddev->pers->
2812 hot_add_disk(rdev->mddev, rdev);
2813 if (err) {
2814 rdev->raid_disk = -1;
2815 return err;
2816 } else
2817 sysfs_notify_dirent_safe(rdev->sysfs_state);
2818 if (sysfs_link_rdev(rdev->mddev, rdev))
2819 ;
2820
2821 } else {
2822 if (slot >= rdev->mddev->raid_disks &&
2823 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2824 return -ENOSPC;
2825 rdev->raid_disk = slot;
2826
2827 clear_bit(Faulty, &rdev->flags);
2828 clear_bit(WriteMostly, &rdev->flags);
2829 set_bit(In_sync, &rdev->flags);
2830 sysfs_notify_dirent_safe(rdev->sysfs_state);
2831 }
2832 return len;
2833}
2834
2835static struct rdev_sysfs_entry rdev_slot =
2836__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2837
2838static ssize_t
2839offset_show(struct md_rdev *rdev, char *page)
2840{
2841 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2842}
2843
2844static ssize_t
2845offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2846{
2847 unsigned long long offset;
2848 if (kstrtoull(buf, 10, &offset) < 0)
2849 return -EINVAL;
2850 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2851 return -EBUSY;
2852 if (rdev->sectors && rdev->mddev->external)
2853
2854
2855 return -EBUSY;
2856 rdev->data_offset = offset;
2857 rdev->new_data_offset = offset;
2858 return len;
2859}
2860
2861static struct rdev_sysfs_entry rdev_offset =
2862__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2863
2864static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2865{
2866 return sprintf(page, "%llu\n",
2867 (unsigned long long)rdev->new_data_offset);
2868}
2869
2870static ssize_t new_offset_store(struct md_rdev *rdev,
2871 const char *buf, size_t len)
2872{
2873 unsigned long long new_offset;
2874 struct mddev *mddev = rdev->mddev;
2875
2876 if (kstrtoull(buf, 10, &new_offset) < 0)
2877 return -EINVAL;
2878
2879 if (mddev->sync_thread ||
2880 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2881 return -EBUSY;
2882 if (new_offset == rdev->data_offset)
2883
2884 ;
2885 else if (new_offset > rdev->data_offset) {
2886
2887 if (new_offset - rdev->data_offset
2888 + mddev->dev_sectors > rdev->sectors)
2889 return -E2BIG;
2890 }
2891
2892
2893
2894
2895
2896 if (new_offset < rdev->data_offset &&
2897 mddev->reshape_backwards)
2898 return -EINVAL;
2899
2900
2901
2902
2903 if (new_offset > rdev->data_offset &&
2904 !mddev->reshape_backwards)
2905 return -EINVAL;
2906
2907 if (mddev->pers && mddev->persistent &&
2908 !super_types[mddev->major_version]
2909 .allow_new_offset(rdev, new_offset))
2910 return -E2BIG;
2911 rdev->new_data_offset = new_offset;
2912 if (new_offset > rdev->data_offset)
2913 mddev->reshape_backwards = 1;
2914 else if (new_offset < rdev->data_offset)
2915 mddev->reshape_backwards = 0;
2916
2917 return len;
2918}
2919static struct rdev_sysfs_entry rdev_new_offset =
2920__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2921
2922static ssize_t
2923rdev_size_show(struct md_rdev *rdev, char *page)
2924{
2925 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2926}
2927
2928static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2929{
2930
2931 if (s1+l1 <= s2)
2932 return 0;
2933 if (s2+l2 <= s1)
2934 return 0;
2935 return 1;
2936}
2937
2938static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2939{
2940 unsigned long long blocks;
2941 sector_t new;
2942
2943 if (kstrtoull(buf, 10, &blocks) < 0)
2944 return -EINVAL;
2945
2946 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2947 return -EINVAL;
2948
2949 new = blocks * 2;
2950 if (new != blocks * 2)
2951 return -EINVAL;
2952
2953 *sectors = new;
2954 return 0;
2955}
2956
2957static ssize_t
2958rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2959{
2960 struct mddev *my_mddev = rdev->mddev;
2961 sector_t oldsectors = rdev->sectors;
2962 sector_t sectors;
2963
2964 if (test_bit(Journal, &rdev->flags))
2965 return -EBUSY;
2966 if (strict_blocks_to_sectors(buf, §ors) < 0)
2967 return -EINVAL;
2968 if (rdev->data_offset != rdev->new_data_offset)
2969 return -EINVAL;
2970 if (my_mddev->pers && rdev->raid_disk >= 0) {
2971 if (my_mddev->persistent) {
2972 sectors = super_types[my_mddev->major_version].
2973 rdev_size_change(rdev, sectors);
2974 if (!sectors)
2975 return -EBUSY;
2976 } else if (!sectors)
2977 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2978 rdev->data_offset;
2979 if (!my_mddev->pers->resize)
2980
2981 return -EINVAL;
2982 }
2983 if (sectors < my_mddev->dev_sectors)
2984 return -EINVAL;
2985
2986 rdev->sectors = sectors;
2987 if (sectors > oldsectors && my_mddev->external) {
2988
2989
2990
2991
2992
2993
2994 struct mddev *mddev;
2995 int overlap = 0;
2996 struct list_head *tmp;
2997
2998 rcu_read_lock();
2999 for_each_mddev(mddev, tmp) {
3000 struct md_rdev *rdev2;
3001
3002 rdev_for_each(rdev2, mddev)
3003 if (rdev->bdev == rdev2->bdev &&
3004 rdev != rdev2 &&
3005 overlaps(rdev->data_offset, rdev->sectors,
3006 rdev2->data_offset,
3007 rdev2->sectors)) {
3008 overlap = 1;
3009 break;
3010 }
3011 if (overlap) {
3012 mddev_put(mddev);
3013 break;
3014 }
3015 }
3016 rcu_read_unlock();
3017 if (overlap) {
3018
3019
3020
3021
3022
3023
3024 rdev->sectors = oldsectors;
3025 return -EBUSY;
3026 }
3027 }
3028 return len;
3029}
3030
3031static struct rdev_sysfs_entry rdev_size =
3032__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3033
3034static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3035{
3036 unsigned long long recovery_start = rdev->recovery_offset;
3037
3038 if (test_bit(In_sync, &rdev->flags) ||
3039 recovery_start == MaxSector)
3040 return sprintf(page, "none\n");
3041
3042 return sprintf(page, "%llu\n", recovery_start);
3043}
3044
3045static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3046{
3047 unsigned long long recovery_start;
3048
3049 if (cmd_match(buf, "none"))
3050 recovery_start = MaxSector;
3051 else if (kstrtoull(buf, 10, &recovery_start))
3052 return -EINVAL;
3053
3054 if (rdev->mddev->pers &&
3055 rdev->raid_disk >= 0)
3056 return -EBUSY;
3057
3058 rdev->recovery_offset = recovery_start;
3059 if (recovery_start == MaxSector)
3060 set_bit(In_sync, &rdev->flags);
3061 else
3062 clear_bit(In_sync, &rdev->flags);
3063 return len;
3064}
3065
3066static struct rdev_sysfs_entry rdev_recovery_start =
3067__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080static ssize_t bb_show(struct md_rdev *rdev, char *page)
3081{
3082 return badblocks_show(&rdev->badblocks, page, 0);
3083}
3084static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3085{
3086 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3087
3088 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3089 wake_up(&rdev->blocked_wait);
3090 return rv;
3091}
3092static struct rdev_sysfs_entry rdev_bad_blocks =
3093__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3094
3095static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3096{
3097 return badblocks_show(&rdev->badblocks, page, 1);
3098}
3099static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3100{
3101 return badblocks_store(&rdev->badblocks, page, len, 1);
3102}
3103static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3104__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3105
3106static struct attribute *rdev_default_attrs[] = {
3107 &rdev_state.attr,
3108 &rdev_errors.attr,
3109 &rdev_slot.attr,
3110 &rdev_offset.attr,
3111 &rdev_new_offset.attr,
3112 &rdev_size.attr,
3113 &rdev_recovery_start.attr,
3114 &rdev_bad_blocks.attr,
3115 &rdev_unack_bad_blocks.attr,
3116 NULL,
3117};
3118static ssize_t
3119rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3120{
3121 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3122 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3123
3124 if (!entry->show)
3125 return -EIO;
3126 if (!rdev->mddev)
3127 return -EBUSY;
3128 return entry->show(rdev, page);
3129}
3130
3131static ssize_t
3132rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3133 const char *page, size_t length)
3134{
3135 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3136 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3137 ssize_t rv;
3138 struct mddev *mddev = rdev->mddev;
3139
3140 if (!entry->store)
3141 return -EIO;
3142 if (!capable(CAP_SYS_ADMIN))
3143 return -EACCES;
3144 rv = mddev ? mddev_lock(mddev): -EBUSY;
3145 if (!rv) {
3146 if (rdev->mddev == NULL)
3147 rv = -EBUSY;
3148 else
3149 rv = entry->store(rdev, page, length);
3150 mddev_unlock(mddev);
3151 }
3152 return rv;
3153}
3154
3155static void rdev_free(struct kobject *ko)
3156{
3157 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3158 kfree(rdev);
3159}
3160static const struct sysfs_ops rdev_sysfs_ops = {
3161 .show = rdev_attr_show,
3162 .store = rdev_attr_store,
3163};
3164static struct kobj_type rdev_ktype = {
3165 .release = rdev_free,
3166 .sysfs_ops = &rdev_sysfs_ops,
3167 .default_attrs = rdev_default_attrs,
3168};
3169
3170int md_rdev_init(struct md_rdev *rdev)
3171{
3172 rdev->desc_nr = -1;
3173 rdev->saved_raid_disk = -1;
3174 rdev->raid_disk = -1;
3175 rdev->flags = 0;
3176 rdev->data_offset = 0;
3177 rdev->new_data_offset = 0;
3178 rdev->sb_events = 0;
3179 rdev->last_read_error = 0;
3180 rdev->sb_loaded = 0;
3181 rdev->bb_page = NULL;
3182 atomic_set(&rdev->nr_pending, 0);
3183 atomic_set(&rdev->read_errors, 0);
3184 atomic_set(&rdev->corrected_errors, 0);
3185
3186 INIT_LIST_HEAD(&rdev->same_set);
3187 init_waitqueue_head(&rdev->blocked_wait);
3188
3189
3190
3191
3192
3193 return badblocks_init(&rdev->badblocks, 0);
3194}
3195EXPORT_SYMBOL_GPL(md_rdev_init);
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3207{
3208 char b[BDEVNAME_SIZE];
3209 int err;
3210 struct md_rdev *rdev;
3211 sector_t size;
3212
3213 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3214 if (!rdev) {
3215 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3216 return ERR_PTR(-ENOMEM);
3217 }
3218
3219 err = md_rdev_init(rdev);
3220 if (err)
3221 goto abort_free;
3222 err = alloc_disk_sb(rdev);
3223 if (err)
3224 goto abort_free;
3225
3226 err = lock_rdev(rdev, newdev, super_format == -2);
3227 if (err)
3228 goto abort_free;
3229
3230 kobject_init(&rdev->kobj, &rdev_ktype);
3231
3232 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3233 if (!size) {
3234 printk(KERN_WARNING
3235 "md: %s has zero or unknown size, marking faulty!\n",
3236 bdevname(rdev->bdev,b));
3237 err = -EINVAL;
3238 goto abort_free;
3239 }
3240
3241 if (super_format >= 0) {
3242 err = super_types[super_format].
3243 load_super(rdev, NULL, super_minor);
3244 if (err == -EINVAL) {
3245 printk(KERN_WARNING
3246 "md: %s does not have a valid v%d.%d "
3247 "superblock, not importing!\n",
3248 bdevname(rdev->bdev,b),
3249 super_format, super_minor);
3250 goto abort_free;
3251 }
3252 if (err < 0) {
3253 printk(KERN_WARNING
3254 "md: could not read %s's sb, not importing!\n",
3255 bdevname(rdev->bdev,b));
3256 goto abort_free;
3257 }
3258 }
3259
3260 return rdev;
3261
3262abort_free:
3263 if (rdev->bdev)
3264 unlock_rdev(rdev);
3265 md_rdev_clear(rdev);
3266 kfree(rdev);
3267 return ERR_PTR(err);
3268}
3269
3270
3271
3272
3273
3274static void analyze_sbs(struct mddev *mddev)
3275{
3276 int i;
3277 struct md_rdev *rdev, *freshest, *tmp;
3278 char b[BDEVNAME_SIZE];
3279
3280 freshest = NULL;
3281 rdev_for_each_safe(rdev, tmp, mddev)
3282 switch (super_types[mddev->major_version].
3283 load_super(rdev, freshest, mddev->minor_version)) {
3284 case 1:
3285 freshest = rdev;
3286 break;
3287 case 0:
3288 break;
3289 default:
3290 printk( KERN_ERR \
3291 "md: fatal superblock inconsistency in %s"
3292 " -- removing from array\n",
3293 bdevname(rdev->bdev,b));
3294 md_kick_rdev_from_array(rdev);
3295 }
3296
3297 super_types[mddev->major_version].
3298 validate_super(mddev, freshest);
3299
3300 i = 0;
3301 rdev_for_each_safe(rdev, tmp, mddev) {
3302 if (mddev->max_disks &&
3303 (rdev->desc_nr >= mddev->max_disks ||
3304 i > mddev->max_disks)) {
3305 printk(KERN_WARNING
3306 "md: %s: %s: only %d devices permitted\n",
3307 mdname(mddev), bdevname(rdev->bdev, b),
3308 mddev->max_disks);
3309 md_kick_rdev_from_array(rdev);
3310 continue;
3311 }
3312 if (rdev != freshest) {
3313 if (super_types[mddev->major_version].
3314 validate_super(mddev, rdev)) {
3315 printk(KERN_WARNING "md: kicking non-fresh %s"
3316 " from array!\n",
3317 bdevname(rdev->bdev,b));
3318 md_kick_rdev_from_array(rdev);
3319 continue;
3320 }
3321 }
3322 if (mddev->level == LEVEL_MULTIPATH) {
3323 rdev->desc_nr = i++;
3324 rdev->raid_disk = rdev->desc_nr;
3325 set_bit(In_sync, &rdev->flags);
3326 } else if (rdev->raid_disk >=
3327 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3328 !test_bit(Journal, &rdev->flags)) {
3329 rdev->raid_disk = -1;
3330 clear_bit(In_sync, &rdev->flags);
3331 }
3332 }
3333}
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3346{
3347 unsigned long result = 0;
3348 long decimals = -1;
3349 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3350 if (*cp == '.')
3351 decimals = 0;
3352 else if (decimals < scale) {
3353 unsigned int value;
3354 value = *cp - '0';
3355 result = result * 10 + value;
3356 if (decimals >= 0)
3357 decimals++;
3358 }
3359 cp++;
3360 }
3361 if (*cp == '\n')
3362 cp++;
3363 if (*cp)
3364 return -EINVAL;
3365 if (decimals < 0)
3366 decimals = 0;
3367 while (decimals < scale) {
3368 result *= 10;
3369 decimals ++;
3370 }
3371 *res = result;
3372 return 0;
3373}
3374
3375static ssize_t
3376safe_delay_show(struct mddev *mddev, char *page)
3377{
3378 int msec = (mddev->safemode_delay*1000)/HZ;
3379 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3380}
3381static ssize_t
3382safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3383{
3384 unsigned long msec;
3385
3386 if (mddev_is_clustered(mddev)) {
3387 pr_info("md: Safemode is disabled for clustered mode\n");
3388 return -EINVAL;
3389 }
3390
3391 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3392 return -EINVAL;
3393 if (msec == 0)
3394 mddev->safemode_delay = 0;
3395 else {
3396 unsigned long old_delay = mddev->safemode_delay;
3397 unsigned long new_delay = (msec*HZ)/1000;
3398
3399 if (new_delay == 0)
3400 new_delay = 1;
3401 mddev->safemode_delay = new_delay;
3402 if (new_delay < old_delay || old_delay == 0)
3403 mod_timer(&mddev->safemode_timer, jiffies+1);
3404 }
3405 return len;
3406}
3407static struct md_sysfs_entry md_safe_delay =
3408__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3409
3410static ssize_t
3411level_show(struct mddev *mddev, char *page)
3412{
3413 struct md_personality *p;
3414 int ret;
3415 spin_lock(&mddev->lock);
3416 p = mddev->pers;
3417 if (p)
3418 ret = sprintf(page, "%s\n", p->name);
3419 else if (mddev->clevel[0])
3420 ret = sprintf(page, "%s\n", mddev->clevel);
3421 else if (mddev->level != LEVEL_NONE)
3422 ret = sprintf(page, "%d\n", mddev->level);
3423 else
3424 ret = 0;
3425 spin_unlock(&mddev->lock);
3426 return ret;
3427}
3428
3429static ssize_t
3430level_store(struct mddev *mddev, const char *buf, size_t len)
3431{
3432 char clevel[16];
3433 ssize_t rv;
3434 size_t slen = len;
3435 struct md_personality *pers, *oldpers;
3436 long level;
3437 void *priv, *oldpriv;
3438 struct md_rdev *rdev;
3439
3440 if (slen == 0 || slen >= sizeof(clevel))
3441 return -EINVAL;
3442
3443 rv = mddev_lock(mddev);
3444 if (rv)
3445 return rv;
3446
3447 if (mddev->pers == NULL) {
3448 strncpy(mddev->clevel, buf, slen);
3449 if (mddev->clevel[slen-1] == '\n')
3450 slen--;
3451 mddev->clevel[slen] = 0;
3452 mddev->level = LEVEL_NONE;
3453 rv = len;
3454 goto out_unlock;
3455 }
3456 rv = -EROFS;
3457 if (mddev->ro)
3458 goto out_unlock;
3459
3460
3461
3462
3463
3464
3465
3466 rv = -EBUSY;
3467 if (mddev->sync_thread ||
3468 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3469 mddev->reshape_position != MaxSector ||
3470 mddev->sysfs_active)
3471 goto out_unlock;
3472
3473 rv = -EINVAL;
3474 if (!mddev->pers->quiesce) {
3475 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3476 mdname(mddev), mddev->pers->name);
3477 goto out_unlock;
3478 }
3479
3480
3481 strncpy(clevel, buf, slen);
3482 if (clevel[slen-1] == '\n')
3483 slen--;
3484 clevel[slen] = 0;
3485 if (kstrtol(clevel, 10, &level))
3486 level = LEVEL_NONE;
3487
3488 if (request_module("md-%s", clevel) != 0)
3489 request_module("md-level-%s", clevel);
3490 spin_lock(&pers_lock);
3491 pers = find_pers(level, clevel);
3492 if (!pers || !try_module_get(pers->owner)) {
3493 spin_unlock(&pers_lock);
3494 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3495 rv = -EINVAL;
3496 goto out_unlock;
3497 }
3498 spin_unlock(&pers_lock);
3499
3500 if (pers == mddev->pers) {
3501
3502 module_put(pers->owner);
3503 rv = len;
3504 goto out_unlock;
3505 }
3506 if (!pers->takeover) {
3507 module_put(pers->owner);
3508 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3509 mdname(mddev), clevel);
3510 rv = -EINVAL;
3511 goto out_unlock;
3512 }
3513
3514 rdev_for_each(rdev, mddev)
3515 rdev->new_raid_disk = rdev->raid_disk;
3516
3517
3518
3519
3520 priv = pers->takeover(mddev);
3521 if (IS_ERR(priv)) {
3522 mddev->new_level = mddev->level;
3523 mddev->new_layout = mddev->layout;
3524 mddev->new_chunk_sectors = mddev->chunk_sectors;
3525 mddev->raid_disks -= mddev->delta_disks;
3526 mddev->delta_disks = 0;
3527 mddev->reshape_backwards = 0;
3528 module_put(pers->owner);
3529 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3530 mdname(mddev), clevel);
3531 rv = PTR_ERR(priv);
3532 goto out_unlock;
3533 }
3534
3535
3536 mddev_suspend(mddev);
3537 mddev_detach(mddev);
3538
3539 spin_lock(&mddev->lock);
3540 oldpers = mddev->pers;
3541 oldpriv = mddev->private;
3542 mddev->pers = pers;
3543 mddev->private = priv;
3544 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3545 mddev->level = mddev->new_level;
3546 mddev->layout = mddev->new_layout;
3547 mddev->chunk_sectors = mddev->new_chunk_sectors;
3548 mddev->delta_disks = 0;
3549 mddev->reshape_backwards = 0;
3550 mddev->degraded = 0;
3551 spin_unlock(&mddev->lock);
3552
3553 if (oldpers->sync_request == NULL &&
3554 mddev->external) {
3555
3556
3557
3558
3559
3560
3561
3562 mddev->in_sync = 0;
3563 mddev->safemode_delay = 0;
3564 mddev->safemode = 0;
3565 }
3566
3567 oldpers->free(mddev, oldpriv);
3568
3569 if (oldpers->sync_request == NULL &&
3570 pers->sync_request != NULL) {
3571
3572 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3573 printk(KERN_WARNING
3574 "md: cannot register extra attributes for %s\n",
3575 mdname(mddev));
3576 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3577 }
3578 if (oldpers->sync_request != NULL &&
3579 pers->sync_request == NULL) {
3580
3581 if (mddev->to_remove == NULL)
3582 mddev->to_remove = &md_redundancy_group;
3583 }
3584
3585 module_put(oldpers->owner);
3586
3587 rdev_for_each(rdev, mddev) {
3588 if (rdev->raid_disk < 0)
3589 continue;
3590 if (rdev->new_raid_disk >= mddev->raid_disks)
3591 rdev->new_raid_disk = -1;
3592 if (rdev->new_raid_disk == rdev->raid_disk)
3593 continue;
3594 sysfs_unlink_rdev(mddev, rdev);
3595 }
3596 rdev_for_each(rdev, mddev) {
3597 if (rdev->raid_disk < 0)
3598 continue;
3599 if (rdev->new_raid_disk == rdev->raid_disk)
3600 continue;
3601 rdev->raid_disk = rdev->new_raid_disk;
3602 if (rdev->raid_disk < 0)
3603 clear_bit(In_sync, &rdev->flags);
3604 else {
3605 if (sysfs_link_rdev(mddev, rdev))
3606 printk(KERN_WARNING "md: cannot register rd%d"
3607 " for %s after level change\n",
3608 rdev->raid_disk, mdname(mddev));
3609 }
3610 }
3611
3612 if (pers->sync_request == NULL) {
3613
3614
3615
3616 mddev->in_sync = 1;
3617 del_timer_sync(&mddev->safemode_timer);
3618 }
3619 blk_set_stacking_limits(&mddev->queue->limits);
3620 pers->run(mddev);
3621 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3622 mddev_resume(mddev);
3623 if (!mddev->thread)
3624 md_update_sb(mddev, 1);
3625 sysfs_notify(&mddev->kobj, NULL, "level");
3626 md_new_event(mddev);
3627 rv = len;
3628out_unlock:
3629 mddev_unlock(mddev);
3630 return rv;
3631}
3632
3633static struct md_sysfs_entry md_level =
3634__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3635
3636static ssize_t
3637layout_show(struct mddev *mddev, char *page)
3638{
3639
3640 if (mddev->reshape_position != MaxSector &&
3641 mddev->layout != mddev->new_layout)
3642 return sprintf(page, "%d (%d)\n",
3643 mddev->new_layout, mddev->layout);
3644 return sprintf(page, "%d\n", mddev->layout);
3645}
3646
3647static ssize_t
3648layout_store(struct mddev *mddev, const char *buf, size_t len)
3649{
3650 unsigned int n;
3651 int err;
3652
3653 err = kstrtouint(buf, 10, &n);
3654 if (err < 0)
3655 return err;
3656 err = mddev_lock(mddev);
3657 if (err)
3658 return err;
3659
3660 if (mddev->pers) {
3661 if (mddev->pers->check_reshape == NULL)
3662 err = -EBUSY;
3663 else if (mddev->ro)
3664 err = -EROFS;
3665 else {
3666 mddev->new_layout = n;
3667 err = mddev->pers->check_reshape(mddev);
3668 if (err)
3669 mddev->new_layout = mddev->layout;
3670 }
3671 } else {
3672 mddev->new_layout = n;
3673 if (mddev->reshape_position == MaxSector)
3674 mddev->layout = n;
3675 }
3676 mddev_unlock(mddev);
3677 return err ?: len;
3678}
3679static struct md_sysfs_entry md_layout =
3680__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3681
3682static ssize_t
3683raid_disks_show(struct mddev *mddev, char *page)
3684{
3685 if (mddev->raid_disks == 0)
3686 return 0;
3687 if (mddev->reshape_position != MaxSector &&
3688 mddev->delta_disks != 0)
3689 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3690 mddev->raid_disks - mddev->delta_disks);
3691 return sprintf(page, "%d\n", mddev->raid_disks);
3692}
3693
3694static int update_raid_disks(struct mddev *mddev, int raid_disks);
3695
3696static ssize_t
3697raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3698{
3699 unsigned int n;
3700 int err;
3701
3702 err = kstrtouint(buf, 10, &n);
3703 if (err < 0)
3704 return err;
3705
3706 err = mddev_lock(mddev);
3707 if (err)
3708 return err;
3709 if (mddev->pers)
3710 err = update_raid_disks(mddev, n);
3711 else if (mddev->reshape_position != MaxSector) {
3712 struct md_rdev *rdev;
3713 int olddisks = mddev->raid_disks - mddev->delta_disks;
3714
3715 err = -EINVAL;
3716 rdev_for_each(rdev, mddev) {
3717 if (olddisks < n &&
3718 rdev->data_offset < rdev->new_data_offset)
3719 goto out_unlock;
3720 if (olddisks > n &&
3721 rdev->data_offset > rdev->new_data_offset)
3722 goto out_unlock;
3723 }
3724 err = 0;
3725 mddev->delta_disks = n - olddisks;
3726 mddev->raid_disks = n;
3727 mddev->reshape_backwards = (mddev->delta_disks < 0);
3728 } else
3729 mddev->raid_disks = n;
3730out_unlock:
3731 mddev_unlock(mddev);
3732 return err ? err : len;
3733}
3734static struct md_sysfs_entry md_raid_disks =
3735__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3736
3737static ssize_t
3738chunk_size_show(struct mddev *mddev, char *page)
3739{
3740 if (mddev->reshape_position != MaxSector &&
3741 mddev->chunk_sectors != mddev->new_chunk_sectors)
3742 return sprintf(page, "%d (%d)\n",
3743 mddev->new_chunk_sectors << 9,
3744 mddev->chunk_sectors << 9);
3745 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3746}
3747
3748static ssize_t
3749chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3750{
3751 unsigned long n;
3752 int err;
3753
3754 err = kstrtoul(buf, 10, &n);
3755 if (err < 0)
3756 return err;
3757
3758 err = mddev_lock(mddev);
3759 if (err)
3760 return err;
3761 if (mddev->pers) {
3762 if (mddev->pers->check_reshape == NULL)
3763 err = -EBUSY;
3764 else if (mddev->ro)
3765 err = -EROFS;
3766 else {
3767 mddev->new_chunk_sectors = n >> 9;
3768 err = mddev->pers->check_reshape(mddev);
3769 if (err)
3770 mddev->new_chunk_sectors = mddev->chunk_sectors;
3771 }
3772 } else {
3773 mddev->new_chunk_sectors = n >> 9;
3774 if (mddev->reshape_position == MaxSector)
3775 mddev->chunk_sectors = n >> 9;
3776 }
3777 mddev_unlock(mddev);
3778 return err ?: len;
3779}
3780static struct md_sysfs_entry md_chunk_size =
3781__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3782
3783static ssize_t
3784resync_start_show(struct mddev *mddev, char *page)
3785{
3786 if (mddev->recovery_cp == MaxSector)
3787 return sprintf(page, "none\n");
3788 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3789}
3790
3791static ssize_t
3792resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3793{
3794 unsigned long long n;
3795 int err;
3796
3797 if (cmd_match(buf, "none"))
3798 n = MaxSector;
3799 else {
3800 err = kstrtoull(buf, 10, &n);
3801 if (err < 0)
3802 return err;
3803 if (n != (sector_t)n)
3804 return -EINVAL;
3805 }
3806
3807 err = mddev_lock(mddev);
3808 if (err)
3809 return err;
3810 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3811 err = -EBUSY;
3812
3813 if (!err) {
3814 mddev->recovery_cp = n;
3815 if (mddev->pers)
3816 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3817 }
3818 mddev_unlock(mddev);
3819 return err ?: len;
3820}
3821static struct md_sysfs_entry md_resync_start =
3822__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3823 resync_start_show, resync_start_store);
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3862 write_pending, active_idle, bad_word};
3863static char *array_states[] = {
3864 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3865 "write-pending", "active-idle", NULL };
3866
3867static int match_word(const char *word, char **list)
3868{
3869 int n;
3870 for (n=0; list[n]; n++)
3871 if (cmd_match(word, list[n]))
3872 break;
3873 return n;
3874}
3875
3876static ssize_t
3877array_state_show(struct mddev *mddev, char *page)
3878{
3879 enum array_state st = inactive;
3880
3881 if (mddev->pers)
3882 switch(mddev->ro) {
3883 case 1:
3884 st = readonly;
3885 break;
3886 case 2:
3887 st = read_auto;
3888 break;
3889 case 0:
3890 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3891 st = write_pending;
3892 else if (mddev->in_sync)
3893 st = clean;
3894 else if (mddev->safemode)
3895 st = active_idle;
3896 else
3897 st = active;
3898 }
3899 else {
3900 if (list_empty(&mddev->disks) &&
3901 mddev->raid_disks == 0 &&
3902 mddev->dev_sectors == 0)
3903 st = clear;
3904 else
3905 st = inactive;
3906 }
3907 return sprintf(page, "%s\n", array_states[st]);
3908}
3909
3910static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3911static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3912static int do_md_run(struct mddev *mddev);
3913static int restart_array(struct mddev *mddev);
3914
3915static ssize_t
3916array_state_store(struct mddev *mddev, const char *buf, size_t len)
3917{
3918 int err;
3919 enum array_state st = match_word(buf, array_states);
3920
3921 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3922
3923
3924
3925 spin_lock(&mddev->lock);
3926 if (st == active) {
3927 restart_array(mddev);
3928 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3929 wake_up(&mddev->sb_wait);
3930 err = 0;
3931 } else {
3932 restart_array(mddev);
3933 if (atomic_read(&mddev->writes_pending) == 0) {
3934 if (mddev->in_sync == 0) {
3935 mddev->in_sync = 1;
3936 if (mddev->safemode == 1)
3937 mddev->safemode = 0;
3938 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3939 }
3940 err = 0;
3941 } else
3942 err = -EBUSY;
3943 }
3944 if (!err)
3945 sysfs_notify_dirent_safe(mddev->sysfs_state);
3946 spin_unlock(&mddev->lock);
3947 return err ?: len;
3948 }
3949 err = mddev_lock(mddev);
3950 if (err)
3951 return err;
3952 err = -EINVAL;
3953 switch(st) {
3954 case bad_word:
3955 break;
3956 case clear:
3957
3958 err = do_md_stop(mddev, 0, NULL);
3959 break;
3960 case inactive:
3961
3962 if (mddev->pers)
3963 err = do_md_stop(mddev, 2, NULL);
3964 else
3965 err = 0;
3966 break;
3967 case suspended:
3968 break;
3969 case readonly:
3970 if (mddev->pers)
3971 err = md_set_readonly(mddev, NULL);
3972 else {
3973 mddev->ro = 1;
3974 set_disk_ro(mddev->gendisk, 1);
3975 err = do_md_run(mddev);
3976 }
3977 break;
3978 case read_auto:
3979 if (mddev->pers) {
3980 if (mddev->ro == 0)
3981 err = md_set_readonly(mddev, NULL);
3982 else if (mddev->ro == 1)
3983 err = restart_array(mddev);
3984 if (err == 0) {
3985 mddev->ro = 2;
3986 set_disk_ro(mddev->gendisk, 0);
3987 }
3988 } else {
3989 mddev->ro = 2;
3990 err = do_md_run(mddev);
3991 }
3992 break;
3993 case clean:
3994 if (mddev->pers) {
3995 err = restart_array(mddev);
3996 if (err)
3997 break;
3998 spin_lock(&mddev->lock);
3999 if (atomic_read(&mddev->writes_pending) == 0) {
4000 if (mddev->in_sync == 0) {
4001 mddev->in_sync = 1;
4002 if (mddev->safemode == 1)
4003 mddev->safemode = 0;
4004 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
4005 }
4006 err = 0;
4007 } else
4008 err = -EBUSY;
4009 spin_unlock(&mddev->lock);
4010 } else
4011 err = -EINVAL;
4012 break;
4013 case active:
4014 if (mddev->pers) {
4015 err = restart_array(mddev);
4016 if (err)
4017 break;
4018 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
4019 wake_up(&mddev->sb_wait);
4020 err = 0;
4021 } else {
4022 mddev->ro = 0;
4023 set_disk_ro(mddev->gendisk, 0);
4024 err = do_md_run(mddev);
4025 }
4026 break;
4027 case write_pending:
4028 case active_idle:
4029
4030 break;
4031 }
4032
4033 if (!err) {
4034 if (mddev->hold_active == UNTIL_IOCTL)
4035 mddev->hold_active = 0;
4036 sysfs_notify_dirent_safe(mddev->sysfs_state);
4037 }
4038 mddev_unlock(mddev);
4039 return err ?: len;
4040}
4041static struct md_sysfs_entry md_array_state =
4042__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4043
4044static ssize_t
4045max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4046 return sprintf(page, "%d\n",
4047 atomic_read(&mddev->max_corr_read_errors));
4048}
4049
4050static ssize_t
4051max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4052{
4053 unsigned int n;
4054 int rv;
4055
4056 rv = kstrtouint(buf, 10, &n);
4057 if (rv < 0)
4058 return rv;
4059 atomic_set(&mddev->max_corr_read_errors, n);
4060 return len;
4061}
4062
4063static struct md_sysfs_entry max_corr_read_errors =
4064__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4065 max_corrected_read_errors_store);
4066
4067static ssize_t
4068null_show(struct mddev *mddev, char *page)
4069{
4070 return -EINVAL;
4071}
4072
4073static ssize_t
4074new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4075{
4076
4077
4078
4079
4080
4081
4082
4083 char *e;
4084 int major = simple_strtoul(buf, &e, 10);
4085 int minor;
4086 dev_t dev;
4087 struct md_rdev *rdev;
4088 int err;
4089
4090 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4091 return -EINVAL;
4092 minor = simple_strtoul(e+1, &e, 10);
4093 if (*e && *e != '\n')
4094 return -EINVAL;
4095 dev = MKDEV(major, minor);
4096 if (major != MAJOR(dev) ||
4097 minor != MINOR(dev))
4098 return -EOVERFLOW;
4099
4100 flush_workqueue(md_misc_wq);
4101
4102 err = mddev_lock(mddev);
4103 if (err)
4104 return err;
4105 if (mddev->persistent) {
4106 rdev = md_import_device(dev, mddev->major_version,
4107 mddev->minor_version);
4108 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4109 struct md_rdev *rdev0
4110 = list_entry(mddev->disks.next,
4111 struct md_rdev, same_set);
4112 err = super_types[mddev->major_version]
4113 .load_super(rdev, rdev0, mddev->minor_version);
4114 if (err < 0)
4115 goto out;
4116 }
4117 } else if (mddev->external)
4118 rdev = md_import_device(dev, -2, -1);
4119 else
4120 rdev = md_import_device(dev, -1, -1);
4121
4122 if (IS_ERR(rdev)) {
4123 mddev_unlock(mddev);
4124 return PTR_ERR(rdev);
4125 }
4126 err = bind_rdev_to_array(rdev, mddev);
4127 out:
4128 if (err)
4129 export_rdev(rdev);
4130 mddev_unlock(mddev);
4131 return err ? err : len;
4132}
4133
4134static struct md_sysfs_entry md_new_device =
4135__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4136
4137static ssize_t
4138bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4139{
4140 char *end;
4141 unsigned long chunk, end_chunk;
4142 int err;
4143
4144 err = mddev_lock(mddev);
4145 if (err)
4146 return err;
4147 if (!mddev->bitmap)
4148 goto out;
4149
4150 while (*buf) {
4151 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4152 if (buf == end) break;
4153 if (*end == '-') {
4154 buf = end + 1;
4155 end_chunk = simple_strtoul(buf, &end, 0);
4156 if (buf == end) break;
4157 }
4158 if (*end && !isspace(*end)) break;
4159 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4160 buf = skip_spaces(end);
4161 }
4162 bitmap_unplug(mddev->bitmap);
4163out:
4164 mddev_unlock(mddev);
4165 return len;
4166}
4167
4168static struct md_sysfs_entry md_bitmap =
4169__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4170
4171static ssize_t
4172size_show(struct mddev *mddev, char *page)
4173{
4174 return sprintf(page, "%llu\n",
4175 (unsigned long long)mddev->dev_sectors / 2);
4176}
4177
4178static int update_size(struct mddev *mddev, sector_t num_sectors);
4179
4180static ssize_t
4181size_store(struct mddev *mddev, const char *buf, size_t len)
4182{
4183
4184
4185
4186
4187 sector_t sectors;
4188 int err = strict_blocks_to_sectors(buf, §ors);
4189
4190 if (err < 0)
4191 return err;
4192 err = mddev_lock(mddev);
4193 if (err)
4194 return err;
4195 if (mddev->pers) {
4196 err = update_size(mddev, sectors);
4197 if (err == 0)
4198 md_update_sb(mddev, 1);
4199 } else {
4200 if (mddev->dev_sectors == 0 ||
4201 mddev->dev_sectors > sectors)
4202 mddev->dev_sectors = sectors;
4203 else
4204 err = -ENOSPC;
4205 }
4206 mddev_unlock(mddev);
4207 return err ? err : len;
4208}
4209
4210static struct md_sysfs_entry md_size =
4211__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4212
4213
4214
4215
4216
4217
4218
4219static ssize_t
4220metadata_show(struct mddev *mddev, char *page)
4221{
4222 if (mddev->persistent)
4223 return sprintf(page, "%d.%d\n",
4224 mddev->major_version, mddev->minor_version);
4225 else if (mddev->external)
4226 return sprintf(page, "external:%s\n", mddev->metadata_type);
4227 else
4228 return sprintf(page, "none\n");
4229}
4230
4231static ssize_t
4232metadata_store(struct mddev *mddev, const char *buf, size_t len)
4233{
4234 int major, minor;
4235 char *e;
4236 int err;
4237
4238
4239
4240
4241
4242 err = mddev_lock(mddev);
4243 if (err)
4244 return err;
4245 err = -EBUSY;
4246 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4247 ;
4248 else if (!list_empty(&mddev->disks))
4249 goto out_unlock;
4250
4251 err = 0;
4252 if (cmd_match(buf, "none")) {
4253 mddev->persistent = 0;
4254 mddev->external = 0;
4255 mddev->major_version = 0;
4256 mddev->minor_version = 90;
4257 goto out_unlock;
4258 }
4259 if (strncmp(buf, "external:", 9) == 0) {
4260 size_t namelen = len-9;
4261 if (namelen >= sizeof(mddev->metadata_type))
4262 namelen = sizeof(mddev->metadata_type)-1;
4263 strncpy(mddev->metadata_type, buf+9, namelen);
4264 mddev->metadata_type[namelen] = 0;
4265 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4266 mddev->metadata_type[--namelen] = 0;
4267 mddev->persistent = 0;
4268 mddev->external = 1;
4269 mddev->major_version = 0;
4270 mddev->minor_version = 90;
4271 goto out_unlock;
4272 }
4273 major = simple_strtoul(buf, &e, 10);
4274 err = -EINVAL;
4275 if (e==buf || *e != '.')
4276 goto out_unlock;
4277 buf = e+1;
4278 minor = simple_strtoul(buf, &e, 10);
4279 if (e==buf || (*e && *e != '\n') )
4280 goto out_unlock;
4281 err = -ENOENT;
4282 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4283 goto out_unlock;
4284 mddev->major_version = major;
4285 mddev->minor_version = minor;
4286 mddev->persistent = 1;
4287 mddev->external = 0;
4288 err = 0;
4289out_unlock:
4290 mddev_unlock(mddev);
4291 return err ?: len;
4292}
4293
4294static struct md_sysfs_entry md_metadata =
4295__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4296
4297static ssize_t
4298action_show(struct mddev *mddev, char *page)
4299{
4300 char *type = "idle";
4301 unsigned long recovery = mddev->recovery;
4302 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4303 type = "frozen";
4304 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4305 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4306 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4307 type = "reshape";
4308 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4309 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4310 type = "resync";
4311 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4312 type = "check";
4313 else
4314 type = "repair";
4315 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4316 type = "recover";
4317 else if (mddev->reshape_position != MaxSector)
4318 type = "reshape";
4319 }
4320 return sprintf(page, "%s\n", type);
4321}
4322
4323static ssize_t
4324action_store(struct mddev *mddev, const char *page, size_t len)
4325{
4326 if (!mddev->pers || !mddev->pers->sync_request)
4327 return -EINVAL;
4328
4329
4330 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4331 if (cmd_match(page, "frozen"))
4332 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4333 else
4334 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4335 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4336 mddev_lock(mddev) == 0) {
4337 flush_workqueue(md_misc_wq);
4338 if (mddev->sync_thread) {
4339 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4340 md_reap_sync_thread(mddev);
4341 }
4342 mddev_unlock(mddev);
4343 }
4344 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4345 return -EBUSY;
4346 else if (cmd_match(page, "resync"))
4347 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4348 else if (cmd_match(page, "recover")) {
4349 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4350 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4351 } else if (cmd_match(page, "reshape")) {
4352 int err;
4353 if (mddev->pers->start_reshape == NULL)
4354 return -EINVAL;
4355 err = mddev_lock(mddev);
4356 if (!err) {
4357 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4358 err = -EBUSY;
4359 else {
4360 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4361 err = mddev->pers->start_reshape(mddev);
4362 }
4363 mddev_unlock(mddev);
4364 }
4365 if (err)
4366 return err;
4367 sysfs_notify(&mddev->kobj, NULL, "degraded");
4368 } else {
4369 if (cmd_match(page, "check"))
4370 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4371 else if (!cmd_match(page, "repair"))
4372 return -EINVAL;
4373 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4374 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4375 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4376 }
4377 if (mddev->ro == 2) {
4378
4379
4380
4381 mddev->ro = 0;
4382 md_wakeup_thread(mddev->sync_thread);
4383 }
4384 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4385 md_wakeup_thread(mddev->thread);
4386 sysfs_notify_dirent_safe(mddev->sysfs_action);
4387 return len;
4388}
4389
4390static struct md_sysfs_entry md_scan_mode =
4391__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4392
4393static ssize_t
4394last_sync_action_show(struct mddev *mddev, char *page)
4395{
4396 return sprintf(page, "%s\n", mddev->last_sync_action);
4397}
4398
4399static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4400
4401static ssize_t
4402mismatch_cnt_show(struct mddev *mddev, char *page)
4403{
4404 return sprintf(page, "%llu\n",
4405 (unsigned long long)
4406 atomic64_read(&mddev->resync_mismatches));
4407}
4408
4409static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4410
4411static ssize_t
4412sync_min_show(struct mddev *mddev, char *page)
4413{
4414 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4415 mddev->sync_speed_min ? "local": "system");
4416}
4417
4418static ssize_t
4419sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4420{
4421 unsigned int min;
4422 int rv;
4423
4424 if (strncmp(buf, "system", 6)==0) {
4425 min = 0;
4426 } else {
4427 rv = kstrtouint(buf, 10, &min);
4428 if (rv < 0)
4429 return rv;
4430 if (min == 0)
4431 return -EINVAL;
4432 }
4433 mddev->sync_speed_min = min;
4434 return len;
4435}
4436
4437static struct md_sysfs_entry md_sync_min =
4438__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4439
4440static ssize_t
4441sync_max_show(struct mddev *mddev, char *page)
4442{
4443 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4444 mddev->sync_speed_max ? "local": "system");
4445}
4446
4447static ssize_t
4448sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4449{
4450 unsigned int max;
4451 int rv;
4452
4453 if (strncmp(buf, "system", 6)==0) {
4454 max = 0;
4455 } else {
4456 rv = kstrtouint(buf, 10, &max);
4457 if (rv < 0)
4458 return rv;
4459 if (max == 0)
4460 return -EINVAL;
4461 }
4462 mddev->sync_speed_max = max;
4463 return len;
4464}
4465
4466static struct md_sysfs_entry md_sync_max =
4467__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4468
4469static ssize_t
4470degraded_show(struct mddev *mddev, char *page)
4471{
4472 return sprintf(page, "%d\n", mddev->degraded);
4473}
4474static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4475
4476static ssize_t
4477sync_force_parallel_show(struct mddev *mddev, char *page)
4478{
4479 return sprintf(page, "%d\n", mddev->parallel_resync);
4480}
4481
4482static ssize_t
4483sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4484{
4485 long n;
4486
4487 if (kstrtol(buf, 10, &n))
4488 return -EINVAL;
4489
4490 if (n != 0 && n != 1)
4491 return -EINVAL;
4492
4493 mddev->parallel_resync = n;
4494
4495 if (mddev->sync_thread)
4496 wake_up(&resync_wait);
4497
4498 return len;
4499}
4500
4501
4502static struct md_sysfs_entry md_sync_force_parallel =
4503__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4504 sync_force_parallel_show, sync_force_parallel_store);
4505
4506static ssize_t
4507sync_speed_show(struct mddev *mddev, char *page)
4508{
4509 unsigned long resync, dt, db;
4510 if (mddev->curr_resync == 0)
4511 return sprintf(page, "none\n");
4512 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4513 dt = (jiffies - mddev->resync_mark) / HZ;
4514 if (!dt) dt++;
4515 db = resync - mddev->resync_mark_cnt;
4516 return sprintf(page, "%lu\n", db/dt/2);
4517}
4518
4519static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4520
4521static ssize_t
4522sync_completed_show(struct mddev *mddev, char *page)
4523{
4524 unsigned long long max_sectors, resync;
4525
4526 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4527 return sprintf(page, "none\n");
4528
4529 if (mddev->curr_resync == 1 ||
4530 mddev->curr_resync == 2)
4531 return sprintf(page, "delayed\n");
4532
4533 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4534 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4535 max_sectors = mddev->resync_max_sectors;
4536 else
4537 max_sectors = mddev->dev_sectors;
4538
4539 resync = mddev->curr_resync_completed;
4540 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4541}
4542
4543static struct md_sysfs_entry md_sync_completed =
4544 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4545
4546static ssize_t
4547min_sync_show(struct mddev *mddev, char *page)
4548{
4549 return sprintf(page, "%llu\n",
4550 (unsigned long long)mddev->resync_min);
4551}
4552static ssize_t
4553min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4554{
4555 unsigned long long min;
4556 int err;
4557
4558 if (kstrtoull(buf, 10, &min))
4559 return -EINVAL;
4560
4561 spin_lock(&mddev->lock);
4562 err = -EINVAL;
4563 if (min > mddev->resync_max)
4564 goto out_unlock;
4565
4566 err = -EBUSY;
4567 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4568 goto out_unlock;
4569
4570
4571 mddev->resync_min = round_down(min, 8);
4572 err = 0;
4573
4574out_unlock:
4575 spin_unlock(&mddev->lock);
4576 return err ?: len;
4577}
4578
4579static struct md_sysfs_entry md_min_sync =
4580__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4581
4582static ssize_t
4583max_sync_show(struct mddev *mddev, char *page)
4584{
4585 if (mddev->resync_max == MaxSector)
4586 return sprintf(page, "max\n");
4587 else
4588 return sprintf(page, "%llu\n",
4589 (unsigned long long)mddev->resync_max);
4590}
4591static ssize_t
4592max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4593{
4594 int err;
4595 spin_lock(&mddev->lock);
4596 if (strncmp(buf, "max", 3) == 0)
4597 mddev->resync_max = MaxSector;
4598 else {
4599 unsigned long long max;
4600 int chunk;
4601
4602 err = -EINVAL;
4603 if (kstrtoull(buf, 10, &max))
4604 goto out_unlock;
4605 if (max < mddev->resync_min)
4606 goto out_unlock;
4607
4608 err = -EBUSY;
4609 if (max < mddev->resync_max &&
4610 mddev->ro == 0 &&
4611 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4612 goto out_unlock;
4613
4614
4615 chunk = mddev->chunk_sectors;
4616 if (chunk) {
4617 sector_t temp = max;
4618
4619 err = -EINVAL;
4620 if (sector_div(temp, chunk))
4621 goto out_unlock;
4622 }
4623 mddev->resync_max = max;
4624 }
4625 wake_up(&mddev->recovery_wait);
4626 err = 0;
4627out_unlock:
4628 spin_unlock(&mddev->lock);
4629 return err ?: len;
4630}
4631
4632static struct md_sysfs_entry md_max_sync =
4633__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4634
4635static ssize_t
4636suspend_lo_show(struct mddev *mddev, char *page)
4637{
4638 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4639}
4640
4641static ssize_t
4642suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4643{
4644 unsigned long long old, new;
4645 int err;
4646
4647 err = kstrtoull(buf, 10, &new);
4648 if (err < 0)
4649 return err;
4650 if (new != (sector_t)new)
4651 return -EINVAL;
4652
4653 err = mddev_lock(mddev);
4654 if (err)
4655 return err;
4656 err = -EINVAL;
4657 if (mddev->pers == NULL ||
4658 mddev->pers->quiesce == NULL)
4659 goto unlock;
4660 old = mddev->suspend_lo;
4661 mddev->suspend_lo = new;
4662 if (new >= old)
4663
4664 mddev->pers->quiesce(mddev, 2);
4665 else {
4666
4667 mddev->pers->quiesce(mddev, 1);
4668 mddev->pers->quiesce(mddev, 0);
4669 }
4670 err = 0;
4671unlock:
4672 mddev_unlock(mddev);
4673 return err ?: len;
4674}
4675static struct md_sysfs_entry md_suspend_lo =
4676__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4677
4678static ssize_t
4679suspend_hi_show(struct mddev *mddev, char *page)
4680{
4681 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4682}
4683
4684static ssize_t
4685suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4686{
4687 unsigned long long old, new;
4688 int err;
4689
4690 err = kstrtoull(buf, 10, &new);
4691 if (err < 0)
4692 return err;
4693 if (new != (sector_t)new)
4694 return -EINVAL;
4695
4696 err = mddev_lock(mddev);
4697 if (err)
4698 return err;
4699 err = -EINVAL;
4700 if (mddev->pers == NULL ||
4701 mddev->pers->quiesce == NULL)
4702 goto unlock;
4703 old = mddev->suspend_hi;
4704 mddev->suspend_hi = new;
4705 if (new <= old)
4706
4707 mddev->pers->quiesce(mddev, 2);
4708 else {
4709
4710 mddev->pers->quiesce(mddev, 1);
4711 mddev->pers->quiesce(mddev, 0);
4712 }
4713 err = 0;
4714unlock:
4715 mddev_unlock(mddev);
4716 return err ?: len;
4717}
4718static struct md_sysfs_entry md_suspend_hi =
4719__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4720
4721static ssize_t
4722reshape_position_show(struct mddev *mddev, char *page)
4723{
4724 if (mddev->reshape_position != MaxSector)
4725 return sprintf(page, "%llu\n",
4726 (unsigned long long)mddev->reshape_position);
4727 strcpy(page, "none\n");
4728 return 5;
4729}
4730
4731static ssize_t
4732reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4733{
4734 struct md_rdev *rdev;
4735 unsigned long long new;
4736 int err;
4737
4738 err = kstrtoull(buf, 10, &new);
4739 if (err < 0)
4740 return err;
4741 if (new != (sector_t)new)
4742 return -EINVAL;
4743 err = mddev_lock(mddev);
4744 if (err)
4745 return err;
4746 err = -EBUSY;
4747 if (mddev->pers)
4748 goto unlock;
4749 mddev->reshape_position = new;
4750 mddev->delta_disks = 0;
4751 mddev->reshape_backwards = 0;
4752 mddev->new_level = mddev->level;
4753 mddev->new_layout = mddev->layout;
4754 mddev->new_chunk_sectors = mddev->chunk_sectors;
4755 rdev_for_each(rdev, mddev)
4756 rdev->new_data_offset = rdev->data_offset;
4757 err = 0;
4758unlock:
4759 mddev_unlock(mddev);
4760 return err ?: len;
4761}
4762
4763static struct md_sysfs_entry md_reshape_position =
4764__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4765 reshape_position_store);
4766
4767static ssize_t
4768reshape_direction_show(struct mddev *mddev, char *page)
4769{
4770 return sprintf(page, "%s\n",
4771 mddev->reshape_backwards ? "backwards" : "forwards");
4772}
4773
4774static ssize_t
4775reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4776{
4777 int backwards = 0;
4778 int err;
4779
4780 if (cmd_match(buf, "forwards"))
4781 backwards = 0;
4782 else if (cmd_match(buf, "backwards"))
4783 backwards = 1;
4784 else
4785 return -EINVAL;
4786 if (mddev->reshape_backwards == backwards)
4787 return len;
4788
4789 err = mddev_lock(mddev);
4790 if (err)
4791 return err;
4792
4793 if (mddev->delta_disks)
4794 err = -EBUSY;
4795 else if (mddev->persistent &&
4796 mddev->major_version == 0)
4797 err = -EINVAL;
4798 else
4799 mddev->reshape_backwards = backwards;
4800 mddev_unlock(mddev);
4801 return err ?: len;
4802}
4803
4804static struct md_sysfs_entry md_reshape_direction =
4805__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4806 reshape_direction_store);
4807
4808static ssize_t
4809array_size_show(struct mddev *mddev, char *page)
4810{
4811 if (mddev->external_size)
4812 return sprintf(page, "%llu\n",
4813 (unsigned long long)mddev->array_sectors/2);
4814 else
4815 return sprintf(page, "default\n");
4816}
4817
4818static ssize_t
4819array_size_store(struct mddev *mddev, const char *buf, size_t len)
4820{
4821 sector_t sectors;
4822 int err;
4823
4824 err = mddev_lock(mddev);
4825 if (err)
4826 return err;
4827
4828
4829 if (mddev_is_clustered(mddev))
4830 return -EINVAL;
4831
4832 if (strncmp(buf, "default", 7) == 0) {
4833 if (mddev->pers)
4834 sectors = mddev->pers->size(mddev, 0, 0);
4835 else
4836 sectors = mddev->array_sectors;
4837
4838 mddev->external_size = 0;
4839 } else {
4840 if (strict_blocks_to_sectors(buf, §ors) < 0)
4841 err = -EINVAL;
4842 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4843 err = -E2BIG;
4844 else
4845 mddev->external_size = 1;
4846 }
4847
4848 if (!err) {
4849 mddev->array_sectors = sectors;
4850 if (mddev->pers) {
4851 set_capacity(mddev->gendisk, mddev->array_sectors);
4852 revalidate_disk(mddev->gendisk);
4853 }
4854 }
4855 mddev_unlock(mddev);
4856 return err ?: len;
4857}
4858
4859static struct md_sysfs_entry md_array_size =
4860__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4861 array_size_store);
4862
4863static struct attribute *md_default_attrs[] = {
4864 &md_level.attr,
4865 &md_layout.attr,
4866 &md_raid_disks.attr,
4867 &md_chunk_size.attr,
4868 &md_size.attr,
4869 &md_resync_start.attr,
4870 &md_metadata.attr,
4871 &md_new_device.attr,
4872 &md_safe_delay.attr,
4873 &md_array_state.attr,
4874 &md_reshape_position.attr,
4875 &md_reshape_direction.attr,
4876 &md_array_size.attr,
4877 &max_corr_read_errors.attr,
4878 NULL,
4879};
4880
4881static struct attribute *md_redundancy_attrs[] = {
4882 &md_scan_mode.attr,
4883 &md_last_scan_mode.attr,
4884 &md_mismatches.attr,
4885 &md_sync_min.attr,
4886 &md_sync_max.attr,
4887 &md_sync_speed.attr,
4888 &md_sync_force_parallel.attr,
4889 &md_sync_completed.attr,
4890 &md_min_sync.attr,
4891 &md_max_sync.attr,
4892 &md_suspend_lo.attr,
4893 &md_suspend_hi.attr,
4894 &md_bitmap.attr,
4895 &md_degraded.attr,
4896 NULL,
4897};
4898static struct attribute_group md_redundancy_group = {
4899 .name = NULL,
4900 .attrs = md_redundancy_attrs,
4901};
4902
4903static ssize_t
4904md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4905{
4906 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4907 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4908 ssize_t rv;
4909
4910 if (!entry->show)
4911 return -EIO;
4912 spin_lock(&all_mddevs_lock);
4913 if (list_empty(&mddev->all_mddevs)) {
4914 spin_unlock(&all_mddevs_lock);
4915 return -EBUSY;
4916 }
4917 mddev_get(mddev);
4918 spin_unlock(&all_mddevs_lock);
4919
4920 rv = entry->show(mddev, page);
4921 mddev_put(mddev);
4922 return rv;
4923}
4924
4925static ssize_t
4926md_attr_store(struct kobject *kobj, struct attribute *attr,
4927 const char *page, size_t length)
4928{
4929 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4930 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4931 ssize_t rv;
4932
4933 if (!entry->store)
4934 return -EIO;
4935 if (!capable(CAP_SYS_ADMIN))
4936 return -EACCES;
4937 spin_lock(&all_mddevs_lock);
4938 if (list_empty(&mddev->all_mddevs)) {
4939 spin_unlock(&all_mddevs_lock);
4940 return -EBUSY;
4941 }
4942 mddev_get(mddev);
4943 spin_unlock(&all_mddevs_lock);
4944 rv = entry->store(mddev, page, length);
4945 mddev_put(mddev);
4946 return rv;
4947}
4948
4949static void md_free(struct kobject *ko)
4950{
4951 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4952
4953 if (mddev->sysfs_state)
4954 sysfs_put(mddev->sysfs_state);
4955
4956 if (mddev->queue)
4957 blk_cleanup_queue(mddev->queue);
4958 if (mddev->gendisk) {
4959 del_gendisk(mddev->gendisk);
4960 put_disk(mddev->gendisk);
4961 }
4962
4963 kfree(mddev);
4964}
4965
4966static const struct sysfs_ops md_sysfs_ops = {
4967 .show = md_attr_show,
4968 .store = md_attr_store,
4969};
4970static struct kobj_type md_ktype = {
4971 .release = md_free,
4972 .sysfs_ops = &md_sysfs_ops,
4973 .default_attrs = md_default_attrs,
4974};
4975
4976int mdp_major = 0;
4977
4978static void mddev_delayed_delete(struct work_struct *ws)
4979{
4980 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4981
4982 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4983 kobject_del(&mddev->kobj);
4984 kobject_put(&mddev->kobj);
4985}
4986
4987static int md_alloc(dev_t dev, char *name)
4988{
4989 static DEFINE_MUTEX(disks_mutex);
4990 struct mddev *mddev = mddev_find(dev);
4991 struct gendisk *disk;
4992 int partitioned;
4993 int shift;
4994 int unit;
4995 int error;
4996
4997 if (!mddev)
4998 return -ENODEV;
4999
5000 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5001 shift = partitioned ? MdpMinorShift : 0;
5002 unit = MINOR(mddev->unit) >> shift;
5003
5004
5005
5006
5007 flush_workqueue(md_misc_wq);
5008
5009 mutex_lock(&disks_mutex);
5010 error = -EEXIST;
5011 if (mddev->gendisk)
5012 goto abort;
5013
5014 if (name) {
5015
5016
5017 struct mddev *mddev2;
5018 spin_lock(&all_mddevs_lock);
5019
5020 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5021 if (mddev2->gendisk &&
5022 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5023 spin_unlock(&all_mddevs_lock);
5024 goto abort;
5025 }
5026 spin_unlock(&all_mddevs_lock);
5027 }
5028
5029 error = -ENOMEM;
5030 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5031 if (!mddev->queue)
5032 goto abort;
5033 mddev->queue->queuedata = mddev;
5034
5035 blk_queue_make_request(mddev->queue, md_make_request);
5036 blk_set_stacking_limits(&mddev->queue->limits);
5037
5038 disk = alloc_disk(1 << shift);
5039 if (!disk) {
5040 blk_cleanup_queue(mddev->queue);
5041 mddev->queue = NULL;
5042 goto abort;
5043 }
5044 disk->major = MAJOR(mddev->unit);
5045 disk->first_minor = unit << shift;
5046 if (name)
5047 strcpy(disk->disk_name, name);
5048 else if (partitioned)
5049 sprintf(disk->disk_name, "md_d%d", unit);
5050 else
5051 sprintf(disk->disk_name, "md%d", unit);
5052 disk->fops = &md_fops;
5053 disk->private_data = mddev;
5054 disk->queue = mddev->queue;
5055 blk_queue_write_cache(mddev->queue, true, true);
5056
5057
5058
5059
5060 disk->flags |= GENHD_FL_EXT_DEVT;
5061 mddev->gendisk = disk;
5062
5063
5064
5065 mutex_lock(&mddev->open_mutex);
5066 add_disk(disk);
5067
5068 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5069 &disk_to_dev(disk)->kobj, "%s", "md");
5070 if (error) {
5071
5072
5073
5074 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
5075 disk->disk_name);
5076 error = 0;
5077 }
5078 if (mddev->kobj.sd &&
5079 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5080 printk(KERN_DEBUG "pointless warning\n");
5081 mutex_unlock(&mddev->open_mutex);
5082 abort:
5083 mutex_unlock(&disks_mutex);
5084 if (!error && mddev->kobj.sd) {
5085 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5086 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5087 }
5088 mddev_put(mddev);
5089 return error;
5090}
5091
5092static struct kobject *md_probe(dev_t dev, int *part, void *data)
5093{
5094 md_alloc(dev, NULL);
5095 return NULL;
5096}
5097
5098static int add_named_array(const char *val, struct kernel_param *kp)
5099{
5100
5101
5102
5103
5104 int len = strlen(val);
5105 char buf[DISK_NAME_LEN];
5106
5107 while (len && val[len-1] == '\n')
5108 len--;
5109 if (len >= DISK_NAME_LEN)
5110 return -E2BIG;
5111 strlcpy(buf, val, len+1);
5112 if (strncmp(buf, "md_", 3) != 0)
5113 return -EINVAL;
5114 return md_alloc(0, buf);
5115}
5116
5117static void md_safemode_timeout(unsigned long data)
5118{
5119 struct mddev *mddev = (struct mddev *) data;
5120
5121 if (!atomic_read(&mddev->writes_pending)) {
5122 mddev->safemode = 1;
5123 if (mddev->external)
5124 sysfs_notify_dirent_safe(mddev->sysfs_state);
5125 }
5126 md_wakeup_thread(mddev->thread);
5127}
5128
5129static int start_dirty_degraded;
5130
5131int md_run(struct mddev *mddev)
5132{
5133 int err;
5134 struct md_rdev *rdev;
5135 struct md_personality *pers;
5136
5137 if (list_empty(&mddev->disks))
5138
5139 return -EINVAL;
5140
5141 if (mddev->pers)
5142 return -EBUSY;
5143
5144 if (mddev->sysfs_active)
5145 return -EBUSY;
5146
5147
5148
5149
5150 if (!mddev->raid_disks) {
5151 if (!mddev->persistent)
5152 return -EINVAL;
5153 analyze_sbs(mddev);
5154 }
5155
5156 if (mddev->level != LEVEL_NONE)
5157 request_module("md-level-%d", mddev->level);
5158 else if (mddev->clevel[0])
5159 request_module("md-%s", mddev->clevel);
5160
5161
5162
5163
5164
5165
5166 rdev_for_each(rdev, mddev) {
5167 if (test_bit(Faulty, &rdev->flags))
5168 continue;
5169 sync_blockdev(rdev->bdev);
5170 invalidate_bdev(rdev->bdev);
5171
5172
5173
5174
5175
5176 if (rdev->meta_bdev) {
5177 ;
5178 } else if (rdev->data_offset < rdev->sb_start) {
5179 if (mddev->dev_sectors &&
5180 rdev->data_offset + mddev->dev_sectors
5181 > rdev->sb_start) {
5182 printk("md: %s: data overlaps metadata\n",
5183 mdname(mddev));
5184 return -EINVAL;
5185 }
5186 } else {
5187 if (rdev->sb_start + rdev->sb_size/512
5188 > rdev->data_offset) {
5189 printk("md: %s: metadata overlaps data\n",
5190 mdname(mddev));
5191 return -EINVAL;
5192 }
5193 }
5194 sysfs_notify_dirent_safe(rdev->sysfs_state);
5195 }
5196
5197 if (mddev->bio_set == NULL)
5198 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5199
5200 spin_lock(&pers_lock);
5201 pers = find_pers(mddev->level, mddev->clevel);
5202 if (!pers || !try_module_get(pers->owner)) {
5203 spin_unlock(&pers_lock);
5204 if (mddev->level != LEVEL_NONE)
5205 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5206 mddev->level);
5207 else
5208 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5209 mddev->clevel);
5210 return -EINVAL;
5211 }
5212 spin_unlock(&pers_lock);
5213 if (mddev->level != pers->level) {
5214 mddev->level = pers->level;
5215 mddev->new_level = pers->level;
5216 }
5217 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5218
5219 if (mddev->reshape_position != MaxSector &&
5220 pers->start_reshape == NULL) {
5221
5222 module_put(pers->owner);
5223 return -EINVAL;
5224 }
5225
5226 if (pers->sync_request) {
5227
5228
5229
5230 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5231 struct md_rdev *rdev2;
5232 int warned = 0;
5233
5234 rdev_for_each(rdev, mddev)
5235 rdev_for_each(rdev2, mddev) {
5236 if (rdev < rdev2 &&
5237 rdev->bdev->bd_contains ==
5238 rdev2->bdev->bd_contains) {
5239 printk(KERN_WARNING
5240 "%s: WARNING: %s appears to be"
5241 " on the same physical disk as"
5242 " %s.\n",
5243 mdname(mddev),
5244 bdevname(rdev->bdev,b),
5245 bdevname(rdev2->bdev,b2));
5246 warned = 1;
5247 }
5248 }
5249
5250 if (warned)
5251 printk(KERN_WARNING
5252 "True protection against single-disk"
5253 " failure might be compromised.\n");
5254 }
5255
5256 mddev->recovery = 0;
5257
5258 mddev->resync_max_sectors = mddev->dev_sectors;
5259
5260 mddev->ok_start_degraded = start_dirty_degraded;
5261
5262 if (start_readonly && mddev->ro == 0)
5263 mddev->ro = 2;
5264
5265 err = pers->run(mddev);
5266 if (err)
5267 printk(KERN_ERR "md: pers->run() failed ...\n");
5268 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5269 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5270 " but 'external_size' not in effect?\n", __func__);
5271 printk(KERN_ERR
5272 "md: invalid array_size %llu > default size %llu\n",
5273 (unsigned long long)mddev->array_sectors / 2,
5274 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5275 err = -EINVAL;
5276 }
5277 if (err == 0 && pers->sync_request &&
5278 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5279 struct bitmap *bitmap;
5280
5281 bitmap = bitmap_create(mddev, -1);
5282 if (IS_ERR(bitmap)) {
5283 err = PTR_ERR(bitmap);
5284 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5285 mdname(mddev), err);
5286 } else
5287 mddev->bitmap = bitmap;
5288
5289 }
5290 if (err) {
5291 mddev_detach(mddev);
5292 if (mddev->private)
5293 pers->free(mddev, mddev->private);
5294 mddev->private = NULL;
5295 module_put(pers->owner);
5296 bitmap_destroy(mddev);
5297 return err;
5298 }
5299 if (mddev->queue) {
5300 bool nonrot = true;
5301
5302 rdev_for_each(rdev, mddev) {
5303 if (rdev->raid_disk >= 0 &&
5304 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5305 nonrot = false;
5306 break;
5307 }
5308 }
5309 if (mddev->degraded)
5310 nonrot = false;
5311 if (nonrot)
5312 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5313 else
5314 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5315 mddev->queue->backing_dev_info.congested_data = mddev;
5316 mddev->queue->backing_dev_info.congested_fn = md_congested;
5317 }
5318 if (pers->sync_request) {
5319 if (mddev->kobj.sd &&
5320 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5321 printk(KERN_WARNING
5322 "md: cannot register extra attributes for %s\n",
5323 mdname(mddev));
5324 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5325 } else if (mddev->ro == 2)
5326 mddev->ro = 0;
5327
5328 atomic_set(&mddev->writes_pending,0);
5329 atomic_set(&mddev->max_corr_read_errors,
5330 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5331 mddev->safemode = 0;
5332 if (mddev_is_clustered(mddev))
5333 mddev->safemode_delay = 0;
5334 else
5335 mddev->safemode_delay = (200 * HZ)/1000 +1;
5336 mddev->in_sync = 1;
5337 smp_wmb();
5338 spin_lock(&mddev->lock);
5339 mddev->pers = pers;
5340 spin_unlock(&mddev->lock);
5341 rdev_for_each(rdev, mddev)
5342 if (rdev->raid_disk >= 0)
5343 if (sysfs_link_rdev(mddev, rdev))
5344 ;
5345
5346 if (mddev->degraded && !mddev->ro)
5347
5348
5349
5350 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5351 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5352
5353 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5354 md_update_sb(mddev, 0);
5355
5356 md_new_event(mddev);
5357 sysfs_notify_dirent_safe(mddev->sysfs_state);
5358 sysfs_notify_dirent_safe(mddev->sysfs_action);
5359 sysfs_notify(&mddev->kobj, NULL, "degraded");
5360 return 0;
5361}
5362EXPORT_SYMBOL_GPL(md_run);
5363
5364static int do_md_run(struct mddev *mddev)
5365{
5366 int err;
5367
5368 err = md_run(mddev);
5369 if (err)
5370 goto out;
5371 err = bitmap_load(mddev);
5372 if (err) {
5373 bitmap_destroy(mddev);
5374 goto out;
5375 }
5376
5377 if (mddev_is_clustered(mddev))
5378 md_allow_write(mddev);
5379
5380 md_wakeup_thread(mddev->thread);
5381 md_wakeup_thread(mddev->sync_thread);
5382
5383 set_capacity(mddev->gendisk, mddev->array_sectors);
5384 revalidate_disk(mddev->gendisk);
5385 mddev->changed = 1;
5386 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5387out:
5388 return err;
5389}
5390
5391static int restart_array(struct mddev *mddev)
5392{
5393 struct gendisk *disk = mddev->gendisk;
5394
5395
5396 if (list_empty(&mddev->disks))
5397 return -ENXIO;
5398 if (!mddev->pers)
5399 return -EINVAL;
5400 if (!mddev->ro)
5401 return -EBUSY;
5402 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5403 struct md_rdev *rdev;
5404 bool has_journal = false;
5405
5406 rcu_read_lock();
5407 rdev_for_each_rcu(rdev, mddev) {
5408 if (test_bit(Journal, &rdev->flags) &&
5409 !test_bit(Faulty, &rdev->flags)) {
5410 has_journal = true;
5411 break;
5412 }
5413 }
5414 rcu_read_unlock();
5415
5416
5417 if (!has_journal)
5418 return -EINVAL;
5419 }
5420
5421 mddev->safemode = 0;
5422 mddev->ro = 0;
5423 set_disk_ro(disk, 0);
5424 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5425 mdname(mddev));
5426
5427 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5428 md_wakeup_thread(mddev->thread);
5429 md_wakeup_thread(mddev->sync_thread);
5430 sysfs_notify_dirent_safe(mddev->sysfs_state);
5431 return 0;
5432}
5433
5434static void md_clean(struct mddev *mddev)
5435{
5436 mddev->array_sectors = 0;
5437 mddev->external_size = 0;
5438 mddev->dev_sectors = 0;
5439 mddev->raid_disks = 0;
5440 mddev->recovery_cp = 0;
5441 mddev->resync_min = 0;
5442 mddev->resync_max = MaxSector;
5443 mddev->reshape_position = MaxSector;
5444 mddev->external = 0;
5445 mddev->persistent = 0;
5446 mddev->level = LEVEL_NONE;
5447 mddev->clevel[0] = 0;
5448 mddev->flags = 0;
5449 mddev->ro = 0;
5450 mddev->metadata_type[0] = 0;
5451 mddev->chunk_sectors = 0;
5452 mddev->ctime = mddev->utime = 0;
5453 mddev->layout = 0;
5454 mddev->max_disks = 0;
5455 mddev->events = 0;
5456 mddev->can_decrease_events = 0;
5457 mddev->delta_disks = 0;
5458 mddev->reshape_backwards = 0;
5459 mddev->new_level = LEVEL_NONE;
5460 mddev->new_layout = 0;
5461 mddev->new_chunk_sectors = 0;
5462 mddev->curr_resync = 0;
5463 atomic64_set(&mddev->resync_mismatches, 0);
5464 mddev->suspend_lo = mddev->suspend_hi = 0;
5465 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5466 mddev->recovery = 0;
5467 mddev->in_sync = 0;
5468 mddev->changed = 0;
5469 mddev->degraded = 0;
5470 mddev->safemode = 0;
5471 mddev->private = NULL;
5472 mddev->cluster_info = NULL;
5473 mddev->bitmap_info.offset = 0;
5474 mddev->bitmap_info.default_offset = 0;
5475 mddev->bitmap_info.default_space = 0;
5476 mddev->bitmap_info.chunksize = 0;
5477 mddev->bitmap_info.daemon_sleep = 0;
5478 mddev->bitmap_info.max_write_behind = 0;
5479 mddev->bitmap_info.nodes = 0;
5480}
5481
5482static void __md_stop_writes(struct mddev *mddev)
5483{
5484 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5485 flush_workqueue(md_misc_wq);
5486 if (mddev->sync_thread) {
5487 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5488 md_reap_sync_thread(mddev);
5489 }
5490
5491 del_timer_sync(&mddev->safemode_timer);
5492
5493 bitmap_flush(mddev);
5494 md_super_wait(mddev);
5495
5496 if (mddev->ro == 0 &&
5497 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5498 (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5499
5500 if (!mddev_is_clustered(mddev))
5501 mddev->in_sync = 1;
5502 md_update_sb(mddev, 1);
5503 }
5504}
5505
5506void md_stop_writes(struct mddev *mddev)
5507{
5508 mddev_lock_nointr(mddev);
5509 __md_stop_writes(mddev);
5510 mddev_unlock(mddev);
5511}
5512EXPORT_SYMBOL_GPL(md_stop_writes);
5513
5514static void mddev_detach(struct mddev *mddev)
5515{
5516 struct bitmap *bitmap = mddev->bitmap;
5517
5518 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5519 printk(KERN_INFO "md:%s: behind writes in progress - waiting to stop.\n",
5520 mdname(mddev));
5521
5522 wait_event(bitmap->behind_wait,
5523 atomic_read(&bitmap->behind_writes) == 0);
5524 }
5525 if (mddev->pers && mddev->pers->quiesce) {
5526 mddev->pers->quiesce(mddev, 1);
5527 mddev->pers->quiesce(mddev, 0);
5528 }
5529 md_unregister_thread(&mddev->thread);
5530 if (mddev->queue)
5531 blk_sync_queue(mddev->queue);
5532}
5533
5534static void __md_stop(struct mddev *mddev)
5535{
5536 struct md_personality *pers = mddev->pers;
5537 mddev_detach(mddev);
5538
5539 flush_workqueue(md_misc_wq);
5540 spin_lock(&mddev->lock);
5541 mddev->pers = NULL;
5542 spin_unlock(&mddev->lock);
5543 pers->free(mddev, mddev->private);
5544 mddev->private = NULL;
5545 if (pers->sync_request && mddev->to_remove == NULL)
5546 mddev->to_remove = &md_redundancy_group;
5547 module_put(pers->owner);
5548 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5549}
5550
5551void md_stop(struct mddev *mddev)
5552{
5553
5554
5555
5556 __md_stop(mddev);
5557 bitmap_destroy(mddev);
5558 if (mddev->bio_set)
5559 bioset_free(mddev->bio_set);
5560}
5561
5562EXPORT_SYMBOL_GPL(md_stop);
5563
5564static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5565{
5566 int err = 0;
5567 int did_freeze = 0;
5568
5569 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5570 did_freeze = 1;
5571 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5572 md_wakeup_thread(mddev->thread);
5573 }
5574 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5575 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5576 if (mddev->sync_thread)
5577
5578
5579 wake_up_process(mddev->sync_thread->tsk);
5580
5581 if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags))
5582 return -EBUSY;
5583 mddev_unlock(mddev);
5584 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5585 &mddev->recovery));
5586 wait_event(mddev->sb_wait,
5587 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5588 mddev_lock_nointr(mddev);
5589
5590 mutex_lock(&mddev->open_mutex);
5591 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5592 mddev->sync_thread ||
5593 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5594 printk("md: %s still in use.\n",mdname(mddev));
5595 if (did_freeze) {
5596 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5597 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5598 md_wakeup_thread(mddev->thread);
5599 }
5600 err = -EBUSY;
5601 goto out;
5602 }
5603 if (mddev->pers) {
5604 __md_stop_writes(mddev);
5605
5606 err = -ENXIO;
5607 if (mddev->ro==1)
5608 goto out;
5609 mddev->ro = 1;
5610 set_disk_ro(mddev->gendisk, 1);
5611 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5612 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5613 md_wakeup_thread(mddev->thread);
5614 sysfs_notify_dirent_safe(mddev->sysfs_state);
5615 err = 0;
5616 }
5617out:
5618 mutex_unlock(&mddev->open_mutex);
5619 return err;
5620}
5621
5622
5623
5624
5625
5626static int do_md_stop(struct mddev *mddev, int mode,
5627 struct block_device *bdev)
5628{
5629 struct gendisk *disk = mddev->gendisk;
5630 struct md_rdev *rdev;
5631 int did_freeze = 0;
5632
5633 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5634 did_freeze = 1;
5635 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5636 md_wakeup_thread(mddev->thread);
5637 }
5638 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5639 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5640 if (mddev->sync_thread)
5641
5642
5643 wake_up_process(mddev->sync_thread->tsk);
5644
5645 mddev_unlock(mddev);
5646 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5647 !test_bit(MD_RECOVERY_RUNNING,
5648 &mddev->recovery)));
5649 mddev_lock_nointr(mddev);
5650
5651 mutex_lock(&mddev->open_mutex);
5652 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5653 mddev->sysfs_active ||
5654 mddev->sync_thread ||
5655 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5656 printk("md: %s still in use.\n",mdname(mddev));
5657 mutex_unlock(&mddev->open_mutex);
5658 if (did_freeze) {
5659 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5660 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5661 md_wakeup_thread(mddev->thread);
5662 }
5663 return -EBUSY;
5664 }
5665 if (mddev->pers) {
5666 if (mddev->ro)
5667 set_disk_ro(disk, 0);
5668
5669 __md_stop_writes(mddev);
5670 __md_stop(mddev);
5671 mddev->queue->backing_dev_info.congested_fn = NULL;
5672
5673
5674 sysfs_notify_dirent_safe(mddev->sysfs_state);
5675
5676 rdev_for_each(rdev, mddev)
5677 if (rdev->raid_disk >= 0)
5678 sysfs_unlink_rdev(mddev, rdev);
5679
5680 set_capacity(disk, 0);
5681 mutex_unlock(&mddev->open_mutex);
5682 mddev->changed = 1;
5683 revalidate_disk(disk);
5684
5685 if (mddev->ro)
5686 mddev->ro = 0;
5687 } else
5688 mutex_unlock(&mddev->open_mutex);
5689
5690
5691
5692 if (mode == 0) {
5693 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5694
5695 bitmap_destroy(mddev);
5696 if (mddev->bitmap_info.file) {
5697 struct file *f = mddev->bitmap_info.file;
5698 spin_lock(&mddev->lock);
5699 mddev->bitmap_info.file = NULL;
5700 spin_unlock(&mddev->lock);
5701 fput(f);
5702 }
5703 mddev->bitmap_info.offset = 0;
5704
5705 export_array(mddev);
5706
5707 md_clean(mddev);
5708 if (mddev->hold_active == UNTIL_STOP)
5709 mddev->hold_active = 0;
5710 }
5711 md_new_event(mddev);
5712 sysfs_notify_dirent_safe(mddev->sysfs_state);
5713 return 0;
5714}
5715
5716#ifndef MODULE
5717static void autorun_array(struct mddev *mddev)
5718{
5719 struct md_rdev *rdev;
5720 int err;
5721
5722 if (list_empty(&mddev->disks))
5723 return;
5724
5725 printk(KERN_INFO "md: running: ");
5726
5727 rdev_for_each(rdev, mddev) {
5728 char b[BDEVNAME_SIZE];
5729 printk("<%s>", bdevname(rdev->bdev,b));
5730 }
5731 printk("\n");
5732
5733 err = do_md_run(mddev);
5734 if (err) {
5735 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5736 do_md_stop(mddev, 0, NULL);
5737 }
5738}
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752static void autorun_devices(int part)
5753{
5754 struct md_rdev *rdev0, *rdev, *tmp;
5755 struct mddev *mddev;
5756 char b[BDEVNAME_SIZE];
5757
5758 printk(KERN_INFO "md: autorun ...\n");
5759 while (!list_empty(&pending_raid_disks)) {
5760 int unit;
5761 dev_t dev;
5762 LIST_HEAD(candidates);
5763 rdev0 = list_entry(pending_raid_disks.next,
5764 struct md_rdev, same_set);
5765
5766 printk(KERN_INFO "md: considering %s ...\n",
5767 bdevname(rdev0->bdev,b));
5768 INIT_LIST_HEAD(&candidates);
5769 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5770 if (super_90_load(rdev, rdev0, 0) >= 0) {
5771 printk(KERN_INFO "md: adding %s ...\n",
5772 bdevname(rdev->bdev,b));
5773 list_move(&rdev->same_set, &candidates);
5774 }
5775
5776
5777
5778
5779
5780 if (part) {
5781 dev = MKDEV(mdp_major,
5782 rdev0->preferred_minor << MdpMinorShift);
5783 unit = MINOR(dev) >> MdpMinorShift;
5784 } else {
5785 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5786 unit = MINOR(dev);
5787 }
5788 if (rdev0->preferred_minor != unit) {
5789 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5790 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5791 break;
5792 }
5793
5794 md_probe(dev, NULL, NULL);
5795 mddev = mddev_find(dev);
5796 if (!mddev || !mddev->gendisk) {
5797 if (mddev)
5798 mddev_put(mddev);
5799 printk(KERN_ERR
5800 "md: cannot allocate memory for md drive.\n");
5801 break;
5802 }
5803 if (mddev_lock(mddev))
5804 printk(KERN_WARNING "md: %s locked, cannot run\n",
5805 mdname(mddev));
5806 else if (mddev->raid_disks || mddev->major_version
5807 || !list_empty(&mddev->disks)) {
5808 printk(KERN_WARNING
5809 "md: %s already running, cannot run %s\n",
5810 mdname(mddev), bdevname(rdev0->bdev,b));
5811 mddev_unlock(mddev);
5812 } else {
5813 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5814 mddev->persistent = 1;
5815 rdev_for_each_list(rdev, tmp, &candidates) {
5816 list_del_init(&rdev->same_set);
5817 if (bind_rdev_to_array(rdev, mddev))
5818 export_rdev(rdev);
5819 }
5820 autorun_array(mddev);
5821 mddev_unlock(mddev);
5822 }
5823
5824
5825
5826 rdev_for_each_list(rdev, tmp, &candidates) {
5827 list_del_init(&rdev->same_set);
5828 export_rdev(rdev);
5829 }
5830 mddev_put(mddev);
5831 }
5832 printk(KERN_INFO "md: ... autorun DONE.\n");
5833}
5834#endif
5835
5836static int get_version(void __user *arg)
5837{
5838 mdu_version_t ver;
5839
5840 ver.major = MD_MAJOR_VERSION;
5841 ver.minor = MD_MINOR_VERSION;
5842 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5843
5844 if (copy_to_user(arg, &ver, sizeof(ver)))
5845 return -EFAULT;
5846
5847 return 0;
5848}
5849
5850static int get_array_info(struct mddev *mddev, void __user *arg)
5851{
5852 mdu_array_info_t info;
5853 int nr,working,insync,failed,spare;
5854 struct md_rdev *rdev;
5855
5856 nr = working = insync = failed = spare = 0;
5857 rcu_read_lock();
5858 rdev_for_each_rcu(rdev, mddev) {
5859 nr++;
5860 if (test_bit(Faulty, &rdev->flags))
5861 failed++;
5862 else {
5863 working++;
5864 if (test_bit(In_sync, &rdev->flags))
5865 insync++;
5866 else if (test_bit(Journal, &rdev->flags))
5867
5868 ;
5869 else
5870 spare++;
5871 }
5872 }
5873 rcu_read_unlock();
5874
5875 info.major_version = mddev->major_version;
5876 info.minor_version = mddev->minor_version;
5877 info.patch_version = MD_PATCHLEVEL_VERSION;
5878 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
5879 info.level = mddev->level;
5880 info.size = mddev->dev_sectors / 2;
5881 if (info.size != mddev->dev_sectors / 2)
5882 info.size = -1;
5883 info.nr_disks = nr;
5884 info.raid_disks = mddev->raid_disks;
5885 info.md_minor = mddev->md_minor;
5886 info.not_persistent= !mddev->persistent;
5887
5888 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
5889 info.state = 0;
5890 if (mddev->in_sync)
5891 info.state = (1<<MD_SB_CLEAN);
5892 if (mddev->bitmap && mddev->bitmap_info.offset)
5893 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5894 if (mddev_is_clustered(mddev))
5895 info.state |= (1<<MD_SB_CLUSTERED);
5896 info.active_disks = insync;
5897 info.working_disks = working;
5898 info.failed_disks = failed;
5899 info.spare_disks = spare;
5900
5901 info.layout = mddev->layout;
5902 info.chunk_size = mddev->chunk_sectors << 9;
5903
5904 if (copy_to_user(arg, &info, sizeof(info)))
5905 return -EFAULT;
5906
5907 return 0;
5908}
5909
5910static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5911{
5912 mdu_bitmap_file_t *file = NULL;
5913 char *ptr;
5914 int err;
5915
5916 file = kzalloc(sizeof(*file), GFP_NOIO);
5917 if (!file)
5918 return -ENOMEM;
5919
5920 err = 0;
5921 spin_lock(&mddev->lock);
5922
5923 if (mddev->bitmap_info.file) {
5924 ptr = file_path(mddev->bitmap_info.file, file->pathname,
5925 sizeof(file->pathname));
5926 if (IS_ERR(ptr))
5927 err = PTR_ERR(ptr);
5928 else
5929 memmove(file->pathname, ptr,
5930 sizeof(file->pathname)-(ptr-file->pathname));
5931 }
5932 spin_unlock(&mddev->lock);
5933
5934 if (err == 0 &&
5935 copy_to_user(arg, file, sizeof(*file)))
5936 err = -EFAULT;
5937
5938 kfree(file);
5939 return err;
5940}
5941
5942static int get_disk_info(struct mddev *mddev, void __user * arg)
5943{
5944 mdu_disk_info_t info;
5945 struct md_rdev *rdev;
5946
5947 if (copy_from_user(&info, arg, sizeof(info)))
5948 return -EFAULT;
5949
5950 rcu_read_lock();
5951 rdev = md_find_rdev_nr_rcu(mddev, info.number);
5952 if (rdev) {
5953 info.major = MAJOR(rdev->bdev->bd_dev);
5954 info.minor = MINOR(rdev->bdev->bd_dev);
5955 info.raid_disk = rdev->raid_disk;
5956 info.state = 0;
5957 if (test_bit(Faulty, &rdev->flags))
5958 info.state |= (1<<MD_DISK_FAULTY);
5959 else if (test_bit(In_sync, &rdev->flags)) {
5960 info.state |= (1<<MD_DISK_ACTIVE);
5961 info.state |= (1<<MD_DISK_SYNC);
5962 }
5963 if (test_bit(Journal, &rdev->flags))
5964 info.state |= (1<<MD_DISK_JOURNAL);
5965 if (test_bit(WriteMostly, &rdev->flags))
5966 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5967 } else {
5968 info.major = info.minor = 0;
5969 info.raid_disk = -1;
5970 info.state = (1<<MD_DISK_REMOVED);
5971 }
5972 rcu_read_unlock();
5973
5974 if (copy_to_user(arg, &info, sizeof(info)))
5975 return -EFAULT;
5976
5977 return 0;
5978}
5979
5980static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
5981{
5982 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5983 struct md_rdev *rdev;
5984 dev_t dev = MKDEV(info->major,info->minor);
5985
5986 if (mddev_is_clustered(mddev) &&
5987 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
5988 pr_err("%s: Cannot add to clustered mddev.\n",
5989 mdname(mddev));
5990 return -EINVAL;
5991 }
5992
5993 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5994 return -EOVERFLOW;
5995
5996 if (!mddev->raid_disks) {
5997 int err;
5998
5999 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6000 if (IS_ERR(rdev)) {
6001 printk(KERN_WARNING
6002 "md: md_import_device returned %ld\n",
6003 PTR_ERR(rdev));
6004 return PTR_ERR(rdev);
6005 }
6006 if (!list_empty(&mddev->disks)) {
6007 struct md_rdev *rdev0
6008 = list_entry(mddev->disks.next,
6009 struct md_rdev, same_set);
6010 err = super_types[mddev->major_version]
6011 .load_super(rdev, rdev0, mddev->minor_version);
6012 if (err < 0) {
6013 printk(KERN_WARNING
6014 "md: %s has different UUID to %s\n",
6015 bdevname(rdev->bdev,b),
6016 bdevname(rdev0->bdev,b2));
6017 export_rdev(rdev);
6018 return -EINVAL;
6019 }
6020 }
6021 err = bind_rdev_to_array(rdev, mddev);
6022 if (err)
6023 export_rdev(rdev);
6024 return err;
6025 }
6026
6027
6028
6029
6030
6031
6032 if (mddev->pers) {
6033 int err;
6034 if (!mddev->pers->hot_add_disk) {
6035 printk(KERN_WARNING
6036 "%s: personality does not support diskops!\n",
6037 mdname(mddev));
6038 return -EINVAL;
6039 }
6040 if (mddev->persistent)
6041 rdev = md_import_device(dev, mddev->major_version,
6042 mddev->minor_version);
6043 else
6044 rdev = md_import_device(dev, -1, -1);
6045 if (IS_ERR(rdev)) {
6046 printk(KERN_WARNING
6047 "md: md_import_device returned %ld\n",
6048 PTR_ERR(rdev));
6049 return PTR_ERR(rdev);
6050 }
6051
6052 if (!mddev->persistent) {
6053 if (info->state & (1<<MD_DISK_SYNC) &&
6054 info->raid_disk < mddev->raid_disks) {
6055 rdev->raid_disk = info->raid_disk;
6056 set_bit(In_sync, &rdev->flags);
6057 clear_bit(Bitmap_sync, &rdev->flags);
6058 } else
6059 rdev->raid_disk = -1;
6060 rdev->saved_raid_disk = rdev->raid_disk;
6061 } else
6062 super_types[mddev->major_version].
6063 validate_super(mddev, rdev);
6064 if ((info->state & (1<<MD_DISK_SYNC)) &&
6065 rdev->raid_disk != info->raid_disk) {
6066
6067
6068
6069 export_rdev(rdev);
6070 return -EINVAL;
6071 }
6072
6073 clear_bit(In_sync, &rdev->flags);
6074 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6075 set_bit(WriteMostly, &rdev->flags);
6076 else
6077 clear_bit(WriteMostly, &rdev->flags);
6078
6079 if (info->state & (1<<MD_DISK_JOURNAL)) {
6080 struct md_rdev *rdev2;
6081 bool has_journal = false;
6082
6083
6084 rdev_for_each(rdev2, mddev) {
6085 if (test_bit(Journal, &rdev2->flags)) {
6086 has_journal = true;
6087 break;
6088 }
6089 }
6090 if (has_journal) {
6091 export_rdev(rdev);
6092 return -EBUSY;
6093 }
6094 set_bit(Journal, &rdev->flags);
6095 }
6096
6097
6098
6099 if (mddev_is_clustered(mddev)) {
6100 if (info->state & (1 << MD_DISK_CANDIDATE))
6101 set_bit(Candidate, &rdev->flags);
6102 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6103
6104 err = md_cluster_ops->add_new_disk(mddev, rdev);
6105 if (err) {
6106 export_rdev(rdev);
6107 return err;
6108 }
6109 }
6110 }
6111
6112 rdev->raid_disk = -1;
6113 err = bind_rdev_to_array(rdev, mddev);
6114
6115 if (err)
6116 export_rdev(rdev);
6117
6118 if (mddev_is_clustered(mddev)) {
6119 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6120 if (!err) {
6121 err = md_cluster_ops->new_disk_ack(mddev,
6122 err == 0);
6123 if (err)
6124 md_kick_rdev_from_array(rdev);
6125 }
6126 } else {
6127 if (err)
6128 md_cluster_ops->add_new_disk_cancel(mddev);
6129 else
6130 err = add_bound_rdev(rdev);
6131 }
6132
6133 } else if (!err)
6134 err = add_bound_rdev(rdev);
6135
6136 return err;
6137 }
6138
6139
6140
6141
6142 if (mddev->major_version != 0) {
6143 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
6144 mdname(mddev));
6145 return -EINVAL;
6146 }
6147
6148 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6149 int err;
6150 rdev = md_import_device(dev, -1, 0);
6151 if (IS_ERR(rdev)) {
6152 printk(KERN_WARNING
6153 "md: error, md_import_device() returned %ld\n",
6154 PTR_ERR(rdev));
6155 return PTR_ERR(rdev);
6156 }
6157 rdev->desc_nr = info->number;
6158 if (info->raid_disk < mddev->raid_disks)
6159 rdev->raid_disk = info->raid_disk;
6160 else
6161 rdev->raid_disk = -1;
6162
6163 if (rdev->raid_disk < mddev->raid_disks)
6164 if (info->state & (1<<MD_DISK_SYNC))
6165 set_bit(In_sync, &rdev->flags);
6166
6167 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6168 set_bit(WriteMostly, &rdev->flags);
6169
6170 if (!mddev->persistent) {
6171 printk(KERN_INFO "md: nonpersistent superblock ...\n");
6172 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6173 } else
6174 rdev->sb_start = calc_dev_sboffset(rdev);
6175 rdev->sectors = rdev->sb_start;
6176
6177 err = bind_rdev_to_array(rdev, mddev);
6178 if (err) {
6179 export_rdev(rdev);
6180 return err;
6181 }
6182 }
6183
6184 return 0;
6185}
6186
6187static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6188{
6189 char b[BDEVNAME_SIZE];
6190 struct md_rdev *rdev;
6191
6192 rdev = find_rdev(mddev, dev);
6193 if (!rdev)
6194 return -ENXIO;
6195
6196 if (rdev->raid_disk < 0)
6197 goto kick_rdev;
6198
6199 clear_bit(Blocked, &rdev->flags);
6200 remove_and_add_spares(mddev, rdev);
6201
6202 if (rdev->raid_disk >= 0)
6203 goto busy;
6204
6205kick_rdev:
6206 if (mddev_is_clustered(mddev))
6207 md_cluster_ops->remove_disk(mddev, rdev);
6208
6209 md_kick_rdev_from_array(rdev);
6210 md_update_sb(mddev, 1);
6211 md_new_event(mddev);
6212
6213 return 0;
6214busy:
6215 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
6216 bdevname(rdev->bdev,b), mdname(mddev));
6217 return -EBUSY;
6218}
6219
6220static int hot_add_disk(struct mddev *mddev, dev_t dev)
6221{
6222 char b[BDEVNAME_SIZE];
6223 int err;
6224 struct md_rdev *rdev;
6225
6226 if (!mddev->pers)
6227 return -ENODEV;
6228
6229 if (mddev->major_version != 0) {
6230 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
6231 " version-0 superblocks.\n",
6232 mdname(mddev));
6233 return -EINVAL;
6234 }
6235 if (!mddev->pers->hot_add_disk) {
6236 printk(KERN_WARNING
6237 "%s: personality does not support diskops!\n",
6238 mdname(mddev));
6239 return -EINVAL;
6240 }
6241
6242 rdev = md_import_device(dev, -1, 0);
6243 if (IS_ERR(rdev)) {
6244 printk(KERN_WARNING
6245 "md: error, md_import_device() returned %ld\n",
6246 PTR_ERR(rdev));
6247 return -EINVAL;
6248 }
6249
6250 if (mddev->persistent)
6251 rdev->sb_start = calc_dev_sboffset(rdev);
6252 else
6253 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6254
6255 rdev->sectors = rdev->sb_start;
6256
6257 if (test_bit(Faulty, &rdev->flags)) {
6258 printk(KERN_WARNING
6259 "md: can not hot-add faulty %s disk to %s!\n",
6260 bdevname(rdev->bdev,b), mdname(mddev));
6261 err = -EINVAL;
6262 goto abort_export;
6263 }
6264
6265 clear_bit(In_sync, &rdev->flags);
6266 rdev->desc_nr = -1;
6267 rdev->saved_raid_disk = -1;
6268 err = bind_rdev_to_array(rdev, mddev);
6269 if (err)
6270 goto abort_export;
6271
6272
6273
6274
6275
6276
6277 rdev->raid_disk = -1;
6278
6279 md_update_sb(mddev, 1);
6280
6281
6282
6283
6284 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6285 md_wakeup_thread(mddev->thread);
6286 md_new_event(mddev);
6287 return 0;
6288
6289abort_export:
6290 export_rdev(rdev);
6291 return err;
6292}
6293
6294static int set_bitmap_file(struct mddev *mddev, int fd)
6295{
6296 int err = 0;
6297
6298 if (mddev->pers) {
6299 if (!mddev->pers->quiesce || !mddev->thread)
6300 return -EBUSY;
6301 if (mddev->recovery || mddev->sync_thread)
6302 return -EBUSY;
6303
6304 }
6305
6306 if (fd >= 0) {
6307 struct inode *inode;
6308 struct file *f;
6309
6310 if (mddev->bitmap || mddev->bitmap_info.file)
6311 return -EEXIST;
6312 f = fget(fd);
6313
6314 if (f == NULL) {
6315 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
6316 mdname(mddev));
6317 return -EBADF;
6318 }
6319
6320 inode = f->f_mapping->host;
6321 if (!S_ISREG(inode->i_mode)) {
6322 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
6323 mdname(mddev));
6324 err = -EBADF;
6325 } else if (!(f->f_mode & FMODE_WRITE)) {
6326 printk(KERN_ERR "%s: error: bitmap file must open for write\n",
6327 mdname(mddev));
6328 err = -EBADF;
6329 } else if (atomic_read(&inode->i_writecount) != 1) {
6330 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6331 mdname(mddev));
6332 err = -EBUSY;
6333 }
6334 if (err) {
6335 fput(f);
6336 return err;
6337 }
6338 mddev->bitmap_info.file = f;
6339 mddev->bitmap_info.offset = 0;
6340 } else if (mddev->bitmap == NULL)
6341 return -ENOENT;
6342 err = 0;
6343 if (mddev->pers) {
6344 mddev->pers->quiesce(mddev, 1);
6345 if (fd >= 0) {
6346 struct bitmap *bitmap;
6347
6348 bitmap = bitmap_create(mddev, -1);
6349 if (!IS_ERR(bitmap)) {
6350 mddev->bitmap = bitmap;
6351 err = bitmap_load(mddev);
6352 } else
6353 err = PTR_ERR(bitmap);
6354 }
6355 if (fd < 0 || err) {
6356 bitmap_destroy(mddev);
6357 fd = -1;
6358 }
6359 mddev->pers->quiesce(mddev, 0);
6360 }
6361 if (fd < 0) {
6362 struct file *f = mddev->bitmap_info.file;
6363 if (f) {
6364 spin_lock(&mddev->lock);
6365 mddev->bitmap_info.file = NULL;
6366 spin_unlock(&mddev->lock);
6367 fput(f);
6368 }
6369 }
6370
6371 return err;
6372}
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6388{
6389
6390 if (info->raid_disks == 0) {
6391
6392 if (info->major_version < 0 ||
6393 info->major_version >= ARRAY_SIZE(super_types) ||
6394 super_types[info->major_version].name == NULL) {
6395
6396 printk(KERN_INFO
6397 "md: superblock version %d not known\n",
6398 info->major_version);
6399 return -EINVAL;
6400 }
6401 mddev->major_version = info->major_version;
6402 mddev->minor_version = info->minor_version;
6403 mddev->patch_version = info->patch_version;
6404 mddev->persistent = !info->not_persistent;
6405
6406
6407
6408 mddev->ctime = ktime_get_real_seconds();
6409 return 0;
6410 }
6411 mddev->major_version = MD_MAJOR_VERSION;
6412 mddev->minor_version = MD_MINOR_VERSION;
6413 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6414 mddev->ctime = ktime_get_real_seconds();
6415
6416 mddev->level = info->level;
6417 mddev->clevel[0] = 0;
6418 mddev->dev_sectors = 2 * (sector_t)info->size;
6419 mddev->raid_disks = info->raid_disks;
6420
6421
6422
6423 if (info->state & (1<<MD_SB_CLEAN))
6424 mddev->recovery_cp = MaxSector;
6425 else
6426 mddev->recovery_cp = 0;
6427 mddev->persistent = ! info->not_persistent;
6428 mddev->external = 0;
6429
6430 mddev->layout = info->layout;
6431 mddev->chunk_sectors = info->chunk_size >> 9;
6432
6433 mddev->max_disks = MD_SB_DISKS;
6434
6435 if (mddev->persistent)
6436 mddev->flags = 0;
6437 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6438
6439 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6440 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6441 mddev->bitmap_info.offset = 0;
6442
6443 mddev->reshape_position = MaxSector;
6444
6445
6446
6447
6448 get_random_bytes(mddev->uuid, 16);
6449
6450 mddev->new_level = mddev->level;
6451 mddev->new_chunk_sectors = mddev->chunk_sectors;
6452 mddev->new_layout = mddev->layout;
6453 mddev->delta_disks = 0;
6454 mddev->reshape_backwards = 0;
6455
6456 return 0;
6457}
6458
6459void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6460{
6461 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6462
6463 if (mddev->external_size)
6464 return;
6465
6466 mddev->array_sectors = array_sectors;
6467}
6468EXPORT_SYMBOL(md_set_array_sectors);
6469
6470static int update_size(struct mddev *mddev, sector_t num_sectors)
6471{
6472 struct md_rdev *rdev;
6473 int rv;
6474 int fit = (num_sectors == 0);
6475
6476
6477 if (mddev_is_clustered(mddev))
6478 return -EINVAL;
6479
6480 if (mddev->pers->resize == NULL)
6481 return -EINVAL;
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6492 mddev->sync_thread)
6493 return -EBUSY;
6494 if (mddev->ro)
6495 return -EROFS;
6496
6497 rdev_for_each(rdev, mddev) {
6498 sector_t avail = rdev->sectors;
6499
6500 if (fit && (num_sectors == 0 || num_sectors > avail))
6501 num_sectors = avail;
6502 if (avail < num_sectors)
6503 return -ENOSPC;
6504 }
6505 rv = mddev->pers->resize(mddev, num_sectors);
6506 if (!rv)
6507 revalidate_disk(mddev->gendisk);
6508 return rv;
6509}
6510
6511static int update_raid_disks(struct mddev *mddev, int raid_disks)
6512{
6513 int rv;
6514 struct md_rdev *rdev;
6515
6516 if (mddev->pers->check_reshape == NULL)
6517 return -EINVAL;
6518 if (mddev->ro)
6519 return -EROFS;
6520 if (raid_disks <= 0 ||
6521 (mddev->max_disks && raid_disks >= mddev->max_disks))
6522 return -EINVAL;
6523 if (mddev->sync_thread ||
6524 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6525 mddev->reshape_position != MaxSector)
6526 return -EBUSY;
6527
6528 rdev_for_each(rdev, mddev) {
6529 if (mddev->raid_disks < raid_disks &&
6530 rdev->data_offset < rdev->new_data_offset)
6531 return -EINVAL;
6532 if (mddev->raid_disks > raid_disks &&
6533 rdev->data_offset > rdev->new_data_offset)
6534 return -EINVAL;
6535 }
6536
6537 mddev->delta_disks = raid_disks - mddev->raid_disks;
6538 if (mddev->delta_disks < 0)
6539 mddev->reshape_backwards = 1;
6540 else if (mddev->delta_disks > 0)
6541 mddev->reshape_backwards = 0;
6542
6543 rv = mddev->pers->check_reshape(mddev);
6544 if (rv < 0) {
6545 mddev->delta_disks = 0;
6546 mddev->reshape_backwards = 0;
6547 }
6548 return rv;
6549}
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6560{
6561 int rv = 0;
6562 int cnt = 0;
6563 int state = 0;
6564
6565
6566 if (mddev->bitmap && mddev->bitmap_info.offset)
6567 state |= (1 << MD_SB_BITMAP_PRESENT);
6568
6569 if (mddev->major_version != info->major_version ||
6570 mddev->minor_version != info->minor_version ||
6571
6572 mddev->ctime != info->ctime ||
6573 mddev->level != info->level ||
6574
6575 mddev->persistent != !info->not_persistent ||
6576 mddev->chunk_sectors != info->chunk_size >> 9 ||
6577
6578 ((state^info->state) & 0xfffffe00)
6579 )
6580 return -EINVAL;
6581
6582 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6583 cnt++;
6584 if (mddev->raid_disks != info->raid_disks)
6585 cnt++;
6586 if (mddev->layout != info->layout)
6587 cnt++;
6588 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6589 cnt++;
6590 if (cnt == 0)
6591 return 0;
6592 if (cnt > 1)
6593 return -EINVAL;
6594
6595 if (mddev->layout != info->layout) {
6596
6597
6598
6599
6600 if (mddev->pers->check_reshape == NULL)
6601 return -EINVAL;
6602 else {
6603 mddev->new_layout = info->layout;
6604 rv = mddev->pers->check_reshape(mddev);
6605 if (rv)
6606 mddev->new_layout = mddev->layout;
6607 return rv;
6608 }
6609 }
6610 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6611 rv = update_size(mddev, (sector_t)info->size * 2);
6612
6613 if (mddev->raid_disks != info->raid_disks)
6614 rv = update_raid_disks(mddev, info->raid_disks);
6615
6616 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6617 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6618 rv = -EINVAL;
6619 goto err;
6620 }
6621 if (mddev->recovery || mddev->sync_thread) {
6622 rv = -EBUSY;
6623 goto err;
6624 }
6625 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6626 struct bitmap *bitmap;
6627
6628 if (mddev->bitmap) {
6629 rv = -EEXIST;
6630 goto err;
6631 }
6632 if (mddev->bitmap_info.default_offset == 0) {
6633 rv = -EINVAL;
6634 goto err;
6635 }
6636 mddev->bitmap_info.offset =
6637 mddev->bitmap_info.default_offset;
6638 mddev->bitmap_info.space =
6639 mddev->bitmap_info.default_space;
6640 mddev->pers->quiesce(mddev, 1);
6641 bitmap = bitmap_create(mddev, -1);
6642 if (!IS_ERR(bitmap)) {
6643 mddev->bitmap = bitmap;
6644 rv = bitmap_load(mddev);
6645 } else
6646 rv = PTR_ERR(bitmap);
6647 if (rv)
6648 bitmap_destroy(mddev);
6649 mddev->pers->quiesce(mddev, 0);
6650 } else {
6651
6652 if (!mddev->bitmap) {
6653 rv = -ENOENT;
6654 goto err;
6655 }
6656 if (mddev->bitmap->storage.file) {
6657 rv = -EINVAL;
6658 goto err;
6659 }
6660 if (mddev->bitmap_info.nodes) {
6661
6662 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
6663 printk("md: can't change bitmap to none since the"
6664 " array is in use by more than one node\n");
6665 rv = -EPERM;
6666 md_cluster_ops->unlock_all_bitmaps(mddev);
6667 goto err;
6668 }
6669
6670 mddev->bitmap_info.nodes = 0;
6671 md_cluster_ops->leave(mddev);
6672 }
6673 mddev->pers->quiesce(mddev, 1);
6674 bitmap_destroy(mddev);
6675 mddev->pers->quiesce(mddev, 0);
6676 mddev->bitmap_info.offset = 0;
6677 }
6678 }
6679 md_update_sb(mddev, 1);
6680 return rv;
6681err:
6682 return rv;
6683}
6684
6685static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6686{
6687 struct md_rdev *rdev;
6688 int err = 0;
6689
6690 if (mddev->pers == NULL)
6691 return -ENODEV;
6692
6693 rcu_read_lock();
6694 rdev = find_rdev_rcu(mddev, dev);
6695 if (!rdev)
6696 err = -ENODEV;
6697 else {
6698 md_error(mddev, rdev);
6699 if (!test_bit(Faulty, &rdev->flags))
6700 err = -EBUSY;
6701 }
6702 rcu_read_unlock();
6703 return err;
6704}
6705
6706
6707
6708
6709
6710
6711
6712static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6713{
6714 struct mddev *mddev = bdev->bd_disk->private_data;
6715
6716 geo->heads = 2;
6717 geo->sectors = 4;
6718 geo->cylinders = mddev->array_sectors / 8;
6719 return 0;
6720}
6721
6722static inline bool md_ioctl_valid(unsigned int cmd)
6723{
6724 switch (cmd) {
6725 case ADD_NEW_DISK:
6726 case BLKROSET:
6727 case GET_ARRAY_INFO:
6728 case GET_BITMAP_FILE:
6729 case GET_DISK_INFO:
6730 case HOT_ADD_DISK:
6731 case HOT_REMOVE_DISK:
6732 case RAID_AUTORUN:
6733 case RAID_VERSION:
6734 case RESTART_ARRAY_RW:
6735 case RUN_ARRAY:
6736 case SET_ARRAY_INFO:
6737 case SET_BITMAP_FILE:
6738 case SET_DISK_FAULTY:
6739 case STOP_ARRAY:
6740 case STOP_ARRAY_RO:
6741 case CLUSTERED_DISK_NACK:
6742 return true;
6743 default:
6744 return false;
6745 }
6746}
6747
6748static int md_ioctl(struct block_device *bdev, fmode_t mode,
6749 unsigned int cmd, unsigned long arg)
6750{
6751 int err = 0;
6752 void __user *argp = (void __user *)arg;
6753 struct mddev *mddev = NULL;
6754 int ro;
6755
6756 if (!md_ioctl_valid(cmd))
6757 return -ENOTTY;
6758
6759 switch (cmd) {
6760 case RAID_VERSION:
6761 case GET_ARRAY_INFO:
6762 case GET_DISK_INFO:
6763 break;
6764 default:
6765 if (!capable(CAP_SYS_ADMIN))
6766 return -EACCES;
6767 }
6768
6769
6770
6771
6772
6773 switch (cmd) {
6774 case RAID_VERSION:
6775 err = get_version(argp);
6776 goto out;
6777
6778#ifndef MODULE
6779 case RAID_AUTORUN:
6780 err = 0;
6781 autostart_arrays(arg);
6782 goto out;
6783#endif
6784 default:;
6785 }
6786
6787
6788
6789
6790
6791 mddev = bdev->bd_disk->private_data;
6792
6793 if (!mddev) {
6794 BUG();
6795 goto out;
6796 }
6797
6798
6799 switch (cmd) {
6800 case GET_ARRAY_INFO:
6801 if (!mddev->raid_disks && !mddev->external)
6802 err = -ENODEV;
6803 else
6804 err = get_array_info(mddev, argp);
6805 goto out;
6806
6807 case GET_DISK_INFO:
6808 if (!mddev->raid_disks && !mddev->external)
6809 err = -ENODEV;
6810 else
6811 err = get_disk_info(mddev, argp);
6812 goto out;
6813
6814 case SET_DISK_FAULTY:
6815 err = set_disk_faulty(mddev, new_decode_dev(arg));
6816 goto out;
6817
6818 case GET_BITMAP_FILE:
6819 err = get_bitmap_file(mddev, argp);
6820 goto out;
6821
6822 }
6823
6824 if (cmd == ADD_NEW_DISK)
6825
6826 flush_workqueue(md_misc_wq);
6827
6828 if (cmd == HOT_REMOVE_DISK)
6829
6830 wait_event_interruptible_timeout(mddev->sb_wait,
6831 !test_bit(MD_RECOVERY_NEEDED,
6832 &mddev->flags),
6833 msecs_to_jiffies(5000));
6834 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6835
6836
6837
6838 mutex_lock(&mddev->open_mutex);
6839 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6840 mutex_unlock(&mddev->open_mutex);
6841 err = -EBUSY;
6842 goto out;
6843 }
6844 set_bit(MD_CLOSING, &mddev->flags);
6845 mutex_unlock(&mddev->open_mutex);
6846 sync_blockdev(bdev);
6847 }
6848 err = mddev_lock(mddev);
6849 if (err) {
6850 printk(KERN_INFO
6851 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6852 err, cmd);
6853 goto out;
6854 }
6855
6856 if (cmd == SET_ARRAY_INFO) {
6857 mdu_array_info_t info;
6858 if (!arg)
6859 memset(&info, 0, sizeof(info));
6860 else if (copy_from_user(&info, argp, sizeof(info))) {
6861 err = -EFAULT;
6862 goto unlock;
6863 }
6864 if (mddev->pers) {
6865 err = update_array_info(mddev, &info);
6866 if (err) {
6867 printk(KERN_WARNING "md: couldn't update"
6868 " array info. %d\n", err);
6869 goto unlock;
6870 }
6871 goto unlock;
6872 }
6873 if (!list_empty(&mddev->disks)) {
6874 printk(KERN_WARNING
6875 "md: array %s already has disks!\n",
6876 mdname(mddev));
6877 err = -EBUSY;
6878 goto unlock;
6879 }
6880 if (mddev->raid_disks) {
6881 printk(KERN_WARNING
6882 "md: array %s already initialised!\n",
6883 mdname(mddev));
6884 err = -EBUSY;
6885 goto unlock;
6886 }
6887 err = set_array_info(mddev, &info);
6888 if (err) {
6889 printk(KERN_WARNING "md: couldn't set"
6890 " array info. %d\n", err);
6891 goto unlock;
6892 }
6893 goto unlock;
6894 }
6895
6896
6897
6898
6899
6900
6901 if ((!mddev->raid_disks && !mddev->external)
6902 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6903 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6904 && cmd != GET_BITMAP_FILE) {
6905 err = -ENODEV;
6906 goto unlock;
6907 }
6908
6909
6910
6911
6912 switch (cmd) {
6913 case RESTART_ARRAY_RW:
6914 err = restart_array(mddev);
6915 goto unlock;
6916
6917 case STOP_ARRAY:
6918 err = do_md_stop(mddev, 0, bdev);
6919 goto unlock;
6920
6921 case STOP_ARRAY_RO:
6922 err = md_set_readonly(mddev, bdev);
6923 goto unlock;
6924
6925 case HOT_REMOVE_DISK:
6926 err = hot_remove_disk(mddev, new_decode_dev(arg));
6927 goto unlock;
6928
6929 case ADD_NEW_DISK:
6930
6931
6932
6933
6934 if (mddev->pers) {
6935 mdu_disk_info_t info;
6936 if (copy_from_user(&info, argp, sizeof(info)))
6937 err = -EFAULT;
6938 else if (!(info.state & (1<<MD_DISK_SYNC)))
6939
6940 break;
6941 else
6942 err = add_new_disk(mddev, &info);
6943 goto unlock;
6944 }
6945 break;
6946
6947 case BLKROSET:
6948 if (get_user(ro, (int __user *)(arg))) {
6949 err = -EFAULT;
6950 goto unlock;
6951 }
6952 err = -EINVAL;
6953
6954
6955
6956
6957 if (ro)
6958 goto unlock;
6959
6960
6961 if (mddev->ro != 1)
6962 goto unlock;
6963
6964
6965
6966
6967 if (mddev->pers) {
6968 err = restart_array(mddev);
6969 if (err == 0) {
6970 mddev->ro = 2;
6971 set_disk_ro(mddev->gendisk, 0);
6972 }
6973 }
6974 goto unlock;
6975 }
6976
6977
6978
6979
6980
6981 if (mddev->ro && mddev->pers) {
6982 if (mddev->ro == 2) {
6983 mddev->ro = 0;
6984 sysfs_notify_dirent_safe(mddev->sysfs_state);
6985 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6986
6987
6988
6989
6990 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6991 mddev_unlock(mddev);
6992 wait_event(mddev->sb_wait,
6993 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6994 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6995 mddev_lock_nointr(mddev);
6996 }
6997 } else {
6998 err = -EROFS;
6999 goto unlock;
7000 }
7001 }
7002
7003 switch (cmd) {
7004 case ADD_NEW_DISK:
7005 {
7006 mdu_disk_info_t info;
7007 if (copy_from_user(&info, argp, sizeof(info)))
7008 err = -EFAULT;
7009 else
7010 err = add_new_disk(mddev, &info);
7011 goto unlock;
7012 }
7013
7014 case CLUSTERED_DISK_NACK:
7015 if (mddev_is_clustered(mddev))
7016 md_cluster_ops->new_disk_ack(mddev, false);
7017 else
7018 err = -EINVAL;
7019 goto unlock;
7020
7021 case HOT_ADD_DISK:
7022 err = hot_add_disk(mddev, new_decode_dev(arg));
7023 goto unlock;
7024
7025 case RUN_ARRAY:
7026 err = do_md_run(mddev);
7027 goto unlock;
7028
7029 case SET_BITMAP_FILE:
7030 err = set_bitmap_file(mddev, (int)arg);
7031 goto unlock;
7032
7033 default:
7034 err = -EINVAL;
7035 goto unlock;
7036 }
7037
7038unlock:
7039 if (mddev->hold_active == UNTIL_IOCTL &&
7040 err != -EINVAL)
7041 mddev->hold_active = 0;
7042 mddev_unlock(mddev);
7043out:
7044 return err;
7045}
7046#ifdef CONFIG_COMPAT
7047static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7048 unsigned int cmd, unsigned long arg)
7049{
7050 switch (cmd) {
7051 case HOT_REMOVE_DISK:
7052 case HOT_ADD_DISK:
7053 case SET_DISK_FAULTY:
7054 case SET_BITMAP_FILE:
7055
7056 break;
7057 default:
7058 arg = (unsigned long)compat_ptr(arg);
7059 break;
7060 }
7061
7062 return md_ioctl(bdev, mode, cmd, arg);
7063}
7064#endif
7065
7066static int md_open(struct block_device *bdev, fmode_t mode)
7067{
7068
7069
7070
7071
7072 struct mddev *mddev = mddev_find(bdev->bd_dev);
7073 int err;
7074
7075 if (!mddev)
7076 return -ENODEV;
7077
7078 if (mddev->gendisk != bdev->bd_disk) {
7079
7080
7081
7082 mddev_put(mddev);
7083
7084 flush_workqueue(md_misc_wq);
7085
7086 return -ERESTARTSYS;
7087 }
7088 BUG_ON(mddev != bdev->bd_disk->private_data);
7089
7090 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7091 goto out;
7092
7093 if (test_bit(MD_CLOSING, &mddev->flags)) {
7094 mutex_unlock(&mddev->open_mutex);
7095 return -ENODEV;
7096 }
7097
7098 err = 0;
7099 atomic_inc(&mddev->openers);
7100 mutex_unlock(&mddev->open_mutex);
7101
7102 check_disk_change(bdev);
7103 out:
7104 return err;
7105}
7106
7107static void md_release(struct gendisk *disk, fmode_t mode)
7108{
7109 struct mddev *mddev = disk->private_data;
7110
7111 BUG_ON(!mddev);
7112 atomic_dec(&mddev->openers);
7113 mddev_put(mddev);
7114}
7115
7116static int md_media_changed(struct gendisk *disk)
7117{
7118 struct mddev *mddev = disk->private_data;
7119
7120 return mddev->changed;
7121}
7122
7123static int md_revalidate(struct gendisk *disk)
7124{
7125 struct mddev *mddev = disk->private_data;
7126
7127 mddev->changed = 0;
7128 return 0;
7129}
7130static const struct block_device_operations md_fops =
7131{
7132 .owner = THIS_MODULE,
7133 .open = md_open,
7134 .release = md_release,
7135 .ioctl = md_ioctl,
7136#ifdef CONFIG_COMPAT
7137 .compat_ioctl = md_compat_ioctl,
7138#endif
7139 .getgeo = md_getgeo,
7140 .media_changed = md_media_changed,
7141 .revalidate_disk= md_revalidate,
7142};
7143
7144static int md_thread(void *arg)
7145{
7146 struct md_thread *thread = arg;
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160 allow_signal(SIGKILL);
7161 while (!kthread_should_stop()) {
7162
7163
7164
7165
7166
7167
7168 if (signal_pending(current))
7169 flush_signals(current);
7170
7171 wait_event_interruptible_timeout
7172 (thread->wqueue,
7173 test_bit(THREAD_WAKEUP, &thread->flags)
7174 || kthread_should_stop(),
7175 thread->timeout);
7176
7177 clear_bit(THREAD_WAKEUP, &thread->flags);
7178 if (!kthread_should_stop())
7179 thread->run(thread);
7180 }
7181
7182 return 0;
7183}
7184
7185void md_wakeup_thread(struct md_thread *thread)
7186{
7187 if (thread) {
7188 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7189 set_bit(THREAD_WAKEUP, &thread->flags);
7190 wake_up(&thread->wqueue);
7191 }
7192}
7193EXPORT_SYMBOL(md_wakeup_thread);
7194
7195struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7196 struct mddev *mddev, const char *name)
7197{
7198 struct md_thread *thread;
7199
7200 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7201 if (!thread)
7202 return NULL;
7203
7204 init_waitqueue_head(&thread->wqueue);
7205
7206 thread->run = run;
7207 thread->mddev = mddev;
7208 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7209 thread->tsk = kthread_run(md_thread, thread,
7210 "%s_%s",
7211 mdname(thread->mddev),
7212 name);
7213 if (IS_ERR(thread->tsk)) {
7214 kfree(thread);
7215 return NULL;
7216 }
7217 return thread;
7218}
7219EXPORT_SYMBOL(md_register_thread);
7220
7221void md_unregister_thread(struct md_thread **threadp)
7222{
7223 struct md_thread *thread = *threadp;
7224 if (!thread)
7225 return;
7226 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7227
7228
7229
7230 spin_lock(&pers_lock);
7231 *threadp = NULL;
7232 spin_unlock(&pers_lock);
7233
7234 kthread_stop(thread->tsk);
7235 kfree(thread);
7236}
7237EXPORT_SYMBOL(md_unregister_thread);
7238
7239void md_error(struct mddev *mddev, struct md_rdev *rdev)
7240{
7241 if (!rdev || test_bit(Faulty, &rdev->flags))
7242 return;
7243
7244 if (!mddev->pers || !mddev->pers->error_handler)
7245 return;
7246 mddev->pers->error_handler(mddev,rdev);
7247 if (mddev->degraded)
7248 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7249 sysfs_notify_dirent_safe(rdev->sysfs_state);
7250 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7251 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7252 md_wakeup_thread(mddev->thread);
7253 if (mddev->event_work.func)
7254 queue_work(md_misc_wq, &mddev->event_work);
7255 md_new_event(mddev);
7256}
7257EXPORT_SYMBOL(md_error);
7258
7259
7260
7261static void status_unused(struct seq_file *seq)
7262{
7263 int i = 0;
7264 struct md_rdev *rdev;
7265
7266 seq_printf(seq, "unused devices: ");
7267
7268 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7269 char b[BDEVNAME_SIZE];
7270 i++;
7271 seq_printf(seq, "%s ",
7272 bdevname(rdev->bdev,b));
7273 }
7274 if (!i)
7275 seq_printf(seq, "<none>");
7276
7277 seq_printf(seq, "\n");
7278}
7279
7280static int status_resync(struct seq_file *seq, struct mddev *mddev)
7281{
7282 sector_t max_sectors, resync, res;
7283 unsigned long dt, db;
7284 sector_t rt;
7285 int scale;
7286 unsigned int per_milli;
7287
7288 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7289 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7290 max_sectors = mddev->resync_max_sectors;
7291 else
7292 max_sectors = mddev->dev_sectors;
7293
7294 resync = mddev->curr_resync;
7295 if (resync <= 3) {
7296 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7297
7298 resync = max_sectors;
7299 } else
7300 resync -= atomic_read(&mddev->recovery_active);
7301
7302 if (resync == 0) {
7303 if (mddev->recovery_cp < MaxSector) {
7304 seq_printf(seq, "\tresync=PENDING");
7305 return 1;
7306 }
7307 return 0;
7308 }
7309 if (resync < 3) {
7310 seq_printf(seq, "\tresync=DELAYED");
7311 return 1;
7312 }
7313
7314 WARN_ON(max_sectors == 0);
7315
7316
7317
7318
7319
7320 scale = 10;
7321 if (sizeof(sector_t) > sizeof(unsigned long)) {
7322 while ( max_sectors/2 > (1ULL<<(scale+32)))
7323 scale++;
7324 }
7325 res = (resync>>scale)*1000;
7326 sector_div(res, (u32)((max_sectors>>scale)+1));
7327
7328 per_milli = res;
7329 {
7330 int i, x = per_milli/50, y = 20-x;
7331 seq_printf(seq, "[");
7332 for (i = 0; i < x; i++)
7333 seq_printf(seq, "=");
7334 seq_printf(seq, ">");
7335 for (i = 0; i < y; i++)
7336 seq_printf(seq, ".");
7337 seq_printf(seq, "] ");
7338 }
7339 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7340 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7341 "reshape" :
7342 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7343 "check" :
7344 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7345 "resync" : "recovery"))),
7346 per_milli/10, per_milli % 10,
7347 (unsigned long long) resync/2,
7348 (unsigned long long) max_sectors/2);
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364 dt = ((jiffies - mddev->resync_mark) / HZ);
7365 if (!dt) dt++;
7366 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7367 - mddev->resync_mark_cnt;
7368
7369 rt = max_sectors - resync;
7370 sector_div(rt, db/32+1);
7371 rt *= dt;
7372 rt >>= 5;
7373
7374 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7375 ((unsigned long)rt % 60)/6);
7376
7377 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7378 return 1;
7379}
7380
7381static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7382{
7383 struct list_head *tmp;
7384 loff_t l = *pos;
7385 struct mddev *mddev;
7386
7387 if (l >= 0x10000)
7388 return NULL;
7389 if (!l--)
7390
7391 return (void*)1;
7392
7393 spin_lock(&all_mddevs_lock);
7394 list_for_each(tmp,&all_mddevs)
7395 if (!l--) {
7396 mddev = list_entry(tmp, struct mddev, all_mddevs);
7397 mddev_get(mddev);
7398 spin_unlock(&all_mddevs_lock);
7399 return mddev;
7400 }
7401 spin_unlock(&all_mddevs_lock);
7402 if (!l--)
7403 return (void*)2;
7404 return NULL;
7405}
7406
7407static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7408{
7409 struct list_head *tmp;
7410 struct mddev *next_mddev, *mddev = v;
7411
7412 ++*pos;
7413 if (v == (void*)2)
7414 return NULL;
7415
7416 spin_lock(&all_mddevs_lock);
7417 if (v == (void*)1)
7418 tmp = all_mddevs.next;
7419 else
7420 tmp = mddev->all_mddevs.next;
7421 if (tmp != &all_mddevs)
7422 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7423 else {
7424 next_mddev = (void*)2;
7425 *pos = 0x10000;
7426 }
7427 spin_unlock(&all_mddevs_lock);
7428
7429 if (v != (void*)1)
7430 mddev_put(mddev);
7431 return next_mddev;
7432
7433}
7434
7435static void md_seq_stop(struct seq_file *seq, void *v)
7436{
7437 struct mddev *mddev = v;
7438
7439 if (mddev && v != (void*)1 && v != (void*)2)
7440 mddev_put(mddev);
7441}
7442
7443static int md_seq_show(struct seq_file *seq, void *v)
7444{
7445 struct mddev *mddev = v;
7446 sector_t sectors;
7447 struct md_rdev *rdev;
7448
7449 if (v == (void*)1) {
7450 struct md_personality *pers;
7451 seq_printf(seq, "Personalities : ");
7452 spin_lock(&pers_lock);
7453 list_for_each_entry(pers, &pers_list, list)
7454 seq_printf(seq, "[%s] ", pers->name);
7455
7456 spin_unlock(&pers_lock);
7457 seq_printf(seq, "\n");
7458 seq->poll_event = atomic_read(&md_event_count);
7459 return 0;
7460 }
7461 if (v == (void*)2) {
7462 status_unused(seq);
7463 return 0;
7464 }
7465
7466 spin_lock(&mddev->lock);
7467 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7468 seq_printf(seq, "%s : %sactive", mdname(mddev),
7469 mddev->pers ? "" : "in");
7470 if (mddev->pers) {
7471 if (mddev->ro==1)
7472 seq_printf(seq, " (read-only)");
7473 if (mddev->ro==2)
7474 seq_printf(seq, " (auto-read-only)");
7475 seq_printf(seq, " %s", mddev->pers->name);
7476 }
7477
7478 sectors = 0;
7479 rcu_read_lock();
7480 rdev_for_each_rcu(rdev, mddev) {
7481 char b[BDEVNAME_SIZE];
7482 seq_printf(seq, " %s[%d]",
7483 bdevname(rdev->bdev,b), rdev->desc_nr);
7484 if (test_bit(WriteMostly, &rdev->flags))
7485 seq_printf(seq, "(W)");
7486 if (test_bit(Journal, &rdev->flags))
7487 seq_printf(seq, "(J)");
7488 if (test_bit(Faulty, &rdev->flags)) {
7489 seq_printf(seq, "(F)");
7490 continue;
7491 }
7492 if (rdev->raid_disk < 0)
7493 seq_printf(seq, "(S)");
7494 if (test_bit(Replacement, &rdev->flags))
7495 seq_printf(seq, "(R)");
7496 sectors += rdev->sectors;
7497 }
7498 rcu_read_unlock();
7499
7500 if (!list_empty(&mddev->disks)) {
7501 if (mddev->pers)
7502 seq_printf(seq, "\n %llu blocks",
7503 (unsigned long long)
7504 mddev->array_sectors / 2);
7505 else
7506 seq_printf(seq, "\n %llu blocks",
7507 (unsigned long long)sectors / 2);
7508 }
7509 if (mddev->persistent) {
7510 if (mddev->major_version != 0 ||
7511 mddev->minor_version != 90) {
7512 seq_printf(seq," super %d.%d",
7513 mddev->major_version,
7514 mddev->minor_version);
7515 }
7516 } else if (mddev->external)
7517 seq_printf(seq, " super external:%s",
7518 mddev->metadata_type);
7519 else
7520 seq_printf(seq, " super non-persistent");
7521
7522 if (mddev->pers) {
7523 mddev->pers->status(seq, mddev);
7524 seq_printf(seq, "\n ");
7525 if (mddev->pers->sync_request) {
7526 if (status_resync(seq, mddev))
7527 seq_printf(seq, "\n ");
7528 }
7529 } else
7530 seq_printf(seq, "\n ");
7531
7532 bitmap_status(seq, mddev->bitmap);
7533
7534 seq_printf(seq, "\n");
7535 }
7536 spin_unlock(&mddev->lock);
7537
7538 return 0;
7539}
7540
7541static const struct seq_operations md_seq_ops = {
7542 .start = md_seq_start,
7543 .next = md_seq_next,
7544 .stop = md_seq_stop,
7545 .show = md_seq_show,
7546};
7547
7548static int md_seq_open(struct inode *inode, struct file *file)
7549{
7550 struct seq_file *seq;
7551 int error;
7552
7553 error = seq_open(file, &md_seq_ops);
7554 if (error)
7555 return error;
7556
7557 seq = file->private_data;
7558 seq->poll_event = atomic_read(&md_event_count);
7559 return error;
7560}
7561
7562static int md_unloading;
7563static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7564{
7565 struct seq_file *seq = filp->private_data;
7566 int mask;
7567
7568 if (md_unloading)
7569 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7570 poll_wait(filp, &md_event_waiters, wait);
7571
7572
7573 mask = POLLIN | POLLRDNORM;
7574
7575 if (seq->poll_event != atomic_read(&md_event_count))
7576 mask |= POLLERR | POLLPRI;
7577 return mask;
7578}
7579
7580static const struct file_operations md_seq_fops = {
7581 .owner = THIS_MODULE,
7582 .open = md_seq_open,
7583 .read = seq_read,
7584 .llseek = seq_lseek,
7585 .release = seq_release_private,
7586 .poll = mdstat_poll,
7587};
7588
7589int register_md_personality(struct md_personality *p)
7590{
7591 printk(KERN_INFO "md: %s personality registered for level %d\n",
7592 p->name, p->level);
7593 spin_lock(&pers_lock);
7594 list_add_tail(&p->list, &pers_list);
7595 spin_unlock(&pers_lock);
7596 return 0;
7597}
7598EXPORT_SYMBOL(register_md_personality);
7599
7600int unregister_md_personality(struct md_personality *p)
7601{
7602 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7603 spin_lock(&pers_lock);
7604 list_del_init(&p->list);
7605 spin_unlock(&pers_lock);
7606 return 0;
7607}
7608EXPORT_SYMBOL(unregister_md_personality);
7609
7610int register_md_cluster_operations(struct md_cluster_operations *ops,
7611 struct module *module)
7612{
7613 int ret = 0;
7614 spin_lock(&pers_lock);
7615 if (md_cluster_ops != NULL)
7616 ret = -EALREADY;
7617 else {
7618 md_cluster_ops = ops;
7619 md_cluster_mod = module;
7620 }
7621 spin_unlock(&pers_lock);
7622 return ret;
7623}
7624EXPORT_SYMBOL(register_md_cluster_operations);
7625
7626int unregister_md_cluster_operations(void)
7627{
7628 spin_lock(&pers_lock);
7629 md_cluster_ops = NULL;
7630 spin_unlock(&pers_lock);
7631 return 0;
7632}
7633EXPORT_SYMBOL(unregister_md_cluster_operations);
7634
7635int md_setup_cluster(struct mddev *mddev, int nodes)
7636{
7637 if (!md_cluster_ops)
7638 request_module("md-cluster");
7639 spin_lock(&pers_lock);
7640
7641 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7642 pr_err("can't find md-cluster module or get it's reference.\n");
7643 spin_unlock(&pers_lock);
7644 return -ENOENT;
7645 }
7646 spin_unlock(&pers_lock);
7647
7648 return md_cluster_ops->join(mddev, nodes);
7649}
7650
7651void md_cluster_stop(struct mddev *mddev)
7652{
7653 if (!md_cluster_ops)
7654 return;
7655 md_cluster_ops->leave(mddev);
7656 module_put(md_cluster_mod);
7657}
7658
7659static int is_mddev_idle(struct mddev *mddev, int init)
7660{
7661 struct md_rdev *rdev;
7662 int idle;
7663 int curr_events;
7664
7665 idle = 1;
7666 rcu_read_lock();
7667 rdev_for_each_rcu(rdev, mddev) {
7668 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7669 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7670 (int)part_stat_read(&disk->part0, sectors[1]) -
7671 atomic_read(&disk->sync_io);
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694 if (init || curr_events - rdev->last_events > 64) {
7695 rdev->last_events = curr_events;
7696 idle = 0;
7697 }
7698 }
7699 rcu_read_unlock();
7700 return idle;
7701}
7702
7703void md_done_sync(struct mddev *mddev, int blocks, int ok)
7704{
7705
7706 atomic_sub(blocks, &mddev->recovery_active);
7707 wake_up(&mddev->recovery_wait);
7708 if (!ok) {
7709 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7710 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7711 md_wakeup_thread(mddev->thread);
7712
7713 }
7714}
7715EXPORT_SYMBOL(md_done_sync);
7716
7717
7718
7719
7720
7721
7722void md_write_start(struct mddev *mddev, struct bio *bi)
7723{
7724 int did_change = 0;
7725 if (bio_data_dir(bi) != WRITE)
7726 return;
7727
7728 BUG_ON(mddev->ro == 1);
7729 if (mddev->ro == 2) {
7730
7731 mddev->ro = 0;
7732 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7733 md_wakeup_thread(mddev->thread);
7734 md_wakeup_thread(mddev->sync_thread);
7735 did_change = 1;
7736 }
7737 atomic_inc(&mddev->writes_pending);
7738 if (mddev->safemode == 1)
7739 mddev->safemode = 0;
7740 if (mddev->in_sync) {
7741 spin_lock(&mddev->lock);
7742 if (mddev->in_sync) {
7743 mddev->in_sync = 0;
7744 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7745 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7746 md_wakeup_thread(mddev->thread);
7747 did_change = 1;
7748 }
7749 spin_unlock(&mddev->lock);
7750 }
7751 if (did_change)
7752 sysfs_notify_dirent_safe(mddev->sysfs_state);
7753 wait_event(mddev->sb_wait,
7754 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7755}
7756EXPORT_SYMBOL(md_write_start);
7757
7758void md_write_end(struct mddev *mddev)
7759{
7760 if (atomic_dec_and_test(&mddev->writes_pending)) {
7761 if (mddev->safemode == 2)
7762 md_wakeup_thread(mddev->thread);
7763 else if (mddev->safemode_delay)
7764 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7765 }
7766}
7767EXPORT_SYMBOL(md_write_end);
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778int md_allow_write(struct mddev *mddev)
7779{
7780 if (!mddev->pers)
7781 return 0;
7782 if (mddev->ro)
7783 return 0;
7784 if (!mddev->pers->sync_request)
7785 return 0;
7786
7787 spin_lock(&mddev->lock);
7788 if (mddev->in_sync) {
7789 mddev->in_sync = 0;
7790 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7791 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7792 if (mddev->safemode_delay &&
7793 mddev->safemode == 0)
7794 mddev->safemode = 1;
7795 spin_unlock(&mddev->lock);
7796 md_update_sb(mddev, 0);
7797 sysfs_notify_dirent_safe(mddev->sysfs_state);
7798 } else
7799 spin_unlock(&mddev->lock);
7800
7801 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7802 return -EAGAIN;
7803 else
7804 return 0;
7805}
7806EXPORT_SYMBOL_GPL(md_allow_write);
7807
7808#define SYNC_MARKS 10
7809#define SYNC_MARK_STEP (3*HZ)
7810#define UPDATE_FREQUENCY (5*60*HZ)
7811void md_do_sync(struct md_thread *thread)
7812{
7813 struct mddev *mddev = thread->mddev;
7814 struct mddev *mddev2;
7815 unsigned int currspeed = 0,
7816 window;
7817 sector_t max_sectors,j, io_sectors, recovery_done;
7818 unsigned long mark[SYNC_MARKS];
7819 unsigned long update_time;
7820 sector_t mark_cnt[SYNC_MARKS];
7821 int last_mark,m;
7822 struct list_head *tmp;
7823 sector_t last_check;
7824 int skipped = 0;
7825 struct md_rdev *rdev;
7826 char *desc, *action = NULL;
7827 struct blk_plug plug;
7828 int ret;
7829
7830
7831 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7832 return;
7833 if (mddev->ro) {
7834 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7835 return;
7836 }
7837
7838 if (mddev_is_clustered(mddev)) {
7839 ret = md_cluster_ops->resync_start(mddev);
7840 if (ret)
7841 goto skip;
7842
7843 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
7844 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7845 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
7846 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
7847 && ((unsigned long long)mddev->curr_resync_completed
7848 < (unsigned long long)mddev->resync_max_sectors))
7849 goto skip;
7850 }
7851
7852 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7853 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7854 desc = "data-check";
7855 action = "check";
7856 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7857 desc = "requested-resync";
7858 action = "repair";
7859 } else
7860 desc = "resync";
7861 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7862 desc = "reshape";
7863 else
7864 desc = "recovery";
7865
7866 mddev->last_sync_action = action ?: desc;
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884 do {
7885 int mddev2_minor = -1;
7886 mddev->curr_resync = 2;
7887
7888 try_again:
7889 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7890 goto skip;
7891 for_each_mddev(mddev2, tmp) {
7892 if (mddev2 == mddev)
7893 continue;
7894 if (!mddev->parallel_resync
7895 && mddev2->curr_resync
7896 && match_mddev_units(mddev, mddev2)) {
7897 DEFINE_WAIT(wq);
7898 if (mddev < mddev2 && mddev->curr_resync == 2) {
7899
7900 mddev->curr_resync = 1;
7901 wake_up(&resync_wait);
7902 }
7903 if (mddev > mddev2 && mddev->curr_resync == 1)
7904
7905
7906
7907 continue;
7908
7909
7910
7911
7912 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7913 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7914 mddev2->curr_resync >= mddev->curr_resync) {
7915 if (mddev2_minor != mddev2->md_minor) {
7916 mddev2_minor = mddev2->md_minor;
7917 printk(KERN_INFO "md: delaying %s of %s"
7918 " until %s has finished (they"
7919 " share one or more physical units)\n",
7920 desc, mdname(mddev),
7921 mdname(mddev2));
7922 }
7923 mddev_put(mddev2);
7924 if (signal_pending(current))
7925 flush_signals(current);
7926 schedule();
7927 finish_wait(&resync_wait, &wq);
7928 goto try_again;
7929 }
7930 finish_wait(&resync_wait, &wq);
7931 }
7932 }
7933 } while (mddev->curr_resync < 2);
7934
7935 j = 0;
7936 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7937
7938
7939
7940 max_sectors = mddev->resync_max_sectors;
7941 atomic64_set(&mddev->resync_mismatches, 0);
7942
7943 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7944 j = mddev->resync_min;
7945 else if (!mddev->bitmap)
7946 j = mddev->recovery_cp;
7947
7948 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7949 max_sectors = mddev->resync_max_sectors;
7950 else {
7951
7952 max_sectors = mddev->dev_sectors;
7953 j = MaxSector;
7954 rcu_read_lock();
7955 rdev_for_each_rcu(rdev, mddev)
7956 if (rdev->raid_disk >= 0 &&
7957 !test_bit(Journal, &rdev->flags) &&
7958 !test_bit(Faulty, &rdev->flags) &&
7959 !test_bit(In_sync, &rdev->flags) &&
7960 rdev->recovery_offset < j)
7961 j = rdev->recovery_offset;
7962 rcu_read_unlock();
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972 if (mddev->bitmap) {
7973 mddev->pers->quiesce(mddev, 1);
7974 mddev->pers->quiesce(mddev, 0);
7975 }
7976 }
7977
7978 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7979 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7980 " %d KB/sec/disk.\n", speed_min(mddev));
7981 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7982 "(but not more than %d KB/sec) for %s.\n",
7983 speed_max(mddev), desc);
7984
7985 is_mddev_idle(mddev, 1);
7986
7987 io_sectors = 0;
7988 for (m = 0; m < SYNC_MARKS; m++) {
7989 mark[m] = jiffies;
7990 mark_cnt[m] = io_sectors;
7991 }
7992 last_mark = 0;
7993 mddev->resync_mark = mark[last_mark];
7994 mddev->resync_mark_cnt = mark_cnt[last_mark];
7995
7996
7997
7998
7999 window = 32*(PAGE_SIZE/512);
8000 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
8001 window/2, (unsigned long long)max_sectors/2);
8002
8003 atomic_set(&mddev->recovery_active, 0);
8004 last_check = 0;
8005
8006 if (j>2) {
8007 printk(KERN_INFO
8008 "md: resuming %s of %s from checkpoint.\n",
8009 desc, mdname(mddev));
8010 mddev->curr_resync = j;
8011 } else
8012 mddev->curr_resync = 3;
8013 mddev->curr_resync_completed = j;
8014 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8015 md_new_event(mddev);
8016 update_time = jiffies;
8017
8018 blk_start_plug(&plug);
8019 while (j < max_sectors) {
8020 sector_t sectors;
8021
8022 skipped = 0;
8023
8024 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8025 ((mddev->curr_resync > mddev->curr_resync_completed &&
8026 (mddev->curr_resync - mddev->curr_resync_completed)
8027 > (max_sectors >> 4)) ||
8028 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8029 (j - mddev->curr_resync_completed)*2
8030 >= mddev->resync_max - mddev->curr_resync_completed ||
8031 mddev->curr_resync_completed > mddev->resync_max
8032 )) {
8033
8034 wait_event(mddev->recovery_wait,
8035 atomic_read(&mddev->recovery_active) == 0);
8036 mddev->curr_resync_completed = j;
8037 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8038 j > mddev->recovery_cp)
8039 mddev->recovery_cp = j;
8040 update_time = jiffies;
8041 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
8042 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8043 }
8044
8045 while (j >= mddev->resync_max &&
8046 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8047
8048
8049
8050
8051 flush_signals(current);
8052 wait_event_interruptible(mddev->recovery_wait,
8053 mddev->resync_max > j
8054 || test_bit(MD_RECOVERY_INTR,
8055 &mddev->recovery));
8056 }
8057
8058 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8059 break;
8060
8061 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8062 if (sectors == 0) {
8063 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8064 break;
8065 }
8066
8067 if (!skipped) {
8068 io_sectors += sectors;
8069 atomic_add(sectors, &mddev->recovery_active);
8070 }
8071
8072 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8073 break;
8074
8075 j += sectors;
8076 if (j > max_sectors)
8077
8078 j = max_sectors;
8079 if (j > 2)
8080 mddev->curr_resync = j;
8081 mddev->curr_mark_cnt = io_sectors;
8082 if (last_check == 0)
8083
8084
8085
8086 md_new_event(mddev);
8087
8088 if (last_check + window > io_sectors || j == max_sectors)
8089 continue;
8090
8091 last_check = io_sectors;
8092 repeat:
8093 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8094
8095 int next = (last_mark+1) % SYNC_MARKS;
8096
8097 mddev->resync_mark = mark[next];
8098 mddev->resync_mark_cnt = mark_cnt[next];
8099 mark[next] = jiffies;
8100 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8101 last_mark = next;
8102 }
8103
8104 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8105 break;
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115 cond_resched();
8116
8117 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8118 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8119 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8120
8121 if (currspeed > speed_min(mddev)) {
8122 if (currspeed > speed_max(mddev)) {
8123 msleep(500);
8124 goto repeat;
8125 }
8126 if (!is_mddev_idle(mddev, 0)) {
8127
8128
8129
8130
8131 wait_event(mddev->recovery_wait,
8132 !atomic_read(&mddev->recovery_active));
8133 }
8134 }
8135 }
8136 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
8137 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8138 ? "interrupted" : "done");
8139
8140
8141
8142 blk_finish_plug(&plug);
8143 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8144
8145 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8146 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8147 mddev->curr_resync > 3) {
8148 mddev->curr_resync_completed = mddev->curr_resync;
8149 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8150 }
8151 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8152
8153 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8154 mddev->curr_resync > 3) {
8155 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8156 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8157 if (mddev->curr_resync >= mddev->recovery_cp) {
8158 printk(KERN_INFO
8159 "md: checkpointing %s of %s.\n",
8160 desc, mdname(mddev));
8161 if (test_bit(MD_RECOVERY_ERROR,
8162 &mddev->recovery))
8163 mddev->recovery_cp =
8164 mddev->curr_resync_completed;
8165 else
8166 mddev->recovery_cp =
8167 mddev->curr_resync;
8168 }
8169 } else
8170 mddev->recovery_cp = MaxSector;
8171 } else {
8172 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8173 mddev->curr_resync = MaxSector;
8174 rcu_read_lock();
8175 rdev_for_each_rcu(rdev, mddev)
8176 if (rdev->raid_disk >= 0 &&
8177 mddev->delta_disks >= 0 &&
8178 !test_bit(Journal, &rdev->flags) &&
8179 !test_bit(Faulty, &rdev->flags) &&
8180 !test_bit(In_sync, &rdev->flags) &&
8181 rdev->recovery_offset < mddev->curr_resync)
8182 rdev->recovery_offset = mddev->curr_resync;
8183 rcu_read_unlock();
8184 }
8185 }
8186 skip:
8187
8188
8189
8190 set_mask_bits(&mddev->flags, 0,
8191 BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
8192
8193 spin_lock(&mddev->lock);
8194 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8195
8196 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8197 mddev->resync_min = 0;
8198 mddev->resync_max = MaxSector;
8199 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8200 mddev->resync_min = mddev->curr_resync_completed;
8201 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8202 mddev->curr_resync = 0;
8203 spin_unlock(&mddev->lock);
8204
8205 wake_up(&resync_wait);
8206 md_wakeup_thread(mddev->thread);
8207 return;
8208}
8209EXPORT_SYMBOL_GPL(md_do_sync);
8210
8211static int remove_and_add_spares(struct mddev *mddev,
8212 struct md_rdev *this)
8213{
8214 struct md_rdev *rdev;
8215 int spares = 0;
8216 int removed = 0;
8217 bool remove_some = false;
8218
8219 rdev_for_each(rdev, mddev) {
8220 if ((this == NULL || rdev == this) &&
8221 rdev->raid_disk >= 0 &&
8222 !test_bit(Blocked, &rdev->flags) &&
8223 test_bit(Faulty, &rdev->flags) &&
8224 atomic_read(&rdev->nr_pending)==0) {
8225
8226
8227
8228
8229
8230 remove_some = true;
8231 set_bit(RemoveSynchronized, &rdev->flags);
8232 }
8233 }
8234
8235 if (remove_some)
8236 synchronize_rcu();
8237 rdev_for_each(rdev, mddev) {
8238 if ((this == NULL || rdev == this) &&
8239 rdev->raid_disk >= 0 &&
8240 !test_bit(Blocked, &rdev->flags) &&
8241 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8242 (!test_bit(In_sync, &rdev->flags) &&
8243 !test_bit(Journal, &rdev->flags))) &&
8244 atomic_read(&rdev->nr_pending)==0)) {
8245 if (mddev->pers->hot_remove_disk(
8246 mddev, rdev) == 0) {
8247 sysfs_unlink_rdev(mddev, rdev);
8248 rdev->raid_disk = -1;
8249 removed++;
8250 }
8251 }
8252 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8253 clear_bit(RemoveSynchronized, &rdev->flags);
8254 }
8255
8256 if (removed && mddev->kobj.sd)
8257 sysfs_notify(&mddev->kobj, NULL, "degraded");
8258
8259 if (this && removed)
8260 goto no_add;
8261
8262 rdev_for_each(rdev, mddev) {
8263 if (this && this != rdev)
8264 continue;
8265 if (test_bit(Candidate, &rdev->flags))
8266 continue;
8267 if (rdev->raid_disk >= 0 &&
8268 !test_bit(In_sync, &rdev->flags) &&
8269 !test_bit(Journal, &rdev->flags) &&
8270 !test_bit(Faulty, &rdev->flags))
8271 spares++;
8272 if (rdev->raid_disk >= 0)
8273 continue;
8274 if (test_bit(Faulty, &rdev->flags))
8275 continue;
8276 if (!test_bit(Journal, &rdev->flags)) {
8277 if (mddev->ro &&
8278 ! (rdev->saved_raid_disk >= 0 &&
8279 !test_bit(Bitmap_sync, &rdev->flags)))
8280 continue;
8281
8282 rdev->recovery_offset = 0;
8283 }
8284 if (mddev->pers->
8285 hot_add_disk(mddev, rdev) == 0) {
8286 if (sysfs_link_rdev(mddev, rdev))
8287 ;
8288 if (!test_bit(Journal, &rdev->flags))
8289 spares++;
8290 md_new_event(mddev);
8291 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8292 }
8293 }
8294no_add:
8295 if (removed)
8296 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8297 return spares;
8298}
8299
8300static void md_start_sync(struct work_struct *ws)
8301{
8302 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8303
8304 mddev->sync_thread = md_register_thread(md_do_sync,
8305 mddev,
8306 "resync");
8307 if (!mddev->sync_thread) {
8308 printk(KERN_ERR "%s: could not start resync thread...\n",
8309 mdname(mddev));
8310
8311 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8312 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8313 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8314 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8315 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8316 wake_up(&resync_wait);
8317 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8318 &mddev->recovery))
8319 if (mddev->sysfs_action)
8320 sysfs_notify_dirent_safe(mddev->sysfs_action);
8321 } else
8322 md_wakeup_thread(mddev->sync_thread);
8323 sysfs_notify_dirent_safe(mddev->sysfs_action);
8324 md_new_event(mddev);
8325}
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349void md_check_recovery(struct mddev *mddev)
8350{
8351 if (mddev->suspended)
8352 return;
8353
8354 if (mddev->bitmap)
8355 bitmap_daemon_work(mddev);
8356
8357 if (signal_pending(current)) {
8358 if (mddev->pers->sync_request && !mddev->external) {
8359 printk(KERN_INFO "md: %s in immediate safe mode\n",
8360 mdname(mddev));
8361 mddev->safemode = 2;
8362 }
8363 flush_signals(current);
8364 }
8365
8366 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8367 return;
8368 if ( ! (
8369 (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
8370 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8371 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8372 test_bit(MD_RELOAD_SB, &mddev->flags) ||
8373 (mddev->external == 0 && mddev->safemode == 1) ||
8374 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8375 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8376 ))
8377 return;
8378
8379 if (mddev_trylock(mddev)) {
8380 int spares = 0;
8381
8382 if (mddev->ro) {
8383 struct md_rdev *rdev;
8384 if (!mddev->external && mddev->in_sync)
8385
8386
8387
8388
8389
8390 rdev_for_each(rdev, mddev)
8391 clear_bit(Blocked, &rdev->flags);
8392
8393
8394
8395
8396
8397
8398
8399 remove_and_add_spares(mddev, NULL);
8400
8401
8402
8403 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8404 md_reap_sync_thread(mddev);
8405 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8406 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8407 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
8408 goto unlock;
8409 }
8410
8411 if (mddev_is_clustered(mddev)) {
8412 struct md_rdev *rdev;
8413
8414
8415
8416 rdev_for_each(rdev, mddev) {
8417 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8418 rdev->raid_disk < 0)
8419 md_kick_rdev_from_array(rdev);
8420 }
8421
8422 if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags))
8423 md_reload_sb(mddev, mddev->good_device_nr);
8424 }
8425
8426 if (!mddev->external) {
8427 int did_change = 0;
8428 spin_lock(&mddev->lock);
8429 if (mddev->safemode &&
8430 !atomic_read(&mddev->writes_pending) &&
8431 !mddev->in_sync &&
8432 mddev->recovery_cp == MaxSector) {
8433 mddev->in_sync = 1;
8434 did_change = 1;
8435 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
8436 }
8437 if (mddev->safemode == 1)
8438 mddev->safemode = 0;
8439 spin_unlock(&mddev->lock);
8440 if (did_change)
8441 sysfs_notify_dirent_safe(mddev->sysfs_state);
8442 }
8443
8444 if (mddev->flags & MD_UPDATE_SB_FLAGS)
8445 md_update_sb(mddev, 0);
8446
8447 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8448 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8449
8450 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8451 goto unlock;
8452 }
8453 if (mddev->sync_thread) {
8454 md_reap_sync_thread(mddev);
8455 goto unlock;
8456 }
8457
8458
8459
8460 mddev->curr_resync_completed = 0;
8461 spin_lock(&mddev->lock);
8462 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8463 spin_unlock(&mddev->lock);
8464
8465
8466
8467 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8468 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8469
8470 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8471 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8472 goto not_running;
8473
8474
8475
8476
8477
8478
8479
8480 if (mddev->reshape_position != MaxSector) {
8481 if (mddev->pers->check_reshape == NULL ||
8482 mddev->pers->check_reshape(mddev) != 0)
8483
8484 goto not_running;
8485 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8486 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8487 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8488 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8489 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8490 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8491 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8492 } else if (mddev->recovery_cp < MaxSector) {
8493 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8494 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8495 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8496
8497 goto not_running;
8498
8499 if (mddev->pers->sync_request) {
8500 if (spares) {
8501
8502
8503
8504
8505 bitmap_write_all(mddev->bitmap);
8506 }
8507 INIT_WORK(&mddev->del_work, md_start_sync);
8508 queue_work(md_misc_wq, &mddev->del_work);
8509 goto unlock;
8510 }
8511 not_running:
8512 if (!mddev->sync_thread) {
8513 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8514 wake_up(&resync_wait);
8515 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8516 &mddev->recovery))
8517 if (mddev->sysfs_action)
8518 sysfs_notify_dirent_safe(mddev->sysfs_action);
8519 }
8520 unlock:
8521 wake_up(&mddev->sb_wait);
8522 mddev_unlock(mddev);
8523 }
8524}
8525EXPORT_SYMBOL(md_check_recovery);
8526
8527void md_reap_sync_thread(struct mddev *mddev)
8528{
8529 struct md_rdev *rdev;
8530
8531
8532 md_unregister_thread(&mddev->sync_thread);
8533 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8534 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8535
8536
8537 if (mddev->pers->spare_active(mddev)) {
8538 sysfs_notify(&mddev->kobj, NULL,
8539 "degraded");
8540 set_bit(MD_CHANGE_DEVS, &mddev->flags);
8541 }
8542 }
8543 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8544 mddev->pers->finish_reshape)
8545 mddev->pers->finish_reshape(mddev);
8546
8547
8548
8549
8550 if (!mddev->degraded)
8551 rdev_for_each(rdev, mddev)
8552 rdev->saved_raid_disk = -1;
8553
8554 md_update_sb(mddev, 1);
8555
8556
8557
8558 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
8559 md_cluster_ops->resync_finish(mddev);
8560 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8561 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8562 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8563 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8564 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8565 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8566 wake_up(&resync_wait);
8567
8568 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8569 sysfs_notify_dirent_safe(mddev->sysfs_action);
8570 md_new_event(mddev);
8571 if (mddev->event_work.func)
8572 queue_work(md_misc_wq, &mddev->event_work);
8573}
8574EXPORT_SYMBOL(md_reap_sync_thread);
8575
8576void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8577{
8578 sysfs_notify_dirent_safe(rdev->sysfs_state);
8579 wait_event_timeout(rdev->blocked_wait,
8580 !test_bit(Blocked, &rdev->flags) &&
8581 !test_bit(BlockedBadBlocks, &rdev->flags),
8582 msecs_to_jiffies(5000));
8583 rdev_dec_pending(rdev, mddev);
8584}
8585EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8586
8587void md_finish_reshape(struct mddev *mddev)
8588{
8589
8590 struct md_rdev *rdev;
8591
8592 rdev_for_each(rdev, mddev) {
8593 if (rdev->data_offset > rdev->new_data_offset)
8594 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8595 else
8596 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8597 rdev->data_offset = rdev->new_data_offset;
8598 }
8599}
8600EXPORT_SYMBOL(md_finish_reshape);
8601
8602
8603
8604
8605int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8606 int is_new)
8607{
8608 struct mddev *mddev = rdev->mddev;
8609 int rv;
8610 if (is_new)
8611 s += rdev->new_data_offset;
8612 else
8613 s += rdev->data_offset;
8614 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8615 if (rv == 0) {
8616
8617 sysfs_notify_dirent_safe(rdev->sysfs_state);
8618 set_mask_bits(&mddev->flags, 0,
8619 BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
8620 md_wakeup_thread(rdev->mddev->thread);
8621 return 1;
8622 } else
8623 return 0;
8624}
8625EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8626
8627int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8628 int is_new)
8629{
8630 if (is_new)
8631 s += rdev->new_data_offset;
8632 else
8633 s += rdev->data_offset;
8634 return badblocks_clear(&rdev->badblocks,
8635 s, sectors);
8636}
8637EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8638
8639static int md_notify_reboot(struct notifier_block *this,
8640 unsigned long code, void *x)
8641{
8642 struct list_head *tmp;
8643 struct mddev *mddev;
8644 int need_delay = 0;
8645
8646 for_each_mddev(mddev, tmp) {
8647 if (mddev_trylock(mddev)) {
8648 if (mddev->pers)
8649 __md_stop_writes(mddev);
8650 if (mddev->persistent)
8651 mddev->safemode = 2;
8652 mddev_unlock(mddev);
8653 }
8654 need_delay = 1;
8655 }
8656
8657
8658
8659
8660
8661
8662 if (need_delay)
8663 mdelay(1000*1);
8664
8665 return NOTIFY_DONE;
8666}
8667
8668static struct notifier_block md_notifier = {
8669 .notifier_call = md_notify_reboot,
8670 .next = NULL,
8671 .priority = INT_MAX,
8672};
8673
8674static void md_geninit(void)
8675{
8676 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8677
8678 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8679}
8680
8681static int __init md_init(void)
8682{
8683 int ret = -ENOMEM;
8684
8685 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8686 if (!md_wq)
8687 goto err_wq;
8688
8689 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8690 if (!md_misc_wq)
8691 goto err_misc_wq;
8692
8693 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8694 goto err_md;
8695
8696 if ((ret = register_blkdev(0, "mdp")) < 0)
8697 goto err_mdp;
8698 mdp_major = ret;
8699
8700 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8701 md_probe, NULL, NULL);
8702 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8703 md_probe, NULL, NULL);
8704
8705 register_reboot_notifier(&md_notifier);
8706 raid_table_header = register_sysctl_table(raid_root_table);
8707
8708 md_geninit();
8709 return 0;
8710
8711err_mdp:
8712 unregister_blkdev(MD_MAJOR, "md");
8713err_md:
8714 destroy_workqueue(md_misc_wq);
8715err_misc_wq:
8716 destroy_workqueue(md_wq);
8717err_wq:
8718 return ret;
8719}
8720
8721static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8722{
8723 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
8724 struct md_rdev *rdev2;
8725 int role, ret;
8726 char b[BDEVNAME_SIZE];
8727
8728
8729 rdev_for_each(rdev2, mddev) {
8730 if (test_bit(Faulty, &rdev2->flags))
8731 continue;
8732
8733
8734 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8735
8736 if (test_bit(Candidate, &rdev2->flags)) {
8737 if (role == 0xfffe) {
8738 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8739 md_kick_rdev_from_array(rdev2);
8740 continue;
8741 }
8742 else
8743 clear_bit(Candidate, &rdev2->flags);
8744 }
8745
8746 if (role != rdev2->raid_disk) {
8747
8748 if (rdev2->raid_disk == -1 && role != 0xffff) {
8749 rdev2->saved_raid_disk = role;
8750 ret = remove_and_add_spares(mddev, rdev2);
8751 pr_info("Activated spare: %s\n",
8752 bdevname(rdev2->bdev,b));
8753
8754
8755 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8756 md_wakeup_thread(mddev->thread);
8757
8758 }
8759
8760
8761
8762
8763
8764 if ((role == 0xfffe) || (role == 0xfffd)) {
8765 md_error(mddev, rdev2);
8766 clear_bit(Blocked, &rdev2->flags);
8767 }
8768 }
8769 }
8770
8771 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
8772 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
8773
8774
8775 mddev->events = le64_to_cpu(sb->events);
8776}
8777
8778static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
8779{
8780 int err;
8781 struct page *swapout = rdev->sb_page;
8782 struct mdp_superblock_1 *sb;
8783
8784
8785
8786
8787 rdev->sb_page = NULL;
8788 alloc_disk_sb(rdev);
8789 ClearPageUptodate(rdev->sb_page);
8790 rdev->sb_loaded = 0;
8791 err = super_types[mddev->major_version].load_super(rdev, NULL, mddev->minor_version);
8792
8793 if (err < 0) {
8794 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
8795 __func__, __LINE__, rdev->desc_nr, err);
8796 put_page(rdev->sb_page);
8797 rdev->sb_page = swapout;
8798 rdev->sb_loaded = 1;
8799 return err;
8800 }
8801
8802 sb = page_address(rdev->sb_page);
8803
8804
8805
8806
8807 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
8808 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
8809
8810
8811
8812
8813 if (rdev->recovery_offset == MaxSector &&
8814 !test_bit(In_sync, &rdev->flags) &&
8815 mddev->pers->spare_active(mddev))
8816 sysfs_notify(&mddev->kobj, NULL, "degraded");
8817
8818 put_page(swapout);
8819 return 0;
8820}
8821
8822void md_reload_sb(struct mddev *mddev, int nr)
8823{
8824 struct md_rdev *rdev;
8825 int err;
8826
8827
8828 rdev_for_each_rcu(rdev, mddev) {
8829 if (rdev->desc_nr == nr)
8830 break;
8831 }
8832
8833 if (!rdev || rdev->desc_nr != nr) {
8834 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
8835 return;
8836 }
8837
8838 err = read_rdev(mddev, rdev);
8839 if (err < 0)
8840 return;
8841
8842 check_sb_changes(mddev, rdev);
8843
8844
8845 rdev_for_each_rcu(rdev, mddev)
8846 read_rdev(mddev, rdev);
8847}
8848EXPORT_SYMBOL(md_reload_sb);
8849
8850#ifndef MODULE
8851
8852
8853
8854
8855
8856
8857static DEFINE_MUTEX(detected_devices_mutex);
8858static LIST_HEAD(all_detected_devices);
8859struct detected_devices_node {
8860 struct list_head list;
8861 dev_t dev;
8862};
8863
8864void md_autodetect_dev(dev_t dev)
8865{
8866 struct detected_devices_node *node_detected_dev;
8867
8868 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8869 if (node_detected_dev) {
8870 node_detected_dev->dev = dev;
8871 mutex_lock(&detected_devices_mutex);
8872 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8873 mutex_unlock(&detected_devices_mutex);
8874 } else {
8875 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8876 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8877 }
8878}
8879
8880static void autostart_arrays(int part)
8881{
8882 struct md_rdev *rdev;
8883 struct detected_devices_node *node_detected_dev;
8884 dev_t dev;
8885 int i_scanned, i_passed;
8886
8887 i_scanned = 0;
8888 i_passed = 0;
8889
8890 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8891
8892 mutex_lock(&detected_devices_mutex);
8893 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8894 i_scanned++;
8895 node_detected_dev = list_entry(all_detected_devices.next,
8896 struct detected_devices_node, list);
8897 list_del(&node_detected_dev->list);
8898 dev = node_detected_dev->dev;
8899 kfree(node_detected_dev);
8900 mutex_unlock(&detected_devices_mutex);
8901 rdev = md_import_device(dev,0, 90);
8902 mutex_lock(&detected_devices_mutex);
8903 if (IS_ERR(rdev))
8904 continue;
8905
8906 if (test_bit(Faulty, &rdev->flags))
8907 continue;
8908
8909 set_bit(AutoDetected, &rdev->flags);
8910 list_add(&rdev->same_set, &pending_raid_disks);
8911 i_passed++;
8912 }
8913 mutex_unlock(&detected_devices_mutex);
8914
8915 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8916 i_scanned, i_passed);
8917
8918 autorun_devices(part);
8919}
8920
8921#endif
8922
8923static __exit void md_exit(void)
8924{
8925 struct mddev *mddev;
8926 struct list_head *tmp;
8927 int delay = 1;
8928
8929 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8930 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8931
8932 unregister_blkdev(MD_MAJOR,"md");
8933 unregister_blkdev(mdp_major, "mdp");
8934 unregister_reboot_notifier(&md_notifier);
8935 unregister_sysctl_table(raid_table_header);
8936
8937
8938
8939
8940 md_unloading = 1;
8941 while (waitqueue_active(&md_event_waiters)) {
8942
8943 wake_up(&md_event_waiters);
8944 msleep(delay);
8945 delay += delay;
8946 }
8947 remove_proc_entry("mdstat", NULL);
8948
8949 for_each_mddev(mddev, tmp) {
8950 export_array(mddev);
8951 mddev->hold_active = 0;
8952 }
8953 destroy_workqueue(md_misc_wq);
8954 destroy_workqueue(md_wq);
8955}
8956
8957subsys_initcall(md_init);
8958module_exit(md_exit)
8959
8960static int get_ro(char *buffer, struct kernel_param *kp)
8961{
8962 return sprintf(buffer, "%d", start_readonly);
8963}
8964static int set_ro(const char *val, struct kernel_param *kp)
8965{
8966 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8967}
8968
8969module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8970module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8971module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8972
8973MODULE_LICENSE("GPL");
8974MODULE_DESCRIPTION("MD RAID framework");
8975MODULE_ALIAS("md");
8976MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8977