1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/sched/signal.h>
48#include <linux/kthread.h>
49#include <linux/blkdev.h>
50#include <linux/badblocks.h>
51#include <linux/sysctl.h>
52#include <linux/seq_file.h>
53#include <linux/fs.h>
54#include <linux/poll.h>
55#include <linux/ctype.h>
56#include <linux/string.h>
57#include <linux/hdreg.h>
58#include <linux/proc_fs.h>
59#include <linux/random.h>
60#include <linux/module.h>
61#include <linux/reboot.h>
62#include <linux/file.h>
63#include <linux/compat.h>
64#include <linux/delay.h>
65#include <linux/raid/md_p.h>
66#include <linux/raid/md_u.h>
67#include <linux/slab.h>
68#include <trace/events/block.h>
69#include "md.h"
70#include "bitmap.h"
71#include "md-cluster.h"
72
73#ifndef MODULE
74static void autostart_arrays(int part);
75#endif
76
77
78
79
80
81
82static LIST_HEAD(pers_list);
83static DEFINE_SPINLOCK(pers_lock);
84
85struct md_cluster_operations *md_cluster_ops;
86EXPORT_SYMBOL(md_cluster_ops);
87struct module *md_cluster_mod;
88EXPORT_SYMBOL(md_cluster_mod);
89
90static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
91static struct workqueue_struct *md_wq;
92static struct workqueue_struct *md_misc_wq;
93
94static int remove_and_add_spares(struct mddev *mddev,
95 struct md_rdev *this);
96static void mddev_detach(struct mddev *mddev);
97
98
99
100
101
102
103#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
104
105
106
107
108
109
110
111
112
113
114
115
116
117static int sysctl_speed_limit_min = 1000;
118static int sysctl_speed_limit_max = 200000;
119static inline int speed_min(struct mddev *mddev)
120{
121 return mddev->sync_speed_min ?
122 mddev->sync_speed_min : sysctl_speed_limit_min;
123}
124
125static inline int speed_max(struct mddev *mddev)
126{
127 return mddev->sync_speed_max ?
128 mddev->sync_speed_max : sysctl_speed_limit_max;
129}
130
131static struct ctl_table_header *raid_table_header;
132
133static struct ctl_table raid_table[] = {
134 {
135 .procname = "speed_limit_min",
136 .data = &sysctl_speed_limit_min,
137 .maxlen = sizeof(int),
138 .mode = S_IRUGO|S_IWUSR,
139 .proc_handler = proc_dointvec,
140 },
141 {
142 .procname = "speed_limit_max",
143 .data = &sysctl_speed_limit_max,
144 .maxlen = sizeof(int),
145 .mode = S_IRUGO|S_IWUSR,
146 .proc_handler = proc_dointvec,
147 },
148 { }
149};
150
151static struct ctl_table raid_dir_table[] = {
152 {
153 .procname = "raid",
154 .maxlen = 0,
155 .mode = S_IRUGO|S_IXUGO,
156 .child = raid_table,
157 },
158 { }
159};
160
161static struct ctl_table raid_root_table[] = {
162 {
163 .procname = "dev",
164 .maxlen = 0,
165 .mode = 0555,
166 .child = raid_dir_table,
167 },
168 { }
169};
170
171static const struct block_device_operations md_fops;
172
173static int start_readonly;
174
175
176
177
178
179struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
180 struct mddev *mddev)
181{
182 struct bio *b;
183
184 if (!mddev || !mddev->bio_set)
185 return bio_alloc(gfp_mask, nr_iovecs);
186
187 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
188 if (!b)
189 return NULL;
190 return b;
191}
192EXPORT_SYMBOL_GPL(bio_alloc_mddev);
193
194
195
196
197
198
199
200
201
202
203
204static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
205static atomic_t md_event_count;
206void md_new_event(struct mddev *mddev)
207{
208 atomic_inc(&md_event_count);
209 wake_up(&md_event_waiters);
210}
211EXPORT_SYMBOL_GPL(md_new_event);
212
213
214
215
216
217static LIST_HEAD(all_mddevs);
218static DEFINE_SPINLOCK(all_mddevs_lock);
219
220
221
222
223
224
225
226
227#define for_each_mddev(_mddev,_tmp) \
228 \
229 for (({ spin_lock(&all_mddevs_lock); \
230 _tmp = all_mddevs.next; \
231 _mddev = NULL;}); \
232 ({ if (_tmp != &all_mddevs) \
233 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
234 spin_unlock(&all_mddevs_lock); \
235 if (_mddev) mddev_put(_mddev); \
236 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
237 _tmp != &all_mddevs;}); \
238 ({ spin_lock(&all_mddevs_lock); \
239 _tmp = _tmp->next;}) \
240 )
241
242
243
244
245
246
247
248
249static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
250{
251 const int rw = bio_data_dir(bio);
252 struct mddev *mddev = q->queuedata;
253 unsigned int sectors;
254 int cpu;
255
256 blk_queue_split(q, &bio, q->bio_split);
257
258 if (mddev == NULL || mddev->pers == NULL) {
259 bio_io_error(bio);
260 return BLK_QC_T_NONE;
261 }
262 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
263 if (bio_sectors(bio) != 0)
264 bio->bi_error = -EROFS;
265 bio_endio(bio);
266 return BLK_QC_T_NONE;
267 }
268 smp_rmb();
269 rcu_read_lock();
270 if (mddev->suspended) {
271 DEFINE_WAIT(__wait);
272 for (;;) {
273 prepare_to_wait(&mddev->sb_wait, &__wait,
274 TASK_UNINTERRUPTIBLE);
275 if (!mddev->suspended)
276 break;
277 rcu_read_unlock();
278 schedule();
279 rcu_read_lock();
280 }
281 finish_wait(&mddev->sb_wait, &__wait);
282 }
283 atomic_inc(&mddev->active_io);
284 rcu_read_unlock();
285
286
287
288
289
290 sectors = bio_sectors(bio);
291
292 bio->bi_opf &= ~REQ_NOMERGE;
293 mddev->pers->make_request(mddev, bio);
294
295 cpu = part_stat_lock();
296 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
297 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
298 part_stat_unlock();
299
300 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
301 wake_up(&mddev->sb_wait);
302
303 return BLK_QC_T_NONE;
304}
305
306
307
308
309
310
311
312void mddev_suspend(struct mddev *mddev)
313{
314 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
315 if (mddev->suspended++)
316 return;
317 synchronize_rcu();
318 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
319 mddev->pers->quiesce(mddev, 1);
320
321 del_timer_sync(&mddev->safemode_timer);
322}
323EXPORT_SYMBOL_GPL(mddev_suspend);
324
325void mddev_resume(struct mddev *mddev)
326{
327 if (--mddev->suspended)
328 return;
329 wake_up(&mddev->sb_wait);
330 mddev->pers->quiesce(mddev, 0);
331
332 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
333 md_wakeup_thread(mddev->thread);
334 md_wakeup_thread(mddev->sync_thread);
335}
336EXPORT_SYMBOL_GPL(mddev_resume);
337
338int mddev_congested(struct mddev *mddev, int bits)
339{
340 struct md_personality *pers = mddev->pers;
341 int ret = 0;
342
343 rcu_read_lock();
344 if (mddev->suspended)
345 ret = 1;
346 else if (pers && pers->congested)
347 ret = pers->congested(mddev, bits);
348 rcu_read_unlock();
349 return ret;
350}
351EXPORT_SYMBOL_GPL(mddev_congested);
352static int md_congested(void *data, int bits)
353{
354 struct mddev *mddev = data;
355 return mddev_congested(mddev, bits);
356}
357
358
359
360
361
362static void md_end_flush(struct bio *bio)
363{
364 struct md_rdev *rdev = bio->bi_private;
365 struct mddev *mddev = rdev->mddev;
366
367 rdev_dec_pending(rdev, mddev);
368
369 if (atomic_dec_and_test(&mddev->flush_pending)) {
370
371 queue_work(md_wq, &mddev->flush_work);
372 }
373 bio_put(bio);
374}
375
376static void md_submit_flush_data(struct work_struct *ws);
377
378static void submit_flushes(struct work_struct *ws)
379{
380 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
381 struct md_rdev *rdev;
382
383 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
384 atomic_set(&mddev->flush_pending, 1);
385 rcu_read_lock();
386 rdev_for_each_rcu(rdev, mddev)
387 if (rdev->raid_disk >= 0 &&
388 !test_bit(Faulty, &rdev->flags)) {
389
390
391
392
393 struct bio *bi;
394 atomic_inc(&rdev->nr_pending);
395 atomic_inc(&rdev->nr_pending);
396 rcu_read_unlock();
397 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
398 bi->bi_end_io = md_end_flush;
399 bi->bi_private = rdev;
400 bi->bi_bdev = rdev->bdev;
401 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
402 atomic_inc(&mddev->flush_pending);
403 submit_bio(bi);
404 rcu_read_lock();
405 rdev_dec_pending(rdev, mddev);
406 }
407 rcu_read_unlock();
408 if (atomic_dec_and_test(&mddev->flush_pending))
409 queue_work(md_wq, &mddev->flush_work);
410}
411
412static void md_submit_flush_data(struct work_struct *ws)
413{
414 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
415 struct bio *bio = mddev->flush_bio;
416
417 if (bio->bi_iter.bi_size == 0)
418
419 bio_endio(bio);
420 else {
421 bio->bi_opf &= ~REQ_PREFLUSH;
422 mddev->pers->make_request(mddev, bio);
423 }
424
425 mddev->flush_bio = NULL;
426 wake_up(&mddev->sb_wait);
427}
428
429void md_flush_request(struct mddev *mddev, struct bio *bio)
430{
431 spin_lock_irq(&mddev->lock);
432 wait_event_lock_irq(mddev->sb_wait,
433 !mddev->flush_bio,
434 mddev->lock);
435 mddev->flush_bio = bio;
436 spin_unlock_irq(&mddev->lock);
437
438 INIT_WORK(&mddev->flush_work, submit_flushes);
439 queue_work(md_wq, &mddev->flush_work);
440}
441EXPORT_SYMBOL(md_flush_request);
442
443static inline struct mddev *mddev_get(struct mddev *mddev)
444{
445 atomic_inc(&mddev->active);
446 return mddev;
447}
448
449static void mddev_delayed_delete(struct work_struct *ws);
450
451static void mddev_put(struct mddev *mddev)
452{
453 struct bio_set *bs = NULL;
454
455 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
456 return;
457 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
458 mddev->ctime == 0 && !mddev->hold_active) {
459
460
461 list_del_init(&mddev->all_mddevs);
462 bs = mddev->bio_set;
463 mddev->bio_set = NULL;
464 if (mddev->gendisk) {
465
466
467
468
469
470 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
471 queue_work(md_misc_wq, &mddev->del_work);
472 } else
473 kfree(mddev);
474 }
475 spin_unlock(&all_mddevs_lock);
476 if (bs)
477 bioset_free(bs);
478}
479
480static void md_safemode_timeout(unsigned long data);
481
482void mddev_init(struct mddev *mddev)
483{
484 mutex_init(&mddev->open_mutex);
485 mutex_init(&mddev->reconfig_mutex);
486 mutex_init(&mddev->bitmap_info.mutex);
487 INIT_LIST_HEAD(&mddev->disks);
488 INIT_LIST_HEAD(&mddev->all_mddevs);
489 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
490 (unsigned long) mddev);
491 atomic_set(&mddev->active, 1);
492 atomic_set(&mddev->openers, 0);
493 atomic_set(&mddev->active_io, 0);
494 spin_lock_init(&mddev->lock);
495 atomic_set(&mddev->flush_pending, 0);
496 init_waitqueue_head(&mddev->sb_wait);
497 init_waitqueue_head(&mddev->recovery_wait);
498 mddev->reshape_position = MaxSector;
499 mddev->reshape_backwards = 0;
500 mddev->last_sync_action = "none";
501 mddev->resync_min = 0;
502 mddev->resync_max = MaxSector;
503 mddev->level = LEVEL_NONE;
504}
505EXPORT_SYMBOL_GPL(mddev_init);
506
507static struct mddev *mddev_find(dev_t unit)
508{
509 struct mddev *mddev, *new = NULL;
510
511 if (unit && MAJOR(unit) != MD_MAJOR)
512 unit &= ~((1<<MdpMinorShift)-1);
513
514 retry:
515 spin_lock(&all_mddevs_lock);
516
517 if (unit) {
518 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
519 if (mddev->unit == unit) {
520 mddev_get(mddev);
521 spin_unlock(&all_mddevs_lock);
522 kfree(new);
523 return mddev;
524 }
525
526 if (new) {
527 list_add(&new->all_mddevs, &all_mddevs);
528 spin_unlock(&all_mddevs_lock);
529 new->hold_active = UNTIL_IOCTL;
530 return new;
531 }
532 } else if (new) {
533
534 static int next_minor = 512;
535 int start = next_minor;
536 int is_free = 0;
537 int dev = 0;
538 while (!is_free) {
539 dev = MKDEV(MD_MAJOR, next_minor);
540 next_minor++;
541 if (next_minor > MINORMASK)
542 next_minor = 0;
543 if (next_minor == start) {
544
545 spin_unlock(&all_mddevs_lock);
546 kfree(new);
547 return NULL;
548 }
549
550 is_free = 1;
551 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
552 if (mddev->unit == dev) {
553 is_free = 0;
554 break;
555 }
556 }
557 new->unit = dev;
558 new->md_minor = MINOR(dev);
559 new->hold_active = UNTIL_STOP;
560 list_add(&new->all_mddevs, &all_mddevs);
561 spin_unlock(&all_mddevs_lock);
562 return new;
563 }
564 spin_unlock(&all_mddevs_lock);
565
566 new = kzalloc(sizeof(*new), GFP_KERNEL);
567 if (!new)
568 return NULL;
569
570 new->unit = unit;
571 if (MAJOR(unit) == MD_MAJOR)
572 new->md_minor = MINOR(unit);
573 else
574 new->md_minor = MINOR(unit) >> MdpMinorShift;
575
576 mddev_init(new);
577
578 goto retry;
579}
580
581static struct attribute_group md_redundancy_group;
582
583void mddev_unlock(struct mddev *mddev)
584{
585 if (mddev->to_remove) {
586
587
588
589
590
591
592
593
594
595
596
597
598 struct attribute_group *to_remove = mddev->to_remove;
599 mddev->to_remove = NULL;
600 mddev->sysfs_active = 1;
601 mutex_unlock(&mddev->reconfig_mutex);
602
603 if (mddev->kobj.sd) {
604 if (to_remove != &md_redundancy_group)
605 sysfs_remove_group(&mddev->kobj, to_remove);
606 if (mddev->pers == NULL ||
607 mddev->pers->sync_request == NULL) {
608 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
609 if (mddev->sysfs_action)
610 sysfs_put(mddev->sysfs_action);
611 mddev->sysfs_action = NULL;
612 }
613 }
614 mddev->sysfs_active = 0;
615 } else
616 mutex_unlock(&mddev->reconfig_mutex);
617
618
619
620
621 spin_lock(&pers_lock);
622 md_wakeup_thread(mddev->thread);
623 spin_unlock(&pers_lock);
624}
625EXPORT_SYMBOL_GPL(mddev_unlock);
626
627struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
628{
629 struct md_rdev *rdev;
630
631 rdev_for_each_rcu(rdev, mddev)
632 if (rdev->desc_nr == nr)
633 return rdev;
634
635 return NULL;
636}
637EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
638
639static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
640{
641 struct md_rdev *rdev;
642
643 rdev_for_each(rdev, mddev)
644 if (rdev->bdev->bd_dev == dev)
645 return rdev;
646
647 return NULL;
648}
649
650static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
651{
652 struct md_rdev *rdev;
653
654 rdev_for_each_rcu(rdev, mddev)
655 if (rdev->bdev->bd_dev == dev)
656 return rdev;
657
658 return NULL;
659}
660
661static struct md_personality *find_pers(int level, char *clevel)
662{
663 struct md_personality *pers;
664 list_for_each_entry(pers, &pers_list, list) {
665 if (level != LEVEL_NONE && pers->level == level)
666 return pers;
667 if (strcmp(pers->name, clevel)==0)
668 return pers;
669 }
670 return NULL;
671}
672
673
674static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
675{
676 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
677 return MD_NEW_SIZE_SECTORS(num_sectors);
678}
679
680static int alloc_disk_sb(struct md_rdev *rdev)
681{
682 rdev->sb_page = alloc_page(GFP_KERNEL);
683 if (!rdev->sb_page)
684 return -ENOMEM;
685 return 0;
686}
687
688void md_rdev_clear(struct md_rdev *rdev)
689{
690 if (rdev->sb_page) {
691 put_page(rdev->sb_page);
692 rdev->sb_loaded = 0;
693 rdev->sb_page = NULL;
694 rdev->sb_start = 0;
695 rdev->sectors = 0;
696 }
697 if (rdev->bb_page) {
698 put_page(rdev->bb_page);
699 rdev->bb_page = NULL;
700 }
701 badblocks_exit(&rdev->badblocks);
702}
703EXPORT_SYMBOL_GPL(md_rdev_clear);
704
705static void super_written(struct bio *bio)
706{
707 struct md_rdev *rdev = bio->bi_private;
708 struct mddev *mddev = rdev->mddev;
709
710 if (bio->bi_error) {
711 pr_err("md: super_written gets error=%d\n", bio->bi_error);
712 md_error(mddev, rdev);
713 if (!test_bit(Faulty, &rdev->flags)
714 && (bio->bi_opf & MD_FAILFAST)) {
715 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
716 set_bit(LastDev, &rdev->flags);
717 }
718 } else
719 clear_bit(LastDev, &rdev->flags);
720
721 if (atomic_dec_and_test(&mddev->pending_writes))
722 wake_up(&mddev->sb_wait);
723 rdev_dec_pending(rdev, mddev);
724 bio_put(bio);
725}
726
727void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
728 sector_t sector, int size, struct page *page)
729{
730
731
732
733
734
735
736 struct bio *bio;
737 int ff = 0;
738
739 if (test_bit(Faulty, &rdev->flags))
740 return;
741
742 bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
743
744 atomic_inc(&rdev->nr_pending);
745
746 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
747 bio->bi_iter.bi_sector = sector;
748 bio_add_page(bio, page, size, 0);
749 bio->bi_private = rdev;
750 bio->bi_end_io = super_written;
751
752 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
753 test_bit(FailFast, &rdev->flags) &&
754 !test_bit(LastDev, &rdev->flags))
755 ff = MD_FAILFAST;
756 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
757
758 atomic_inc(&mddev->pending_writes);
759 submit_bio(bio);
760}
761
762int md_super_wait(struct mddev *mddev)
763{
764
765 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
766 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
767 return -EAGAIN;
768 return 0;
769}
770
771int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
772 struct page *page, int op, int op_flags, bool metadata_op)
773{
774 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
775 int ret;
776
777 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
778 rdev->meta_bdev : rdev->bdev;
779 bio_set_op_attrs(bio, op, op_flags);
780 if (metadata_op)
781 bio->bi_iter.bi_sector = sector + rdev->sb_start;
782 else if (rdev->mddev->reshape_position != MaxSector &&
783 (rdev->mddev->reshape_backwards ==
784 (sector >= rdev->mddev->reshape_position)))
785 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
786 else
787 bio->bi_iter.bi_sector = sector + rdev->data_offset;
788 bio_add_page(bio, page, size, 0);
789
790 submit_bio_wait(bio);
791
792 ret = !bio->bi_error;
793 bio_put(bio);
794 return ret;
795}
796EXPORT_SYMBOL_GPL(sync_page_io);
797
798static int read_disk_sb(struct md_rdev *rdev, int size)
799{
800 char b[BDEVNAME_SIZE];
801
802 if (rdev->sb_loaded)
803 return 0;
804
805 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
806 goto fail;
807 rdev->sb_loaded = 1;
808 return 0;
809
810fail:
811 pr_err("md: disabled device %s, could not read superblock.\n",
812 bdevname(rdev->bdev,b));
813 return -EINVAL;
814}
815
816static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
817{
818 return sb1->set_uuid0 == sb2->set_uuid0 &&
819 sb1->set_uuid1 == sb2->set_uuid1 &&
820 sb1->set_uuid2 == sb2->set_uuid2 &&
821 sb1->set_uuid3 == sb2->set_uuid3;
822}
823
824static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
825{
826 int ret;
827 mdp_super_t *tmp1, *tmp2;
828
829 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
830 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
831
832 if (!tmp1 || !tmp2) {
833 ret = 0;
834 goto abort;
835 }
836
837 *tmp1 = *sb1;
838 *tmp2 = *sb2;
839
840
841
842
843 tmp1->nr_disks = 0;
844 tmp2->nr_disks = 0;
845
846 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
847abort:
848 kfree(tmp1);
849 kfree(tmp2);
850 return ret;
851}
852
853static u32 md_csum_fold(u32 csum)
854{
855 csum = (csum & 0xffff) + (csum >> 16);
856 return (csum & 0xffff) + (csum >> 16);
857}
858
859static unsigned int calc_sb_csum(mdp_super_t *sb)
860{
861 u64 newcsum = 0;
862 u32 *sb32 = (u32*)sb;
863 int i;
864 unsigned int disk_csum, csum;
865
866 disk_csum = sb->sb_csum;
867 sb->sb_csum = 0;
868
869 for (i = 0; i < MD_SB_BYTES/4 ; i++)
870 newcsum += sb32[i];
871 csum = (newcsum & 0xffffffff) + (newcsum>>32);
872
873#ifdef CONFIG_ALPHA
874
875
876
877
878
879
880
881
882 sb->sb_csum = md_csum_fold(disk_csum);
883#else
884 sb->sb_csum = disk_csum;
885#endif
886 return csum;
887}
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919struct super_type {
920 char *name;
921 struct module *owner;
922 int (*load_super)(struct md_rdev *rdev,
923 struct md_rdev *refdev,
924 int minor_version);
925 int (*validate_super)(struct mddev *mddev,
926 struct md_rdev *rdev);
927 void (*sync_super)(struct mddev *mddev,
928 struct md_rdev *rdev);
929 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
930 sector_t num_sectors);
931 int (*allow_new_offset)(struct md_rdev *rdev,
932 unsigned long long new_offset);
933};
934
935
936
937
938
939
940
941
942
943int md_check_no_bitmap(struct mddev *mddev)
944{
945 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
946 return 0;
947 pr_warn("%s: bitmaps are not supported for %s\n",
948 mdname(mddev), mddev->pers->name);
949 return 1;
950}
951EXPORT_SYMBOL(md_check_no_bitmap);
952
953
954
955
956static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
957{
958 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
959 mdp_super_t *sb;
960 int ret;
961
962
963
964
965
966
967
968 rdev->sb_start = calc_dev_sboffset(rdev);
969
970 ret = read_disk_sb(rdev, MD_SB_BYTES);
971 if (ret)
972 return ret;
973
974 ret = -EINVAL;
975
976 bdevname(rdev->bdev, b);
977 sb = page_address(rdev->sb_page);
978
979 if (sb->md_magic != MD_SB_MAGIC) {
980 pr_warn("md: invalid raid superblock magic on %s\n", b);
981 goto abort;
982 }
983
984 if (sb->major_version != 0 ||
985 sb->minor_version < 90 ||
986 sb->minor_version > 91) {
987 pr_warn("Bad version number %d.%d on %s\n",
988 sb->major_version, sb->minor_version, b);
989 goto abort;
990 }
991
992 if (sb->raid_disks <= 0)
993 goto abort;
994
995 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
996 pr_warn("md: invalid superblock checksum on %s\n", b);
997 goto abort;
998 }
999
1000 rdev->preferred_minor = sb->md_minor;
1001 rdev->data_offset = 0;
1002 rdev->new_data_offset = 0;
1003 rdev->sb_size = MD_SB_BYTES;
1004 rdev->badblocks.shift = -1;
1005
1006 if (sb->level == LEVEL_MULTIPATH)
1007 rdev->desc_nr = -1;
1008 else
1009 rdev->desc_nr = sb->this_disk.number;
1010
1011 if (!refdev) {
1012 ret = 1;
1013 } else {
1014 __u64 ev1, ev2;
1015 mdp_super_t *refsb = page_address(refdev->sb_page);
1016 if (!uuid_equal(refsb, sb)) {
1017 pr_warn("md: %s has different UUID to %s\n",
1018 b, bdevname(refdev->bdev,b2));
1019 goto abort;
1020 }
1021 if (!sb_equal(refsb, sb)) {
1022 pr_warn("md: %s has same UUID but different superblock to %s\n",
1023 b, bdevname(refdev->bdev, b2));
1024 goto abort;
1025 }
1026 ev1 = md_event(sb);
1027 ev2 = md_event(refsb);
1028 if (ev1 > ev2)
1029 ret = 1;
1030 else
1031 ret = 0;
1032 }
1033 rdev->sectors = rdev->sb_start;
1034
1035
1036
1037
1038 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1039 sb->level >= 1)
1040 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1041
1042 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1043
1044 ret = -EINVAL;
1045
1046 abort:
1047 return ret;
1048}
1049
1050
1051
1052
1053static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1054{
1055 mdp_disk_t *desc;
1056 mdp_super_t *sb = page_address(rdev->sb_page);
1057 __u64 ev1 = md_event(sb);
1058
1059 rdev->raid_disk = -1;
1060 clear_bit(Faulty, &rdev->flags);
1061 clear_bit(In_sync, &rdev->flags);
1062 clear_bit(Bitmap_sync, &rdev->flags);
1063 clear_bit(WriteMostly, &rdev->flags);
1064
1065 if (mddev->raid_disks == 0) {
1066 mddev->major_version = 0;
1067 mddev->minor_version = sb->minor_version;
1068 mddev->patch_version = sb->patch_version;
1069 mddev->external = 0;
1070 mddev->chunk_sectors = sb->chunk_size >> 9;
1071 mddev->ctime = sb->ctime;
1072 mddev->utime = sb->utime;
1073 mddev->level = sb->level;
1074 mddev->clevel[0] = 0;
1075 mddev->layout = sb->layout;
1076 mddev->raid_disks = sb->raid_disks;
1077 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1078 mddev->events = ev1;
1079 mddev->bitmap_info.offset = 0;
1080 mddev->bitmap_info.space = 0;
1081
1082 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1083 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1084 mddev->reshape_backwards = 0;
1085
1086 if (mddev->minor_version >= 91) {
1087 mddev->reshape_position = sb->reshape_position;
1088 mddev->delta_disks = sb->delta_disks;
1089 mddev->new_level = sb->new_level;
1090 mddev->new_layout = sb->new_layout;
1091 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1092 if (mddev->delta_disks < 0)
1093 mddev->reshape_backwards = 1;
1094 } else {
1095 mddev->reshape_position = MaxSector;
1096 mddev->delta_disks = 0;
1097 mddev->new_level = mddev->level;
1098 mddev->new_layout = mddev->layout;
1099 mddev->new_chunk_sectors = mddev->chunk_sectors;
1100 }
1101
1102 if (sb->state & (1<<MD_SB_CLEAN))
1103 mddev->recovery_cp = MaxSector;
1104 else {
1105 if (sb->events_hi == sb->cp_events_hi &&
1106 sb->events_lo == sb->cp_events_lo) {
1107 mddev->recovery_cp = sb->recovery_cp;
1108 } else
1109 mddev->recovery_cp = 0;
1110 }
1111
1112 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1113 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1114 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1115 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1116
1117 mddev->max_disks = MD_SB_DISKS;
1118
1119 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1120 mddev->bitmap_info.file == NULL) {
1121 mddev->bitmap_info.offset =
1122 mddev->bitmap_info.default_offset;
1123 mddev->bitmap_info.space =
1124 mddev->bitmap_info.default_space;
1125 }
1126
1127 } else if (mddev->pers == NULL) {
1128
1129
1130 ++ev1;
1131 if (sb->disks[rdev->desc_nr].state & (
1132 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1133 if (ev1 < mddev->events)
1134 return -EINVAL;
1135 } else if (mddev->bitmap) {
1136
1137
1138
1139 if (ev1 < mddev->bitmap->events_cleared)
1140 return 0;
1141 if (ev1 < mddev->events)
1142 set_bit(Bitmap_sync, &rdev->flags);
1143 } else {
1144 if (ev1 < mddev->events)
1145
1146 return 0;
1147 }
1148
1149 if (mddev->level != LEVEL_MULTIPATH) {
1150 desc = sb->disks + rdev->desc_nr;
1151
1152 if (desc->state & (1<<MD_DISK_FAULTY))
1153 set_bit(Faulty, &rdev->flags);
1154 else if (desc->state & (1<<MD_DISK_SYNC)
1155) {
1156 set_bit(In_sync, &rdev->flags);
1157 rdev->raid_disk = desc->raid_disk;
1158 rdev->saved_raid_disk = desc->raid_disk;
1159 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1160
1161
1162
1163 if (mddev->minor_version >= 91) {
1164 rdev->recovery_offset = 0;
1165 rdev->raid_disk = desc->raid_disk;
1166 }
1167 }
1168 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1169 set_bit(WriteMostly, &rdev->flags);
1170 if (desc->state & (1<<MD_DISK_FAILFAST))
1171 set_bit(FailFast, &rdev->flags);
1172 } else
1173 set_bit(In_sync, &rdev->flags);
1174 return 0;
1175}
1176
1177
1178
1179
1180static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1181{
1182 mdp_super_t *sb;
1183 struct md_rdev *rdev2;
1184 int next_spare = mddev->raid_disks;
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196 int i;
1197 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1198
1199 rdev->sb_size = MD_SB_BYTES;
1200
1201 sb = page_address(rdev->sb_page);
1202
1203 memset(sb, 0, sizeof(*sb));
1204
1205 sb->md_magic = MD_SB_MAGIC;
1206 sb->major_version = mddev->major_version;
1207 sb->patch_version = mddev->patch_version;
1208 sb->gvalid_words = 0;
1209 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1210 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1211 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1212 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1213
1214 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1215 sb->level = mddev->level;
1216 sb->size = mddev->dev_sectors / 2;
1217 sb->raid_disks = mddev->raid_disks;
1218 sb->md_minor = mddev->md_minor;
1219 sb->not_persistent = 0;
1220 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1221 sb->state = 0;
1222 sb->events_hi = (mddev->events>>32);
1223 sb->events_lo = (u32)mddev->events;
1224
1225 if (mddev->reshape_position == MaxSector)
1226 sb->minor_version = 90;
1227 else {
1228 sb->minor_version = 91;
1229 sb->reshape_position = mddev->reshape_position;
1230 sb->new_level = mddev->new_level;
1231 sb->delta_disks = mddev->delta_disks;
1232 sb->new_layout = mddev->new_layout;
1233 sb->new_chunk = mddev->new_chunk_sectors << 9;
1234 }
1235 mddev->minor_version = sb->minor_version;
1236 if (mddev->in_sync)
1237 {
1238 sb->recovery_cp = mddev->recovery_cp;
1239 sb->cp_events_hi = (mddev->events>>32);
1240 sb->cp_events_lo = (u32)mddev->events;
1241 if (mddev->recovery_cp == MaxSector)
1242 sb->state = (1<< MD_SB_CLEAN);
1243 } else
1244 sb->recovery_cp = 0;
1245
1246 sb->layout = mddev->layout;
1247 sb->chunk_size = mddev->chunk_sectors << 9;
1248
1249 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1250 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1251
1252 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1253 rdev_for_each(rdev2, mddev) {
1254 mdp_disk_t *d;
1255 int desc_nr;
1256 int is_active = test_bit(In_sync, &rdev2->flags);
1257
1258 if (rdev2->raid_disk >= 0 &&
1259 sb->minor_version >= 91)
1260
1261
1262
1263
1264 is_active = 1;
1265 if (rdev2->raid_disk < 0 ||
1266 test_bit(Faulty, &rdev2->flags))
1267 is_active = 0;
1268 if (is_active)
1269 desc_nr = rdev2->raid_disk;
1270 else
1271 desc_nr = next_spare++;
1272 rdev2->desc_nr = desc_nr;
1273 d = &sb->disks[rdev2->desc_nr];
1274 nr_disks++;
1275 d->number = rdev2->desc_nr;
1276 d->major = MAJOR(rdev2->bdev->bd_dev);
1277 d->minor = MINOR(rdev2->bdev->bd_dev);
1278 if (is_active)
1279 d->raid_disk = rdev2->raid_disk;
1280 else
1281 d->raid_disk = rdev2->desc_nr;
1282 if (test_bit(Faulty, &rdev2->flags))
1283 d->state = (1<<MD_DISK_FAULTY);
1284 else if (is_active) {
1285 d->state = (1<<MD_DISK_ACTIVE);
1286 if (test_bit(In_sync, &rdev2->flags))
1287 d->state |= (1<<MD_DISK_SYNC);
1288 active++;
1289 working++;
1290 } else {
1291 d->state = 0;
1292 spare++;
1293 working++;
1294 }
1295 if (test_bit(WriteMostly, &rdev2->flags))
1296 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1297 if (test_bit(FailFast, &rdev2->flags))
1298 d->state |= (1<<MD_DISK_FAILFAST);
1299 }
1300
1301 for (i=0 ; i < mddev->raid_disks ; i++) {
1302 mdp_disk_t *d = &sb->disks[i];
1303 if (d->state == 0 && d->number == 0) {
1304 d->number = i;
1305 d->raid_disk = i;
1306 d->state = (1<<MD_DISK_REMOVED);
1307 d->state |= (1<<MD_DISK_FAULTY);
1308 failed++;
1309 }
1310 }
1311 sb->nr_disks = nr_disks;
1312 sb->active_disks = active;
1313 sb->working_disks = working;
1314 sb->failed_disks = failed;
1315 sb->spare_disks = spare;
1316
1317 sb->this_disk = sb->disks[rdev->desc_nr];
1318 sb->sb_csum = calc_sb_csum(sb);
1319}
1320
1321
1322
1323
1324static unsigned long long
1325super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1326{
1327 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1328 return 0;
1329 if (rdev->mddev->bitmap_info.offset)
1330 return 0;
1331 rdev->sb_start = calc_dev_sboffset(rdev);
1332 if (!num_sectors || num_sectors > rdev->sb_start)
1333 num_sectors = rdev->sb_start;
1334
1335
1336
1337 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1338 rdev->mddev->level >= 1)
1339 num_sectors = (sector_t)(2ULL << 32) - 2;
1340 do {
1341 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1342 rdev->sb_page);
1343 } while (md_super_wait(rdev->mddev) < 0);
1344 return num_sectors;
1345}
1346
1347static int
1348super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1349{
1350
1351 return new_offset == 0;
1352}
1353
1354
1355
1356
1357
1358static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1359{
1360 __le32 disk_csum;
1361 u32 csum;
1362 unsigned long long newcsum;
1363 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1364 __le32 *isuper = (__le32*)sb;
1365
1366 disk_csum = sb->sb_csum;
1367 sb->sb_csum = 0;
1368 newcsum = 0;
1369 for (; size >= 4; size -= 4)
1370 newcsum += le32_to_cpu(*isuper++);
1371
1372 if (size == 2)
1373 newcsum += le16_to_cpu(*(__le16*) isuper);
1374
1375 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1376 sb->sb_csum = disk_csum;
1377 return cpu_to_le32(csum);
1378}
1379
1380static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1381{
1382 struct mdp_superblock_1 *sb;
1383 int ret;
1384 sector_t sb_start;
1385 sector_t sectors;
1386 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1387 int bmask;
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397 switch(minor_version) {
1398 case 0:
1399 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1400 sb_start -= 8*2;
1401 sb_start &= ~(sector_t)(4*2-1);
1402 break;
1403 case 1:
1404 sb_start = 0;
1405 break;
1406 case 2:
1407 sb_start = 8;
1408 break;
1409 default:
1410 return -EINVAL;
1411 }
1412 rdev->sb_start = sb_start;
1413
1414
1415
1416
1417 ret = read_disk_sb(rdev, 4096);
1418 if (ret) return ret;
1419
1420 sb = page_address(rdev->sb_page);
1421
1422 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1423 sb->major_version != cpu_to_le32(1) ||
1424 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1425 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1426 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1427 return -EINVAL;
1428
1429 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1430 pr_warn("md: invalid superblock checksum on %s\n",
1431 bdevname(rdev->bdev,b));
1432 return -EINVAL;
1433 }
1434 if (le64_to_cpu(sb->data_size) < 10) {
1435 pr_warn("md: data_size too small on %s\n",
1436 bdevname(rdev->bdev,b));
1437 return -EINVAL;
1438 }
1439 if (sb->pad0 ||
1440 sb->pad3[0] ||
1441 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1442
1443 return -EINVAL;
1444
1445 rdev->preferred_minor = 0xffff;
1446 rdev->data_offset = le64_to_cpu(sb->data_offset);
1447 rdev->new_data_offset = rdev->data_offset;
1448 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1449 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1450 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1451 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1452
1453 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1454 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1455 if (rdev->sb_size & bmask)
1456 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1457
1458 if (minor_version
1459 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1460 return -EINVAL;
1461 if (minor_version
1462 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1463 return -EINVAL;
1464
1465 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1466 rdev->desc_nr = -1;
1467 else
1468 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1469
1470 if (!rdev->bb_page) {
1471 rdev->bb_page = alloc_page(GFP_KERNEL);
1472 if (!rdev->bb_page)
1473 return -ENOMEM;
1474 }
1475 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1476 rdev->badblocks.count == 0) {
1477
1478
1479
1480 s32 offset;
1481 sector_t bb_sector;
1482 u64 *bbp;
1483 int i;
1484 int sectors = le16_to_cpu(sb->bblog_size);
1485 if (sectors > (PAGE_SIZE / 512))
1486 return -EINVAL;
1487 offset = le32_to_cpu(sb->bblog_offset);
1488 if (offset == 0)
1489 return -EINVAL;
1490 bb_sector = (long long)offset;
1491 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1492 rdev->bb_page, REQ_OP_READ, 0, true))
1493 return -EIO;
1494 bbp = (u64 *)page_address(rdev->bb_page);
1495 rdev->badblocks.shift = sb->bblog_shift;
1496 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1497 u64 bb = le64_to_cpu(*bbp);
1498 int count = bb & (0x3ff);
1499 u64 sector = bb >> 10;
1500 sector <<= sb->bblog_shift;
1501 count <<= sb->bblog_shift;
1502 if (bb + 1 == 0)
1503 break;
1504 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1505 return -EINVAL;
1506 }
1507 } else if (sb->bblog_offset != 0)
1508 rdev->badblocks.shift = 0;
1509
1510 if (!refdev) {
1511 ret = 1;
1512 } else {
1513 __u64 ev1, ev2;
1514 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1515
1516 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1517 sb->level != refsb->level ||
1518 sb->layout != refsb->layout ||
1519 sb->chunksize != refsb->chunksize) {
1520 pr_warn("md: %s has strangely different superblock to %s\n",
1521 bdevname(rdev->bdev,b),
1522 bdevname(refdev->bdev,b2));
1523 return -EINVAL;
1524 }
1525 ev1 = le64_to_cpu(sb->events);
1526 ev2 = le64_to_cpu(refsb->events);
1527
1528 if (ev1 > ev2)
1529 ret = 1;
1530 else
1531 ret = 0;
1532 }
1533 if (minor_version) {
1534 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1535 sectors -= rdev->data_offset;
1536 } else
1537 sectors = rdev->sb_start;
1538 if (sectors < le64_to_cpu(sb->data_size))
1539 return -EINVAL;
1540 rdev->sectors = le64_to_cpu(sb->data_size);
1541 return ret;
1542}
1543
1544static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1545{
1546 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1547 __u64 ev1 = le64_to_cpu(sb->events);
1548
1549 rdev->raid_disk = -1;
1550 clear_bit(Faulty, &rdev->flags);
1551 clear_bit(In_sync, &rdev->flags);
1552 clear_bit(Bitmap_sync, &rdev->flags);
1553 clear_bit(WriteMostly, &rdev->flags);
1554
1555 if (mddev->raid_disks == 0) {
1556 mddev->major_version = 1;
1557 mddev->patch_version = 0;
1558 mddev->external = 0;
1559 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1560 mddev->ctime = le64_to_cpu(sb->ctime);
1561 mddev->utime = le64_to_cpu(sb->utime);
1562 mddev->level = le32_to_cpu(sb->level);
1563 mddev->clevel[0] = 0;
1564 mddev->layout = le32_to_cpu(sb->layout);
1565 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1566 mddev->dev_sectors = le64_to_cpu(sb->size);
1567 mddev->events = ev1;
1568 mddev->bitmap_info.offset = 0;
1569 mddev->bitmap_info.space = 0;
1570
1571
1572
1573 mddev->bitmap_info.default_offset = 1024 >> 9;
1574 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1575 mddev->reshape_backwards = 0;
1576
1577 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1578 memcpy(mddev->uuid, sb->set_uuid, 16);
1579
1580 mddev->max_disks = (4096-256)/2;
1581
1582 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1583 mddev->bitmap_info.file == NULL) {
1584 mddev->bitmap_info.offset =
1585 (__s32)le32_to_cpu(sb->bitmap_offset);
1586
1587
1588
1589
1590
1591 if (mddev->minor_version > 0)
1592 mddev->bitmap_info.space = 0;
1593 else if (mddev->bitmap_info.offset > 0)
1594 mddev->bitmap_info.space =
1595 8 - mddev->bitmap_info.offset;
1596 else
1597 mddev->bitmap_info.space =
1598 -mddev->bitmap_info.offset;
1599 }
1600
1601 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1602 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1603 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1604 mddev->new_level = le32_to_cpu(sb->new_level);
1605 mddev->new_layout = le32_to_cpu(sb->new_layout);
1606 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1607 if (mddev->delta_disks < 0 ||
1608 (mddev->delta_disks == 0 &&
1609 (le32_to_cpu(sb->feature_map)
1610 & MD_FEATURE_RESHAPE_BACKWARDS)))
1611 mddev->reshape_backwards = 1;
1612 } else {
1613 mddev->reshape_position = MaxSector;
1614 mddev->delta_disks = 0;
1615 mddev->new_level = mddev->level;
1616 mddev->new_layout = mddev->layout;
1617 mddev->new_chunk_sectors = mddev->chunk_sectors;
1618 }
1619
1620 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1621 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1622 } else if (mddev->pers == NULL) {
1623
1624
1625 ++ev1;
1626 if (rdev->desc_nr >= 0 &&
1627 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1628 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1629 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1630 if (ev1 < mddev->events)
1631 return -EINVAL;
1632 } else if (mddev->bitmap) {
1633
1634
1635
1636 if (ev1 < mddev->bitmap->events_cleared)
1637 return 0;
1638 if (ev1 < mddev->events)
1639 set_bit(Bitmap_sync, &rdev->flags);
1640 } else {
1641 if (ev1 < mddev->events)
1642
1643 return 0;
1644 }
1645 if (mddev->level != LEVEL_MULTIPATH) {
1646 int role;
1647 if (rdev->desc_nr < 0 ||
1648 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1649 role = MD_DISK_ROLE_SPARE;
1650 rdev->desc_nr = -1;
1651 } else
1652 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1653 switch(role) {
1654 case MD_DISK_ROLE_SPARE:
1655 break;
1656 case MD_DISK_ROLE_FAULTY:
1657 set_bit(Faulty, &rdev->flags);
1658 break;
1659 case MD_DISK_ROLE_JOURNAL:
1660 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1661
1662 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1663 return -EINVAL;
1664 }
1665 set_bit(Journal, &rdev->flags);
1666 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1667 rdev->raid_disk = 0;
1668 break;
1669 default:
1670 rdev->saved_raid_disk = role;
1671 if ((le32_to_cpu(sb->feature_map) &
1672 MD_FEATURE_RECOVERY_OFFSET)) {
1673 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1674 if (!(le32_to_cpu(sb->feature_map) &
1675 MD_FEATURE_RECOVERY_BITMAP))
1676 rdev->saved_raid_disk = -1;
1677 } else
1678 set_bit(In_sync, &rdev->flags);
1679 rdev->raid_disk = role;
1680 break;
1681 }
1682 if (sb->devflags & WriteMostly1)
1683 set_bit(WriteMostly, &rdev->flags);
1684 if (sb->devflags & FailFast1)
1685 set_bit(FailFast, &rdev->flags);
1686 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1687 set_bit(Replacement, &rdev->flags);
1688 } else
1689 set_bit(In_sync, &rdev->flags);
1690
1691 return 0;
1692}
1693
1694static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1695{
1696 struct mdp_superblock_1 *sb;
1697 struct md_rdev *rdev2;
1698 int max_dev, i;
1699
1700
1701 sb = page_address(rdev->sb_page);
1702
1703 sb->feature_map = 0;
1704 sb->pad0 = 0;
1705 sb->recovery_offset = cpu_to_le64(0);
1706 memset(sb->pad3, 0, sizeof(sb->pad3));
1707
1708 sb->utime = cpu_to_le64((__u64)mddev->utime);
1709 sb->events = cpu_to_le64(mddev->events);
1710 if (mddev->in_sync)
1711 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1712 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1713 sb->resync_offset = cpu_to_le64(MaxSector);
1714 else
1715 sb->resync_offset = cpu_to_le64(0);
1716
1717 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1718
1719 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1720 sb->size = cpu_to_le64(mddev->dev_sectors);
1721 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1722 sb->level = cpu_to_le32(mddev->level);
1723 sb->layout = cpu_to_le32(mddev->layout);
1724 if (test_bit(FailFast, &rdev->flags))
1725 sb->devflags |= FailFast1;
1726 else
1727 sb->devflags &= ~FailFast1;
1728
1729 if (test_bit(WriteMostly, &rdev->flags))
1730 sb->devflags |= WriteMostly1;
1731 else
1732 sb->devflags &= ~WriteMostly1;
1733 sb->data_offset = cpu_to_le64(rdev->data_offset);
1734 sb->data_size = cpu_to_le64(rdev->sectors);
1735
1736 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1737 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1738 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1739 }
1740
1741 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1742 !test_bit(In_sync, &rdev->flags)) {
1743 sb->feature_map |=
1744 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1745 sb->recovery_offset =
1746 cpu_to_le64(rdev->recovery_offset);
1747 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1748 sb->feature_map |=
1749 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1750 }
1751
1752 if (test_bit(Journal, &rdev->flags))
1753 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1754 if (test_bit(Replacement, &rdev->flags))
1755 sb->feature_map |=
1756 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1757
1758 if (mddev->reshape_position != MaxSector) {
1759 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1760 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1761 sb->new_layout = cpu_to_le32(mddev->new_layout);
1762 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1763 sb->new_level = cpu_to_le32(mddev->new_level);
1764 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1765 if (mddev->delta_disks == 0 &&
1766 mddev->reshape_backwards)
1767 sb->feature_map
1768 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1769 if (rdev->new_data_offset != rdev->data_offset) {
1770 sb->feature_map
1771 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1772 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1773 - rdev->data_offset));
1774 }
1775 }
1776
1777 if (mddev_is_clustered(mddev))
1778 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1779
1780 if (rdev->badblocks.count == 0)
1781 ;
1782 else if (sb->bblog_offset == 0)
1783
1784 md_error(mddev, rdev);
1785 else {
1786 struct badblocks *bb = &rdev->badblocks;
1787 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1788 u64 *p = bb->page;
1789 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1790 if (bb->changed) {
1791 unsigned seq;
1792
1793retry:
1794 seq = read_seqbegin(&bb->lock);
1795
1796 memset(bbp, 0xff, PAGE_SIZE);
1797
1798 for (i = 0 ; i < bb->count ; i++) {
1799 u64 internal_bb = p[i];
1800 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1801 | BB_LEN(internal_bb));
1802 bbp[i] = cpu_to_le64(store_bb);
1803 }
1804 bb->changed = 0;
1805 if (read_seqretry(&bb->lock, seq))
1806 goto retry;
1807
1808 bb->sector = (rdev->sb_start +
1809 (int)le32_to_cpu(sb->bblog_offset));
1810 bb->size = le16_to_cpu(sb->bblog_size);
1811 }
1812 }
1813
1814 max_dev = 0;
1815 rdev_for_each(rdev2, mddev)
1816 if (rdev2->desc_nr+1 > max_dev)
1817 max_dev = rdev2->desc_nr+1;
1818
1819 if (max_dev > le32_to_cpu(sb->max_dev)) {
1820 int bmask;
1821 sb->max_dev = cpu_to_le32(max_dev);
1822 rdev->sb_size = max_dev * 2 + 256;
1823 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1824 if (rdev->sb_size & bmask)
1825 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1826 } else
1827 max_dev = le32_to_cpu(sb->max_dev);
1828
1829 for (i=0; i<max_dev;i++)
1830 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1831
1832 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1833 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1834
1835 rdev_for_each(rdev2, mddev) {
1836 i = rdev2->desc_nr;
1837 if (test_bit(Faulty, &rdev2->flags))
1838 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1839 else if (test_bit(In_sync, &rdev2->flags))
1840 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1841 else if (test_bit(Journal, &rdev2->flags))
1842 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1843 else if (rdev2->raid_disk >= 0)
1844 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1845 else
1846 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1847 }
1848
1849 sb->sb_csum = calc_sb_1_csum(sb);
1850}
1851
1852static unsigned long long
1853super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1854{
1855 struct mdp_superblock_1 *sb;
1856 sector_t max_sectors;
1857 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1858 return 0;
1859 if (rdev->data_offset != rdev->new_data_offset)
1860 return 0;
1861 if (rdev->sb_start < rdev->data_offset) {
1862
1863 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1864 max_sectors -= rdev->data_offset;
1865 if (!num_sectors || num_sectors > max_sectors)
1866 num_sectors = max_sectors;
1867 } else if (rdev->mddev->bitmap_info.offset) {
1868
1869 return 0;
1870 } else {
1871
1872 sector_t sb_start;
1873 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1874 sb_start &= ~(sector_t)(4*2 - 1);
1875 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1876 if (!num_sectors || num_sectors > max_sectors)
1877 num_sectors = max_sectors;
1878 rdev->sb_start = sb_start;
1879 }
1880 sb = page_address(rdev->sb_page);
1881 sb->data_size = cpu_to_le64(num_sectors);
1882 sb->super_offset = cpu_to_le64(rdev->sb_start);
1883 sb->sb_csum = calc_sb_1_csum(sb);
1884 do {
1885 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1886 rdev->sb_page);
1887 } while (md_super_wait(rdev->mddev) < 0);
1888 return num_sectors;
1889
1890}
1891
1892static int
1893super_1_allow_new_offset(struct md_rdev *rdev,
1894 unsigned long long new_offset)
1895{
1896
1897 struct bitmap *bitmap;
1898 if (new_offset >= rdev->data_offset)
1899 return 1;
1900
1901
1902
1903 if (rdev->mddev->minor_version == 0)
1904 return 1;
1905
1906
1907
1908
1909
1910
1911
1912 if (rdev->sb_start + (32+4)*2 > new_offset)
1913 return 0;
1914 bitmap = rdev->mddev->bitmap;
1915 if (bitmap && !rdev->mddev->bitmap_info.file &&
1916 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1917 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1918 return 0;
1919 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1920 return 0;
1921
1922 return 1;
1923}
1924
1925static struct super_type super_types[] = {
1926 [0] = {
1927 .name = "0.90.0",
1928 .owner = THIS_MODULE,
1929 .load_super = super_90_load,
1930 .validate_super = super_90_validate,
1931 .sync_super = super_90_sync,
1932 .rdev_size_change = super_90_rdev_size_change,
1933 .allow_new_offset = super_90_allow_new_offset,
1934 },
1935 [1] = {
1936 .name = "md-1",
1937 .owner = THIS_MODULE,
1938 .load_super = super_1_load,
1939 .validate_super = super_1_validate,
1940 .sync_super = super_1_sync,
1941 .rdev_size_change = super_1_rdev_size_change,
1942 .allow_new_offset = super_1_allow_new_offset,
1943 },
1944};
1945
1946static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1947{
1948 if (mddev->sync_super) {
1949 mddev->sync_super(mddev, rdev);
1950 return;
1951 }
1952
1953 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1954
1955 super_types[mddev->major_version].sync_super(mddev, rdev);
1956}
1957
1958static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1959{
1960 struct md_rdev *rdev, *rdev2;
1961
1962 rcu_read_lock();
1963 rdev_for_each_rcu(rdev, mddev1) {
1964 if (test_bit(Faulty, &rdev->flags) ||
1965 test_bit(Journal, &rdev->flags) ||
1966 rdev->raid_disk == -1)
1967 continue;
1968 rdev_for_each_rcu(rdev2, mddev2) {
1969 if (test_bit(Faulty, &rdev2->flags) ||
1970 test_bit(Journal, &rdev2->flags) ||
1971 rdev2->raid_disk == -1)
1972 continue;
1973 if (rdev->bdev->bd_contains ==
1974 rdev2->bdev->bd_contains) {
1975 rcu_read_unlock();
1976 return 1;
1977 }
1978 }
1979 }
1980 rcu_read_unlock();
1981 return 0;
1982}
1983
1984static LIST_HEAD(pending_raid_disks);
1985
1986
1987
1988
1989
1990
1991
1992
1993int md_integrity_register(struct mddev *mddev)
1994{
1995 struct md_rdev *rdev, *reference = NULL;
1996
1997 if (list_empty(&mddev->disks))
1998 return 0;
1999 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2000 return 0;
2001 rdev_for_each(rdev, mddev) {
2002
2003 if (test_bit(Faulty, &rdev->flags))
2004 continue;
2005 if (rdev->raid_disk < 0)
2006 continue;
2007 if (!reference) {
2008
2009 reference = rdev;
2010 continue;
2011 }
2012
2013 if (blk_integrity_compare(reference->bdev->bd_disk,
2014 rdev->bdev->bd_disk) < 0)
2015 return -EINVAL;
2016 }
2017 if (!reference || !bdev_get_integrity(reference->bdev))
2018 return 0;
2019
2020
2021
2022
2023 blk_integrity_register(mddev->gendisk,
2024 bdev_get_integrity(reference->bdev));
2025
2026 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2027 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2028 pr_err("md: failed to create integrity pool for %s\n",
2029 mdname(mddev));
2030 return -EINVAL;
2031 }
2032 return 0;
2033}
2034EXPORT_SYMBOL(md_integrity_register);
2035
2036
2037
2038
2039
2040int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2041{
2042 struct blk_integrity *bi_rdev;
2043 struct blk_integrity *bi_mddev;
2044 char name[BDEVNAME_SIZE];
2045
2046 if (!mddev->gendisk)
2047 return 0;
2048
2049 bi_rdev = bdev_get_integrity(rdev->bdev);
2050 bi_mddev = blk_get_integrity(mddev->gendisk);
2051
2052 if (!bi_mddev)
2053 return 0;
2054
2055 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2056 pr_err("%s: incompatible integrity profile for %s\n",
2057 mdname(mddev), bdevname(rdev->bdev, name));
2058 return -ENXIO;
2059 }
2060
2061 return 0;
2062}
2063EXPORT_SYMBOL(md_integrity_add_rdev);
2064
2065static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2066{
2067 char b[BDEVNAME_SIZE];
2068 struct kobject *ko;
2069 int err;
2070
2071
2072 if (find_rdev(mddev, rdev->bdev->bd_dev))
2073 return -EEXIST;
2074
2075
2076 if (!test_bit(Journal, &rdev->flags) &&
2077 rdev->sectors &&
2078 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2079 if (mddev->pers) {
2080
2081
2082
2083
2084 if (mddev->level > 0)
2085 return -ENOSPC;
2086 } else
2087 mddev->dev_sectors = rdev->sectors;
2088 }
2089
2090
2091
2092
2093
2094 rcu_read_lock();
2095 if (rdev->desc_nr < 0) {
2096 int choice = 0;
2097 if (mddev->pers)
2098 choice = mddev->raid_disks;
2099 while (md_find_rdev_nr_rcu(mddev, choice))
2100 choice++;
2101 rdev->desc_nr = choice;
2102 } else {
2103 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2104 rcu_read_unlock();
2105 return -EBUSY;
2106 }
2107 }
2108 rcu_read_unlock();
2109 if (!test_bit(Journal, &rdev->flags) &&
2110 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2111 pr_warn("md: %s: array is limited to %d devices\n",
2112 mdname(mddev), mddev->max_disks);
2113 return -EBUSY;
2114 }
2115 bdevname(rdev->bdev,b);
2116 strreplace(b, '/', '!');
2117
2118 rdev->mddev = mddev;
2119 pr_debug("md: bind<%s>\n", b);
2120
2121 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2122 goto fail;
2123
2124 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2125 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2126 ;
2127 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2128
2129 list_add_rcu(&rdev->same_set, &mddev->disks);
2130 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2131
2132
2133 mddev->recovery_disabled++;
2134
2135 return 0;
2136
2137 fail:
2138 pr_warn("md: failed to register dev-%s for %s\n",
2139 b, mdname(mddev));
2140 return err;
2141}
2142
2143static void md_delayed_delete(struct work_struct *ws)
2144{
2145 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2146 kobject_del(&rdev->kobj);
2147 kobject_put(&rdev->kobj);
2148}
2149
2150static void unbind_rdev_from_array(struct md_rdev *rdev)
2151{
2152 char b[BDEVNAME_SIZE];
2153
2154 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2155 list_del_rcu(&rdev->same_set);
2156 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2157 rdev->mddev = NULL;
2158 sysfs_remove_link(&rdev->kobj, "block");
2159 sysfs_put(rdev->sysfs_state);
2160 rdev->sysfs_state = NULL;
2161 rdev->badblocks.count = 0;
2162
2163
2164
2165
2166 synchronize_rcu();
2167 INIT_WORK(&rdev->del_work, md_delayed_delete);
2168 kobject_get(&rdev->kobj);
2169 queue_work(md_misc_wq, &rdev->del_work);
2170}
2171
2172
2173
2174
2175
2176
2177static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2178{
2179 int err = 0;
2180 struct block_device *bdev;
2181 char b[BDEVNAME_SIZE];
2182
2183 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2184 shared ? (struct md_rdev *)lock_rdev : rdev);
2185 if (IS_ERR(bdev)) {
2186 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2187 return PTR_ERR(bdev);
2188 }
2189 rdev->bdev = bdev;
2190 return err;
2191}
2192
2193static void unlock_rdev(struct md_rdev *rdev)
2194{
2195 struct block_device *bdev = rdev->bdev;
2196 rdev->bdev = NULL;
2197 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2198}
2199
2200void md_autodetect_dev(dev_t dev);
2201
2202static void export_rdev(struct md_rdev *rdev)
2203{
2204 char b[BDEVNAME_SIZE];
2205
2206 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2207 md_rdev_clear(rdev);
2208#ifndef MODULE
2209 if (test_bit(AutoDetected, &rdev->flags))
2210 md_autodetect_dev(rdev->bdev->bd_dev);
2211#endif
2212 unlock_rdev(rdev);
2213 kobject_put(&rdev->kobj);
2214}
2215
2216void md_kick_rdev_from_array(struct md_rdev *rdev)
2217{
2218 unbind_rdev_from_array(rdev);
2219 export_rdev(rdev);
2220}
2221EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2222
2223static void export_array(struct mddev *mddev)
2224{
2225 struct md_rdev *rdev;
2226
2227 while (!list_empty(&mddev->disks)) {
2228 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2229 same_set);
2230 md_kick_rdev_from_array(rdev);
2231 }
2232 mddev->raid_disks = 0;
2233 mddev->major_version = 0;
2234}
2235
2236static void sync_sbs(struct mddev *mddev, int nospares)
2237{
2238
2239
2240
2241
2242
2243
2244 struct md_rdev *rdev;
2245 rdev_for_each(rdev, mddev) {
2246 if (rdev->sb_events == mddev->events ||
2247 (nospares &&
2248 rdev->raid_disk < 0 &&
2249 rdev->sb_events+1 == mddev->events)) {
2250
2251 rdev->sb_loaded = 2;
2252 } else {
2253 sync_super(mddev, rdev);
2254 rdev->sb_loaded = 1;
2255 }
2256 }
2257}
2258
2259static bool does_sb_need_changing(struct mddev *mddev)
2260{
2261 struct md_rdev *rdev;
2262 struct mdp_superblock_1 *sb;
2263 int role;
2264
2265
2266 rdev_for_each(rdev, mddev)
2267 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2268 break;
2269
2270
2271 if (!rdev)
2272 return false;
2273
2274 sb = page_address(rdev->sb_page);
2275
2276 rdev_for_each(rdev, mddev) {
2277 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2278
2279 if (role == 0xffff && rdev->raid_disk >=0 &&
2280 !test_bit(Faulty, &rdev->flags))
2281 return true;
2282
2283 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2284 return true;
2285 }
2286
2287
2288 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2289 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2290 (mddev->layout != le32_to_cpu(sb->layout)) ||
2291 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2292 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2293 return true;
2294
2295 return false;
2296}
2297
2298void md_update_sb(struct mddev *mddev, int force_change)
2299{
2300 struct md_rdev *rdev;
2301 int sync_req;
2302 int nospares = 0;
2303 int any_badblocks_changed = 0;
2304 int ret = -1;
2305
2306 if (mddev->ro) {
2307 if (force_change)
2308 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2309 return;
2310 }
2311
2312repeat:
2313 if (mddev_is_clustered(mddev)) {
2314 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2315 force_change = 1;
2316 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2317 nospares = 1;
2318 ret = md_cluster_ops->metadata_update_start(mddev);
2319
2320 if (!does_sb_need_changing(mddev)) {
2321 if (ret == 0)
2322 md_cluster_ops->metadata_update_cancel(mddev);
2323 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2324 BIT(MD_SB_CHANGE_DEVS) |
2325 BIT(MD_SB_CHANGE_CLEAN));
2326 return;
2327 }
2328 }
2329
2330
2331 rdev_for_each(rdev, mddev) {
2332 if (rdev->raid_disk >= 0 &&
2333 mddev->delta_disks >= 0 &&
2334 !test_bit(Journal, &rdev->flags) &&
2335 !test_bit(In_sync, &rdev->flags) &&
2336 mddev->curr_resync_completed > rdev->recovery_offset)
2337 rdev->recovery_offset = mddev->curr_resync_completed;
2338
2339 }
2340 if (!mddev->persistent) {
2341 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2342 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2343 if (!mddev->external) {
2344 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2345 rdev_for_each(rdev, mddev) {
2346 if (rdev->badblocks.changed) {
2347 rdev->badblocks.changed = 0;
2348 ack_all_badblocks(&rdev->badblocks);
2349 md_error(mddev, rdev);
2350 }
2351 clear_bit(Blocked, &rdev->flags);
2352 clear_bit(BlockedBadBlocks, &rdev->flags);
2353 wake_up(&rdev->blocked_wait);
2354 }
2355 }
2356 wake_up(&mddev->sb_wait);
2357 return;
2358 }
2359
2360 spin_lock(&mddev->lock);
2361
2362 mddev->utime = ktime_get_real_seconds();
2363
2364 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2365 force_change = 1;
2366 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2367
2368
2369
2370
2371 nospares = 1;
2372 if (force_change)
2373 nospares = 0;
2374 if (mddev->degraded)
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384 nospares = 0;
2385
2386 sync_req = mddev->in_sync;
2387
2388
2389
2390 if (nospares
2391 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2392 && mddev->can_decrease_events
2393 && mddev->events != 1) {
2394 mddev->events--;
2395 mddev->can_decrease_events = 0;
2396 } else {
2397
2398 mddev->events ++;
2399 mddev->can_decrease_events = nospares;
2400 }
2401
2402
2403
2404
2405
2406
2407 WARN_ON(mddev->events == 0);
2408
2409 rdev_for_each(rdev, mddev) {
2410 if (rdev->badblocks.changed)
2411 any_badblocks_changed++;
2412 if (test_bit(Faulty, &rdev->flags))
2413 set_bit(FaultRecorded, &rdev->flags);
2414 }
2415
2416 sync_sbs(mddev, nospares);
2417 spin_unlock(&mddev->lock);
2418
2419 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2420 mdname(mddev), mddev->in_sync);
2421
2422 if (mddev->queue)
2423 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2424rewrite:
2425 bitmap_update_sb(mddev->bitmap);
2426 rdev_for_each(rdev, mddev) {
2427 char b[BDEVNAME_SIZE];
2428
2429 if (rdev->sb_loaded != 1)
2430 continue;
2431
2432 if (!test_bit(Faulty, &rdev->flags)) {
2433 md_super_write(mddev,rdev,
2434 rdev->sb_start, rdev->sb_size,
2435 rdev->sb_page);
2436 pr_debug("md: (write) %s's sb offset: %llu\n",
2437 bdevname(rdev->bdev, b),
2438 (unsigned long long)rdev->sb_start);
2439 rdev->sb_events = mddev->events;
2440 if (rdev->badblocks.size) {
2441 md_super_write(mddev, rdev,
2442 rdev->badblocks.sector,
2443 rdev->badblocks.size << 9,
2444 rdev->bb_page);
2445 rdev->badblocks.size = 0;
2446 }
2447
2448 } else
2449 pr_debug("md: %s (skipping faulty)\n",
2450 bdevname(rdev->bdev, b));
2451
2452 if (mddev->level == LEVEL_MULTIPATH)
2453
2454 break;
2455 }
2456 if (md_super_wait(mddev) < 0)
2457 goto rewrite;
2458
2459
2460 if (mddev_is_clustered(mddev) && ret == 0)
2461 md_cluster_ops->metadata_update_finish(mddev);
2462
2463 if (mddev->in_sync != sync_req ||
2464 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2465 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2466
2467 goto repeat;
2468 wake_up(&mddev->sb_wait);
2469 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2470 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2471
2472 rdev_for_each(rdev, mddev) {
2473 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2474 clear_bit(Blocked, &rdev->flags);
2475
2476 if (any_badblocks_changed)
2477 ack_all_badblocks(&rdev->badblocks);
2478 clear_bit(BlockedBadBlocks, &rdev->flags);
2479 wake_up(&rdev->blocked_wait);
2480 }
2481}
2482EXPORT_SYMBOL(md_update_sb);
2483
2484static int add_bound_rdev(struct md_rdev *rdev)
2485{
2486 struct mddev *mddev = rdev->mddev;
2487 int err = 0;
2488 bool add_journal = test_bit(Journal, &rdev->flags);
2489
2490 if (!mddev->pers->hot_remove_disk || add_journal) {
2491
2492
2493
2494
2495 super_types[mddev->major_version].
2496 validate_super(mddev, rdev);
2497 if (add_journal)
2498 mddev_suspend(mddev);
2499 err = mddev->pers->hot_add_disk(mddev, rdev);
2500 if (add_journal)
2501 mddev_resume(mddev);
2502 if (err) {
2503 md_kick_rdev_from_array(rdev);
2504 return err;
2505 }
2506 }
2507 sysfs_notify_dirent_safe(rdev->sysfs_state);
2508
2509 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2510 if (mddev->degraded)
2511 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2512 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2513 md_new_event(mddev);
2514 md_wakeup_thread(mddev->thread);
2515 return 0;
2516}
2517
2518
2519
2520
2521static int cmd_match(const char *cmd, const char *str)
2522{
2523
2524
2525
2526
2527 while (*cmd && *str && *cmd == *str) {
2528 cmd++;
2529 str++;
2530 }
2531 if (*cmd == '\n')
2532 cmd++;
2533 if (*str || *cmd)
2534 return 0;
2535 return 1;
2536}
2537
2538struct rdev_sysfs_entry {
2539 struct attribute attr;
2540 ssize_t (*show)(struct md_rdev *, char *);
2541 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2542};
2543
2544static ssize_t
2545state_show(struct md_rdev *rdev, char *page)
2546{
2547 char *sep = ",";
2548 size_t len = 0;
2549 unsigned long flags = ACCESS_ONCE(rdev->flags);
2550
2551 if (test_bit(Faulty, &flags) ||
2552 (!test_bit(ExternalBbl, &flags) &&
2553 rdev->badblocks.unacked_exist))
2554 len += sprintf(page+len, "faulty%s", sep);
2555 if (test_bit(In_sync, &flags))
2556 len += sprintf(page+len, "in_sync%s", sep);
2557 if (test_bit(Journal, &flags))
2558 len += sprintf(page+len, "journal%s", sep);
2559 if (test_bit(WriteMostly, &flags))
2560 len += sprintf(page+len, "write_mostly%s", sep);
2561 if (test_bit(Blocked, &flags) ||
2562 (rdev->badblocks.unacked_exist
2563 && !test_bit(Faulty, &flags)))
2564 len += sprintf(page+len, "blocked%s", sep);
2565 if (!test_bit(Faulty, &flags) &&
2566 !test_bit(Journal, &flags) &&
2567 !test_bit(In_sync, &flags))
2568 len += sprintf(page+len, "spare%s", sep);
2569 if (test_bit(WriteErrorSeen, &flags))
2570 len += sprintf(page+len, "write_error%s", sep);
2571 if (test_bit(WantReplacement, &flags))
2572 len += sprintf(page+len, "want_replacement%s", sep);
2573 if (test_bit(Replacement, &flags))
2574 len += sprintf(page+len, "replacement%s", sep);
2575 if (test_bit(ExternalBbl, &flags))
2576 len += sprintf(page+len, "external_bbl%s", sep);
2577 if (test_bit(FailFast, &flags))
2578 len += sprintf(page+len, "failfast%s", sep);
2579
2580 if (len)
2581 len -= strlen(sep);
2582
2583 return len+sprintf(page+len, "\n");
2584}
2585
2586static ssize_t
2587state_store(struct md_rdev *rdev, const char *buf, size_t len)
2588{
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603 int err = -EINVAL;
2604 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2605 md_error(rdev->mddev, rdev);
2606 if (test_bit(Faulty, &rdev->flags))
2607 err = 0;
2608 else
2609 err = -EBUSY;
2610 } else if (cmd_match(buf, "remove")) {
2611 if (rdev->mddev->pers) {
2612 clear_bit(Blocked, &rdev->flags);
2613 remove_and_add_spares(rdev->mddev, rdev);
2614 }
2615 if (rdev->raid_disk >= 0)
2616 err = -EBUSY;
2617 else {
2618 struct mddev *mddev = rdev->mddev;
2619 err = 0;
2620 if (mddev_is_clustered(mddev))
2621 err = md_cluster_ops->remove_disk(mddev, rdev);
2622
2623 if (err == 0) {
2624 md_kick_rdev_from_array(rdev);
2625 if (mddev->pers) {
2626 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2627 md_wakeup_thread(mddev->thread);
2628 }
2629 md_new_event(mddev);
2630 }
2631 }
2632 } else if (cmd_match(buf, "writemostly")) {
2633 set_bit(WriteMostly, &rdev->flags);
2634 err = 0;
2635 } else if (cmd_match(buf, "-writemostly")) {
2636 clear_bit(WriteMostly, &rdev->flags);
2637 err = 0;
2638 } else if (cmd_match(buf, "blocked")) {
2639 set_bit(Blocked, &rdev->flags);
2640 err = 0;
2641 } else if (cmd_match(buf, "-blocked")) {
2642 if (!test_bit(Faulty, &rdev->flags) &&
2643 !test_bit(ExternalBbl, &rdev->flags) &&
2644 rdev->badblocks.unacked_exist) {
2645
2646
2647
2648 md_error(rdev->mddev, rdev);
2649 }
2650 clear_bit(Blocked, &rdev->flags);
2651 clear_bit(BlockedBadBlocks, &rdev->flags);
2652 wake_up(&rdev->blocked_wait);
2653 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2654 md_wakeup_thread(rdev->mddev->thread);
2655
2656 err = 0;
2657 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2658 set_bit(In_sync, &rdev->flags);
2659 err = 0;
2660 } else if (cmd_match(buf, "failfast")) {
2661 set_bit(FailFast, &rdev->flags);
2662 err = 0;
2663 } else if (cmd_match(buf, "-failfast")) {
2664 clear_bit(FailFast, &rdev->flags);
2665 err = 0;
2666 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2667 !test_bit(Journal, &rdev->flags)) {
2668 if (rdev->mddev->pers == NULL) {
2669 clear_bit(In_sync, &rdev->flags);
2670 rdev->saved_raid_disk = rdev->raid_disk;
2671 rdev->raid_disk = -1;
2672 err = 0;
2673 }
2674 } else if (cmd_match(buf, "write_error")) {
2675 set_bit(WriteErrorSeen, &rdev->flags);
2676 err = 0;
2677 } else if (cmd_match(buf, "-write_error")) {
2678 clear_bit(WriteErrorSeen, &rdev->flags);
2679 err = 0;
2680 } else if (cmd_match(buf, "want_replacement")) {
2681
2682
2683
2684
2685 if (rdev->raid_disk >= 0 &&
2686 !test_bit(Journal, &rdev->flags) &&
2687 !test_bit(Replacement, &rdev->flags))
2688 set_bit(WantReplacement, &rdev->flags);
2689 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2690 md_wakeup_thread(rdev->mddev->thread);
2691 err = 0;
2692 } else if (cmd_match(buf, "-want_replacement")) {
2693
2694
2695
2696 err = 0;
2697 clear_bit(WantReplacement, &rdev->flags);
2698 } else if (cmd_match(buf, "replacement")) {
2699
2700
2701
2702
2703 if (rdev->mddev->pers)
2704 err = -EBUSY;
2705 else {
2706 set_bit(Replacement, &rdev->flags);
2707 err = 0;
2708 }
2709 } else if (cmd_match(buf, "-replacement")) {
2710
2711 if (rdev->mddev->pers)
2712 err = -EBUSY;
2713 else {
2714 clear_bit(Replacement, &rdev->flags);
2715 err = 0;
2716 }
2717 } else if (cmd_match(buf, "re-add")) {
2718 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2719
2720
2721
2722
2723
2724
2725 if (!mddev_is_clustered(rdev->mddev) ||
2726 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2727 clear_bit(Faulty, &rdev->flags);
2728 err = add_bound_rdev(rdev);
2729 }
2730 } else
2731 err = -EBUSY;
2732 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2733 set_bit(ExternalBbl, &rdev->flags);
2734 rdev->badblocks.shift = 0;
2735 err = 0;
2736 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2737 clear_bit(ExternalBbl, &rdev->flags);
2738 err = 0;
2739 }
2740 if (!err)
2741 sysfs_notify_dirent_safe(rdev->sysfs_state);
2742 return err ? err : len;
2743}
2744static struct rdev_sysfs_entry rdev_state =
2745__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2746
2747static ssize_t
2748errors_show(struct md_rdev *rdev, char *page)
2749{
2750 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2751}
2752
2753static ssize_t
2754errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2755{
2756 unsigned int n;
2757 int rv;
2758
2759 rv = kstrtouint(buf, 10, &n);
2760 if (rv < 0)
2761 return rv;
2762 atomic_set(&rdev->corrected_errors, n);
2763 return len;
2764}
2765static struct rdev_sysfs_entry rdev_errors =
2766__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2767
2768static ssize_t
2769slot_show(struct md_rdev *rdev, char *page)
2770{
2771 if (test_bit(Journal, &rdev->flags))
2772 return sprintf(page, "journal\n");
2773 else if (rdev->raid_disk < 0)
2774 return sprintf(page, "none\n");
2775 else
2776 return sprintf(page, "%d\n", rdev->raid_disk);
2777}
2778
2779static ssize_t
2780slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2781{
2782 int slot;
2783 int err;
2784
2785 if (test_bit(Journal, &rdev->flags))
2786 return -EBUSY;
2787 if (strncmp(buf, "none", 4)==0)
2788 slot = -1;
2789 else {
2790 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2791 if (err < 0)
2792 return err;
2793 }
2794 if (rdev->mddev->pers && slot == -1) {
2795
2796
2797
2798
2799
2800
2801
2802 if (rdev->raid_disk == -1)
2803 return -EEXIST;
2804
2805 if (rdev->mddev->pers->hot_remove_disk == NULL)
2806 return -EINVAL;
2807 clear_bit(Blocked, &rdev->flags);
2808 remove_and_add_spares(rdev->mddev, rdev);
2809 if (rdev->raid_disk >= 0)
2810 return -EBUSY;
2811 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2812 md_wakeup_thread(rdev->mddev->thread);
2813 } else if (rdev->mddev->pers) {
2814
2815
2816
2817 int err;
2818
2819 if (rdev->raid_disk != -1)
2820 return -EBUSY;
2821
2822 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2823 return -EBUSY;
2824
2825 if (rdev->mddev->pers->hot_add_disk == NULL)
2826 return -EINVAL;
2827
2828 if (slot >= rdev->mddev->raid_disks &&
2829 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2830 return -ENOSPC;
2831
2832 rdev->raid_disk = slot;
2833 if (test_bit(In_sync, &rdev->flags))
2834 rdev->saved_raid_disk = slot;
2835 else
2836 rdev->saved_raid_disk = -1;
2837 clear_bit(In_sync, &rdev->flags);
2838 clear_bit(Bitmap_sync, &rdev->flags);
2839 err = rdev->mddev->pers->
2840 hot_add_disk(rdev->mddev, rdev);
2841 if (err) {
2842 rdev->raid_disk = -1;
2843 return err;
2844 } else
2845 sysfs_notify_dirent_safe(rdev->sysfs_state);
2846 if (sysfs_link_rdev(rdev->mddev, rdev))
2847 ;
2848
2849 } else {
2850 if (slot >= rdev->mddev->raid_disks &&
2851 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2852 return -ENOSPC;
2853 rdev->raid_disk = slot;
2854
2855 clear_bit(Faulty, &rdev->flags);
2856 clear_bit(WriteMostly, &rdev->flags);
2857 set_bit(In_sync, &rdev->flags);
2858 sysfs_notify_dirent_safe(rdev->sysfs_state);
2859 }
2860 return len;
2861}
2862
2863static struct rdev_sysfs_entry rdev_slot =
2864__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2865
2866static ssize_t
2867offset_show(struct md_rdev *rdev, char *page)
2868{
2869 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2870}
2871
2872static ssize_t
2873offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2874{
2875 unsigned long long offset;
2876 if (kstrtoull(buf, 10, &offset) < 0)
2877 return -EINVAL;
2878 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2879 return -EBUSY;
2880 if (rdev->sectors && rdev->mddev->external)
2881
2882
2883 return -EBUSY;
2884 rdev->data_offset = offset;
2885 rdev->new_data_offset = offset;
2886 return len;
2887}
2888
2889static struct rdev_sysfs_entry rdev_offset =
2890__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2891
2892static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2893{
2894 return sprintf(page, "%llu\n",
2895 (unsigned long long)rdev->new_data_offset);
2896}
2897
2898static ssize_t new_offset_store(struct md_rdev *rdev,
2899 const char *buf, size_t len)
2900{
2901 unsigned long long new_offset;
2902 struct mddev *mddev = rdev->mddev;
2903
2904 if (kstrtoull(buf, 10, &new_offset) < 0)
2905 return -EINVAL;
2906
2907 if (mddev->sync_thread ||
2908 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2909 return -EBUSY;
2910 if (new_offset == rdev->data_offset)
2911
2912 ;
2913 else if (new_offset > rdev->data_offset) {
2914
2915 if (new_offset - rdev->data_offset
2916 + mddev->dev_sectors > rdev->sectors)
2917 return -E2BIG;
2918 }
2919
2920
2921
2922
2923
2924 if (new_offset < rdev->data_offset &&
2925 mddev->reshape_backwards)
2926 return -EINVAL;
2927
2928
2929
2930
2931 if (new_offset > rdev->data_offset &&
2932 !mddev->reshape_backwards)
2933 return -EINVAL;
2934
2935 if (mddev->pers && mddev->persistent &&
2936 !super_types[mddev->major_version]
2937 .allow_new_offset(rdev, new_offset))
2938 return -E2BIG;
2939 rdev->new_data_offset = new_offset;
2940 if (new_offset > rdev->data_offset)
2941 mddev->reshape_backwards = 1;
2942 else if (new_offset < rdev->data_offset)
2943 mddev->reshape_backwards = 0;
2944
2945 return len;
2946}
2947static struct rdev_sysfs_entry rdev_new_offset =
2948__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2949
2950static ssize_t
2951rdev_size_show(struct md_rdev *rdev, char *page)
2952{
2953 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2954}
2955
2956static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2957{
2958
2959 if (s1+l1 <= s2)
2960 return 0;
2961 if (s2+l2 <= s1)
2962 return 0;
2963 return 1;
2964}
2965
2966static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2967{
2968 unsigned long long blocks;
2969 sector_t new;
2970
2971 if (kstrtoull(buf, 10, &blocks) < 0)
2972 return -EINVAL;
2973
2974 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2975 return -EINVAL;
2976
2977 new = blocks * 2;
2978 if (new != blocks * 2)
2979 return -EINVAL;
2980
2981 *sectors = new;
2982 return 0;
2983}
2984
2985static ssize_t
2986rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2987{
2988 struct mddev *my_mddev = rdev->mddev;
2989 sector_t oldsectors = rdev->sectors;
2990 sector_t sectors;
2991
2992 if (test_bit(Journal, &rdev->flags))
2993 return -EBUSY;
2994 if (strict_blocks_to_sectors(buf, §ors) < 0)
2995 return -EINVAL;
2996 if (rdev->data_offset != rdev->new_data_offset)
2997 return -EINVAL;
2998 if (my_mddev->pers && rdev->raid_disk >= 0) {
2999 if (my_mddev->persistent) {
3000 sectors = super_types[my_mddev->major_version].
3001 rdev_size_change(rdev, sectors);
3002 if (!sectors)
3003 return -EBUSY;
3004 } else if (!sectors)
3005 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3006 rdev->data_offset;
3007 if (!my_mddev->pers->resize)
3008
3009 return -EINVAL;
3010 }
3011 if (sectors < my_mddev->dev_sectors)
3012 return -EINVAL;
3013
3014 rdev->sectors = sectors;
3015 if (sectors > oldsectors && my_mddev->external) {
3016
3017
3018
3019
3020
3021
3022 struct mddev *mddev;
3023 int overlap = 0;
3024 struct list_head *tmp;
3025
3026 rcu_read_lock();
3027 for_each_mddev(mddev, tmp) {
3028 struct md_rdev *rdev2;
3029
3030 rdev_for_each(rdev2, mddev)
3031 if (rdev->bdev == rdev2->bdev &&
3032 rdev != rdev2 &&
3033 overlaps(rdev->data_offset, rdev->sectors,
3034 rdev2->data_offset,
3035 rdev2->sectors)) {
3036 overlap = 1;
3037 break;
3038 }
3039 if (overlap) {
3040 mddev_put(mddev);
3041 break;
3042 }
3043 }
3044 rcu_read_unlock();
3045 if (overlap) {
3046
3047
3048
3049
3050
3051
3052 rdev->sectors = oldsectors;
3053 return -EBUSY;
3054 }
3055 }
3056 return len;
3057}
3058
3059static struct rdev_sysfs_entry rdev_size =
3060__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3061
3062static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3063{
3064 unsigned long long recovery_start = rdev->recovery_offset;
3065
3066 if (test_bit(In_sync, &rdev->flags) ||
3067 recovery_start == MaxSector)
3068 return sprintf(page, "none\n");
3069
3070 return sprintf(page, "%llu\n", recovery_start);
3071}
3072
3073static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3074{
3075 unsigned long long recovery_start;
3076
3077 if (cmd_match(buf, "none"))
3078 recovery_start = MaxSector;
3079 else if (kstrtoull(buf, 10, &recovery_start))
3080 return -EINVAL;
3081
3082 if (rdev->mddev->pers &&
3083 rdev->raid_disk >= 0)
3084 return -EBUSY;
3085
3086 rdev->recovery_offset = recovery_start;
3087 if (recovery_start == MaxSector)
3088 set_bit(In_sync, &rdev->flags);
3089 else
3090 clear_bit(In_sync, &rdev->flags);
3091 return len;
3092}
3093
3094static struct rdev_sysfs_entry rdev_recovery_start =
3095__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108static ssize_t bb_show(struct md_rdev *rdev, char *page)
3109{
3110 return badblocks_show(&rdev->badblocks, page, 0);
3111}
3112static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3113{
3114 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3115
3116 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3117 wake_up(&rdev->blocked_wait);
3118 return rv;
3119}
3120static struct rdev_sysfs_entry rdev_bad_blocks =
3121__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3122
3123static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3124{
3125 return badblocks_show(&rdev->badblocks, page, 1);
3126}
3127static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3128{
3129 return badblocks_store(&rdev->badblocks, page, len, 1);
3130}
3131static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3132__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3133
3134static struct attribute *rdev_default_attrs[] = {
3135 &rdev_state.attr,
3136 &rdev_errors.attr,
3137 &rdev_slot.attr,
3138 &rdev_offset.attr,
3139 &rdev_new_offset.attr,
3140 &rdev_size.attr,
3141 &rdev_recovery_start.attr,
3142 &rdev_bad_blocks.attr,
3143 &rdev_unack_bad_blocks.attr,
3144 NULL,
3145};
3146static ssize_t
3147rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3148{
3149 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3150 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3151
3152 if (!entry->show)
3153 return -EIO;
3154 if (!rdev->mddev)
3155 return -EBUSY;
3156 return entry->show(rdev, page);
3157}
3158
3159static ssize_t
3160rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3161 const char *page, size_t length)
3162{
3163 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3164 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3165 ssize_t rv;
3166 struct mddev *mddev = rdev->mddev;
3167
3168 if (!entry->store)
3169 return -EIO;
3170 if (!capable(CAP_SYS_ADMIN))
3171 return -EACCES;
3172 rv = mddev ? mddev_lock(mddev): -EBUSY;
3173 if (!rv) {
3174 if (rdev->mddev == NULL)
3175 rv = -EBUSY;
3176 else
3177 rv = entry->store(rdev, page, length);
3178 mddev_unlock(mddev);
3179 }
3180 return rv;
3181}
3182
3183static void rdev_free(struct kobject *ko)
3184{
3185 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3186 kfree(rdev);
3187}
3188static const struct sysfs_ops rdev_sysfs_ops = {
3189 .show = rdev_attr_show,
3190 .store = rdev_attr_store,
3191};
3192static struct kobj_type rdev_ktype = {
3193 .release = rdev_free,
3194 .sysfs_ops = &rdev_sysfs_ops,
3195 .default_attrs = rdev_default_attrs,
3196};
3197
3198int md_rdev_init(struct md_rdev *rdev)
3199{
3200 rdev->desc_nr = -1;
3201 rdev->saved_raid_disk = -1;
3202 rdev->raid_disk = -1;
3203 rdev->flags = 0;
3204 rdev->data_offset = 0;
3205 rdev->new_data_offset = 0;
3206 rdev->sb_events = 0;
3207 rdev->last_read_error = 0;
3208 rdev->sb_loaded = 0;
3209 rdev->bb_page = NULL;
3210 atomic_set(&rdev->nr_pending, 0);
3211 atomic_set(&rdev->read_errors, 0);
3212 atomic_set(&rdev->corrected_errors, 0);
3213
3214 INIT_LIST_HEAD(&rdev->same_set);
3215 init_waitqueue_head(&rdev->blocked_wait);
3216
3217
3218
3219
3220
3221 return badblocks_init(&rdev->badblocks, 0);
3222}
3223EXPORT_SYMBOL_GPL(md_rdev_init);
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3235{
3236 char b[BDEVNAME_SIZE];
3237 int err;
3238 struct md_rdev *rdev;
3239 sector_t size;
3240
3241 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3242 if (!rdev)
3243 return ERR_PTR(-ENOMEM);
3244
3245 err = md_rdev_init(rdev);
3246 if (err)
3247 goto abort_free;
3248 err = alloc_disk_sb(rdev);
3249 if (err)
3250 goto abort_free;
3251
3252 err = lock_rdev(rdev, newdev, super_format == -2);
3253 if (err)
3254 goto abort_free;
3255
3256 kobject_init(&rdev->kobj, &rdev_ktype);
3257
3258 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3259 if (!size) {
3260 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3261 bdevname(rdev->bdev,b));
3262 err = -EINVAL;
3263 goto abort_free;
3264 }
3265
3266 if (super_format >= 0) {
3267 err = super_types[super_format].
3268 load_super(rdev, NULL, super_minor);
3269 if (err == -EINVAL) {
3270 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3271 bdevname(rdev->bdev,b),
3272 super_format, super_minor);
3273 goto abort_free;
3274 }
3275 if (err < 0) {
3276 pr_warn("md: could not read %s's sb, not importing!\n",
3277 bdevname(rdev->bdev,b));
3278 goto abort_free;
3279 }
3280 }
3281
3282 return rdev;
3283
3284abort_free:
3285 if (rdev->bdev)
3286 unlock_rdev(rdev);
3287 md_rdev_clear(rdev);
3288 kfree(rdev);
3289 return ERR_PTR(err);
3290}
3291
3292
3293
3294
3295
3296static void analyze_sbs(struct mddev *mddev)
3297{
3298 int i;
3299 struct md_rdev *rdev, *freshest, *tmp;
3300 char b[BDEVNAME_SIZE];
3301
3302 freshest = NULL;
3303 rdev_for_each_safe(rdev, tmp, mddev)
3304 switch (super_types[mddev->major_version].
3305 load_super(rdev, freshest, mddev->minor_version)) {
3306 case 1:
3307 freshest = rdev;
3308 break;
3309 case 0:
3310 break;
3311 default:
3312 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3313 bdevname(rdev->bdev,b));
3314 md_kick_rdev_from_array(rdev);
3315 }
3316
3317 super_types[mddev->major_version].
3318 validate_super(mddev, freshest);
3319
3320 i = 0;
3321 rdev_for_each_safe(rdev, tmp, mddev) {
3322 if (mddev->max_disks &&
3323 (rdev->desc_nr >= mddev->max_disks ||
3324 i > mddev->max_disks)) {
3325 pr_warn("md: %s: %s: only %d devices permitted\n",
3326 mdname(mddev), bdevname(rdev->bdev, b),
3327 mddev->max_disks);
3328 md_kick_rdev_from_array(rdev);
3329 continue;
3330 }
3331 if (rdev != freshest) {
3332 if (super_types[mddev->major_version].
3333 validate_super(mddev, rdev)) {
3334 pr_warn("md: kicking non-fresh %s from array!\n",
3335 bdevname(rdev->bdev,b));
3336 md_kick_rdev_from_array(rdev);
3337 continue;
3338 }
3339 }
3340 if (mddev->level == LEVEL_MULTIPATH) {
3341 rdev->desc_nr = i++;
3342 rdev->raid_disk = rdev->desc_nr;
3343 set_bit(In_sync, &rdev->flags);
3344 } else if (rdev->raid_disk >=
3345 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3346 !test_bit(Journal, &rdev->flags)) {
3347 rdev->raid_disk = -1;
3348 clear_bit(In_sync, &rdev->flags);
3349 }
3350 }
3351}
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3364{
3365 unsigned long result = 0;
3366 long decimals = -1;
3367 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3368 if (*cp == '.')
3369 decimals = 0;
3370 else if (decimals < scale) {
3371 unsigned int value;
3372 value = *cp - '0';
3373 result = result * 10 + value;
3374 if (decimals >= 0)
3375 decimals++;
3376 }
3377 cp++;
3378 }
3379 if (*cp == '\n')
3380 cp++;
3381 if (*cp)
3382 return -EINVAL;
3383 if (decimals < 0)
3384 decimals = 0;
3385 while (decimals < scale) {
3386 result *= 10;
3387 decimals ++;
3388 }
3389 *res = result;
3390 return 0;
3391}
3392
3393static ssize_t
3394safe_delay_show(struct mddev *mddev, char *page)
3395{
3396 int msec = (mddev->safemode_delay*1000)/HZ;
3397 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3398}
3399static ssize_t
3400safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3401{
3402 unsigned long msec;
3403
3404 if (mddev_is_clustered(mddev)) {
3405 pr_warn("md: Safemode is disabled for clustered mode\n");
3406 return -EINVAL;
3407 }
3408
3409 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3410 return -EINVAL;
3411 if (msec == 0)
3412 mddev->safemode_delay = 0;
3413 else {
3414 unsigned long old_delay = mddev->safemode_delay;
3415 unsigned long new_delay = (msec*HZ)/1000;
3416
3417 if (new_delay == 0)
3418 new_delay = 1;
3419 mddev->safemode_delay = new_delay;
3420 if (new_delay < old_delay || old_delay == 0)
3421 mod_timer(&mddev->safemode_timer, jiffies+1);
3422 }
3423 return len;
3424}
3425static struct md_sysfs_entry md_safe_delay =
3426__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3427
3428static ssize_t
3429level_show(struct mddev *mddev, char *page)
3430{
3431 struct md_personality *p;
3432 int ret;
3433 spin_lock(&mddev->lock);
3434 p = mddev->pers;
3435 if (p)
3436 ret = sprintf(page, "%s\n", p->name);
3437 else if (mddev->clevel[0])
3438 ret = sprintf(page, "%s\n", mddev->clevel);
3439 else if (mddev->level != LEVEL_NONE)
3440 ret = sprintf(page, "%d\n", mddev->level);
3441 else
3442 ret = 0;
3443 spin_unlock(&mddev->lock);
3444 return ret;
3445}
3446
3447static ssize_t
3448level_store(struct mddev *mddev, const char *buf, size_t len)
3449{
3450 char clevel[16];
3451 ssize_t rv;
3452 size_t slen = len;
3453 struct md_personality *pers, *oldpers;
3454 long level;
3455 void *priv, *oldpriv;
3456 struct md_rdev *rdev;
3457
3458 if (slen == 0 || slen >= sizeof(clevel))
3459 return -EINVAL;
3460
3461 rv = mddev_lock(mddev);
3462 if (rv)
3463 return rv;
3464
3465 if (mddev->pers == NULL) {
3466 strncpy(mddev->clevel, buf, slen);
3467 if (mddev->clevel[slen-1] == '\n')
3468 slen--;
3469 mddev->clevel[slen] = 0;
3470 mddev->level = LEVEL_NONE;
3471 rv = len;
3472 goto out_unlock;
3473 }
3474 rv = -EROFS;
3475 if (mddev->ro)
3476 goto out_unlock;
3477
3478
3479
3480
3481
3482
3483
3484 rv = -EBUSY;
3485 if (mddev->sync_thread ||
3486 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3487 mddev->reshape_position != MaxSector ||
3488 mddev->sysfs_active)
3489 goto out_unlock;
3490
3491 rv = -EINVAL;
3492 if (!mddev->pers->quiesce) {
3493 pr_warn("md: %s: %s does not support online personality change\n",
3494 mdname(mddev), mddev->pers->name);
3495 goto out_unlock;
3496 }
3497
3498
3499 strncpy(clevel, buf, slen);
3500 if (clevel[slen-1] == '\n')
3501 slen--;
3502 clevel[slen] = 0;
3503 if (kstrtol(clevel, 10, &level))
3504 level = LEVEL_NONE;
3505
3506 if (request_module("md-%s", clevel) != 0)
3507 request_module("md-level-%s", clevel);
3508 spin_lock(&pers_lock);
3509 pers = find_pers(level, clevel);
3510 if (!pers || !try_module_get(pers->owner)) {
3511 spin_unlock(&pers_lock);
3512 pr_warn("md: personality %s not loaded\n", clevel);
3513 rv = -EINVAL;
3514 goto out_unlock;
3515 }
3516 spin_unlock(&pers_lock);
3517
3518 if (pers == mddev->pers) {
3519
3520 module_put(pers->owner);
3521 rv = len;
3522 goto out_unlock;
3523 }
3524 if (!pers->takeover) {
3525 module_put(pers->owner);
3526 pr_warn("md: %s: %s does not support personality takeover\n",
3527 mdname(mddev), clevel);
3528 rv = -EINVAL;
3529 goto out_unlock;
3530 }
3531
3532 rdev_for_each(rdev, mddev)
3533 rdev->new_raid_disk = rdev->raid_disk;
3534
3535
3536
3537
3538 priv = pers->takeover(mddev);
3539 if (IS_ERR(priv)) {
3540 mddev->new_level = mddev->level;
3541 mddev->new_layout = mddev->layout;
3542 mddev->new_chunk_sectors = mddev->chunk_sectors;
3543 mddev->raid_disks -= mddev->delta_disks;
3544 mddev->delta_disks = 0;
3545 mddev->reshape_backwards = 0;
3546 module_put(pers->owner);
3547 pr_warn("md: %s: %s would not accept array\n",
3548 mdname(mddev), clevel);
3549 rv = PTR_ERR(priv);
3550 goto out_unlock;
3551 }
3552
3553
3554 mddev_suspend(mddev);
3555 mddev_detach(mddev);
3556
3557 spin_lock(&mddev->lock);
3558 oldpers = mddev->pers;
3559 oldpriv = mddev->private;
3560 mddev->pers = pers;
3561 mddev->private = priv;
3562 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3563 mddev->level = mddev->new_level;
3564 mddev->layout = mddev->new_layout;
3565 mddev->chunk_sectors = mddev->new_chunk_sectors;
3566 mddev->delta_disks = 0;
3567 mddev->reshape_backwards = 0;
3568 mddev->degraded = 0;
3569 spin_unlock(&mddev->lock);
3570
3571 if (oldpers->sync_request == NULL &&
3572 mddev->external) {
3573
3574
3575
3576
3577
3578
3579
3580 mddev->in_sync = 0;
3581 mddev->safemode_delay = 0;
3582 mddev->safemode = 0;
3583 }
3584
3585 oldpers->free(mddev, oldpriv);
3586
3587 if (oldpers->sync_request == NULL &&
3588 pers->sync_request != NULL) {
3589
3590 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3591 pr_warn("md: cannot register extra attributes for %s\n",
3592 mdname(mddev));
3593 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3594 }
3595 if (oldpers->sync_request != NULL &&
3596 pers->sync_request == NULL) {
3597
3598 if (mddev->to_remove == NULL)
3599 mddev->to_remove = &md_redundancy_group;
3600 }
3601
3602 module_put(oldpers->owner);
3603
3604 rdev_for_each(rdev, mddev) {
3605 if (rdev->raid_disk < 0)
3606 continue;
3607 if (rdev->new_raid_disk >= mddev->raid_disks)
3608 rdev->new_raid_disk = -1;
3609 if (rdev->new_raid_disk == rdev->raid_disk)
3610 continue;
3611 sysfs_unlink_rdev(mddev, rdev);
3612 }
3613 rdev_for_each(rdev, mddev) {
3614 if (rdev->raid_disk < 0)
3615 continue;
3616 if (rdev->new_raid_disk == rdev->raid_disk)
3617 continue;
3618 rdev->raid_disk = rdev->new_raid_disk;
3619 if (rdev->raid_disk < 0)
3620 clear_bit(In_sync, &rdev->flags);
3621 else {
3622 if (sysfs_link_rdev(mddev, rdev))
3623 pr_warn("md: cannot register rd%d for %s after level change\n",
3624 rdev->raid_disk, mdname(mddev));
3625 }
3626 }
3627
3628 if (pers->sync_request == NULL) {
3629
3630
3631
3632 mddev->in_sync = 1;
3633 del_timer_sync(&mddev->safemode_timer);
3634 }
3635 blk_set_stacking_limits(&mddev->queue->limits);
3636 pers->run(mddev);
3637 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3638 mddev_resume(mddev);
3639 if (!mddev->thread)
3640 md_update_sb(mddev, 1);
3641 sysfs_notify(&mddev->kobj, NULL, "level");
3642 md_new_event(mddev);
3643 rv = len;
3644out_unlock:
3645 mddev_unlock(mddev);
3646 return rv;
3647}
3648
3649static struct md_sysfs_entry md_level =
3650__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3651
3652static ssize_t
3653layout_show(struct mddev *mddev, char *page)
3654{
3655
3656 if (mddev->reshape_position != MaxSector &&
3657 mddev->layout != mddev->new_layout)
3658 return sprintf(page, "%d (%d)\n",
3659 mddev->new_layout, mddev->layout);
3660 return sprintf(page, "%d\n", mddev->layout);
3661}
3662
3663static ssize_t
3664layout_store(struct mddev *mddev, const char *buf, size_t len)
3665{
3666 unsigned int n;
3667 int err;
3668
3669 err = kstrtouint(buf, 10, &n);
3670 if (err < 0)
3671 return err;
3672 err = mddev_lock(mddev);
3673 if (err)
3674 return err;
3675
3676 if (mddev->pers) {
3677 if (mddev->pers->check_reshape == NULL)
3678 err = -EBUSY;
3679 else if (mddev->ro)
3680 err = -EROFS;
3681 else {
3682 mddev->new_layout = n;
3683 err = mddev->pers->check_reshape(mddev);
3684 if (err)
3685 mddev->new_layout = mddev->layout;
3686 }
3687 } else {
3688 mddev->new_layout = n;
3689 if (mddev->reshape_position == MaxSector)
3690 mddev->layout = n;
3691 }
3692 mddev_unlock(mddev);
3693 return err ?: len;
3694}
3695static struct md_sysfs_entry md_layout =
3696__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3697
3698static ssize_t
3699raid_disks_show(struct mddev *mddev, char *page)
3700{
3701 if (mddev->raid_disks == 0)
3702 return 0;
3703 if (mddev->reshape_position != MaxSector &&
3704 mddev->delta_disks != 0)
3705 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3706 mddev->raid_disks - mddev->delta_disks);
3707 return sprintf(page, "%d\n", mddev->raid_disks);
3708}
3709
3710static int update_raid_disks(struct mddev *mddev, int raid_disks);
3711
3712static ssize_t
3713raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3714{
3715 unsigned int n;
3716 int err;
3717
3718 err = kstrtouint(buf, 10, &n);
3719 if (err < 0)
3720 return err;
3721
3722 err = mddev_lock(mddev);
3723 if (err)
3724 return err;
3725 if (mddev->pers)
3726 err = update_raid_disks(mddev, n);
3727 else if (mddev->reshape_position != MaxSector) {
3728 struct md_rdev *rdev;
3729 int olddisks = mddev->raid_disks - mddev->delta_disks;
3730
3731 err = -EINVAL;
3732 rdev_for_each(rdev, mddev) {
3733 if (olddisks < n &&
3734 rdev->data_offset < rdev->new_data_offset)
3735 goto out_unlock;
3736 if (olddisks > n &&
3737 rdev->data_offset > rdev->new_data_offset)
3738 goto out_unlock;
3739 }
3740 err = 0;
3741 mddev->delta_disks = n - olddisks;
3742 mddev->raid_disks = n;
3743 mddev->reshape_backwards = (mddev->delta_disks < 0);
3744 } else
3745 mddev->raid_disks = n;
3746out_unlock:
3747 mddev_unlock(mddev);
3748 return err ? err : len;
3749}
3750static struct md_sysfs_entry md_raid_disks =
3751__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3752
3753static ssize_t
3754chunk_size_show(struct mddev *mddev, char *page)
3755{
3756 if (mddev->reshape_position != MaxSector &&
3757 mddev->chunk_sectors != mddev->new_chunk_sectors)
3758 return sprintf(page, "%d (%d)\n",
3759 mddev->new_chunk_sectors << 9,
3760 mddev->chunk_sectors << 9);
3761 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3762}
3763
3764static ssize_t
3765chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3766{
3767 unsigned long n;
3768 int err;
3769
3770 err = kstrtoul(buf, 10, &n);
3771 if (err < 0)
3772 return err;
3773
3774 err = mddev_lock(mddev);
3775 if (err)
3776 return err;
3777 if (mddev->pers) {
3778 if (mddev->pers->check_reshape == NULL)
3779 err = -EBUSY;
3780 else if (mddev->ro)
3781 err = -EROFS;
3782 else {
3783 mddev->new_chunk_sectors = n >> 9;
3784 err = mddev->pers->check_reshape(mddev);
3785 if (err)
3786 mddev->new_chunk_sectors = mddev->chunk_sectors;
3787 }
3788 } else {
3789 mddev->new_chunk_sectors = n >> 9;
3790 if (mddev->reshape_position == MaxSector)
3791 mddev->chunk_sectors = n >> 9;
3792 }
3793 mddev_unlock(mddev);
3794 return err ?: len;
3795}
3796static struct md_sysfs_entry md_chunk_size =
3797__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3798
3799static ssize_t
3800resync_start_show(struct mddev *mddev, char *page)
3801{
3802 if (mddev->recovery_cp == MaxSector)
3803 return sprintf(page, "none\n");
3804 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3805}
3806
3807static ssize_t
3808resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3809{
3810 unsigned long long n;
3811 int err;
3812
3813 if (cmd_match(buf, "none"))
3814 n = MaxSector;
3815 else {
3816 err = kstrtoull(buf, 10, &n);
3817 if (err < 0)
3818 return err;
3819 if (n != (sector_t)n)
3820 return -EINVAL;
3821 }
3822
3823 err = mddev_lock(mddev);
3824 if (err)
3825 return err;
3826 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3827 err = -EBUSY;
3828
3829 if (!err) {
3830 mddev->recovery_cp = n;
3831 if (mddev->pers)
3832 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
3833 }
3834 mddev_unlock(mddev);
3835 return err ?: len;
3836}
3837static struct md_sysfs_entry md_resync_start =
3838__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3839 resync_start_show, resync_start_store);
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3878 write_pending, active_idle, bad_word};
3879static char *array_states[] = {
3880 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3881 "write-pending", "active-idle", NULL };
3882
3883static int match_word(const char *word, char **list)
3884{
3885 int n;
3886 for (n=0; list[n]; n++)
3887 if (cmd_match(word, list[n]))
3888 break;
3889 return n;
3890}
3891
3892static ssize_t
3893array_state_show(struct mddev *mddev, char *page)
3894{
3895 enum array_state st = inactive;
3896
3897 if (mddev->pers)
3898 switch(mddev->ro) {
3899 case 1:
3900 st = readonly;
3901 break;
3902 case 2:
3903 st = read_auto;
3904 break;
3905 case 0:
3906 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
3907 st = write_pending;
3908 else if (mddev->in_sync)
3909 st = clean;
3910 else if (mddev->safemode)
3911 st = active_idle;
3912 else
3913 st = active;
3914 }
3915 else {
3916 if (list_empty(&mddev->disks) &&
3917 mddev->raid_disks == 0 &&
3918 mddev->dev_sectors == 0)
3919 st = clear;
3920 else
3921 st = inactive;
3922 }
3923 return sprintf(page, "%s\n", array_states[st]);
3924}
3925
3926static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3927static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3928static int do_md_run(struct mddev *mddev);
3929static int restart_array(struct mddev *mddev);
3930
3931static ssize_t
3932array_state_store(struct mddev *mddev, const char *buf, size_t len)
3933{
3934 int err;
3935 enum array_state st = match_word(buf, array_states);
3936
3937 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3938
3939
3940
3941 spin_lock(&mddev->lock);
3942 if (st == active) {
3943 restart_array(mddev);
3944 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
3945 md_wakeup_thread(mddev->thread);
3946 wake_up(&mddev->sb_wait);
3947 err = 0;
3948 } else {
3949 restart_array(mddev);
3950 if (atomic_read(&mddev->writes_pending) == 0) {
3951 if (mddev->in_sync == 0) {
3952 mddev->in_sync = 1;
3953 if (mddev->safemode == 1)
3954 mddev->safemode = 0;
3955 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
3956 }
3957 err = 0;
3958 } else
3959 err = -EBUSY;
3960 }
3961 if (!err)
3962 sysfs_notify_dirent_safe(mddev->sysfs_state);
3963 spin_unlock(&mddev->lock);
3964 return err ?: len;
3965 }
3966 err = mddev_lock(mddev);
3967 if (err)
3968 return err;
3969 err = -EINVAL;
3970 switch(st) {
3971 case bad_word:
3972 break;
3973 case clear:
3974
3975 err = do_md_stop(mddev, 0, NULL);
3976 break;
3977 case inactive:
3978
3979 if (mddev->pers)
3980 err = do_md_stop(mddev, 2, NULL);
3981 else
3982 err = 0;
3983 break;
3984 case suspended:
3985 break;
3986 case readonly:
3987 if (mddev->pers)
3988 err = md_set_readonly(mddev, NULL);
3989 else {
3990 mddev->ro = 1;
3991 set_disk_ro(mddev->gendisk, 1);
3992 err = do_md_run(mddev);
3993 }
3994 break;
3995 case read_auto:
3996 if (mddev->pers) {
3997 if (mddev->ro == 0)
3998 err = md_set_readonly(mddev, NULL);
3999 else if (mddev->ro == 1)
4000 err = restart_array(mddev);
4001 if (err == 0) {
4002 mddev->ro = 2;
4003 set_disk_ro(mddev->gendisk, 0);
4004 }
4005 } else {
4006 mddev->ro = 2;
4007 err = do_md_run(mddev);
4008 }
4009 break;
4010 case clean:
4011 if (mddev->pers) {
4012 err = restart_array(mddev);
4013 if (err)
4014 break;
4015 spin_lock(&mddev->lock);
4016 if (atomic_read(&mddev->writes_pending) == 0) {
4017 if (mddev->in_sync == 0) {
4018 mddev->in_sync = 1;
4019 if (mddev->safemode == 1)
4020 mddev->safemode = 0;
4021 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4022 }
4023 err = 0;
4024 } else
4025 err = -EBUSY;
4026 spin_unlock(&mddev->lock);
4027 } else
4028 err = -EINVAL;
4029 break;
4030 case active:
4031 if (mddev->pers) {
4032 err = restart_array(mddev);
4033 if (err)
4034 break;
4035 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4036 wake_up(&mddev->sb_wait);
4037 err = 0;
4038 } else {
4039 mddev->ro = 0;
4040 set_disk_ro(mddev->gendisk, 0);
4041 err = do_md_run(mddev);
4042 }
4043 break;
4044 case write_pending:
4045 case active_idle:
4046
4047 break;
4048 }
4049
4050 if (!err) {
4051 if (mddev->hold_active == UNTIL_IOCTL)
4052 mddev->hold_active = 0;
4053 sysfs_notify_dirent_safe(mddev->sysfs_state);
4054 }
4055 mddev_unlock(mddev);
4056 return err ?: len;
4057}
4058static struct md_sysfs_entry md_array_state =
4059__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4060
4061static ssize_t
4062max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4063 return sprintf(page, "%d\n",
4064 atomic_read(&mddev->max_corr_read_errors));
4065}
4066
4067static ssize_t
4068max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4069{
4070 unsigned int n;
4071 int rv;
4072
4073 rv = kstrtouint(buf, 10, &n);
4074 if (rv < 0)
4075 return rv;
4076 atomic_set(&mddev->max_corr_read_errors, n);
4077 return len;
4078}
4079
4080static struct md_sysfs_entry max_corr_read_errors =
4081__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4082 max_corrected_read_errors_store);
4083
4084static ssize_t
4085null_show(struct mddev *mddev, char *page)
4086{
4087 return -EINVAL;
4088}
4089
4090static ssize_t
4091new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4092{
4093
4094
4095
4096
4097
4098
4099
4100 char *e;
4101 int major = simple_strtoul(buf, &e, 10);
4102 int minor;
4103 dev_t dev;
4104 struct md_rdev *rdev;
4105 int err;
4106
4107 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4108 return -EINVAL;
4109 minor = simple_strtoul(e+1, &e, 10);
4110 if (*e && *e != '\n')
4111 return -EINVAL;
4112 dev = MKDEV(major, minor);
4113 if (major != MAJOR(dev) ||
4114 minor != MINOR(dev))
4115 return -EOVERFLOW;
4116
4117 flush_workqueue(md_misc_wq);
4118
4119 err = mddev_lock(mddev);
4120 if (err)
4121 return err;
4122 if (mddev->persistent) {
4123 rdev = md_import_device(dev, mddev->major_version,
4124 mddev->minor_version);
4125 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4126 struct md_rdev *rdev0
4127 = list_entry(mddev->disks.next,
4128 struct md_rdev, same_set);
4129 err = super_types[mddev->major_version]
4130 .load_super(rdev, rdev0, mddev->minor_version);
4131 if (err < 0)
4132 goto out;
4133 }
4134 } else if (mddev->external)
4135 rdev = md_import_device(dev, -2, -1);
4136 else
4137 rdev = md_import_device(dev, -1, -1);
4138
4139 if (IS_ERR(rdev)) {
4140 mddev_unlock(mddev);
4141 return PTR_ERR(rdev);
4142 }
4143 err = bind_rdev_to_array(rdev, mddev);
4144 out:
4145 if (err)
4146 export_rdev(rdev);
4147 mddev_unlock(mddev);
4148 return err ? err : len;
4149}
4150
4151static struct md_sysfs_entry md_new_device =
4152__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4153
4154static ssize_t
4155bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4156{
4157 char *end;
4158 unsigned long chunk, end_chunk;
4159 int err;
4160
4161 err = mddev_lock(mddev);
4162 if (err)
4163 return err;
4164 if (!mddev->bitmap)
4165 goto out;
4166
4167 while (*buf) {
4168 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4169 if (buf == end) break;
4170 if (*end == '-') {
4171 buf = end + 1;
4172 end_chunk = simple_strtoul(buf, &end, 0);
4173 if (buf == end) break;
4174 }
4175 if (*end && !isspace(*end)) break;
4176 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4177 buf = skip_spaces(end);
4178 }
4179 bitmap_unplug(mddev->bitmap);
4180out:
4181 mddev_unlock(mddev);
4182 return len;
4183}
4184
4185static struct md_sysfs_entry md_bitmap =
4186__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4187
4188static ssize_t
4189size_show(struct mddev *mddev, char *page)
4190{
4191 return sprintf(page, "%llu\n",
4192 (unsigned long long)mddev->dev_sectors / 2);
4193}
4194
4195static int update_size(struct mddev *mddev, sector_t num_sectors);
4196
4197static ssize_t
4198size_store(struct mddev *mddev, const char *buf, size_t len)
4199{
4200
4201
4202
4203
4204 sector_t sectors;
4205 int err = strict_blocks_to_sectors(buf, §ors);
4206
4207 if (err < 0)
4208 return err;
4209 err = mddev_lock(mddev);
4210 if (err)
4211 return err;
4212 if (mddev->pers) {
4213 err = update_size(mddev, sectors);
4214 if (err == 0)
4215 md_update_sb(mddev, 1);
4216 } else {
4217 if (mddev->dev_sectors == 0 ||
4218 mddev->dev_sectors > sectors)
4219 mddev->dev_sectors = sectors;
4220 else
4221 err = -ENOSPC;
4222 }
4223 mddev_unlock(mddev);
4224 return err ? err : len;
4225}
4226
4227static struct md_sysfs_entry md_size =
4228__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4229
4230
4231
4232
4233
4234
4235
4236static ssize_t
4237metadata_show(struct mddev *mddev, char *page)
4238{
4239 if (mddev->persistent)
4240 return sprintf(page, "%d.%d\n",
4241 mddev->major_version, mddev->minor_version);
4242 else if (mddev->external)
4243 return sprintf(page, "external:%s\n", mddev->metadata_type);
4244 else
4245 return sprintf(page, "none\n");
4246}
4247
4248static ssize_t
4249metadata_store(struct mddev *mddev, const char *buf, size_t len)
4250{
4251 int major, minor;
4252 char *e;
4253 int err;
4254
4255
4256
4257
4258
4259 err = mddev_lock(mddev);
4260 if (err)
4261 return err;
4262 err = -EBUSY;
4263 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4264 ;
4265 else if (!list_empty(&mddev->disks))
4266 goto out_unlock;
4267
4268 err = 0;
4269 if (cmd_match(buf, "none")) {
4270 mddev->persistent = 0;
4271 mddev->external = 0;
4272 mddev->major_version = 0;
4273 mddev->minor_version = 90;
4274 goto out_unlock;
4275 }
4276 if (strncmp(buf, "external:", 9) == 0) {
4277 size_t namelen = len-9;
4278 if (namelen >= sizeof(mddev->metadata_type))
4279 namelen = sizeof(mddev->metadata_type)-1;
4280 strncpy(mddev->metadata_type, buf+9, namelen);
4281 mddev->metadata_type[namelen] = 0;
4282 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4283 mddev->metadata_type[--namelen] = 0;
4284 mddev->persistent = 0;
4285 mddev->external = 1;
4286 mddev->major_version = 0;
4287 mddev->minor_version = 90;
4288 goto out_unlock;
4289 }
4290 major = simple_strtoul(buf, &e, 10);
4291 err = -EINVAL;
4292 if (e==buf || *e != '.')
4293 goto out_unlock;
4294 buf = e+1;
4295 minor = simple_strtoul(buf, &e, 10);
4296 if (e==buf || (*e && *e != '\n') )
4297 goto out_unlock;
4298 err = -ENOENT;
4299 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4300 goto out_unlock;
4301 mddev->major_version = major;
4302 mddev->minor_version = minor;
4303 mddev->persistent = 1;
4304 mddev->external = 0;
4305 err = 0;
4306out_unlock:
4307 mddev_unlock(mddev);
4308 return err ?: len;
4309}
4310
4311static struct md_sysfs_entry md_metadata =
4312__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4313
4314static ssize_t
4315action_show(struct mddev *mddev, char *page)
4316{
4317 char *type = "idle";
4318 unsigned long recovery = mddev->recovery;
4319 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4320 type = "frozen";
4321 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4322 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4323 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4324 type = "reshape";
4325 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4326 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4327 type = "resync";
4328 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4329 type = "check";
4330 else
4331 type = "repair";
4332 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4333 type = "recover";
4334 else if (mddev->reshape_position != MaxSector)
4335 type = "reshape";
4336 }
4337 return sprintf(page, "%s\n", type);
4338}
4339
4340static ssize_t
4341action_store(struct mddev *mddev, const char *page, size_t len)
4342{
4343 if (!mddev->pers || !mddev->pers->sync_request)
4344 return -EINVAL;
4345
4346
4347 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4348 if (cmd_match(page, "frozen"))
4349 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4350 else
4351 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4352 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4353 mddev_lock(mddev) == 0) {
4354 flush_workqueue(md_misc_wq);
4355 if (mddev->sync_thread) {
4356 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4357 md_reap_sync_thread(mddev);
4358 }
4359 mddev_unlock(mddev);
4360 }
4361 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4362 return -EBUSY;
4363 else if (cmd_match(page, "resync"))
4364 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4365 else if (cmd_match(page, "recover")) {
4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4367 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4368 } else if (cmd_match(page, "reshape")) {
4369 int err;
4370 if (mddev->pers->start_reshape == NULL)
4371 return -EINVAL;
4372 err = mddev_lock(mddev);
4373 if (!err) {
4374 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4375 err = -EBUSY;
4376 else {
4377 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4378 err = mddev->pers->start_reshape(mddev);
4379 }
4380 mddev_unlock(mddev);
4381 }
4382 if (err)
4383 return err;
4384 sysfs_notify(&mddev->kobj, NULL, "degraded");
4385 } else {
4386 if (cmd_match(page, "check"))
4387 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4388 else if (!cmd_match(page, "repair"))
4389 return -EINVAL;
4390 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4391 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4392 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4393 }
4394 if (mddev->ro == 2) {
4395
4396
4397
4398 mddev->ro = 0;
4399 md_wakeup_thread(mddev->sync_thread);
4400 }
4401 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4402 md_wakeup_thread(mddev->thread);
4403 sysfs_notify_dirent_safe(mddev->sysfs_action);
4404 return len;
4405}
4406
4407static struct md_sysfs_entry md_scan_mode =
4408__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4409
4410static ssize_t
4411last_sync_action_show(struct mddev *mddev, char *page)
4412{
4413 return sprintf(page, "%s\n", mddev->last_sync_action);
4414}
4415
4416static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4417
4418static ssize_t
4419mismatch_cnt_show(struct mddev *mddev, char *page)
4420{
4421 return sprintf(page, "%llu\n",
4422 (unsigned long long)
4423 atomic64_read(&mddev->resync_mismatches));
4424}
4425
4426static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4427
4428static ssize_t
4429sync_min_show(struct mddev *mddev, char *page)
4430{
4431 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4432 mddev->sync_speed_min ? "local": "system");
4433}
4434
4435static ssize_t
4436sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4437{
4438 unsigned int min;
4439 int rv;
4440
4441 if (strncmp(buf, "system", 6)==0) {
4442 min = 0;
4443 } else {
4444 rv = kstrtouint(buf, 10, &min);
4445 if (rv < 0)
4446 return rv;
4447 if (min == 0)
4448 return -EINVAL;
4449 }
4450 mddev->sync_speed_min = min;
4451 return len;
4452}
4453
4454static struct md_sysfs_entry md_sync_min =
4455__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4456
4457static ssize_t
4458sync_max_show(struct mddev *mddev, char *page)
4459{
4460 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4461 mddev->sync_speed_max ? "local": "system");
4462}
4463
4464static ssize_t
4465sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4466{
4467 unsigned int max;
4468 int rv;
4469
4470 if (strncmp(buf, "system", 6)==0) {
4471 max = 0;
4472 } else {
4473 rv = kstrtouint(buf, 10, &max);
4474 if (rv < 0)
4475 return rv;
4476 if (max == 0)
4477 return -EINVAL;
4478 }
4479 mddev->sync_speed_max = max;
4480 return len;
4481}
4482
4483static struct md_sysfs_entry md_sync_max =
4484__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4485
4486static ssize_t
4487degraded_show(struct mddev *mddev, char *page)
4488{
4489 return sprintf(page, "%d\n", mddev->degraded);
4490}
4491static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4492
4493static ssize_t
4494sync_force_parallel_show(struct mddev *mddev, char *page)
4495{
4496 return sprintf(page, "%d\n", mddev->parallel_resync);
4497}
4498
4499static ssize_t
4500sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4501{
4502 long n;
4503
4504 if (kstrtol(buf, 10, &n))
4505 return -EINVAL;
4506
4507 if (n != 0 && n != 1)
4508 return -EINVAL;
4509
4510 mddev->parallel_resync = n;
4511
4512 if (mddev->sync_thread)
4513 wake_up(&resync_wait);
4514
4515 return len;
4516}
4517
4518
4519static struct md_sysfs_entry md_sync_force_parallel =
4520__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4521 sync_force_parallel_show, sync_force_parallel_store);
4522
4523static ssize_t
4524sync_speed_show(struct mddev *mddev, char *page)
4525{
4526 unsigned long resync, dt, db;
4527 if (mddev->curr_resync == 0)
4528 return sprintf(page, "none\n");
4529 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4530 dt = (jiffies - mddev->resync_mark) / HZ;
4531 if (!dt) dt++;
4532 db = resync - mddev->resync_mark_cnt;
4533 return sprintf(page, "%lu\n", db/dt/2);
4534}
4535
4536static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4537
4538static ssize_t
4539sync_completed_show(struct mddev *mddev, char *page)
4540{
4541 unsigned long long max_sectors, resync;
4542
4543 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4544 return sprintf(page, "none\n");
4545
4546 if (mddev->curr_resync == 1 ||
4547 mddev->curr_resync == 2)
4548 return sprintf(page, "delayed\n");
4549
4550 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4551 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4552 max_sectors = mddev->resync_max_sectors;
4553 else
4554 max_sectors = mddev->dev_sectors;
4555
4556 resync = mddev->curr_resync_completed;
4557 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4558}
4559
4560static struct md_sysfs_entry md_sync_completed =
4561 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4562
4563static ssize_t
4564min_sync_show(struct mddev *mddev, char *page)
4565{
4566 return sprintf(page, "%llu\n",
4567 (unsigned long long)mddev->resync_min);
4568}
4569static ssize_t
4570min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4571{
4572 unsigned long long min;
4573 int err;
4574
4575 if (kstrtoull(buf, 10, &min))
4576 return -EINVAL;
4577
4578 spin_lock(&mddev->lock);
4579 err = -EINVAL;
4580 if (min > mddev->resync_max)
4581 goto out_unlock;
4582
4583 err = -EBUSY;
4584 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4585 goto out_unlock;
4586
4587
4588 mddev->resync_min = round_down(min, 8);
4589 err = 0;
4590
4591out_unlock:
4592 spin_unlock(&mddev->lock);
4593 return err ?: len;
4594}
4595
4596static struct md_sysfs_entry md_min_sync =
4597__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4598
4599static ssize_t
4600max_sync_show(struct mddev *mddev, char *page)
4601{
4602 if (mddev->resync_max == MaxSector)
4603 return sprintf(page, "max\n");
4604 else
4605 return sprintf(page, "%llu\n",
4606 (unsigned long long)mddev->resync_max);
4607}
4608static ssize_t
4609max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4610{
4611 int err;
4612 spin_lock(&mddev->lock);
4613 if (strncmp(buf, "max", 3) == 0)
4614 mddev->resync_max = MaxSector;
4615 else {
4616 unsigned long long max;
4617 int chunk;
4618
4619 err = -EINVAL;
4620 if (kstrtoull(buf, 10, &max))
4621 goto out_unlock;
4622 if (max < mddev->resync_min)
4623 goto out_unlock;
4624
4625 err = -EBUSY;
4626 if (max < mddev->resync_max &&
4627 mddev->ro == 0 &&
4628 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4629 goto out_unlock;
4630
4631
4632 chunk = mddev->chunk_sectors;
4633 if (chunk) {
4634 sector_t temp = max;
4635
4636 err = -EINVAL;
4637 if (sector_div(temp, chunk))
4638 goto out_unlock;
4639 }
4640 mddev->resync_max = max;
4641 }
4642 wake_up(&mddev->recovery_wait);
4643 err = 0;
4644out_unlock:
4645 spin_unlock(&mddev->lock);
4646 return err ?: len;
4647}
4648
4649static struct md_sysfs_entry md_max_sync =
4650__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4651
4652static ssize_t
4653suspend_lo_show(struct mddev *mddev, char *page)
4654{
4655 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4656}
4657
4658static ssize_t
4659suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4660{
4661 unsigned long long old, new;
4662 int err;
4663
4664 err = kstrtoull(buf, 10, &new);
4665 if (err < 0)
4666 return err;
4667 if (new != (sector_t)new)
4668 return -EINVAL;
4669
4670 err = mddev_lock(mddev);
4671 if (err)
4672 return err;
4673 err = -EINVAL;
4674 if (mddev->pers == NULL ||
4675 mddev->pers->quiesce == NULL)
4676 goto unlock;
4677 old = mddev->suspend_lo;
4678 mddev->suspend_lo = new;
4679 if (new >= old)
4680
4681 mddev->pers->quiesce(mddev, 2);
4682 else {
4683
4684 mddev->pers->quiesce(mddev, 1);
4685 mddev->pers->quiesce(mddev, 0);
4686 }
4687 err = 0;
4688unlock:
4689 mddev_unlock(mddev);
4690 return err ?: len;
4691}
4692static struct md_sysfs_entry md_suspend_lo =
4693__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4694
4695static ssize_t
4696suspend_hi_show(struct mddev *mddev, char *page)
4697{
4698 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4699}
4700
4701static ssize_t
4702suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4703{
4704 unsigned long long old, new;
4705 int err;
4706
4707 err = kstrtoull(buf, 10, &new);
4708 if (err < 0)
4709 return err;
4710 if (new != (sector_t)new)
4711 return -EINVAL;
4712
4713 err = mddev_lock(mddev);
4714 if (err)
4715 return err;
4716 err = -EINVAL;
4717 if (mddev->pers == NULL ||
4718 mddev->pers->quiesce == NULL)
4719 goto unlock;
4720 old = mddev->suspend_hi;
4721 mddev->suspend_hi = new;
4722 if (new <= old)
4723
4724 mddev->pers->quiesce(mddev, 2);
4725 else {
4726
4727 mddev->pers->quiesce(mddev, 1);
4728 mddev->pers->quiesce(mddev, 0);
4729 }
4730 err = 0;
4731unlock:
4732 mddev_unlock(mddev);
4733 return err ?: len;
4734}
4735static struct md_sysfs_entry md_suspend_hi =
4736__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4737
4738static ssize_t
4739reshape_position_show(struct mddev *mddev, char *page)
4740{
4741 if (mddev->reshape_position != MaxSector)
4742 return sprintf(page, "%llu\n",
4743 (unsigned long long)mddev->reshape_position);
4744 strcpy(page, "none\n");
4745 return 5;
4746}
4747
4748static ssize_t
4749reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4750{
4751 struct md_rdev *rdev;
4752 unsigned long long new;
4753 int err;
4754
4755 err = kstrtoull(buf, 10, &new);
4756 if (err < 0)
4757 return err;
4758 if (new != (sector_t)new)
4759 return -EINVAL;
4760 err = mddev_lock(mddev);
4761 if (err)
4762 return err;
4763 err = -EBUSY;
4764 if (mddev->pers)
4765 goto unlock;
4766 mddev->reshape_position = new;
4767 mddev->delta_disks = 0;
4768 mddev->reshape_backwards = 0;
4769 mddev->new_level = mddev->level;
4770 mddev->new_layout = mddev->layout;
4771 mddev->new_chunk_sectors = mddev->chunk_sectors;
4772 rdev_for_each(rdev, mddev)
4773 rdev->new_data_offset = rdev->data_offset;
4774 err = 0;
4775unlock:
4776 mddev_unlock(mddev);
4777 return err ?: len;
4778}
4779
4780static struct md_sysfs_entry md_reshape_position =
4781__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4782 reshape_position_store);
4783
4784static ssize_t
4785reshape_direction_show(struct mddev *mddev, char *page)
4786{
4787 return sprintf(page, "%s\n",
4788 mddev->reshape_backwards ? "backwards" : "forwards");
4789}
4790
4791static ssize_t
4792reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4793{
4794 int backwards = 0;
4795 int err;
4796
4797 if (cmd_match(buf, "forwards"))
4798 backwards = 0;
4799 else if (cmd_match(buf, "backwards"))
4800 backwards = 1;
4801 else
4802 return -EINVAL;
4803 if (mddev->reshape_backwards == backwards)
4804 return len;
4805
4806 err = mddev_lock(mddev);
4807 if (err)
4808 return err;
4809
4810 if (mddev->delta_disks)
4811 err = -EBUSY;
4812 else if (mddev->persistent &&
4813 mddev->major_version == 0)
4814 err = -EINVAL;
4815 else
4816 mddev->reshape_backwards = backwards;
4817 mddev_unlock(mddev);
4818 return err ?: len;
4819}
4820
4821static struct md_sysfs_entry md_reshape_direction =
4822__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4823 reshape_direction_store);
4824
4825static ssize_t
4826array_size_show(struct mddev *mddev, char *page)
4827{
4828 if (mddev->external_size)
4829 return sprintf(page, "%llu\n",
4830 (unsigned long long)mddev->array_sectors/2);
4831 else
4832 return sprintf(page, "default\n");
4833}
4834
4835static ssize_t
4836array_size_store(struct mddev *mddev, const char *buf, size_t len)
4837{
4838 sector_t sectors;
4839 int err;
4840
4841 err = mddev_lock(mddev);
4842 if (err)
4843 return err;
4844
4845
4846 if (mddev_is_clustered(mddev))
4847 return -EINVAL;
4848
4849 if (strncmp(buf, "default", 7) == 0) {
4850 if (mddev->pers)
4851 sectors = mddev->pers->size(mddev, 0, 0);
4852 else
4853 sectors = mddev->array_sectors;
4854
4855 mddev->external_size = 0;
4856 } else {
4857 if (strict_blocks_to_sectors(buf, §ors) < 0)
4858 err = -EINVAL;
4859 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4860 err = -E2BIG;
4861 else
4862 mddev->external_size = 1;
4863 }
4864
4865 if (!err) {
4866 mddev->array_sectors = sectors;
4867 if (mddev->pers) {
4868 set_capacity(mddev->gendisk, mddev->array_sectors);
4869 revalidate_disk(mddev->gendisk);
4870 }
4871 }
4872 mddev_unlock(mddev);
4873 return err ?: len;
4874}
4875
4876static struct md_sysfs_entry md_array_size =
4877__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4878 array_size_store);
4879
4880static struct attribute *md_default_attrs[] = {
4881 &md_level.attr,
4882 &md_layout.attr,
4883 &md_raid_disks.attr,
4884 &md_chunk_size.attr,
4885 &md_size.attr,
4886 &md_resync_start.attr,
4887 &md_metadata.attr,
4888 &md_new_device.attr,
4889 &md_safe_delay.attr,
4890 &md_array_state.attr,
4891 &md_reshape_position.attr,
4892 &md_reshape_direction.attr,
4893 &md_array_size.attr,
4894 &max_corr_read_errors.attr,
4895 NULL,
4896};
4897
4898static struct attribute *md_redundancy_attrs[] = {
4899 &md_scan_mode.attr,
4900 &md_last_scan_mode.attr,
4901 &md_mismatches.attr,
4902 &md_sync_min.attr,
4903 &md_sync_max.attr,
4904 &md_sync_speed.attr,
4905 &md_sync_force_parallel.attr,
4906 &md_sync_completed.attr,
4907 &md_min_sync.attr,
4908 &md_max_sync.attr,
4909 &md_suspend_lo.attr,
4910 &md_suspend_hi.attr,
4911 &md_bitmap.attr,
4912 &md_degraded.attr,
4913 NULL,
4914};
4915static struct attribute_group md_redundancy_group = {
4916 .name = NULL,
4917 .attrs = md_redundancy_attrs,
4918};
4919
4920static ssize_t
4921md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4922{
4923 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4924 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4925 ssize_t rv;
4926
4927 if (!entry->show)
4928 return -EIO;
4929 spin_lock(&all_mddevs_lock);
4930 if (list_empty(&mddev->all_mddevs)) {
4931 spin_unlock(&all_mddevs_lock);
4932 return -EBUSY;
4933 }
4934 mddev_get(mddev);
4935 spin_unlock(&all_mddevs_lock);
4936
4937 rv = entry->show(mddev, page);
4938 mddev_put(mddev);
4939 return rv;
4940}
4941
4942static ssize_t
4943md_attr_store(struct kobject *kobj, struct attribute *attr,
4944 const char *page, size_t length)
4945{
4946 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4947 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4948 ssize_t rv;
4949
4950 if (!entry->store)
4951 return -EIO;
4952 if (!capable(CAP_SYS_ADMIN))
4953 return -EACCES;
4954 spin_lock(&all_mddevs_lock);
4955 if (list_empty(&mddev->all_mddevs)) {
4956 spin_unlock(&all_mddevs_lock);
4957 return -EBUSY;
4958 }
4959 mddev_get(mddev);
4960 spin_unlock(&all_mddevs_lock);
4961 rv = entry->store(mddev, page, length);
4962 mddev_put(mddev);
4963 return rv;
4964}
4965
4966static void md_free(struct kobject *ko)
4967{
4968 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4969
4970 if (mddev->sysfs_state)
4971 sysfs_put(mddev->sysfs_state);
4972
4973 if (mddev->queue)
4974 blk_cleanup_queue(mddev->queue);
4975 if (mddev->gendisk) {
4976 del_gendisk(mddev->gendisk);
4977 put_disk(mddev->gendisk);
4978 }
4979
4980 kfree(mddev);
4981}
4982
4983static const struct sysfs_ops md_sysfs_ops = {
4984 .show = md_attr_show,
4985 .store = md_attr_store,
4986};
4987static struct kobj_type md_ktype = {
4988 .release = md_free,
4989 .sysfs_ops = &md_sysfs_ops,
4990 .default_attrs = md_default_attrs,
4991};
4992
4993int mdp_major = 0;
4994
4995static void mddev_delayed_delete(struct work_struct *ws)
4996{
4997 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4998
4999 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5000 kobject_del(&mddev->kobj);
5001 kobject_put(&mddev->kobj);
5002}
5003
5004static int md_alloc(dev_t dev, char *name)
5005{
5006 static DEFINE_MUTEX(disks_mutex);
5007 struct mddev *mddev = mddev_find(dev);
5008 struct gendisk *disk;
5009 int partitioned;
5010 int shift;
5011 int unit;
5012 int error;
5013
5014 if (!mddev)
5015 return -ENODEV;
5016
5017 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5018 shift = partitioned ? MdpMinorShift : 0;
5019 unit = MINOR(mddev->unit) >> shift;
5020
5021
5022
5023
5024 flush_workqueue(md_misc_wq);
5025
5026 mutex_lock(&disks_mutex);
5027 error = -EEXIST;
5028 if (mddev->gendisk)
5029 goto abort;
5030
5031 if (name) {
5032
5033
5034 struct mddev *mddev2;
5035 spin_lock(&all_mddevs_lock);
5036
5037 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5038 if (mddev2->gendisk &&
5039 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5040 spin_unlock(&all_mddevs_lock);
5041 goto abort;
5042 }
5043 spin_unlock(&all_mddevs_lock);
5044 }
5045
5046 error = -ENOMEM;
5047 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5048 if (!mddev->queue)
5049 goto abort;
5050 mddev->queue->queuedata = mddev;
5051
5052 blk_queue_make_request(mddev->queue, md_make_request);
5053 blk_set_stacking_limits(&mddev->queue->limits);
5054
5055 disk = alloc_disk(1 << shift);
5056 if (!disk) {
5057 blk_cleanup_queue(mddev->queue);
5058 mddev->queue = NULL;
5059 goto abort;
5060 }
5061 disk->major = MAJOR(mddev->unit);
5062 disk->first_minor = unit << shift;
5063 if (name)
5064 strcpy(disk->disk_name, name);
5065 else if (partitioned)
5066 sprintf(disk->disk_name, "md_d%d", unit);
5067 else
5068 sprintf(disk->disk_name, "md%d", unit);
5069 disk->fops = &md_fops;
5070 disk->private_data = mddev;
5071 disk->queue = mddev->queue;
5072 blk_queue_write_cache(mddev->queue, true, true);
5073
5074
5075
5076
5077 disk->flags |= GENHD_FL_EXT_DEVT;
5078 mddev->gendisk = disk;
5079
5080
5081
5082 mutex_lock(&mddev->open_mutex);
5083 add_disk(disk);
5084
5085 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5086 &disk_to_dev(disk)->kobj, "%s", "md");
5087 if (error) {
5088
5089
5090
5091 pr_debug("md: cannot register %s/md - name in use\n",
5092 disk->disk_name);
5093 error = 0;
5094 }
5095 if (mddev->kobj.sd &&
5096 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5097 pr_debug("pointless warning\n");
5098 mutex_unlock(&mddev->open_mutex);
5099 abort:
5100 mutex_unlock(&disks_mutex);
5101 if (!error && mddev->kobj.sd) {
5102 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5103 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5104 }
5105 mddev_put(mddev);
5106 return error;
5107}
5108
5109static struct kobject *md_probe(dev_t dev, int *part, void *data)
5110{
5111 md_alloc(dev, NULL);
5112 return NULL;
5113}
5114
5115static int add_named_array(const char *val, struct kernel_param *kp)
5116{
5117
5118
5119
5120
5121 int len = strlen(val);
5122 char buf[DISK_NAME_LEN];
5123
5124 while (len && val[len-1] == '\n')
5125 len--;
5126 if (len >= DISK_NAME_LEN)
5127 return -E2BIG;
5128 strlcpy(buf, val, len+1);
5129 if (strncmp(buf, "md_", 3) != 0)
5130 return -EINVAL;
5131 return md_alloc(0, buf);
5132}
5133
5134static void md_safemode_timeout(unsigned long data)
5135{
5136 struct mddev *mddev = (struct mddev *) data;
5137
5138 if (!atomic_read(&mddev->writes_pending)) {
5139 mddev->safemode = 1;
5140 if (mddev->external)
5141 sysfs_notify_dirent_safe(mddev->sysfs_state);
5142 }
5143 md_wakeup_thread(mddev->thread);
5144}
5145
5146static int start_dirty_degraded;
5147
5148int md_run(struct mddev *mddev)
5149{
5150 int err;
5151 struct md_rdev *rdev;
5152 struct md_personality *pers;
5153
5154 if (list_empty(&mddev->disks))
5155
5156 return -EINVAL;
5157
5158 if (mddev->pers)
5159 return -EBUSY;
5160
5161 if (mddev->sysfs_active)
5162 return -EBUSY;
5163
5164
5165
5166
5167 if (!mddev->raid_disks) {
5168 if (!mddev->persistent)
5169 return -EINVAL;
5170 analyze_sbs(mddev);
5171 }
5172
5173 if (mddev->level != LEVEL_NONE)
5174 request_module("md-level-%d", mddev->level);
5175 else if (mddev->clevel[0])
5176 request_module("md-%s", mddev->clevel);
5177
5178
5179
5180
5181
5182
5183 rdev_for_each(rdev, mddev) {
5184 if (test_bit(Faulty, &rdev->flags))
5185 continue;
5186 sync_blockdev(rdev->bdev);
5187 invalidate_bdev(rdev->bdev);
5188
5189
5190
5191
5192
5193 if (rdev->meta_bdev) {
5194 ;
5195 } else if (rdev->data_offset < rdev->sb_start) {
5196 if (mddev->dev_sectors &&
5197 rdev->data_offset + mddev->dev_sectors
5198 > rdev->sb_start) {
5199 pr_warn("md: %s: data overlaps metadata\n",
5200 mdname(mddev));
5201 return -EINVAL;
5202 }
5203 } else {
5204 if (rdev->sb_start + rdev->sb_size/512
5205 > rdev->data_offset) {
5206 pr_warn("md: %s: metadata overlaps data\n",
5207 mdname(mddev));
5208 return -EINVAL;
5209 }
5210 }
5211 sysfs_notify_dirent_safe(rdev->sysfs_state);
5212 }
5213
5214 if (mddev->bio_set == NULL) {
5215 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5216 if (!mddev->bio_set)
5217 return -ENOMEM;
5218 }
5219
5220 spin_lock(&pers_lock);
5221 pers = find_pers(mddev->level, mddev->clevel);
5222 if (!pers || !try_module_get(pers->owner)) {
5223 spin_unlock(&pers_lock);
5224 if (mddev->level != LEVEL_NONE)
5225 pr_warn("md: personality for level %d is not loaded!\n",
5226 mddev->level);
5227 else
5228 pr_warn("md: personality for level %s is not loaded!\n",
5229 mddev->clevel);
5230 return -EINVAL;
5231 }
5232 spin_unlock(&pers_lock);
5233 if (mddev->level != pers->level) {
5234 mddev->level = pers->level;
5235 mddev->new_level = pers->level;
5236 }
5237 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5238
5239 if (mddev->reshape_position != MaxSector &&
5240 pers->start_reshape == NULL) {
5241
5242 module_put(pers->owner);
5243 return -EINVAL;
5244 }
5245
5246 if (pers->sync_request) {
5247
5248
5249
5250 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5251 struct md_rdev *rdev2;
5252 int warned = 0;
5253
5254 rdev_for_each(rdev, mddev)
5255 rdev_for_each(rdev2, mddev) {
5256 if (rdev < rdev2 &&
5257 rdev->bdev->bd_contains ==
5258 rdev2->bdev->bd_contains) {
5259 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5260 mdname(mddev),
5261 bdevname(rdev->bdev,b),
5262 bdevname(rdev2->bdev,b2));
5263 warned = 1;
5264 }
5265 }
5266
5267 if (warned)
5268 pr_warn("True protection against single-disk failure might be compromised.\n");
5269 }
5270
5271 mddev->recovery = 0;
5272
5273 mddev->resync_max_sectors = mddev->dev_sectors;
5274
5275 mddev->ok_start_degraded = start_dirty_degraded;
5276
5277 if (start_readonly && mddev->ro == 0)
5278 mddev->ro = 2;
5279
5280
5281
5282
5283
5284
5285 err = pers->run(mddev);
5286 if (err)
5287 pr_warn("md: pers->run() failed ...\n");
5288 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5289 WARN_ONCE(!mddev->external_size,
5290 "%s: default size too small, but 'external_size' not in effect?\n",
5291 __func__);
5292 pr_warn("md: invalid array_size %llu > default size %llu\n",
5293 (unsigned long long)mddev->array_sectors / 2,
5294 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5295 err = -EINVAL;
5296 }
5297 if (err == 0 && pers->sync_request &&
5298 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5299 struct bitmap *bitmap;
5300
5301 bitmap = bitmap_create(mddev, -1);
5302 if (IS_ERR(bitmap)) {
5303 err = PTR_ERR(bitmap);
5304 pr_warn("%s: failed to create bitmap (%d)\n",
5305 mdname(mddev), err);
5306 } else
5307 mddev->bitmap = bitmap;
5308
5309 }
5310 if (err) {
5311 mddev_detach(mddev);
5312 if (mddev->private)
5313 pers->free(mddev, mddev->private);
5314 mddev->private = NULL;
5315 module_put(pers->owner);
5316 bitmap_destroy(mddev);
5317 return err;
5318 }
5319 if (mddev->queue) {
5320 bool nonrot = true;
5321
5322 rdev_for_each(rdev, mddev) {
5323 if (rdev->raid_disk >= 0 &&
5324 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5325 nonrot = false;
5326 break;
5327 }
5328 }
5329 if (mddev->degraded)
5330 nonrot = false;
5331 if (nonrot)
5332 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5333 else
5334 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5335 mddev->queue->backing_dev_info->congested_data = mddev;
5336 mddev->queue->backing_dev_info->congested_fn = md_congested;
5337 }
5338 if (pers->sync_request) {
5339 if (mddev->kobj.sd &&
5340 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5341 pr_warn("md: cannot register extra attributes for %s\n",
5342 mdname(mddev));
5343 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5344 } else if (mddev->ro == 2)
5345 mddev->ro = 0;
5346
5347 atomic_set(&mddev->writes_pending,0);
5348 atomic_set(&mddev->max_corr_read_errors,
5349 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5350 mddev->safemode = 0;
5351 if (mddev_is_clustered(mddev))
5352 mddev->safemode_delay = 0;
5353 else
5354 mddev->safemode_delay = (200 * HZ)/1000 +1;
5355 mddev->in_sync = 1;
5356 smp_wmb();
5357 spin_lock(&mddev->lock);
5358 mddev->pers = pers;
5359 spin_unlock(&mddev->lock);
5360 rdev_for_each(rdev, mddev)
5361 if (rdev->raid_disk >= 0)
5362 if (sysfs_link_rdev(mddev, rdev))
5363 ;
5364
5365 if (mddev->degraded && !mddev->ro)
5366
5367
5368
5369 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5370 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5371
5372 if (mddev->sb_flags)
5373 md_update_sb(mddev, 0);
5374
5375 md_new_event(mddev);
5376 sysfs_notify_dirent_safe(mddev->sysfs_state);
5377 sysfs_notify_dirent_safe(mddev->sysfs_action);
5378 sysfs_notify(&mddev->kobj, NULL, "degraded");
5379 return 0;
5380}
5381EXPORT_SYMBOL_GPL(md_run);
5382
5383static int do_md_run(struct mddev *mddev)
5384{
5385 int err;
5386
5387 err = md_run(mddev);
5388 if (err)
5389 goto out;
5390 err = bitmap_load(mddev);
5391 if (err) {
5392 bitmap_destroy(mddev);
5393 goto out;
5394 }
5395
5396 if (mddev_is_clustered(mddev))
5397 md_allow_write(mddev);
5398
5399 md_wakeup_thread(mddev->thread);
5400 md_wakeup_thread(mddev->sync_thread);
5401
5402 set_capacity(mddev->gendisk, mddev->array_sectors);
5403 revalidate_disk(mddev->gendisk);
5404 mddev->changed = 1;
5405 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5406out:
5407 return err;
5408}
5409
5410static int restart_array(struct mddev *mddev)
5411{
5412 struct gendisk *disk = mddev->gendisk;
5413
5414
5415 if (list_empty(&mddev->disks))
5416 return -ENXIO;
5417 if (!mddev->pers)
5418 return -EINVAL;
5419 if (!mddev->ro)
5420 return -EBUSY;
5421 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5422 struct md_rdev *rdev;
5423 bool has_journal = false;
5424
5425 rcu_read_lock();
5426 rdev_for_each_rcu(rdev, mddev) {
5427 if (test_bit(Journal, &rdev->flags) &&
5428 !test_bit(Faulty, &rdev->flags)) {
5429 has_journal = true;
5430 break;
5431 }
5432 }
5433 rcu_read_unlock();
5434
5435
5436 if (!has_journal)
5437 return -EINVAL;
5438 }
5439
5440 mddev->safemode = 0;
5441 mddev->ro = 0;
5442 set_disk_ro(disk, 0);
5443 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5444
5445 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5446 md_wakeup_thread(mddev->thread);
5447 md_wakeup_thread(mddev->sync_thread);
5448 sysfs_notify_dirent_safe(mddev->sysfs_state);
5449 return 0;
5450}
5451
5452static void md_clean(struct mddev *mddev)
5453{
5454 mddev->array_sectors = 0;
5455 mddev->external_size = 0;
5456 mddev->dev_sectors = 0;
5457 mddev->raid_disks = 0;
5458 mddev->recovery_cp = 0;
5459 mddev->resync_min = 0;
5460 mddev->resync_max = MaxSector;
5461 mddev->reshape_position = MaxSector;
5462 mddev->external = 0;
5463 mddev->persistent = 0;
5464 mddev->level = LEVEL_NONE;
5465 mddev->clevel[0] = 0;
5466 mddev->flags = 0;
5467 mddev->sb_flags = 0;
5468 mddev->ro = 0;
5469 mddev->metadata_type[0] = 0;
5470 mddev->chunk_sectors = 0;
5471 mddev->ctime = mddev->utime = 0;
5472 mddev->layout = 0;
5473 mddev->max_disks = 0;
5474 mddev->events = 0;
5475 mddev->can_decrease_events = 0;
5476 mddev->delta_disks = 0;
5477 mddev->reshape_backwards = 0;
5478 mddev->new_level = LEVEL_NONE;
5479 mddev->new_layout = 0;
5480 mddev->new_chunk_sectors = 0;
5481 mddev->curr_resync = 0;
5482 atomic64_set(&mddev->resync_mismatches, 0);
5483 mddev->suspend_lo = mddev->suspend_hi = 0;
5484 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5485 mddev->recovery = 0;
5486 mddev->in_sync = 0;
5487 mddev->changed = 0;
5488 mddev->degraded = 0;
5489 mddev->safemode = 0;
5490 mddev->private = NULL;
5491 mddev->cluster_info = NULL;
5492 mddev->bitmap_info.offset = 0;
5493 mddev->bitmap_info.default_offset = 0;
5494 mddev->bitmap_info.default_space = 0;
5495 mddev->bitmap_info.chunksize = 0;
5496 mddev->bitmap_info.daemon_sleep = 0;
5497 mddev->bitmap_info.max_write_behind = 0;
5498 mddev->bitmap_info.nodes = 0;
5499}
5500
5501static void __md_stop_writes(struct mddev *mddev)
5502{
5503 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5504 flush_workqueue(md_misc_wq);
5505 if (mddev->sync_thread) {
5506 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5507 md_reap_sync_thread(mddev);
5508 }
5509
5510 del_timer_sync(&mddev->safemode_timer);
5511
5512 if (mddev->pers && mddev->pers->quiesce) {
5513 mddev->pers->quiesce(mddev, 1);
5514 mddev->pers->quiesce(mddev, 0);
5515 }
5516 bitmap_flush(mddev);
5517
5518 if (mddev->ro == 0 &&
5519 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5520 mddev->sb_flags)) {
5521
5522 if (!mddev_is_clustered(mddev))
5523 mddev->in_sync = 1;
5524 md_update_sb(mddev, 1);
5525 }
5526}
5527
5528void md_stop_writes(struct mddev *mddev)
5529{
5530 mddev_lock_nointr(mddev);
5531 __md_stop_writes(mddev);
5532 mddev_unlock(mddev);
5533}
5534EXPORT_SYMBOL_GPL(md_stop_writes);
5535
5536static void mddev_detach(struct mddev *mddev)
5537{
5538 struct bitmap *bitmap = mddev->bitmap;
5539
5540 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5541 pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
5542 mdname(mddev));
5543
5544 wait_event(bitmap->behind_wait,
5545 atomic_read(&bitmap->behind_writes) == 0);
5546 }
5547 if (mddev->pers && mddev->pers->quiesce) {
5548 mddev->pers->quiesce(mddev, 1);
5549 mddev->pers->quiesce(mddev, 0);
5550 }
5551 md_unregister_thread(&mddev->thread);
5552 if (mddev->queue)
5553 blk_sync_queue(mddev->queue);
5554}
5555
5556static void __md_stop(struct mddev *mddev)
5557{
5558 struct md_personality *pers = mddev->pers;
5559 mddev_detach(mddev);
5560
5561 flush_workqueue(md_misc_wq);
5562 spin_lock(&mddev->lock);
5563 mddev->pers = NULL;
5564 spin_unlock(&mddev->lock);
5565 pers->free(mddev, mddev->private);
5566 mddev->private = NULL;
5567 if (pers->sync_request && mddev->to_remove == NULL)
5568 mddev->to_remove = &md_redundancy_group;
5569 module_put(pers->owner);
5570 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5571}
5572
5573void md_stop(struct mddev *mddev)
5574{
5575
5576
5577
5578 __md_stop(mddev);
5579 bitmap_destroy(mddev);
5580 if (mddev->bio_set)
5581 bioset_free(mddev->bio_set);
5582}
5583
5584EXPORT_SYMBOL_GPL(md_stop);
5585
5586static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5587{
5588 int err = 0;
5589 int did_freeze = 0;
5590
5591 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5592 did_freeze = 1;
5593 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5594 md_wakeup_thread(mddev->thread);
5595 }
5596 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5597 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5598 if (mddev->sync_thread)
5599
5600
5601 wake_up_process(mddev->sync_thread->tsk);
5602
5603 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
5604 return -EBUSY;
5605 mddev_unlock(mddev);
5606 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5607 &mddev->recovery));
5608 wait_event(mddev->sb_wait,
5609 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
5610 mddev_lock_nointr(mddev);
5611
5612 mutex_lock(&mddev->open_mutex);
5613 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5614 mddev->sync_thread ||
5615 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5616 pr_warn("md: %s still in use.\n",mdname(mddev));
5617 if (did_freeze) {
5618 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5619 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5620 md_wakeup_thread(mddev->thread);
5621 }
5622 err = -EBUSY;
5623 goto out;
5624 }
5625 if (mddev->pers) {
5626 __md_stop_writes(mddev);
5627
5628 err = -ENXIO;
5629 if (mddev->ro==1)
5630 goto out;
5631 mddev->ro = 1;
5632 set_disk_ro(mddev->gendisk, 1);
5633 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5634 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5635 md_wakeup_thread(mddev->thread);
5636 sysfs_notify_dirent_safe(mddev->sysfs_state);
5637 err = 0;
5638 }
5639out:
5640 mutex_unlock(&mddev->open_mutex);
5641 return err;
5642}
5643
5644
5645
5646
5647
5648static int do_md_stop(struct mddev *mddev, int mode,
5649 struct block_device *bdev)
5650{
5651 struct gendisk *disk = mddev->gendisk;
5652 struct md_rdev *rdev;
5653 int did_freeze = 0;
5654
5655 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5656 did_freeze = 1;
5657 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5658 md_wakeup_thread(mddev->thread);
5659 }
5660 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5661 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5662 if (mddev->sync_thread)
5663
5664
5665 wake_up_process(mddev->sync_thread->tsk);
5666
5667 mddev_unlock(mddev);
5668 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5669 !test_bit(MD_RECOVERY_RUNNING,
5670 &mddev->recovery)));
5671 mddev_lock_nointr(mddev);
5672
5673 mutex_lock(&mddev->open_mutex);
5674 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5675 mddev->sysfs_active ||
5676 mddev->sync_thread ||
5677 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5678 pr_warn("md: %s still in use.\n",mdname(mddev));
5679 mutex_unlock(&mddev->open_mutex);
5680 if (did_freeze) {
5681 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5682 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5683 md_wakeup_thread(mddev->thread);
5684 }
5685 return -EBUSY;
5686 }
5687 if (mddev->pers) {
5688 if (mddev->ro)
5689 set_disk_ro(disk, 0);
5690
5691 __md_stop_writes(mddev);
5692 __md_stop(mddev);
5693 mddev->queue->backing_dev_info->congested_fn = NULL;
5694
5695
5696 sysfs_notify_dirent_safe(mddev->sysfs_state);
5697
5698 rdev_for_each(rdev, mddev)
5699 if (rdev->raid_disk >= 0)
5700 sysfs_unlink_rdev(mddev, rdev);
5701
5702 set_capacity(disk, 0);
5703 mutex_unlock(&mddev->open_mutex);
5704 mddev->changed = 1;
5705 revalidate_disk(disk);
5706
5707 if (mddev->ro)
5708 mddev->ro = 0;
5709 } else
5710 mutex_unlock(&mddev->open_mutex);
5711
5712
5713
5714 if (mode == 0) {
5715 pr_info("md: %s stopped.\n", mdname(mddev));
5716
5717 bitmap_destroy(mddev);
5718 if (mddev->bitmap_info.file) {
5719 struct file *f = mddev->bitmap_info.file;
5720 spin_lock(&mddev->lock);
5721 mddev->bitmap_info.file = NULL;
5722 spin_unlock(&mddev->lock);
5723 fput(f);
5724 }
5725 mddev->bitmap_info.offset = 0;
5726
5727 export_array(mddev);
5728
5729 md_clean(mddev);
5730 if (mddev->hold_active == UNTIL_STOP)
5731 mddev->hold_active = 0;
5732 }
5733 md_new_event(mddev);
5734 sysfs_notify_dirent_safe(mddev->sysfs_state);
5735 return 0;
5736}
5737
5738#ifndef MODULE
5739static void autorun_array(struct mddev *mddev)
5740{
5741 struct md_rdev *rdev;
5742 int err;
5743
5744 if (list_empty(&mddev->disks))
5745 return;
5746
5747 pr_info("md: running: ");
5748
5749 rdev_for_each(rdev, mddev) {
5750 char b[BDEVNAME_SIZE];
5751 pr_cont("<%s>", bdevname(rdev->bdev,b));
5752 }
5753 pr_cont("\n");
5754
5755 err = do_md_run(mddev);
5756 if (err) {
5757 pr_warn("md: do_md_run() returned %d\n", err);
5758 do_md_stop(mddev, 0, NULL);
5759 }
5760}
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774static void autorun_devices(int part)
5775{
5776 struct md_rdev *rdev0, *rdev, *tmp;
5777 struct mddev *mddev;
5778 char b[BDEVNAME_SIZE];
5779
5780 pr_info("md: autorun ...\n");
5781 while (!list_empty(&pending_raid_disks)) {
5782 int unit;
5783 dev_t dev;
5784 LIST_HEAD(candidates);
5785 rdev0 = list_entry(pending_raid_disks.next,
5786 struct md_rdev, same_set);
5787
5788 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
5789 INIT_LIST_HEAD(&candidates);
5790 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5791 if (super_90_load(rdev, rdev0, 0) >= 0) {
5792 pr_debug("md: adding %s ...\n",
5793 bdevname(rdev->bdev,b));
5794 list_move(&rdev->same_set, &candidates);
5795 }
5796
5797
5798
5799
5800
5801 if (part) {
5802 dev = MKDEV(mdp_major,
5803 rdev0->preferred_minor << MdpMinorShift);
5804 unit = MINOR(dev) >> MdpMinorShift;
5805 } else {
5806 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5807 unit = MINOR(dev);
5808 }
5809 if (rdev0->preferred_minor != unit) {
5810 pr_warn("md: unit number in %s is bad: %d\n",
5811 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5812 break;
5813 }
5814
5815 md_probe(dev, NULL, NULL);
5816 mddev = mddev_find(dev);
5817 if (!mddev || !mddev->gendisk) {
5818 if (mddev)
5819 mddev_put(mddev);
5820 break;
5821 }
5822 if (mddev_lock(mddev))
5823 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
5824 else if (mddev->raid_disks || mddev->major_version
5825 || !list_empty(&mddev->disks)) {
5826 pr_warn("md: %s already running, cannot run %s\n",
5827 mdname(mddev), bdevname(rdev0->bdev,b));
5828 mddev_unlock(mddev);
5829 } else {
5830 pr_debug("md: created %s\n", mdname(mddev));
5831 mddev->persistent = 1;
5832 rdev_for_each_list(rdev, tmp, &candidates) {
5833 list_del_init(&rdev->same_set);
5834 if (bind_rdev_to_array(rdev, mddev))
5835 export_rdev(rdev);
5836 }
5837 autorun_array(mddev);
5838 mddev_unlock(mddev);
5839 }
5840
5841
5842
5843 rdev_for_each_list(rdev, tmp, &candidates) {
5844 list_del_init(&rdev->same_set);
5845 export_rdev(rdev);
5846 }
5847 mddev_put(mddev);
5848 }
5849 pr_info("md: ... autorun DONE.\n");
5850}
5851#endif
5852
5853static int get_version(void __user *arg)
5854{
5855 mdu_version_t ver;
5856
5857 ver.major = MD_MAJOR_VERSION;
5858 ver.minor = MD_MINOR_VERSION;
5859 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5860
5861 if (copy_to_user(arg, &ver, sizeof(ver)))
5862 return -EFAULT;
5863
5864 return 0;
5865}
5866
5867static int get_array_info(struct mddev *mddev, void __user *arg)
5868{
5869 mdu_array_info_t info;
5870 int nr,working,insync,failed,spare;
5871 struct md_rdev *rdev;
5872
5873 nr = working = insync = failed = spare = 0;
5874 rcu_read_lock();
5875 rdev_for_each_rcu(rdev, mddev) {
5876 nr++;
5877 if (test_bit(Faulty, &rdev->flags))
5878 failed++;
5879 else {
5880 working++;
5881 if (test_bit(In_sync, &rdev->flags))
5882 insync++;
5883 else if (test_bit(Journal, &rdev->flags))
5884
5885 ;
5886 else
5887 spare++;
5888 }
5889 }
5890 rcu_read_unlock();
5891
5892 info.major_version = mddev->major_version;
5893 info.minor_version = mddev->minor_version;
5894 info.patch_version = MD_PATCHLEVEL_VERSION;
5895 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
5896 info.level = mddev->level;
5897 info.size = mddev->dev_sectors / 2;
5898 if (info.size != mddev->dev_sectors / 2)
5899 info.size = -1;
5900 info.nr_disks = nr;
5901 info.raid_disks = mddev->raid_disks;
5902 info.md_minor = mddev->md_minor;
5903 info.not_persistent= !mddev->persistent;
5904
5905 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
5906 info.state = 0;
5907 if (mddev->in_sync)
5908 info.state = (1<<MD_SB_CLEAN);
5909 if (mddev->bitmap && mddev->bitmap_info.offset)
5910 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5911 if (mddev_is_clustered(mddev))
5912 info.state |= (1<<MD_SB_CLUSTERED);
5913 info.active_disks = insync;
5914 info.working_disks = working;
5915 info.failed_disks = failed;
5916 info.spare_disks = spare;
5917
5918 info.layout = mddev->layout;
5919 info.chunk_size = mddev->chunk_sectors << 9;
5920
5921 if (copy_to_user(arg, &info, sizeof(info)))
5922 return -EFAULT;
5923
5924 return 0;
5925}
5926
5927static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5928{
5929 mdu_bitmap_file_t *file = NULL;
5930 char *ptr;
5931 int err;
5932
5933 file = kzalloc(sizeof(*file), GFP_NOIO);
5934 if (!file)
5935 return -ENOMEM;
5936
5937 err = 0;
5938 spin_lock(&mddev->lock);
5939
5940 if (mddev->bitmap_info.file) {
5941 ptr = file_path(mddev->bitmap_info.file, file->pathname,
5942 sizeof(file->pathname));
5943 if (IS_ERR(ptr))
5944 err = PTR_ERR(ptr);
5945 else
5946 memmove(file->pathname, ptr,
5947 sizeof(file->pathname)-(ptr-file->pathname));
5948 }
5949 spin_unlock(&mddev->lock);
5950
5951 if (err == 0 &&
5952 copy_to_user(arg, file, sizeof(*file)))
5953 err = -EFAULT;
5954
5955 kfree(file);
5956 return err;
5957}
5958
5959static int get_disk_info(struct mddev *mddev, void __user * arg)
5960{
5961 mdu_disk_info_t info;
5962 struct md_rdev *rdev;
5963
5964 if (copy_from_user(&info, arg, sizeof(info)))
5965 return -EFAULT;
5966
5967 rcu_read_lock();
5968 rdev = md_find_rdev_nr_rcu(mddev, info.number);
5969 if (rdev) {
5970 info.major = MAJOR(rdev->bdev->bd_dev);
5971 info.minor = MINOR(rdev->bdev->bd_dev);
5972 info.raid_disk = rdev->raid_disk;
5973 info.state = 0;
5974 if (test_bit(Faulty, &rdev->flags))
5975 info.state |= (1<<MD_DISK_FAULTY);
5976 else if (test_bit(In_sync, &rdev->flags)) {
5977 info.state |= (1<<MD_DISK_ACTIVE);
5978 info.state |= (1<<MD_DISK_SYNC);
5979 }
5980 if (test_bit(Journal, &rdev->flags))
5981 info.state |= (1<<MD_DISK_JOURNAL);
5982 if (test_bit(WriteMostly, &rdev->flags))
5983 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5984 if (test_bit(FailFast, &rdev->flags))
5985 info.state |= (1<<MD_DISK_FAILFAST);
5986 } else {
5987 info.major = info.minor = 0;
5988 info.raid_disk = -1;
5989 info.state = (1<<MD_DISK_REMOVED);
5990 }
5991 rcu_read_unlock();
5992
5993 if (copy_to_user(arg, &info, sizeof(info)))
5994 return -EFAULT;
5995
5996 return 0;
5997}
5998
5999static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6000{
6001 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6002 struct md_rdev *rdev;
6003 dev_t dev = MKDEV(info->major,info->minor);
6004
6005 if (mddev_is_clustered(mddev) &&
6006 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6007 pr_warn("%s: Cannot add to clustered mddev.\n",
6008 mdname(mddev));
6009 return -EINVAL;
6010 }
6011
6012 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6013 return -EOVERFLOW;
6014
6015 if (!mddev->raid_disks) {
6016 int err;
6017
6018 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6019 if (IS_ERR(rdev)) {
6020 pr_warn("md: md_import_device returned %ld\n",
6021 PTR_ERR(rdev));
6022 return PTR_ERR(rdev);
6023 }
6024 if (!list_empty(&mddev->disks)) {
6025 struct md_rdev *rdev0
6026 = list_entry(mddev->disks.next,
6027 struct md_rdev, same_set);
6028 err = super_types[mddev->major_version]
6029 .load_super(rdev, rdev0, mddev->minor_version);
6030 if (err < 0) {
6031 pr_warn("md: %s has different UUID to %s\n",
6032 bdevname(rdev->bdev,b),
6033 bdevname(rdev0->bdev,b2));
6034 export_rdev(rdev);
6035 return -EINVAL;
6036 }
6037 }
6038 err = bind_rdev_to_array(rdev, mddev);
6039 if (err)
6040 export_rdev(rdev);
6041 return err;
6042 }
6043
6044
6045
6046
6047
6048
6049 if (mddev->pers) {
6050 int err;
6051 if (!mddev->pers->hot_add_disk) {
6052 pr_warn("%s: personality does not support diskops!\n",
6053 mdname(mddev));
6054 return -EINVAL;
6055 }
6056 if (mddev->persistent)
6057 rdev = md_import_device(dev, mddev->major_version,
6058 mddev->minor_version);
6059 else
6060 rdev = md_import_device(dev, -1, -1);
6061 if (IS_ERR(rdev)) {
6062 pr_warn("md: md_import_device returned %ld\n",
6063 PTR_ERR(rdev));
6064 return PTR_ERR(rdev);
6065 }
6066
6067 if (!mddev->persistent) {
6068 if (info->state & (1<<MD_DISK_SYNC) &&
6069 info->raid_disk < mddev->raid_disks) {
6070 rdev->raid_disk = info->raid_disk;
6071 set_bit(In_sync, &rdev->flags);
6072 clear_bit(Bitmap_sync, &rdev->flags);
6073 } else
6074 rdev->raid_disk = -1;
6075 rdev->saved_raid_disk = rdev->raid_disk;
6076 } else
6077 super_types[mddev->major_version].
6078 validate_super(mddev, rdev);
6079 if ((info->state & (1<<MD_DISK_SYNC)) &&
6080 rdev->raid_disk != info->raid_disk) {
6081
6082
6083
6084 export_rdev(rdev);
6085 return -EINVAL;
6086 }
6087
6088 clear_bit(In_sync, &rdev->flags);
6089 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6090 set_bit(WriteMostly, &rdev->flags);
6091 else
6092 clear_bit(WriteMostly, &rdev->flags);
6093 if (info->state & (1<<MD_DISK_FAILFAST))
6094 set_bit(FailFast, &rdev->flags);
6095 else
6096 clear_bit(FailFast, &rdev->flags);
6097
6098 if (info->state & (1<<MD_DISK_JOURNAL)) {
6099 struct md_rdev *rdev2;
6100 bool has_journal = false;
6101
6102
6103 rdev_for_each(rdev2, mddev) {
6104 if (test_bit(Journal, &rdev2->flags)) {
6105 has_journal = true;
6106 break;
6107 }
6108 }
6109 if (has_journal) {
6110 export_rdev(rdev);
6111 return -EBUSY;
6112 }
6113 set_bit(Journal, &rdev->flags);
6114 }
6115
6116
6117
6118 if (mddev_is_clustered(mddev)) {
6119 if (info->state & (1 << MD_DISK_CANDIDATE))
6120 set_bit(Candidate, &rdev->flags);
6121 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6122
6123 err = md_cluster_ops->add_new_disk(mddev, rdev);
6124 if (err) {
6125 export_rdev(rdev);
6126 return err;
6127 }
6128 }
6129 }
6130
6131 rdev->raid_disk = -1;
6132 err = bind_rdev_to_array(rdev, mddev);
6133
6134 if (err)
6135 export_rdev(rdev);
6136
6137 if (mddev_is_clustered(mddev)) {
6138 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6139 if (!err) {
6140 err = md_cluster_ops->new_disk_ack(mddev,
6141 err == 0);
6142 if (err)
6143 md_kick_rdev_from_array(rdev);
6144 }
6145 } else {
6146 if (err)
6147 md_cluster_ops->add_new_disk_cancel(mddev);
6148 else
6149 err = add_bound_rdev(rdev);
6150 }
6151
6152 } else if (!err)
6153 err = add_bound_rdev(rdev);
6154
6155 return err;
6156 }
6157
6158
6159
6160
6161 if (mddev->major_version != 0) {
6162 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6163 return -EINVAL;
6164 }
6165
6166 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6167 int err;
6168 rdev = md_import_device(dev, -1, 0);
6169 if (IS_ERR(rdev)) {
6170 pr_warn("md: error, md_import_device() returned %ld\n",
6171 PTR_ERR(rdev));
6172 return PTR_ERR(rdev);
6173 }
6174 rdev->desc_nr = info->number;
6175 if (info->raid_disk < mddev->raid_disks)
6176 rdev->raid_disk = info->raid_disk;
6177 else
6178 rdev->raid_disk = -1;
6179
6180 if (rdev->raid_disk < mddev->raid_disks)
6181 if (info->state & (1<<MD_DISK_SYNC))
6182 set_bit(In_sync, &rdev->flags);
6183
6184 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6185 set_bit(WriteMostly, &rdev->flags);
6186 if (info->state & (1<<MD_DISK_FAILFAST))
6187 set_bit(FailFast, &rdev->flags);
6188
6189 if (!mddev->persistent) {
6190 pr_debug("md: nonpersistent superblock ...\n");
6191 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6192 } else
6193 rdev->sb_start = calc_dev_sboffset(rdev);
6194 rdev->sectors = rdev->sb_start;
6195
6196 err = bind_rdev_to_array(rdev, mddev);
6197 if (err) {
6198 export_rdev(rdev);
6199 return err;
6200 }
6201 }
6202
6203 return 0;
6204}
6205
6206static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6207{
6208 char b[BDEVNAME_SIZE];
6209 struct md_rdev *rdev;
6210
6211 rdev = find_rdev(mddev, dev);
6212 if (!rdev)
6213 return -ENXIO;
6214
6215 if (rdev->raid_disk < 0)
6216 goto kick_rdev;
6217
6218 clear_bit(Blocked, &rdev->flags);
6219 remove_and_add_spares(mddev, rdev);
6220
6221 if (rdev->raid_disk >= 0)
6222 goto busy;
6223
6224kick_rdev:
6225 if (mddev_is_clustered(mddev))
6226 md_cluster_ops->remove_disk(mddev, rdev);
6227
6228 md_kick_rdev_from_array(rdev);
6229 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6230 if (mddev->thread)
6231 md_wakeup_thread(mddev->thread);
6232 else
6233 md_update_sb(mddev, 1);
6234 md_new_event(mddev);
6235
6236 return 0;
6237busy:
6238 pr_debug("md: cannot remove active disk %s from %s ...\n",
6239 bdevname(rdev->bdev,b), mdname(mddev));
6240 return -EBUSY;
6241}
6242
6243static int hot_add_disk(struct mddev *mddev, dev_t dev)
6244{
6245 char b[BDEVNAME_SIZE];
6246 int err;
6247 struct md_rdev *rdev;
6248
6249 if (!mddev->pers)
6250 return -ENODEV;
6251
6252 if (mddev->major_version != 0) {
6253 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6254 mdname(mddev));
6255 return -EINVAL;
6256 }
6257 if (!mddev->pers->hot_add_disk) {
6258 pr_warn("%s: personality does not support diskops!\n",
6259 mdname(mddev));
6260 return -EINVAL;
6261 }
6262
6263 rdev = md_import_device(dev, -1, 0);
6264 if (IS_ERR(rdev)) {
6265 pr_warn("md: error, md_import_device() returned %ld\n",
6266 PTR_ERR(rdev));
6267 return -EINVAL;
6268 }
6269
6270 if (mddev->persistent)
6271 rdev->sb_start = calc_dev_sboffset(rdev);
6272 else
6273 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6274
6275 rdev->sectors = rdev->sb_start;
6276
6277 if (test_bit(Faulty, &rdev->flags)) {
6278 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6279 bdevname(rdev->bdev,b), mdname(mddev));
6280 err = -EINVAL;
6281 goto abort_export;
6282 }
6283
6284 clear_bit(In_sync, &rdev->flags);
6285 rdev->desc_nr = -1;
6286 rdev->saved_raid_disk = -1;
6287 err = bind_rdev_to_array(rdev, mddev);
6288 if (err)
6289 goto abort_export;
6290
6291
6292
6293
6294
6295
6296 rdev->raid_disk = -1;
6297
6298 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6299 if (!mddev->thread)
6300 md_update_sb(mddev, 1);
6301
6302
6303
6304
6305 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6306 md_wakeup_thread(mddev->thread);
6307 md_new_event(mddev);
6308 return 0;
6309
6310abort_export:
6311 export_rdev(rdev);
6312 return err;
6313}
6314
6315static int set_bitmap_file(struct mddev *mddev, int fd)
6316{
6317 int err = 0;
6318
6319 if (mddev->pers) {
6320 if (!mddev->pers->quiesce || !mddev->thread)
6321 return -EBUSY;
6322 if (mddev->recovery || mddev->sync_thread)
6323 return -EBUSY;
6324
6325 }
6326
6327 if (fd >= 0) {
6328 struct inode *inode;
6329 struct file *f;
6330
6331 if (mddev->bitmap || mddev->bitmap_info.file)
6332 return -EEXIST;
6333 f = fget(fd);
6334
6335 if (f == NULL) {
6336 pr_warn("%s: error: failed to get bitmap file\n",
6337 mdname(mddev));
6338 return -EBADF;
6339 }
6340
6341 inode = f->f_mapping->host;
6342 if (!S_ISREG(inode->i_mode)) {
6343 pr_warn("%s: error: bitmap file must be a regular file\n",
6344 mdname(mddev));
6345 err = -EBADF;
6346 } else if (!(f->f_mode & FMODE_WRITE)) {
6347 pr_warn("%s: error: bitmap file must open for write\n",
6348 mdname(mddev));
6349 err = -EBADF;
6350 } else if (atomic_read(&inode->i_writecount) != 1) {
6351 pr_warn("%s: error: bitmap file is already in use\n",
6352 mdname(mddev));
6353 err = -EBUSY;
6354 }
6355 if (err) {
6356 fput(f);
6357 return err;
6358 }
6359 mddev->bitmap_info.file = f;
6360 mddev->bitmap_info.offset = 0;
6361 } else if (mddev->bitmap == NULL)
6362 return -ENOENT;
6363 err = 0;
6364 if (mddev->pers) {
6365 mddev->pers->quiesce(mddev, 1);
6366 if (fd >= 0) {
6367 struct bitmap *bitmap;
6368
6369 bitmap = bitmap_create(mddev, -1);
6370 if (!IS_ERR(bitmap)) {
6371 mddev->bitmap = bitmap;
6372 err = bitmap_load(mddev);
6373 } else
6374 err = PTR_ERR(bitmap);
6375 }
6376 if (fd < 0 || err) {
6377 bitmap_destroy(mddev);
6378 fd = -1;
6379 }
6380 mddev->pers->quiesce(mddev, 0);
6381 }
6382 if (fd < 0) {
6383 struct file *f = mddev->bitmap_info.file;
6384 if (f) {
6385 spin_lock(&mddev->lock);
6386 mddev->bitmap_info.file = NULL;
6387 spin_unlock(&mddev->lock);
6388 fput(f);
6389 }
6390 }
6391
6392 return err;
6393}
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6409{
6410
6411 if (info->raid_disks == 0) {
6412
6413 if (info->major_version < 0 ||
6414 info->major_version >= ARRAY_SIZE(super_types) ||
6415 super_types[info->major_version].name == NULL) {
6416
6417 pr_warn("md: superblock version %d not known\n",
6418 info->major_version);
6419 return -EINVAL;
6420 }
6421 mddev->major_version = info->major_version;
6422 mddev->minor_version = info->minor_version;
6423 mddev->patch_version = info->patch_version;
6424 mddev->persistent = !info->not_persistent;
6425
6426
6427
6428 mddev->ctime = ktime_get_real_seconds();
6429 return 0;
6430 }
6431 mddev->major_version = MD_MAJOR_VERSION;
6432 mddev->minor_version = MD_MINOR_VERSION;
6433 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6434 mddev->ctime = ktime_get_real_seconds();
6435
6436 mddev->level = info->level;
6437 mddev->clevel[0] = 0;
6438 mddev->dev_sectors = 2 * (sector_t)info->size;
6439 mddev->raid_disks = info->raid_disks;
6440
6441
6442
6443 if (info->state & (1<<MD_SB_CLEAN))
6444 mddev->recovery_cp = MaxSector;
6445 else
6446 mddev->recovery_cp = 0;
6447 mddev->persistent = ! info->not_persistent;
6448 mddev->external = 0;
6449
6450 mddev->layout = info->layout;
6451 mddev->chunk_sectors = info->chunk_size >> 9;
6452
6453 if (mddev->persistent) {
6454 mddev->max_disks = MD_SB_DISKS;
6455 mddev->flags = 0;
6456 mddev->sb_flags = 0;
6457 }
6458 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6459
6460 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6461 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6462 mddev->bitmap_info.offset = 0;
6463
6464 mddev->reshape_position = MaxSector;
6465
6466
6467
6468
6469 get_random_bytes(mddev->uuid, 16);
6470
6471 mddev->new_level = mddev->level;
6472 mddev->new_chunk_sectors = mddev->chunk_sectors;
6473 mddev->new_layout = mddev->layout;
6474 mddev->delta_disks = 0;
6475 mddev->reshape_backwards = 0;
6476
6477 return 0;
6478}
6479
6480void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6481{
6482 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6483
6484 if (mddev->external_size)
6485 return;
6486
6487 mddev->array_sectors = array_sectors;
6488}
6489EXPORT_SYMBOL(md_set_array_sectors);
6490
6491static int update_size(struct mddev *mddev, sector_t num_sectors)
6492{
6493 struct md_rdev *rdev;
6494 int rv;
6495 int fit = (num_sectors == 0);
6496
6497
6498 if (mddev_is_clustered(mddev))
6499 return -EINVAL;
6500
6501 if (mddev->pers->resize == NULL)
6502 return -EINVAL;
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6513 mddev->sync_thread)
6514 return -EBUSY;
6515 if (mddev->ro)
6516 return -EROFS;
6517
6518 rdev_for_each(rdev, mddev) {
6519 sector_t avail = rdev->sectors;
6520
6521 if (fit && (num_sectors == 0 || num_sectors > avail))
6522 num_sectors = avail;
6523 if (avail < num_sectors)
6524 return -ENOSPC;
6525 }
6526 rv = mddev->pers->resize(mddev, num_sectors);
6527 if (!rv) {
6528 if (mddev->queue) {
6529 set_capacity(mddev->gendisk, mddev->array_sectors);
6530 revalidate_disk(mddev->gendisk);
6531 }
6532 }
6533 return rv;
6534}
6535
6536static int update_raid_disks(struct mddev *mddev, int raid_disks)
6537{
6538 int rv;
6539 struct md_rdev *rdev;
6540
6541 if (mddev->pers->check_reshape == NULL)
6542 return -EINVAL;
6543 if (mddev->ro)
6544 return -EROFS;
6545 if (raid_disks <= 0 ||
6546 (mddev->max_disks && raid_disks >= mddev->max_disks))
6547 return -EINVAL;
6548 if (mddev->sync_thread ||
6549 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6550 mddev->reshape_position != MaxSector)
6551 return -EBUSY;
6552
6553 rdev_for_each(rdev, mddev) {
6554 if (mddev->raid_disks < raid_disks &&
6555 rdev->data_offset < rdev->new_data_offset)
6556 return -EINVAL;
6557 if (mddev->raid_disks > raid_disks &&
6558 rdev->data_offset > rdev->new_data_offset)
6559 return -EINVAL;
6560 }
6561
6562 mddev->delta_disks = raid_disks - mddev->raid_disks;
6563 if (mddev->delta_disks < 0)
6564 mddev->reshape_backwards = 1;
6565 else if (mddev->delta_disks > 0)
6566 mddev->reshape_backwards = 0;
6567
6568 rv = mddev->pers->check_reshape(mddev);
6569 if (rv < 0) {
6570 mddev->delta_disks = 0;
6571 mddev->reshape_backwards = 0;
6572 }
6573 return rv;
6574}
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6585{
6586 int rv = 0;
6587 int cnt = 0;
6588 int state = 0;
6589
6590
6591 if (mddev->bitmap && mddev->bitmap_info.offset)
6592 state |= (1 << MD_SB_BITMAP_PRESENT);
6593
6594 if (mddev->major_version != info->major_version ||
6595 mddev->minor_version != info->minor_version ||
6596
6597 mddev->ctime != info->ctime ||
6598 mddev->level != info->level ||
6599
6600 mddev->persistent != !info->not_persistent ||
6601 mddev->chunk_sectors != info->chunk_size >> 9 ||
6602
6603 ((state^info->state) & 0xfffffe00)
6604 )
6605 return -EINVAL;
6606
6607 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6608 cnt++;
6609 if (mddev->raid_disks != info->raid_disks)
6610 cnt++;
6611 if (mddev->layout != info->layout)
6612 cnt++;
6613 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6614 cnt++;
6615 if (cnt == 0)
6616 return 0;
6617 if (cnt > 1)
6618 return -EINVAL;
6619
6620 if (mddev->layout != info->layout) {
6621
6622
6623
6624
6625 if (mddev->pers->check_reshape == NULL)
6626 return -EINVAL;
6627 else {
6628 mddev->new_layout = info->layout;
6629 rv = mddev->pers->check_reshape(mddev);
6630 if (rv)
6631 mddev->new_layout = mddev->layout;
6632 return rv;
6633 }
6634 }
6635 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6636 rv = update_size(mddev, (sector_t)info->size * 2);
6637
6638 if (mddev->raid_disks != info->raid_disks)
6639 rv = update_raid_disks(mddev, info->raid_disks);
6640
6641 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6642 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6643 rv = -EINVAL;
6644 goto err;
6645 }
6646 if (mddev->recovery || mddev->sync_thread) {
6647 rv = -EBUSY;
6648 goto err;
6649 }
6650 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6651 struct bitmap *bitmap;
6652
6653 if (mddev->bitmap) {
6654 rv = -EEXIST;
6655 goto err;
6656 }
6657 if (mddev->bitmap_info.default_offset == 0) {
6658 rv = -EINVAL;
6659 goto err;
6660 }
6661 mddev->bitmap_info.offset =
6662 mddev->bitmap_info.default_offset;
6663 mddev->bitmap_info.space =
6664 mddev->bitmap_info.default_space;
6665 mddev->pers->quiesce(mddev, 1);
6666 bitmap = bitmap_create(mddev, -1);
6667 if (!IS_ERR(bitmap)) {
6668 mddev->bitmap = bitmap;
6669 rv = bitmap_load(mddev);
6670 } else
6671 rv = PTR_ERR(bitmap);
6672 if (rv)
6673 bitmap_destroy(mddev);
6674 mddev->pers->quiesce(mddev, 0);
6675 } else {
6676
6677 if (!mddev->bitmap) {
6678 rv = -ENOENT;
6679 goto err;
6680 }
6681 if (mddev->bitmap->storage.file) {
6682 rv = -EINVAL;
6683 goto err;
6684 }
6685 if (mddev->bitmap_info.nodes) {
6686
6687 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
6688 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
6689 rv = -EPERM;
6690 md_cluster_ops->unlock_all_bitmaps(mddev);
6691 goto err;
6692 }
6693
6694 mddev->bitmap_info.nodes = 0;
6695 md_cluster_ops->leave(mddev);
6696 }
6697 mddev->pers->quiesce(mddev, 1);
6698 bitmap_destroy(mddev);
6699 mddev->pers->quiesce(mddev, 0);
6700 mddev->bitmap_info.offset = 0;
6701 }
6702 }
6703 md_update_sb(mddev, 1);
6704 return rv;
6705err:
6706 return rv;
6707}
6708
6709static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6710{
6711 struct md_rdev *rdev;
6712 int err = 0;
6713
6714 if (mddev->pers == NULL)
6715 return -ENODEV;
6716
6717 rcu_read_lock();
6718 rdev = find_rdev_rcu(mddev, dev);
6719 if (!rdev)
6720 err = -ENODEV;
6721 else {
6722 md_error(mddev, rdev);
6723 if (!test_bit(Faulty, &rdev->flags))
6724 err = -EBUSY;
6725 }
6726 rcu_read_unlock();
6727 return err;
6728}
6729
6730
6731
6732
6733
6734
6735
6736static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6737{
6738 struct mddev *mddev = bdev->bd_disk->private_data;
6739
6740 geo->heads = 2;
6741 geo->sectors = 4;
6742 geo->cylinders = mddev->array_sectors / 8;
6743 return 0;
6744}
6745
6746static inline bool md_ioctl_valid(unsigned int cmd)
6747{
6748 switch (cmd) {
6749 case ADD_NEW_DISK:
6750 case BLKROSET:
6751 case GET_ARRAY_INFO:
6752 case GET_BITMAP_FILE:
6753 case GET_DISK_INFO:
6754 case HOT_ADD_DISK:
6755 case HOT_REMOVE_DISK:
6756 case RAID_AUTORUN:
6757 case RAID_VERSION:
6758 case RESTART_ARRAY_RW:
6759 case RUN_ARRAY:
6760 case SET_ARRAY_INFO:
6761 case SET_BITMAP_FILE:
6762 case SET_DISK_FAULTY:
6763 case STOP_ARRAY:
6764 case STOP_ARRAY_RO:
6765 case CLUSTERED_DISK_NACK:
6766 return true;
6767 default:
6768 return false;
6769 }
6770}
6771
6772static int md_ioctl(struct block_device *bdev, fmode_t mode,
6773 unsigned int cmd, unsigned long arg)
6774{
6775 int err = 0;
6776 void __user *argp = (void __user *)arg;
6777 struct mddev *mddev = NULL;
6778 int ro;
6779
6780 if (!md_ioctl_valid(cmd))
6781 return -ENOTTY;
6782
6783 switch (cmd) {
6784 case RAID_VERSION:
6785 case GET_ARRAY_INFO:
6786 case GET_DISK_INFO:
6787 break;
6788 default:
6789 if (!capable(CAP_SYS_ADMIN))
6790 return -EACCES;
6791 }
6792
6793
6794
6795
6796
6797 switch (cmd) {
6798 case RAID_VERSION:
6799 err = get_version(argp);
6800 goto out;
6801
6802#ifndef MODULE
6803 case RAID_AUTORUN:
6804 err = 0;
6805 autostart_arrays(arg);
6806 goto out;
6807#endif
6808 default:;
6809 }
6810
6811
6812
6813
6814
6815 mddev = bdev->bd_disk->private_data;
6816
6817 if (!mddev) {
6818 BUG();
6819 goto out;
6820 }
6821
6822
6823 switch (cmd) {
6824 case GET_ARRAY_INFO:
6825 if (!mddev->raid_disks && !mddev->external)
6826 err = -ENODEV;
6827 else
6828 err = get_array_info(mddev, argp);
6829 goto out;
6830
6831 case GET_DISK_INFO:
6832 if (!mddev->raid_disks && !mddev->external)
6833 err = -ENODEV;
6834 else
6835 err = get_disk_info(mddev, argp);
6836 goto out;
6837
6838 case SET_DISK_FAULTY:
6839 err = set_disk_faulty(mddev, new_decode_dev(arg));
6840 goto out;
6841
6842 case GET_BITMAP_FILE:
6843 err = get_bitmap_file(mddev, argp);
6844 goto out;
6845
6846 }
6847
6848 if (cmd == ADD_NEW_DISK)
6849
6850 flush_workqueue(md_misc_wq);
6851
6852 if (cmd == HOT_REMOVE_DISK)
6853
6854 wait_event_interruptible_timeout(mddev->sb_wait,
6855 !test_bit(MD_RECOVERY_NEEDED,
6856 &mddev->recovery),
6857 msecs_to_jiffies(5000));
6858 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6859
6860
6861
6862 mutex_lock(&mddev->open_mutex);
6863 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6864 mutex_unlock(&mddev->open_mutex);
6865 err = -EBUSY;
6866 goto out;
6867 }
6868 set_bit(MD_CLOSING, &mddev->flags);
6869 mutex_unlock(&mddev->open_mutex);
6870 sync_blockdev(bdev);
6871 }
6872 err = mddev_lock(mddev);
6873 if (err) {
6874 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
6875 err, cmd);
6876 goto out;
6877 }
6878
6879 if (cmd == SET_ARRAY_INFO) {
6880 mdu_array_info_t info;
6881 if (!arg)
6882 memset(&info, 0, sizeof(info));
6883 else if (copy_from_user(&info, argp, sizeof(info))) {
6884 err = -EFAULT;
6885 goto unlock;
6886 }
6887 if (mddev->pers) {
6888 err = update_array_info(mddev, &info);
6889 if (err) {
6890 pr_warn("md: couldn't update array info. %d\n", err);
6891 goto unlock;
6892 }
6893 goto unlock;
6894 }
6895 if (!list_empty(&mddev->disks)) {
6896 pr_warn("md: array %s already has disks!\n", mdname(mddev));
6897 err = -EBUSY;
6898 goto unlock;
6899 }
6900 if (mddev->raid_disks) {
6901 pr_warn("md: array %s already initialised!\n", mdname(mddev));
6902 err = -EBUSY;
6903 goto unlock;
6904 }
6905 err = set_array_info(mddev, &info);
6906 if (err) {
6907 pr_warn("md: couldn't set array info. %d\n", err);
6908 goto unlock;
6909 }
6910 goto unlock;
6911 }
6912
6913
6914
6915
6916
6917
6918 if ((!mddev->raid_disks && !mddev->external)
6919 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6920 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6921 && cmd != GET_BITMAP_FILE) {
6922 err = -ENODEV;
6923 goto unlock;
6924 }
6925
6926
6927
6928
6929 switch (cmd) {
6930 case RESTART_ARRAY_RW:
6931 err = restart_array(mddev);
6932 goto unlock;
6933
6934 case STOP_ARRAY:
6935 err = do_md_stop(mddev, 0, bdev);
6936 goto unlock;
6937
6938 case STOP_ARRAY_RO:
6939 err = md_set_readonly(mddev, bdev);
6940 goto unlock;
6941
6942 case HOT_REMOVE_DISK:
6943 err = hot_remove_disk(mddev, new_decode_dev(arg));
6944 goto unlock;
6945
6946 case ADD_NEW_DISK:
6947
6948
6949
6950
6951 if (mddev->pers) {
6952 mdu_disk_info_t info;
6953 if (copy_from_user(&info, argp, sizeof(info)))
6954 err = -EFAULT;
6955 else if (!(info.state & (1<<MD_DISK_SYNC)))
6956
6957 break;
6958 else
6959 err = add_new_disk(mddev, &info);
6960 goto unlock;
6961 }
6962 break;
6963
6964 case BLKROSET:
6965 if (get_user(ro, (int __user *)(arg))) {
6966 err = -EFAULT;
6967 goto unlock;
6968 }
6969 err = -EINVAL;
6970
6971
6972
6973
6974 if (ro)
6975 goto unlock;
6976
6977
6978 if (mddev->ro != 1)
6979 goto unlock;
6980
6981
6982
6983
6984 if (mddev->pers) {
6985 err = restart_array(mddev);
6986 if (err == 0) {
6987 mddev->ro = 2;
6988 set_disk_ro(mddev->gendisk, 0);
6989 }
6990 }
6991 goto unlock;
6992 }
6993
6994
6995
6996
6997
6998 if (mddev->ro && mddev->pers) {
6999 if (mddev->ro == 2) {
7000 mddev->ro = 0;
7001 sysfs_notify_dirent_safe(mddev->sysfs_state);
7002 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7003
7004
7005
7006
7007 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7008 mddev_unlock(mddev);
7009 wait_event(mddev->sb_wait,
7010 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7011 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7012 mddev_lock_nointr(mddev);
7013 }
7014 } else {
7015 err = -EROFS;
7016 goto unlock;
7017 }
7018 }
7019
7020 switch (cmd) {
7021 case ADD_NEW_DISK:
7022 {
7023 mdu_disk_info_t info;
7024 if (copy_from_user(&info, argp, sizeof(info)))
7025 err = -EFAULT;
7026 else
7027 err = add_new_disk(mddev, &info);
7028 goto unlock;
7029 }
7030
7031 case CLUSTERED_DISK_NACK:
7032 if (mddev_is_clustered(mddev))
7033 md_cluster_ops->new_disk_ack(mddev, false);
7034 else
7035 err = -EINVAL;
7036 goto unlock;
7037
7038 case HOT_ADD_DISK:
7039 err = hot_add_disk(mddev, new_decode_dev(arg));
7040 goto unlock;
7041
7042 case RUN_ARRAY:
7043 err = do_md_run(mddev);
7044 goto unlock;
7045
7046 case SET_BITMAP_FILE:
7047 err = set_bitmap_file(mddev, (int)arg);
7048 goto unlock;
7049
7050 default:
7051 err = -EINVAL;
7052 goto unlock;
7053 }
7054
7055unlock:
7056 if (mddev->hold_active == UNTIL_IOCTL &&
7057 err != -EINVAL)
7058 mddev->hold_active = 0;
7059 mddev_unlock(mddev);
7060out:
7061 return err;
7062}
7063#ifdef CONFIG_COMPAT
7064static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7065 unsigned int cmd, unsigned long arg)
7066{
7067 switch (cmd) {
7068 case HOT_REMOVE_DISK:
7069 case HOT_ADD_DISK:
7070 case SET_DISK_FAULTY:
7071 case SET_BITMAP_FILE:
7072
7073 break;
7074 default:
7075 arg = (unsigned long)compat_ptr(arg);
7076 break;
7077 }
7078
7079 return md_ioctl(bdev, mode, cmd, arg);
7080}
7081#endif
7082
7083static int md_open(struct block_device *bdev, fmode_t mode)
7084{
7085
7086
7087
7088
7089 struct mddev *mddev = mddev_find(bdev->bd_dev);
7090 int err;
7091
7092 if (!mddev)
7093 return -ENODEV;
7094
7095 if (mddev->gendisk != bdev->bd_disk) {
7096
7097
7098
7099 mddev_put(mddev);
7100
7101 flush_workqueue(md_misc_wq);
7102
7103 return -ERESTARTSYS;
7104 }
7105 BUG_ON(mddev != bdev->bd_disk->private_data);
7106
7107 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7108 goto out;
7109
7110 if (test_bit(MD_CLOSING, &mddev->flags)) {
7111 mutex_unlock(&mddev->open_mutex);
7112 err = -ENODEV;
7113 goto out;
7114 }
7115
7116 err = 0;
7117 atomic_inc(&mddev->openers);
7118 mutex_unlock(&mddev->open_mutex);
7119
7120 check_disk_change(bdev);
7121 out:
7122 if (err)
7123 mddev_put(mddev);
7124 return err;
7125}
7126
7127static void md_release(struct gendisk *disk, fmode_t mode)
7128{
7129 struct mddev *mddev = disk->private_data;
7130
7131 BUG_ON(!mddev);
7132 atomic_dec(&mddev->openers);
7133 mddev_put(mddev);
7134}
7135
7136static int md_media_changed(struct gendisk *disk)
7137{
7138 struct mddev *mddev = disk->private_data;
7139
7140 return mddev->changed;
7141}
7142
7143static int md_revalidate(struct gendisk *disk)
7144{
7145 struct mddev *mddev = disk->private_data;
7146
7147 mddev->changed = 0;
7148 return 0;
7149}
7150static const struct block_device_operations md_fops =
7151{
7152 .owner = THIS_MODULE,
7153 .open = md_open,
7154 .release = md_release,
7155 .ioctl = md_ioctl,
7156#ifdef CONFIG_COMPAT
7157 .compat_ioctl = md_compat_ioctl,
7158#endif
7159 .getgeo = md_getgeo,
7160 .media_changed = md_media_changed,
7161 .revalidate_disk= md_revalidate,
7162};
7163
7164static int md_thread(void *arg)
7165{
7166 struct md_thread *thread = arg;
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180 allow_signal(SIGKILL);
7181 while (!kthread_should_stop()) {
7182
7183
7184
7185
7186
7187
7188 if (signal_pending(current))
7189 flush_signals(current);
7190
7191 wait_event_interruptible_timeout
7192 (thread->wqueue,
7193 test_bit(THREAD_WAKEUP, &thread->flags)
7194 || kthread_should_stop() || kthread_should_park(),
7195 thread->timeout);
7196
7197 clear_bit(THREAD_WAKEUP, &thread->flags);
7198 if (kthread_should_park())
7199 kthread_parkme();
7200 if (!kthread_should_stop())
7201 thread->run(thread);
7202 }
7203
7204 return 0;
7205}
7206
7207void md_wakeup_thread(struct md_thread *thread)
7208{
7209 if (thread) {
7210 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7211 set_bit(THREAD_WAKEUP, &thread->flags);
7212 wake_up(&thread->wqueue);
7213 }
7214}
7215EXPORT_SYMBOL(md_wakeup_thread);
7216
7217struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7218 struct mddev *mddev, const char *name)
7219{
7220 struct md_thread *thread;
7221
7222 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7223 if (!thread)
7224 return NULL;
7225
7226 init_waitqueue_head(&thread->wqueue);
7227
7228 thread->run = run;
7229 thread->mddev = mddev;
7230 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7231 thread->tsk = kthread_run(md_thread, thread,
7232 "%s_%s",
7233 mdname(thread->mddev),
7234 name);
7235 if (IS_ERR(thread->tsk)) {
7236 kfree(thread);
7237 return NULL;
7238 }
7239 return thread;
7240}
7241EXPORT_SYMBOL(md_register_thread);
7242
7243void md_unregister_thread(struct md_thread **threadp)
7244{
7245 struct md_thread *thread = *threadp;
7246 if (!thread)
7247 return;
7248 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7249
7250
7251
7252 spin_lock(&pers_lock);
7253 *threadp = NULL;
7254 spin_unlock(&pers_lock);
7255
7256 kthread_stop(thread->tsk);
7257 kfree(thread);
7258}
7259EXPORT_SYMBOL(md_unregister_thread);
7260
7261void md_error(struct mddev *mddev, struct md_rdev *rdev)
7262{
7263 if (!rdev || test_bit(Faulty, &rdev->flags))
7264 return;
7265
7266 if (!mddev->pers || !mddev->pers->error_handler)
7267 return;
7268 mddev->pers->error_handler(mddev,rdev);
7269 if (mddev->degraded)
7270 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7271 sysfs_notify_dirent_safe(rdev->sysfs_state);
7272 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7273 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7274 md_wakeup_thread(mddev->thread);
7275 if (mddev->event_work.func)
7276 queue_work(md_misc_wq, &mddev->event_work);
7277 md_new_event(mddev);
7278}
7279EXPORT_SYMBOL(md_error);
7280
7281
7282
7283static void status_unused(struct seq_file *seq)
7284{
7285 int i = 0;
7286 struct md_rdev *rdev;
7287
7288 seq_printf(seq, "unused devices: ");
7289
7290 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7291 char b[BDEVNAME_SIZE];
7292 i++;
7293 seq_printf(seq, "%s ",
7294 bdevname(rdev->bdev,b));
7295 }
7296 if (!i)
7297 seq_printf(seq, "<none>");
7298
7299 seq_printf(seq, "\n");
7300}
7301
7302static int status_resync(struct seq_file *seq, struct mddev *mddev)
7303{
7304 sector_t max_sectors, resync, res;
7305 unsigned long dt, db;
7306 sector_t rt;
7307 int scale;
7308 unsigned int per_milli;
7309
7310 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7311 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7312 max_sectors = mddev->resync_max_sectors;
7313 else
7314 max_sectors = mddev->dev_sectors;
7315
7316 resync = mddev->curr_resync;
7317 if (resync <= 3) {
7318 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7319
7320 resync = max_sectors;
7321 } else
7322 resync -= atomic_read(&mddev->recovery_active);
7323
7324 if (resync == 0) {
7325 if (mddev->recovery_cp < MaxSector) {
7326 seq_printf(seq, "\tresync=PENDING");
7327 return 1;
7328 }
7329 return 0;
7330 }
7331 if (resync < 3) {
7332 seq_printf(seq, "\tresync=DELAYED");
7333 return 1;
7334 }
7335
7336 WARN_ON(max_sectors == 0);
7337
7338
7339
7340
7341
7342 scale = 10;
7343 if (sizeof(sector_t) > sizeof(unsigned long)) {
7344 while ( max_sectors/2 > (1ULL<<(scale+32)))
7345 scale++;
7346 }
7347 res = (resync>>scale)*1000;
7348 sector_div(res, (u32)((max_sectors>>scale)+1));
7349
7350 per_milli = res;
7351 {
7352 int i, x = per_milli/50, y = 20-x;
7353 seq_printf(seq, "[");
7354 for (i = 0; i < x; i++)
7355 seq_printf(seq, "=");
7356 seq_printf(seq, ">");
7357 for (i = 0; i < y; i++)
7358 seq_printf(seq, ".");
7359 seq_printf(seq, "] ");
7360 }
7361 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7362 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7363 "reshape" :
7364 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7365 "check" :
7366 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7367 "resync" : "recovery"))),
7368 per_milli/10, per_milli % 10,
7369 (unsigned long long) resync/2,
7370 (unsigned long long) max_sectors/2);
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386 dt = ((jiffies - mddev->resync_mark) / HZ);
7387 if (!dt) dt++;
7388 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7389 - mddev->resync_mark_cnt;
7390
7391 rt = max_sectors - resync;
7392 sector_div(rt, db/32+1);
7393 rt *= dt;
7394 rt >>= 5;
7395
7396 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7397 ((unsigned long)rt % 60)/6);
7398
7399 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7400 return 1;
7401}
7402
7403static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7404{
7405 struct list_head *tmp;
7406 loff_t l = *pos;
7407 struct mddev *mddev;
7408
7409 if (l >= 0x10000)
7410 return NULL;
7411 if (!l--)
7412
7413 return (void*)1;
7414
7415 spin_lock(&all_mddevs_lock);
7416 list_for_each(tmp,&all_mddevs)
7417 if (!l--) {
7418 mddev = list_entry(tmp, struct mddev, all_mddevs);
7419 mddev_get(mddev);
7420 spin_unlock(&all_mddevs_lock);
7421 return mddev;
7422 }
7423 spin_unlock(&all_mddevs_lock);
7424 if (!l--)
7425 return (void*)2;
7426 return NULL;
7427}
7428
7429static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7430{
7431 struct list_head *tmp;
7432 struct mddev *next_mddev, *mddev = v;
7433
7434 ++*pos;
7435 if (v == (void*)2)
7436 return NULL;
7437
7438 spin_lock(&all_mddevs_lock);
7439 if (v == (void*)1)
7440 tmp = all_mddevs.next;
7441 else
7442 tmp = mddev->all_mddevs.next;
7443 if (tmp != &all_mddevs)
7444 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7445 else {
7446 next_mddev = (void*)2;
7447 *pos = 0x10000;
7448 }
7449 spin_unlock(&all_mddevs_lock);
7450
7451 if (v != (void*)1)
7452 mddev_put(mddev);
7453 return next_mddev;
7454
7455}
7456
7457static void md_seq_stop(struct seq_file *seq, void *v)
7458{
7459 struct mddev *mddev = v;
7460
7461 if (mddev && v != (void*)1 && v != (void*)2)
7462 mddev_put(mddev);
7463}
7464
7465static int md_seq_show(struct seq_file *seq, void *v)
7466{
7467 struct mddev *mddev = v;
7468 sector_t sectors;
7469 struct md_rdev *rdev;
7470
7471 if (v == (void*)1) {
7472 struct md_personality *pers;
7473 seq_printf(seq, "Personalities : ");
7474 spin_lock(&pers_lock);
7475 list_for_each_entry(pers, &pers_list, list)
7476 seq_printf(seq, "[%s] ", pers->name);
7477
7478 spin_unlock(&pers_lock);
7479 seq_printf(seq, "\n");
7480 seq->poll_event = atomic_read(&md_event_count);
7481 return 0;
7482 }
7483 if (v == (void*)2) {
7484 status_unused(seq);
7485 return 0;
7486 }
7487
7488 spin_lock(&mddev->lock);
7489 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7490 seq_printf(seq, "%s : %sactive", mdname(mddev),
7491 mddev->pers ? "" : "in");
7492 if (mddev->pers) {
7493 if (mddev->ro==1)
7494 seq_printf(seq, " (read-only)");
7495 if (mddev->ro==2)
7496 seq_printf(seq, " (auto-read-only)");
7497 seq_printf(seq, " %s", mddev->pers->name);
7498 }
7499
7500 sectors = 0;
7501 rcu_read_lock();
7502 rdev_for_each_rcu(rdev, mddev) {
7503 char b[BDEVNAME_SIZE];
7504 seq_printf(seq, " %s[%d]",
7505 bdevname(rdev->bdev,b), rdev->desc_nr);
7506 if (test_bit(WriteMostly, &rdev->flags))
7507 seq_printf(seq, "(W)");
7508 if (test_bit(Journal, &rdev->flags))
7509 seq_printf(seq, "(J)");
7510 if (test_bit(Faulty, &rdev->flags)) {
7511 seq_printf(seq, "(F)");
7512 continue;
7513 }
7514 if (rdev->raid_disk < 0)
7515 seq_printf(seq, "(S)");
7516 if (test_bit(Replacement, &rdev->flags))
7517 seq_printf(seq, "(R)");
7518 sectors += rdev->sectors;
7519 }
7520 rcu_read_unlock();
7521
7522 if (!list_empty(&mddev->disks)) {
7523 if (mddev->pers)
7524 seq_printf(seq, "\n %llu blocks",
7525 (unsigned long long)
7526 mddev->array_sectors / 2);
7527 else
7528 seq_printf(seq, "\n %llu blocks",
7529 (unsigned long long)sectors / 2);
7530 }
7531 if (mddev->persistent) {
7532 if (mddev->major_version != 0 ||
7533 mddev->minor_version != 90) {
7534 seq_printf(seq," super %d.%d",
7535 mddev->major_version,
7536 mddev->minor_version);
7537 }
7538 } else if (mddev->external)
7539 seq_printf(seq, " super external:%s",
7540 mddev->metadata_type);
7541 else
7542 seq_printf(seq, " super non-persistent");
7543
7544 if (mddev->pers) {
7545 mddev->pers->status(seq, mddev);
7546 seq_printf(seq, "\n ");
7547 if (mddev->pers->sync_request) {
7548 if (status_resync(seq, mddev))
7549 seq_printf(seq, "\n ");
7550 }
7551 } else
7552 seq_printf(seq, "\n ");
7553
7554 bitmap_status(seq, mddev->bitmap);
7555
7556 seq_printf(seq, "\n");
7557 }
7558 spin_unlock(&mddev->lock);
7559
7560 return 0;
7561}
7562
7563static const struct seq_operations md_seq_ops = {
7564 .start = md_seq_start,
7565 .next = md_seq_next,
7566 .stop = md_seq_stop,
7567 .show = md_seq_show,
7568};
7569
7570static int md_seq_open(struct inode *inode, struct file *file)
7571{
7572 struct seq_file *seq;
7573 int error;
7574
7575 error = seq_open(file, &md_seq_ops);
7576 if (error)
7577 return error;
7578
7579 seq = file->private_data;
7580 seq->poll_event = atomic_read(&md_event_count);
7581 return error;
7582}
7583
7584static int md_unloading;
7585static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7586{
7587 struct seq_file *seq = filp->private_data;
7588 int mask;
7589
7590 if (md_unloading)
7591 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7592 poll_wait(filp, &md_event_waiters, wait);
7593
7594
7595 mask = POLLIN | POLLRDNORM;
7596
7597 if (seq->poll_event != atomic_read(&md_event_count))
7598 mask |= POLLERR | POLLPRI;
7599 return mask;
7600}
7601
7602static const struct file_operations md_seq_fops = {
7603 .owner = THIS_MODULE,
7604 .open = md_seq_open,
7605 .read = seq_read,
7606 .llseek = seq_lseek,
7607 .release = seq_release_private,
7608 .poll = mdstat_poll,
7609};
7610
7611int register_md_personality(struct md_personality *p)
7612{
7613 pr_debug("md: %s personality registered for level %d\n",
7614 p->name, p->level);
7615 spin_lock(&pers_lock);
7616 list_add_tail(&p->list, &pers_list);
7617 spin_unlock(&pers_lock);
7618 return 0;
7619}
7620EXPORT_SYMBOL(register_md_personality);
7621
7622int unregister_md_personality(struct md_personality *p)
7623{
7624 pr_debug("md: %s personality unregistered\n", p->name);
7625 spin_lock(&pers_lock);
7626 list_del_init(&p->list);
7627 spin_unlock(&pers_lock);
7628 return 0;
7629}
7630EXPORT_SYMBOL(unregister_md_personality);
7631
7632int register_md_cluster_operations(struct md_cluster_operations *ops,
7633 struct module *module)
7634{
7635 int ret = 0;
7636 spin_lock(&pers_lock);
7637 if (md_cluster_ops != NULL)
7638 ret = -EALREADY;
7639 else {
7640 md_cluster_ops = ops;
7641 md_cluster_mod = module;
7642 }
7643 spin_unlock(&pers_lock);
7644 return ret;
7645}
7646EXPORT_SYMBOL(register_md_cluster_operations);
7647
7648int unregister_md_cluster_operations(void)
7649{
7650 spin_lock(&pers_lock);
7651 md_cluster_ops = NULL;
7652 spin_unlock(&pers_lock);
7653 return 0;
7654}
7655EXPORT_SYMBOL(unregister_md_cluster_operations);
7656
7657int md_setup_cluster(struct mddev *mddev, int nodes)
7658{
7659 if (!md_cluster_ops)
7660 request_module("md-cluster");
7661 spin_lock(&pers_lock);
7662
7663 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7664 pr_warn("can't find md-cluster module or get it's reference.\n");
7665 spin_unlock(&pers_lock);
7666 return -ENOENT;
7667 }
7668 spin_unlock(&pers_lock);
7669
7670 return md_cluster_ops->join(mddev, nodes);
7671}
7672
7673void md_cluster_stop(struct mddev *mddev)
7674{
7675 if (!md_cluster_ops)
7676 return;
7677 md_cluster_ops->leave(mddev);
7678 module_put(md_cluster_mod);
7679}
7680
7681static int is_mddev_idle(struct mddev *mddev, int init)
7682{
7683 struct md_rdev *rdev;
7684 int idle;
7685 int curr_events;
7686
7687 idle = 1;
7688 rcu_read_lock();
7689 rdev_for_each_rcu(rdev, mddev) {
7690 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7691 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7692 (int)part_stat_read(&disk->part0, sectors[1]) -
7693 atomic_read(&disk->sync_io);
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716 if (init || curr_events - rdev->last_events > 64) {
7717 rdev->last_events = curr_events;
7718 idle = 0;
7719 }
7720 }
7721 rcu_read_unlock();
7722 return idle;
7723}
7724
7725void md_done_sync(struct mddev *mddev, int blocks, int ok)
7726{
7727
7728 atomic_sub(blocks, &mddev->recovery_active);
7729 wake_up(&mddev->recovery_wait);
7730 if (!ok) {
7731 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7732 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7733 md_wakeup_thread(mddev->thread);
7734
7735 }
7736}
7737EXPORT_SYMBOL(md_done_sync);
7738
7739
7740
7741
7742
7743
7744void md_write_start(struct mddev *mddev, struct bio *bi)
7745{
7746 int did_change = 0;
7747 if (bio_data_dir(bi) != WRITE)
7748 return;
7749
7750 BUG_ON(mddev->ro == 1);
7751 if (mddev->ro == 2) {
7752
7753 mddev->ro = 0;
7754 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7755 md_wakeup_thread(mddev->thread);
7756 md_wakeup_thread(mddev->sync_thread);
7757 did_change = 1;
7758 }
7759 atomic_inc(&mddev->writes_pending);
7760 if (mddev->safemode == 1)
7761 mddev->safemode = 0;
7762 if (mddev->in_sync) {
7763 spin_lock(&mddev->lock);
7764 if (mddev->in_sync) {
7765 mddev->in_sync = 0;
7766 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7767 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
7768 md_wakeup_thread(mddev->thread);
7769 did_change = 1;
7770 }
7771 spin_unlock(&mddev->lock);
7772 }
7773 if (did_change)
7774 sysfs_notify_dirent_safe(mddev->sysfs_state);
7775 wait_event(mddev->sb_wait,
7776 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7777}
7778EXPORT_SYMBOL(md_write_start);
7779
7780void md_write_end(struct mddev *mddev)
7781{
7782 if (atomic_dec_and_test(&mddev->writes_pending)) {
7783 if (mddev->safemode == 2)
7784 md_wakeup_thread(mddev->thread);
7785 else if (mddev->safemode_delay)
7786 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7787 }
7788}
7789EXPORT_SYMBOL(md_write_end);
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800int md_allow_write(struct mddev *mddev)
7801{
7802 if (!mddev->pers)
7803 return 0;
7804 if (mddev->ro)
7805 return 0;
7806 if (!mddev->pers->sync_request)
7807 return 0;
7808
7809 spin_lock(&mddev->lock);
7810 if (mddev->in_sync) {
7811 mddev->in_sync = 0;
7812 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7813 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
7814 if (mddev->safemode_delay &&
7815 mddev->safemode == 0)
7816 mddev->safemode = 1;
7817 spin_unlock(&mddev->lock);
7818 md_update_sb(mddev, 0);
7819 sysfs_notify_dirent_safe(mddev->sysfs_state);
7820 } else
7821 spin_unlock(&mddev->lock);
7822
7823 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
7824 return -EAGAIN;
7825 else
7826 return 0;
7827}
7828EXPORT_SYMBOL_GPL(md_allow_write);
7829
7830#define SYNC_MARKS 10
7831#define SYNC_MARK_STEP (3*HZ)
7832#define UPDATE_FREQUENCY (5*60*HZ)
7833void md_do_sync(struct md_thread *thread)
7834{
7835 struct mddev *mddev = thread->mddev;
7836 struct mddev *mddev2;
7837 unsigned int currspeed = 0,
7838 window;
7839 sector_t max_sectors,j, io_sectors, recovery_done;
7840 unsigned long mark[SYNC_MARKS];
7841 unsigned long update_time;
7842 sector_t mark_cnt[SYNC_MARKS];
7843 int last_mark,m;
7844 struct list_head *tmp;
7845 sector_t last_check;
7846 int skipped = 0;
7847 struct md_rdev *rdev;
7848 char *desc, *action = NULL;
7849 struct blk_plug plug;
7850 int ret;
7851
7852
7853 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7854 return;
7855 if (mddev->ro) {
7856 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7857 return;
7858 }
7859
7860 if (mddev_is_clustered(mddev)) {
7861 ret = md_cluster_ops->resync_start(mddev);
7862 if (ret)
7863 goto skip;
7864
7865 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
7866 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7867 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
7868 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
7869 && ((unsigned long long)mddev->curr_resync_completed
7870 < (unsigned long long)mddev->resync_max_sectors))
7871 goto skip;
7872 }
7873
7874 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7875 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7876 desc = "data-check";
7877 action = "check";
7878 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7879 desc = "requested-resync";
7880 action = "repair";
7881 } else
7882 desc = "resync";
7883 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7884 desc = "reshape";
7885 else
7886 desc = "recovery";
7887
7888 mddev->last_sync_action = action ?: desc;
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906 do {
7907 int mddev2_minor = -1;
7908 mddev->curr_resync = 2;
7909
7910 try_again:
7911 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7912 goto skip;
7913 for_each_mddev(mddev2, tmp) {
7914 if (mddev2 == mddev)
7915 continue;
7916 if (!mddev->parallel_resync
7917 && mddev2->curr_resync
7918 && match_mddev_units(mddev, mddev2)) {
7919 DEFINE_WAIT(wq);
7920 if (mddev < mddev2 && mddev->curr_resync == 2) {
7921
7922 mddev->curr_resync = 1;
7923 wake_up(&resync_wait);
7924 }
7925 if (mddev > mddev2 && mddev->curr_resync == 1)
7926
7927
7928
7929 continue;
7930
7931
7932
7933
7934 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7935 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7936 mddev2->curr_resync >= mddev->curr_resync) {
7937 if (mddev2_minor != mddev2->md_minor) {
7938 mddev2_minor = mddev2->md_minor;
7939 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
7940 desc, mdname(mddev),
7941 mdname(mddev2));
7942 }
7943 mddev_put(mddev2);
7944 if (signal_pending(current))
7945 flush_signals(current);
7946 schedule();
7947 finish_wait(&resync_wait, &wq);
7948 goto try_again;
7949 }
7950 finish_wait(&resync_wait, &wq);
7951 }
7952 }
7953 } while (mddev->curr_resync < 2);
7954
7955 j = 0;
7956 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7957
7958
7959
7960 max_sectors = mddev->resync_max_sectors;
7961 atomic64_set(&mddev->resync_mismatches, 0);
7962
7963 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7964 j = mddev->resync_min;
7965 else if (!mddev->bitmap)
7966 j = mddev->recovery_cp;
7967
7968 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7969 max_sectors = mddev->resync_max_sectors;
7970 else {
7971
7972 max_sectors = mddev->dev_sectors;
7973 j = MaxSector;
7974 rcu_read_lock();
7975 rdev_for_each_rcu(rdev, mddev)
7976 if (rdev->raid_disk >= 0 &&
7977 !test_bit(Journal, &rdev->flags) &&
7978 !test_bit(Faulty, &rdev->flags) &&
7979 !test_bit(In_sync, &rdev->flags) &&
7980 rdev->recovery_offset < j)
7981 j = rdev->recovery_offset;
7982 rcu_read_unlock();
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992 if (mddev->bitmap) {
7993 mddev->pers->quiesce(mddev, 1);
7994 mddev->pers->quiesce(mddev, 0);
7995 }
7996 }
7997
7998 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
7999 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8000 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8001 speed_max(mddev), desc);
8002
8003 is_mddev_idle(mddev, 1);
8004
8005 io_sectors = 0;
8006 for (m = 0; m < SYNC_MARKS; m++) {
8007 mark[m] = jiffies;
8008 mark_cnt[m] = io_sectors;
8009 }
8010 last_mark = 0;
8011 mddev->resync_mark = mark[last_mark];
8012 mddev->resync_mark_cnt = mark_cnt[last_mark];
8013
8014
8015
8016
8017 window = 32*(PAGE_SIZE/512);
8018 pr_debug("md: using %dk window, over a total of %lluk.\n",
8019 window/2, (unsigned long long)max_sectors/2);
8020
8021 atomic_set(&mddev->recovery_active, 0);
8022 last_check = 0;
8023
8024 if (j>2) {
8025 pr_debug("md: resuming %s of %s from checkpoint.\n",
8026 desc, mdname(mddev));
8027 mddev->curr_resync = j;
8028 } else
8029 mddev->curr_resync = 3;
8030 mddev->curr_resync_completed = j;
8031 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8032 md_new_event(mddev);
8033 update_time = jiffies;
8034
8035 blk_start_plug(&plug);
8036 while (j < max_sectors) {
8037 sector_t sectors;
8038
8039 skipped = 0;
8040
8041 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8042 ((mddev->curr_resync > mddev->curr_resync_completed &&
8043 (mddev->curr_resync - mddev->curr_resync_completed)
8044 > (max_sectors >> 4)) ||
8045 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8046 (j - mddev->curr_resync_completed)*2
8047 >= mddev->resync_max - mddev->curr_resync_completed ||
8048 mddev->curr_resync_completed > mddev->resync_max
8049 )) {
8050
8051 wait_event(mddev->recovery_wait,
8052 atomic_read(&mddev->recovery_active) == 0);
8053 mddev->curr_resync_completed = j;
8054 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8055 j > mddev->recovery_cp)
8056 mddev->recovery_cp = j;
8057 update_time = jiffies;
8058 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8059 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8060 }
8061
8062 while (j >= mddev->resync_max &&
8063 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8064
8065
8066
8067
8068 flush_signals(current);
8069 wait_event_interruptible(mddev->recovery_wait,
8070 mddev->resync_max > j
8071 || test_bit(MD_RECOVERY_INTR,
8072 &mddev->recovery));
8073 }
8074
8075 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8076 break;
8077
8078 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8079 if (sectors == 0) {
8080 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8081 break;
8082 }
8083
8084 if (!skipped) {
8085 io_sectors += sectors;
8086 atomic_add(sectors, &mddev->recovery_active);
8087 }
8088
8089 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8090 break;
8091
8092 j += sectors;
8093 if (j > max_sectors)
8094
8095 j = max_sectors;
8096 if (j > 2)
8097 mddev->curr_resync = j;
8098 mddev->curr_mark_cnt = io_sectors;
8099 if (last_check == 0)
8100
8101
8102
8103 md_new_event(mddev);
8104
8105 if (last_check + window > io_sectors || j == max_sectors)
8106 continue;
8107
8108 last_check = io_sectors;
8109 repeat:
8110 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8111
8112 int next = (last_mark+1) % SYNC_MARKS;
8113
8114 mddev->resync_mark = mark[next];
8115 mddev->resync_mark_cnt = mark_cnt[next];
8116 mark[next] = jiffies;
8117 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8118 last_mark = next;
8119 }
8120
8121 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8122 break;
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132 cond_resched();
8133
8134 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8135 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8136 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8137
8138 if (currspeed > speed_min(mddev)) {
8139 if (currspeed > speed_max(mddev)) {
8140 msleep(500);
8141 goto repeat;
8142 }
8143 if (!is_mddev_idle(mddev, 0)) {
8144
8145
8146
8147
8148 wait_event(mddev->recovery_wait,
8149 !atomic_read(&mddev->recovery_active));
8150 }
8151 }
8152 }
8153 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8154 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8155 ? "interrupted" : "done");
8156
8157
8158
8159 blk_finish_plug(&plug);
8160 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8161
8162 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8163 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8164 mddev->curr_resync > 3) {
8165 mddev->curr_resync_completed = mddev->curr_resync;
8166 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8167 }
8168 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8169
8170 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8171 mddev->curr_resync > 3) {
8172 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8173 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8174 if (mddev->curr_resync >= mddev->recovery_cp) {
8175 pr_debug("md: checkpointing %s of %s.\n",
8176 desc, mdname(mddev));
8177 if (test_bit(MD_RECOVERY_ERROR,
8178 &mddev->recovery))
8179 mddev->recovery_cp =
8180 mddev->curr_resync_completed;
8181 else
8182 mddev->recovery_cp =
8183 mddev->curr_resync;
8184 }
8185 } else
8186 mddev->recovery_cp = MaxSector;
8187 } else {
8188 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8189 mddev->curr_resync = MaxSector;
8190 rcu_read_lock();
8191 rdev_for_each_rcu(rdev, mddev)
8192 if (rdev->raid_disk >= 0 &&
8193 mddev->delta_disks >= 0 &&
8194 !test_bit(Journal, &rdev->flags) &&
8195 !test_bit(Faulty, &rdev->flags) &&
8196 !test_bit(In_sync, &rdev->flags) &&
8197 rdev->recovery_offset < mddev->curr_resync)
8198 rdev->recovery_offset = mddev->curr_resync;
8199 rcu_read_unlock();
8200 }
8201 }
8202 skip:
8203
8204
8205
8206 set_mask_bits(&mddev->sb_flags, 0,
8207 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8208
8209 spin_lock(&mddev->lock);
8210 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8211
8212 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8213 mddev->resync_min = 0;
8214 mddev->resync_max = MaxSector;
8215 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8216 mddev->resync_min = mddev->curr_resync_completed;
8217 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8218 mddev->curr_resync = 0;
8219 spin_unlock(&mddev->lock);
8220
8221 wake_up(&resync_wait);
8222 md_wakeup_thread(mddev->thread);
8223 return;
8224}
8225EXPORT_SYMBOL_GPL(md_do_sync);
8226
8227static int remove_and_add_spares(struct mddev *mddev,
8228 struct md_rdev *this)
8229{
8230 struct md_rdev *rdev;
8231 int spares = 0;
8232 int removed = 0;
8233 bool remove_some = false;
8234
8235 rdev_for_each(rdev, mddev) {
8236 if ((this == NULL || rdev == this) &&
8237 rdev->raid_disk >= 0 &&
8238 !test_bit(Blocked, &rdev->flags) &&
8239 test_bit(Faulty, &rdev->flags) &&
8240 atomic_read(&rdev->nr_pending)==0) {
8241
8242
8243
8244
8245
8246 remove_some = true;
8247 set_bit(RemoveSynchronized, &rdev->flags);
8248 }
8249 }
8250
8251 if (remove_some)
8252 synchronize_rcu();
8253 rdev_for_each(rdev, mddev) {
8254 if ((this == NULL || rdev == this) &&
8255 rdev->raid_disk >= 0 &&
8256 !test_bit(Blocked, &rdev->flags) &&
8257 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8258 (!test_bit(In_sync, &rdev->flags) &&
8259 !test_bit(Journal, &rdev->flags))) &&
8260 atomic_read(&rdev->nr_pending)==0)) {
8261 if (mddev->pers->hot_remove_disk(
8262 mddev, rdev) == 0) {
8263 sysfs_unlink_rdev(mddev, rdev);
8264 rdev->raid_disk = -1;
8265 removed++;
8266 }
8267 }
8268 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8269 clear_bit(RemoveSynchronized, &rdev->flags);
8270 }
8271
8272 if (removed && mddev->kobj.sd)
8273 sysfs_notify(&mddev->kobj, NULL, "degraded");
8274
8275 if (this && removed)
8276 goto no_add;
8277
8278 rdev_for_each(rdev, mddev) {
8279 if (this && this != rdev)
8280 continue;
8281 if (test_bit(Candidate, &rdev->flags))
8282 continue;
8283 if (rdev->raid_disk >= 0 &&
8284 !test_bit(In_sync, &rdev->flags) &&
8285 !test_bit(Journal, &rdev->flags) &&
8286 !test_bit(Faulty, &rdev->flags))
8287 spares++;
8288 if (rdev->raid_disk >= 0)
8289 continue;
8290 if (test_bit(Faulty, &rdev->flags))
8291 continue;
8292 if (!test_bit(Journal, &rdev->flags)) {
8293 if (mddev->ro &&
8294 ! (rdev->saved_raid_disk >= 0 &&
8295 !test_bit(Bitmap_sync, &rdev->flags)))
8296 continue;
8297
8298 rdev->recovery_offset = 0;
8299 }
8300 if (mddev->pers->
8301 hot_add_disk(mddev, rdev) == 0) {
8302 if (sysfs_link_rdev(mddev, rdev))
8303 ;
8304 if (!test_bit(Journal, &rdev->flags))
8305 spares++;
8306 md_new_event(mddev);
8307 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8308 }
8309 }
8310no_add:
8311 if (removed)
8312 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8313 return spares;
8314}
8315
8316static void md_start_sync(struct work_struct *ws)
8317{
8318 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8319
8320 mddev->sync_thread = md_register_thread(md_do_sync,
8321 mddev,
8322 "resync");
8323 if (!mddev->sync_thread) {
8324 pr_warn("%s: could not start resync thread...\n",
8325 mdname(mddev));
8326
8327 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8328 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8329 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8330 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8331 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8332 wake_up(&resync_wait);
8333 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8334 &mddev->recovery))
8335 if (mddev->sysfs_action)
8336 sysfs_notify_dirent_safe(mddev->sysfs_action);
8337 } else
8338 md_wakeup_thread(mddev->sync_thread);
8339 sysfs_notify_dirent_safe(mddev->sysfs_action);
8340 md_new_event(mddev);
8341}
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365void md_check_recovery(struct mddev *mddev)
8366{
8367 if (mddev->suspended)
8368 return;
8369
8370 if (mddev->bitmap)
8371 bitmap_daemon_work(mddev);
8372
8373 if (signal_pending(current)) {
8374 if (mddev->pers->sync_request && !mddev->external) {
8375 pr_debug("md: %s in immediate safe mode\n",
8376 mdname(mddev));
8377 mddev->safemode = 2;
8378 }
8379 flush_signals(current);
8380 }
8381
8382 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8383 return;
8384 if ( ! (
8385 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8386 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8387 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8388 test_bit(MD_RELOAD_SB, &mddev->flags) ||
8389 (mddev->external == 0 && mddev->safemode == 1) ||
8390 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8391 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8392 ))
8393 return;
8394
8395 if (mddev_trylock(mddev)) {
8396 int spares = 0;
8397
8398 if (mddev->ro) {
8399 struct md_rdev *rdev;
8400 if (!mddev->external && mddev->in_sync)
8401
8402
8403
8404
8405
8406 rdev_for_each(rdev, mddev)
8407 clear_bit(Blocked, &rdev->flags);
8408
8409
8410
8411
8412
8413
8414
8415 remove_and_add_spares(mddev, NULL);
8416
8417
8418
8419 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8420 md_reap_sync_thread(mddev);
8421 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8422 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8423 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8424 goto unlock;
8425 }
8426
8427 if (mddev_is_clustered(mddev)) {
8428 struct md_rdev *rdev;
8429
8430
8431
8432 rdev_for_each(rdev, mddev) {
8433 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8434 rdev->raid_disk < 0)
8435 md_kick_rdev_from_array(rdev);
8436 }
8437
8438 if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags))
8439 md_reload_sb(mddev, mddev->good_device_nr);
8440 }
8441
8442 if (!mddev->external) {
8443 int did_change = 0;
8444 spin_lock(&mddev->lock);
8445 if (mddev->safemode &&
8446 !atomic_read(&mddev->writes_pending) &&
8447 !mddev->in_sync &&
8448 mddev->recovery_cp == MaxSector) {
8449 mddev->in_sync = 1;
8450 did_change = 1;
8451 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8452 }
8453 if (mddev->safemode == 1)
8454 mddev->safemode = 0;
8455 spin_unlock(&mddev->lock);
8456 if (did_change)
8457 sysfs_notify_dirent_safe(mddev->sysfs_state);
8458 }
8459
8460 if (mddev->sb_flags)
8461 md_update_sb(mddev, 0);
8462
8463 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8464 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8465
8466 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8467 goto unlock;
8468 }
8469 if (mddev->sync_thread) {
8470 md_reap_sync_thread(mddev);
8471 goto unlock;
8472 }
8473
8474
8475
8476 mddev->curr_resync_completed = 0;
8477 spin_lock(&mddev->lock);
8478 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8479 spin_unlock(&mddev->lock);
8480
8481
8482
8483 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8484 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8485
8486 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8487 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8488 goto not_running;
8489
8490
8491
8492
8493
8494
8495
8496 if (mddev->reshape_position != MaxSector) {
8497 if (mddev->pers->check_reshape == NULL ||
8498 mddev->pers->check_reshape(mddev) != 0)
8499
8500 goto not_running;
8501 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8502 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8503 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8504 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8505 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8506 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8507 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8508 } else if (mddev->recovery_cp < MaxSector) {
8509 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8510 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8511 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8512
8513 goto not_running;
8514
8515 if (mddev->pers->sync_request) {
8516 if (spares) {
8517
8518
8519
8520
8521 bitmap_write_all(mddev->bitmap);
8522 }
8523 INIT_WORK(&mddev->del_work, md_start_sync);
8524 queue_work(md_misc_wq, &mddev->del_work);
8525 goto unlock;
8526 }
8527 not_running:
8528 if (!mddev->sync_thread) {
8529 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8530 wake_up(&resync_wait);
8531 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8532 &mddev->recovery))
8533 if (mddev->sysfs_action)
8534 sysfs_notify_dirent_safe(mddev->sysfs_action);
8535 }
8536 unlock:
8537 wake_up(&mddev->sb_wait);
8538 mddev_unlock(mddev);
8539 }
8540}
8541EXPORT_SYMBOL(md_check_recovery);
8542
8543void md_reap_sync_thread(struct mddev *mddev)
8544{
8545 struct md_rdev *rdev;
8546
8547
8548 md_unregister_thread(&mddev->sync_thread);
8549 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8550 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8551
8552
8553 if (mddev->pers->spare_active(mddev)) {
8554 sysfs_notify(&mddev->kobj, NULL,
8555 "degraded");
8556 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8557 }
8558 }
8559 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8560 mddev->pers->finish_reshape)
8561 mddev->pers->finish_reshape(mddev);
8562
8563
8564
8565
8566 if (!mddev->degraded)
8567 rdev_for_each(rdev, mddev)
8568 rdev->saved_raid_disk = -1;
8569
8570 md_update_sb(mddev, 1);
8571
8572
8573
8574 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
8575 md_cluster_ops->resync_finish(mddev);
8576 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8577 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8578 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8579 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8580 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8581 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8582 wake_up(&resync_wait);
8583
8584 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8585 sysfs_notify_dirent_safe(mddev->sysfs_action);
8586 md_new_event(mddev);
8587 if (mddev->event_work.func)
8588 queue_work(md_misc_wq, &mddev->event_work);
8589}
8590EXPORT_SYMBOL(md_reap_sync_thread);
8591
8592void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8593{
8594 sysfs_notify_dirent_safe(rdev->sysfs_state);
8595 wait_event_timeout(rdev->blocked_wait,
8596 !test_bit(Blocked, &rdev->flags) &&
8597 !test_bit(BlockedBadBlocks, &rdev->flags),
8598 msecs_to_jiffies(5000));
8599 rdev_dec_pending(rdev, mddev);
8600}
8601EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8602
8603void md_finish_reshape(struct mddev *mddev)
8604{
8605
8606 struct md_rdev *rdev;
8607
8608 rdev_for_each(rdev, mddev) {
8609 if (rdev->data_offset > rdev->new_data_offset)
8610 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8611 else
8612 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8613 rdev->data_offset = rdev->new_data_offset;
8614 }
8615}
8616EXPORT_SYMBOL(md_finish_reshape);
8617
8618
8619
8620
8621int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8622 int is_new)
8623{
8624 struct mddev *mddev = rdev->mddev;
8625 int rv;
8626 if (is_new)
8627 s += rdev->new_data_offset;
8628 else
8629 s += rdev->data_offset;
8630 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8631 if (rv == 0) {
8632
8633 if (test_bit(ExternalBbl, &rdev->flags))
8634 sysfs_notify(&rdev->kobj, NULL,
8635 "unacknowledged_bad_blocks");
8636 sysfs_notify_dirent_safe(rdev->sysfs_state);
8637 set_mask_bits(&mddev->sb_flags, 0,
8638 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
8639 md_wakeup_thread(rdev->mddev->thread);
8640 return 1;
8641 } else
8642 return 0;
8643}
8644EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8645
8646int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8647 int is_new)
8648{
8649 int rv;
8650 if (is_new)
8651 s += rdev->new_data_offset;
8652 else
8653 s += rdev->data_offset;
8654 rv = badblocks_clear(&rdev->badblocks, s, sectors);
8655 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
8656 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
8657 return rv;
8658}
8659EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8660
8661static int md_notify_reboot(struct notifier_block *this,
8662 unsigned long code, void *x)
8663{
8664 struct list_head *tmp;
8665 struct mddev *mddev;
8666 int need_delay = 0;
8667
8668 for_each_mddev(mddev, tmp) {
8669 if (mddev_trylock(mddev)) {
8670 if (mddev->pers)
8671 __md_stop_writes(mddev);
8672 if (mddev->persistent)
8673 mddev->safemode = 2;
8674 mddev_unlock(mddev);
8675 }
8676 need_delay = 1;
8677 }
8678
8679
8680
8681
8682
8683
8684 if (need_delay)
8685 mdelay(1000*1);
8686
8687 return NOTIFY_DONE;
8688}
8689
8690static struct notifier_block md_notifier = {
8691 .notifier_call = md_notify_reboot,
8692 .next = NULL,
8693 .priority = INT_MAX,
8694};
8695
8696static void md_geninit(void)
8697{
8698 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8699
8700 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8701}
8702
8703static int __init md_init(void)
8704{
8705 int ret = -ENOMEM;
8706
8707 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8708 if (!md_wq)
8709 goto err_wq;
8710
8711 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8712 if (!md_misc_wq)
8713 goto err_misc_wq;
8714
8715 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8716 goto err_md;
8717
8718 if ((ret = register_blkdev(0, "mdp")) < 0)
8719 goto err_mdp;
8720 mdp_major = ret;
8721
8722 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8723 md_probe, NULL, NULL);
8724 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8725 md_probe, NULL, NULL);
8726
8727 register_reboot_notifier(&md_notifier);
8728 raid_table_header = register_sysctl_table(raid_root_table);
8729
8730 md_geninit();
8731 return 0;
8732
8733err_mdp:
8734 unregister_blkdev(MD_MAJOR, "md");
8735err_md:
8736 destroy_workqueue(md_misc_wq);
8737err_misc_wq:
8738 destroy_workqueue(md_wq);
8739err_wq:
8740 return ret;
8741}
8742
8743static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8744{
8745 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
8746 struct md_rdev *rdev2;
8747 int role, ret;
8748 char b[BDEVNAME_SIZE];
8749
8750
8751 rdev_for_each(rdev2, mddev) {
8752 if (test_bit(Faulty, &rdev2->flags))
8753 continue;
8754
8755
8756 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8757
8758 if (test_bit(Candidate, &rdev2->flags)) {
8759 if (role == 0xfffe) {
8760 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8761 md_kick_rdev_from_array(rdev2);
8762 continue;
8763 }
8764 else
8765 clear_bit(Candidate, &rdev2->flags);
8766 }
8767
8768 if (role != rdev2->raid_disk) {
8769
8770 if (rdev2->raid_disk == -1 && role != 0xffff) {
8771 rdev2->saved_raid_disk = role;
8772 ret = remove_and_add_spares(mddev, rdev2);
8773 pr_info("Activated spare: %s\n",
8774 bdevname(rdev2->bdev,b));
8775
8776
8777 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8778 md_wakeup_thread(mddev->thread);
8779
8780 }
8781
8782
8783
8784
8785
8786 if ((role == 0xfffe) || (role == 0xfffd)) {
8787 md_error(mddev, rdev2);
8788 clear_bit(Blocked, &rdev2->flags);
8789 }
8790 }
8791 }
8792
8793 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
8794 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
8795
8796
8797 mddev->events = le64_to_cpu(sb->events);
8798}
8799
8800static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
8801{
8802 int err;
8803 struct page *swapout = rdev->sb_page;
8804 struct mdp_superblock_1 *sb;
8805
8806
8807
8808
8809 rdev->sb_page = NULL;
8810 err = alloc_disk_sb(rdev);
8811 if (err == 0) {
8812 ClearPageUptodate(rdev->sb_page);
8813 rdev->sb_loaded = 0;
8814 err = super_types[mddev->major_version].
8815 load_super(rdev, NULL, mddev->minor_version);
8816 }
8817 if (err < 0) {
8818 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
8819 __func__, __LINE__, rdev->desc_nr, err);
8820 if (rdev->sb_page)
8821 put_page(rdev->sb_page);
8822 rdev->sb_page = swapout;
8823 rdev->sb_loaded = 1;
8824 return err;
8825 }
8826
8827 sb = page_address(rdev->sb_page);
8828
8829
8830
8831
8832 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
8833 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
8834
8835
8836
8837
8838 if (rdev->recovery_offset == MaxSector &&
8839 !test_bit(In_sync, &rdev->flags) &&
8840 mddev->pers->spare_active(mddev))
8841 sysfs_notify(&mddev->kobj, NULL, "degraded");
8842
8843 put_page(swapout);
8844 return 0;
8845}
8846
8847void md_reload_sb(struct mddev *mddev, int nr)
8848{
8849 struct md_rdev *rdev;
8850 int err;
8851
8852
8853 rdev_for_each_rcu(rdev, mddev) {
8854 if (rdev->desc_nr == nr)
8855 break;
8856 }
8857
8858 if (!rdev || rdev->desc_nr != nr) {
8859 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
8860 return;
8861 }
8862
8863 err = read_rdev(mddev, rdev);
8864 if (err < 0)
8865 return;
8866
8867 check_sb_changes(mddev, rdev);
8868
8869
8870 rdev_for_each_rcu(rdev, mddev)
8871 read_rdev(mddev, rdev);
8872}
8873EXPORT_SYMBOL(md_reload_sb);
8874
8875#ifndef MODULE
8876
8877
8878
8879
8880
8881
8882static DEFINE_MUTEX(detected_devices_mutex);
8883static LIST_HEAD(all_detected_devices);
8884struct detected_devices_node {
8885 struct list_head list;
8886 dev_t dev;
8887};
8888
8889void md_autodetect_dev(dev_t dev)
8890{
8891 struct detected_devices_node *node_detected_dev;
8892
8893 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8894 if (node_detected_dev) {
8895 node_detected_dev->dev = dev;
8896 mutex_lock(&detected_devices_mutex);
8897 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8898 mutex_unlock(&detected_devices_mutex);
8899 }
8900}
8901
8902static void autostart_arrays(int part)
8903{
8904 struct md_rdev *rdev;
8905 struct detected_devices_node *node_detected_dev;
8906 dev_t dev;
8907 int i_scanned, i_passed;
8908
8909 i_scanned = 0;
8910 i_passed = 0;
8911
8912 pr_info("md: Autodetecting RAID arrays.\n");
8913
8914 mutex_lock(&detected_devices_mutex);
8915 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8916 i_scanned++;
8917 node_detected_dev = list_entry(all_detected_devices.next,
8918 struct detected_devices_node, list);
8919 list_del(&node_detected_dev->list);
8920 dev = node_detected_dev->dev;
8921 kfree(node_detected_dev);
8922 mutex_unlock(&detected_devices_mutex);
8923 rdev = md_import_device(dev,0, 90);
8924 mutex_lock(&detected_devices_mutex);
8925 if (IS_ERR(rdev))
8926 continue;
8927
8928 if (test_bit(Faulty, &rdev->flags))
8929 continue;
8930
8931 set_bit(AutoDetected, &rdev->flags);
8932 list_add(&rdev->same_set, &pending_raid_disks);
8933 i_passed++;
8934 }
8935 mutex_unlock(&detected_devices_mutex);
8936
8937 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
8938
8939 autorun_devices(part);
8940}
8941
8942#endif
8943
8944static __exit void md_exit(void)
8945{
8946 struct mddev *mddev;
8947 struct list_head *tmp;
8948 int delay = 1;
8949
8950 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8951 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8952
8953 unregister_blkdev(MD_MAJOR,"md");
8954 unregister_blkdev(mdp_major, "mdp");
8955 unregister_reboot_notifier(&md_notifier);
8956 unregister_sysctl_table(raid_table_header);
8957
8958
8959
8960
8961 md_unloading = 1;
8962 while (waitqueue_active(&md_event_waiters)) {
8963
8964 wake_up(&md_event_waiters);
8965 msleep(delay);
8966 delay += delay;
8967 }
8968 remove_proc_entry("mdstat", NULL);
8969
8970 for_each_mddev(mddev, tmp) {
8971 export_array(mddev);
8972 mddev->ctime = 0;
8973 mddev->hold_active = 0;
8974
8975
8976
8977
8978
8979
8980 }
8981 destroy_workqueue(md_misc_wq);
8982 destroy_workqueue(md_wq);
8983}
8984
8985subsys_initcall(md_init);
8986module_exit(md_exit)
8987
8988static int get_ro(char *buffer, struct kernel_param *kp)
8989{
8990 return sprintf(buffer, "%d", start_readonly);
8991}
8992static int set_ro(const char *val, struct kernel_param *kp)
8993{
8994 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8995}
8996
8997module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8998module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8999module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9000
9001MODULE_LICENSE("GPL");
9002MODULE_DESCRIPTION("MD RAID framework");
9003MODULE_ALIAS("md");
9004MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9005