1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/slab.h>
62#include <linux/percpu-refcount.h>
63
64#include <trace/events/block.h>
65#include "md.h"
66#include "md-bitmap.h"
67#include "md-cluster.h"
68
69#ifndef MODULE
70static void autostart_arrays(int part);
71#endif
72
73
74
75
76
77
78static LIST_HEAD(pers_list);
79static DEFINE_SPINLOCK(pers_lock);
80
81static struct kobj_type md_ktype;
82
83struct md_cluster_operations *md_cluster_ops;
84EXPORT_SYMBOL(md_cluster_ops);
85static struct module *md_cluster_mod;
86
87static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
88static struct workqueue_struct *md_wq;
89static struct workqueue_struct *md_misc_wq;
90
91static int remove_and_add_spares(struct mddev *mddev,
92 struct md_rdev *this);
93static void mddev_detach(struct mddev *mddev);
94
95
96
97
98
99
100#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
101
102
103
104
105
106
107
108
109
110
111
112
113
114static int sysctl_speed_limit_min = 1000;
115static int sysctl_speed_limit_max = 200000;
116static inline int speed_min(struct mddev *mddev)
117{
118 return mddev->sync_speed_min ?
119 mddev->sync_speed_min : sysctl_speed_limit_min;
120}
121
122static inline int speed_max(struct mddev *mddev)
123{
124 return mddev->sync_speed_max ?
125 mddev->sync_speed_max : sysctl_speed_limit_max;
126}
127
128static int rdev_init_wb(struct md_rdev *rdev)
129{
130 if (rdev->bdev->bd_queue->nr_hw_queues == 1)
131 return 0;
132
133 spin_lock_init(&rdev->wb_list_lock);
134 INIT_LIST_HEAD(&rdev->wb_list);
135 init_waitqueue_head(&rdev->wb_io_wait);
136 set_bit(WBCollisionCheck, &rdev->flags);
137
138 return 1;
139}
140
141
142
143
144
145void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
146 bool is_suspend)
147{
148 if (mddev->bitmap_info.max_write_behind == 0)
149 return;
150
151 if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev))
152 return;
153
154 if (mddev->wb_info_pool == NULL) {
155 unsigned int noio_flag;
156
157 if (!is_suspend)
158 mddev_suspend(mddev);
159 noio_flag = memalloc_noio_save();
160 mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS,
161 sizeof(struct wb_info));
162 memalloc_noio_restore(noio_flag);
163 if (!mddev->wb_info_pool)
164 pr_err("can't alloc memory pool for writemostly\n");
165 if (!is_suspend)
166 mddev_resume(mddev);
167 }
168}
169EXPORT_SYMBOL_GPL(mddev_create_wb_pool);
170
171
172
173
174static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev)
175{
176 if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags))
177 return;
178
179 if (mddev->wb_info_pool) {
180 struct md_rdev *temp;
181 int num = 0;
182
183
184
185
186 rdev_for_each(temp, mddev)
187 if (temp != rdev &&
188 test_bit(WBCollisionCheck, &temp->flags))
189 num++;
190 if (!num) {
191 mddev_suspend(rdev->mddev);
192 mempool_destroy(mddev->wb_info_pool);
193 mddev->wb_info_pool = NULL;
194 mddev_resume(rdev->mddev);
195 }
196 }
197}
198
199static struct ctl_table_header *raid_table_header;
200
201static struct ctl_table raid_table[] = {
202 {
203 .procname = "speed_limit_min",
204 .data = &sysctl_speed_limit_min,
205 .maxlen = sizeof(int),
206 .mode = S_IRUGO|S_IWUSR,
207 .proc_handler = proc_dointvec,
208 },
209 {
210 .procname = "speed_limit_max",
211 .data = &sysctl_speed_limit_max,
212 .maxlen = sizeof(int),
213 .mode = S_IRUGO|S_IWUSR,
214 .proc_handler = proc_dointvec,
215 },
216 { }
217};
218
219static struct ctl_table raid_dir_table[] = {
220 {
221 .procname = "raid",
222 .maxlen = 0,
223 .mode = S_IRUGO|S_IXUGO,
224 .child = raid_table,
225 },
226 { }
227};
228
229static struct ctl_table raid_root_table[] = {
230 {
231 .procname = "dev",
232 .maxlen = 0,
233 .mode = 0555,
234 .child = raid_dir_table,
235 },
236 { }
237};
238
239static const struct block_device_operations md_fops;
240
241static int start_readonly;
242
243
244
245
246
247
248
249
250
251static bool create_on_open = true;
252
253struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
254 struct mddev *mddev)
255{
256 if (!mddev || !bioset_initialized(&mddev->bio_set))
257 return bio_alloc(gfp_mask, nr_iovecs);
258
259 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
260}
261EXPORT_SYMBOL_GPL(bio_alloc_mddev);
262
263static struct bio *md_bio_alloc_sync(struct mddev *mddev)
264{
265 if (!mddev || !bioset_initialized(&mddev->sync_set))
266 return bio_alloc(GFP_NOIO, 1);
267
268 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
269}
270
271
272
273
274
275
276
277
278
279
280
281static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
282static atomic_t md_event_count;
283void md_new_event(struct mddev *mddev)
284{
285 atomic_inc(&md_event_count);
286 wake_up(&md_event_waiters);
287}
288EXPORT_SYMBOL_GPL(md_new_event);
289
290
291
292
293
294static LIST_HEAD(all_mddevs);
295static DEFINE_SPINLOCK(all_mddevs_lock);
296
297
298
299
300
301
302
303
304#define for_each_mddev(_mddev,_tmp) \
305 \
306 for (({ spin_lock(&all_mddevs_lock); \
307 _tmp = all_mddevs.next; \
308 _mddev = NULL;}); \
309 ({ if (_tmp != &all_mddevs) \
310 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
311 spin_unlock(&all_mddevs_lock); \
312 if (_mddev) mddev_put(_mddev); \
313 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
314 _tmp != &all_mddevs;}); \
315 ({ spin_lock(&all_mddevs_lock); \
316 _tmp = _tmp->next;}) \
317 )
318
319
320
321
322
323
324
325
326static bool is_suspended(struct mddev *mddev, struct bio *bio)
327{
328 if (mddev->suspended)
329 return true;
330 if (bio_data_dir(bio) != WRITE)
331 return false;
332 if (mddev->suspend_lo >= mddev->suspend_hi)
333 return false;
334 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
335 return false;
336 if (bio_end_sector(bio) < mddev->suspend_lo)
337 return false;
338 return true;
339}
340
341void md_handle_request(struct mddev *mddev, struct bio *bio)
342{
343check_suspended:
344 rcu_read_lock();
345 if (is_suspended(mddev, bio)) {
346 DEFINE_WAIT(__wait);
347 for (;;) {
348 prepare_to_wait(&mddev->sb_wait, &__wait,
349 TASK_UNINTERRUPTIBLE);
350 if (!is_suspended(mddev, bio))
351 break;
352 rcu_read_unlock();
353 schedule();
354 rcu_read_lock();
355 }
356 finish_wait(&mddev->sb_wait, &__wait);
357 }
358 atomic_inc(&mddev->active_io);
359 rcu_read_unlock();
360
361 if (!mddev->pers->make_request(mddev, bio)) {
362 atomic_dec(&mddev->active_io);
363 wake_up(&mddev->sb_wait);
364 goto check_suspended;
365 }
366
367 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
368 wake_up(&mddev->sb_wait);
369}
370EXPORT_SYMBOL(md_handle_request);
371
372static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
373{
374 const int rw = bio_data_dir(bio);
375 const int sgrp = op_stat_group(bio_op(bio));
376 struct mddev *mddev = q->queuedata;
377 unsigned int sectors;
378
379 blk_queue_split(q, &bio);
380
381 if (mddev == NULL || mddev->pers == NULL) {
382 bio_io_error(bio);
383 return BLK_QC_T_NONE;
384 }
385 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
386 if (bio_sectors(bio) != 0)
387 bio->bi_status = BLK_STS_IOERR;
388 bio_endio(bio);
389 return BLK_QC_T_NONE;
390 }
391
392
393
394
395
396 sectors = bio_sectors(bio);
397
398 bio->bi_opf &= ~REQ_NOMERGE;
399
400 md_handle_request(mddev, bio);
401
402 part_stat_lock();
403 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
404 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
405 part_stat_unlock();
406
407 return BLK_QC_T_NONE;
408}
409
410
411
412
413
414
415
416void mddev_suspend(struct mddev *mddev)
417{
418 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
419 lockdep_assert_held(&mddev->reconfig_mutex);
420 if (mddev->suspended++)
421 return;
422 synchronize_rcu();
423 wake_up(&mddev->sb_wait);
424 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
425 smp_mb__after_atomic();
426 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
427 mddev->pers->quiesce(mddev, 1);
428 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
429 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
430
431 del_timer_sync(&mddev->safemode_timer);
432}
433EXPORT_SYMBOL_GPL(mddev_suspend);
434
435void mddev_resume(struct mddev *mddev)
436{
437 lockdep_assert_held(&mddev->reconfig_mutex);
438 if (--mddev->suspended)
439 return;
440 wake_up(&mddev->sb_wait);
441 mddev->pers->quiesce(mddev, 0);
442
443 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
444 md_wakeup_thread(mddev->thread);
445 md_wakeup_thread(mddev->sync_thread);
446}
447EXPORT_SYMBOL_GPL(mddev_resume);
448
449int mddev_congested(struct mddev *mddev, int bits)
450{
451 struct md_personality *pers = mddev->pers;
452 int ret = 0;
453
454 rcu_read_lock();
455 if (mddev->suspended)
456 ret = 1;
457 else if (pers && pers->congested)
458 ret = pers->congested(mddev, bits);
459 rcu_read_unlock();
460 return ret;
461}
462EXPORT_SYMBOL_GPL(mddev_congested);
463static int md_congested(void *data, int bits)
464{
465 struct mddev *mddev = data;
466 return mddev_congested(mddev, bits);
467}
468
469
470
471
472
473static void md_end_flush(struct bio *bio)
474{
475 struct md_rdev *rdev = bio->bi_private;
476 struct mddev *mddev = rdev->mddev;
477
478 rdev_dec_pending(rdev, mddev);
479
480 if (atomic_dec_and_test(&mddev->flush_pending)) {
481
482 queue_work(md_wq, &mddev->flush_work);
483 }
484 bio_put(bio);
485}
486
487static void md_submit_flush_data(struct work_struct *ws);
488
489static void submit_flushes(struct work_struct *ws)
490{
491 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
492 struct md_rdev *rdev;
493
494 mddev->start_flush = ktime_get_boottime();
495 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
496 atomic_set(&mddev->flush_pending, 1);
497 rcu_read_lock();
498 rdev_for_each_rcu(rdev, mddev)
499 if (rdev->raid_disk >= 0 &&
500 !test_bit(Faulty, &rdev->flags)) {
501
502
503
504
505 struct bio *bi;
506 atomic_inc(&rdev->nr_pending);
507 atomic_inc(&rdev->nr_pending);
508 rcu_read_unlock();
509 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
510 bi->bi_end_io = md_end_flush;
511 bi->bi_private = rdev;
512 bio_set_dev(bi, rdev->bdev);
513 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
514 atomic_inc(&mddev->flush_pending);
515 submit_bio(bi);
516 rcu_read_lock();
517 rdev_dec_pending(rdev, mddev);
518 }
519 rcu_read_unlock();
520 if (atomic_dec_and_test(&mddev->flush_pending))
521 queue_work(md_wq, &mddev->flush_work);
522}
523
524static void md_submit_flush_data(struct work_struct *ws)
525{
526 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
527 struct bio *bio = mddev->flush_bio;
528
529
530
531
532
533
534
535 mddev->last_flush = mddev->start_flush;
536 mddev->flush_bio = NULL;
537 wake_up(&mddev->sb_wait);
538
539 if (bio->bi_iter.bi_size == 0) {
540
541 bio_endio(bio);
542 } else {
543 bio->bi_opf &= ~REQ_PREFLUSH;
544 md_handle_request(mddev, bio);
545 }
546}
547
548void md_flush_request(struct mddev *mddev, struct bio *bio)
549{
550 ktime_t start = ktime_get_boottime();
551 spin_lock_irq(&mddev->lock);
552 wait_event_lock_irq(mddev->sb_wait,
553 !mddev->flush_bio ||
554 ktime_after(mddev->last_flush, start),
555 mddev->lock);
556 if (!ktime_after(mddev->last_flush, start)) {
557 WARN_ON(mddev->flush_bio);
558 mddev->flush_bio = bio;
559 bio = NULL;
560 }
561 spin_unlock_irq(&mddev->lock);
562
563 if (!bio) {
564 INIT_WORK(&mddev->flush_work, submit_flushes);
565 queue_work(md_wq, &mddev->flush_work);
566 } else {
567
568 if (bio->bi_iter.bi_size == 0)
569
570 bio_endio(bio);
571 else {
572 bio->bi_opf &= ~REQ_PREFLUSH;
573 mddev->pers->make_request(mddev, bio);
574 }
575 }
576}
577EXPORT_SYMBOL(md_flush_request);
578
579static inline struct mddev *mddev_get(struct mddev *mddev)
580{
581 atomic_inc(&mddev->active);
582 return mddev;
583}
584
585static void mddev_delayed_delete(struct work_struct *ws);
586
587static void mddev_put(struct mddev *mddev)
588{
589 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
590 return;
591 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
592 mddev->ctime == 0 && !mddev->hold_active) {
593
594
595 list_del_init(&mddev->all_mddevs);
596
597
598
599
600
601
602 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
603 queue_work(md_misc_wq, &mddev->del_work);
604 }
605 spin_unlock(&all_mddevs_lock);
606}
607
608static void md_safemode_timeout(struct timer_list *t);
609
610void mddev_init(struct mddev *mddev)
611{
612 kobject_init(&mddev->kobj, &md_ktype);
613 mutex_init(&mddev->open_mutex);
614 mutex_init(&mddev->reconfig_mutex);
615 mutex_init(&mddev->bitmap_info.mutex);
616 INIT_LIST_HEAD(&mddev->disks);
617 INIT_LIST_HEAD(&mddev->all_mddevs);
618 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
619 atomic_set(&mddev->active, 1);
620 atomic_set(&mddev->openers, 0);
621 atomic_set(&mddev->active_io, 0);
622 spin_lock_init(&mddev->lock);
623 atomic_set(&mddev->flush_pending, 0);
624 init_waitqueue_head(&mddev->sb_wait);
625 init_waitqueue_head(&mddev->recovery_wait);
626 mddev->reshape_position = MaxSector;
627 mddev->reshape_backwards = 0;
628 mddev->last_sync_action = "none";
629 mddev->resync_min = 0;
630 mddev->resync_max = MaxSector;
631 mddev->level = LEVEL_NONE;
632}
633EXPORT_SYMBOL_GPL(mddev_init);
634
635static struct mddev *mddev_find(dev_t unit)
636{
637 struct mddev *mddev, *new = NULL;
638
639 if (unit && MAJOR(unit) != MD_MAJOR)
640 unit &= ~((1<<MdpMinorShift)-1);
641
642 retry:
643 spin_lock(&all_mddevs_lock);
644
645 if (unit) {
646 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
647 if (mddev->unit == unit) {
648 mddev_get(mddev);
649 spin_unlock(&all_mddevs_lock);
650 kfree(new);
651 return mddev;
652 }
653
654 if (new) {
655 list_add(&new->all_mddevs, &all_mddevs);
656 spin_unlock(&all_mddevs_lock);
657 new->hold_active = UNTIL_IOCTL;
658 return new;
659 }
660 } else if (new) {
661
662 static int next_minor = 512;
663 int start = next_minor;
664 int is_free = 0;
665 int dev = 0;
666 while (!is_free) {
667 dev = MKDEV(MD_MAJOR, next_minor);
668 next_minor++;
669 if (next_minor > MINORMASK)
670 next_minor = 0;
671 if (next_minor == start) {
672
673 spin_unlock(&all_mddevs_lock);
674 kfree(new);
675 return NULL;
676 }
677
678 is_free = 1;
679 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
680 if (mddev->unit == dev) {
681 is_free = 0;
682 break;
683 }
684 }
685 new->unit = dev;
686 new->md_minor = MINOR(dev);
687 new->hold_active = UNTIL_STOP;
688 list_add(&new->all_mddevs, &all_mddevs);
689 spin_unlock(&all_mddevs_lock);
690 return new;
691 }
692 spin_unlock(&all_mddevs_lock);
693
694 new = kzalloc(sizeof(*new), GFP_KERNEL);
695 if (!new)
696 return NULL;
697
698 new->unit = unit;
699 if (MAJOR(unit) == MD_MAJOR)
700 new->md_minor = MINOR(unit);
701 else
702 new->md_minor = MINOR(unit) >> MdpMinorShift;
703
704 mddev_init(new);
705
706 goto retry;
707}
708
709static struct attribute_group md_redundancy_group;
710
711void mddev_unlock(struct mddev *mddev)
712{
713 if (mddev->to_remove) {
714
715
716
717
718
719
720
721
722
723
724
725
726 struct attribute_group *to_remove = mddev->to_remove;
727 mddev->to_remove = NULL;
728 mddev->sysfs_active = 1;
729 mutex_unlock(&mddev->reconfig_mutex);
730
731 if (mddev->kobj.sd) {
732 if (to_remove != &md_redundancy_group)
733 sysfs_remove_group(&mddev->kobj, to_remove);
734 if (mddev->pers == NULL ||
735 mddev->pers->sync_request == NULL) {
736 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
737 if (mddev->sysfs_action)
738 sysfs_put(mddev->sysfs_action);
739 mddev->sysfs_action = NULL;
740 }
741 }
742 mddev->sysfs_active = 0;
743 } else
744 mutex_unlock(&mddev->reconfig_mutex);
745
746
747
748
749 spin_lock(&pers_lock);
750 md_wakeup_thread(mddev->thread);
751 wake_up(&mddev->sb_wait);
752 spin_unlock(&pers_lock);
753}
754EXPORT_SYMBOL_GPL(mddev_unlock);
755
756struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
757{
758 struct md_rdev *rdev;
759
760 rdev_for_each_rcu(rdev, mddev)
761 if (rdev->desc_nr == nr)
762 return rdev;
763
764 return NULL;
765}
766EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
767
768static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
769{
770 struct md_rdev *rdev;
771
772 rdev_for_each(rdev, mddev)
773 if (rdev->bdev->bd_dev == dev)
774 return rdev;
775
776 return NULL;
777}
778
779struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
780{
781 struct md_rdev *rdev;
782
783 rdev_for_each_rcu(rdev, mddev)
784 if (rdev->bdev->bd_dev == dev)
785 return rdev;
786
787 return NULL;
788}
789EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
790
791static struct md_personality *find_pers(int level, char *clevel)
792{
793 struct md_personality *pers;
794 list_for_each_entry(pers, &pers_list, list) {
795 if (level != LEVEL_NONE && pers->level == level)
796 return pers;
797 if (strcmp(pers->name, clevel)==0)
798 return pers;
799 }
800 return NULL;
801}
802
803
804static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
805{
806 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
807 return MD_NEW_SIZE_SECTORS(num_sectors);
808}
809
810static int alloc_disk_sb(struct md_rdev *rdev)
811{
812 rdev->sb_page = alloc_page(GFP_KERNEL);
813 if (!rdev->sb_page)
814 return -ENOMEM;
815 return 0;
816}
817
818void md_rdev_clear(struct md_rdev *rdev)
819{
820 if (rdev->sb_page) {
821 put_page(rdev->sb_page);
822 rdev->sb_loaded = 0;
823 rdev->sb_page = NULL;
824 rdev->sb_start = 0;
825 rdev->sectors = 0;
826 }
827 if (rdev->bb_page) {
828 put_page(rdev->bb_page);
829 rdev->bb_page = NULL;
830 }
831 badblocks_exit(&rdev->badblocks);
832}
833EXPORT_SYMBOL_GPL(md_rdev_clear);
834
835static void super_written(struct bio *bio)
836{
837 struct md_rdev *rdev = bio->bi_private;
838 struct mddev *mddev = rdev->mddev;
839
840 if (bio->bi_status) {
841 pr_err("md: super_written gets error=%d\n", bio->bi_status);
842 md_error(mddev, rdev);
843 if (!test_bit(Faulty, &rdev->flags)
844 && (bio->bi_opf & MD_FAILFAST)) {
845 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
846 set_bit(LastDev, &rdev->flags);
847 }
848 } else
849 clear_bit(LastDev, &rdev->flags);
850
851 if (atomic_dec_and_test(&mddev->pending_writes))
852 wake_up(&mddev->sb_wait);
853 rdev_dec_pending(rdev, mddev);
854 bio_put(bio);
855}
856
857void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
858 sector_t sector, int size, struct page *page)
859{
860
861
862
863
864
865
866 struct bio *bio;
867 int ff = 0;
868
869 if (!page)
870 return;
871
872 if (test_bit(Faulty, &rdev->flags))
873 return;
874
875 bio = md_bio_alloc_sync(mddev);
876
877 atomic_inc(&rdev->nr_pending);
878
879 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
880 bio->bi_iter.bi_sector = sector;
881 bio_add_page(bio, page, size, 0);
882 bio->bi_private = rdev;
883 bio->bi_end_io = super_written;
884
885 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
886 test_bit(FailFast, &rdev->flags) &&
887 !test_bit(LastDev, &rdev->flags))
888 ff = MD_FAILFAST;
889 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
890
891 atomic_inc(&mddev->pending_writes);
892 submit_bio(bio);
893}
894
895int md_super_wait(struct mddev *mddev)
896{
897
898 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
899 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
900 return -EAGAIN;
901 return 0;
902}
903
904int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
905 struct page *page, int op, int op_flags, bool metadata_op)
906{
907 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
908 int ret;
909
910 if (metadata_op && rdev->meta_bdev)
911 bio_set_dev(bio, rdev->meta_bdev);
912 else
913 bio_set_dev(bio, rdev->bdev);
914 bio_set_op_attrs(bio, op, op_flags);
915 if (metadata_op)
916 bio->bi_iter.bi_sector = sector + rdev->sb_start;
917 else if (rdev->mddev->reshape_position != MaxSector &&
918 (rdev->mddev->reshape_backwards ==
919 (sector >= rdev->mddev->reshape_position)))
920 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
921 else
922 bio->bi_iter.bi_sector = sector + rdev->data_offset;
923 bio_add_page(bio, page, size, 0);
924
925 submit_bio_wait(bio);
926
927 ret = !bio->bi_status;
928 bio_put(bio);
929 return ret;
930}
931EXPORT_SYMBOL_GPL(sync_page_io);
932
933static int read_disk_sb(struct md_rdev *rdev, int size)
934{
935 char b[BDEVNAME_SIZE];
936
937 if (rdev->sb_loaded)
938 return 0;
939
940 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
941 goto fail;
942 rdev->sb_loaded = 1;
943 return 0;
944
945fail:
946 pr_err("md: disabled device %s, could not read superblock.\n",
947 bdevname(rdev->bdev,b));
948 return -EINVAL;
949}
950
951static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
952{
953 return sb1->set_uuid0 == sb2->set_uuid0 &&
954 sb1->set_uuid1 == sb2->set_uuid1 &&
955 sb1->set_uuid2 == sb2->set_uuid2 &&
956 sb1->set_uuid3 == sb2->set_uuid3;
957}
958
959static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
960{
961 int ret;
962 mdp_super_t *tmp1, *tmp2;
963
964 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
965 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
966
967 if (!tmp1 || !tmp2) {
968 ret = 0;
969 goto abort;
970 }
971
972 *tmp1 = *sb1;
973 *tmp2 = *sb2;
974
975
976
977
978 tmp1->nr_disks = 0;
979 tmp2->nr_disks = 0;
980
981 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
982abort:
983 kfree(tmp1);
984 kfree(tmp2);
985 return ret;
986}
987
988static u32 md_csum_fold(u32 csum)
989{
990 csum = (csum & 0xffff) + (csum >> 16);
991 return (csum & 0xffff) + (csum >> 16);
992}
993
994static unsigned int calc_sb_csum(mdp_super_t *sb)
995{
996 u64 newcsum = 0;
997 u32 *sb32 = (u32*)sb;
998 int i;
999 unsigned int disk_csum, csum;
1000
1001 disk_csum = sb->sb_csum;
1002 sb->sb_csum = 0;
1003
1004 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1005 newcsum += sb32[i];
1006 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1007
1008#ifdef CONFIG_ALPHA
1009
1010
1011
1012
1013
1014
1015
1016
1017 sb->sb_csum = md_csum_fold(disk_csum);
1018#else
1019 sb->sb_csum = disk_csum;
1020#endif
1021 return csum;
1022}
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054struct super_type {
1055 char *name;
1056 struct module *owner;
1057 int (*load_super)(struct md_rdev *rdev,
1058 struct md_rdev *refdev,
1059 int minor_version);
1060 int (*validate_super)(struct mddev *mddev,
1061 struct md_rdev *rdev);
1062 void (*sync_super)(struct mddev *mddev,
1063 struct md_rdev *rdev);
1064 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1065 sector_t num_sectors);
1066 int (*allow_new_offset)(struct md_rdev *rdev,
1067 unsigned long long new_offset);
1068};
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078int md_check_no_bitmap(struct mddev *mddev)
1079{
1080 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1081 return 0;
1082 pr_warn("%s: bitmaps are not supported for %s\n",
1083 mdname(mddev), mddev->pers->name);
1084 return 1;
1085}
1086EXPORT_SYMBOL(md_check_no_bitmap);
1087
1088
1089
1090
1091static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1092{
1093 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1094 mdp_super_t *sb;
1095 int ret;
1096
1097
1098
1099
1100
1101
1102
1103 rdev->sb_start = calc_dev_sboffset(rdev);
1104
1105 ret = read_disk_sb(rdev, MD_SB_BYTES);
1106 if (ret)
1107 return ret;
1108
1109 ret = -EINVAL;
1110
1111 bdevname(rdev->bdev, b);
1112 sb = page_address(rdev->sb_page);
1113
1114 if (sb->md_magic != MD_SB_MAGIC) {
1115 pr_warn("md: invalid raid superblock magic on %s\n", b);
1116 goto abort;
1117 }
1118
1119 if (sb->major_version != 0 ||
1120 sb->minor_version < 90 ||
1121 sb->minor_version > 91) {
1122 pr_warn("Bad version number %d.%d on %s\n",
1123 sb->major_version, sb->minor_version, b);
1124 goto abort;
1125 }
1126
1127 if (sb->raid_disks <= 0)
1128 goto abort;
1129
1130 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1131 pr_warn("md: invalid superblock checksum on %s\n", b);
1132 goto abort;
1133 }
1134
1135 rdev->preferred_minor = sb->md_minor;
1136 rdev->data_offset = 0;
1137 rdev->new_data_offset = 0;
1138 rdev->sb_size = MD_SB_BYTES;
1139 rdev->badblocks.shift = -1;
1140
1141 if (sb->level == LEVEL_MULTIPATH)
1142 rdev->desc_nr = -1;
1143 else
1144 rdev->desc_nr = sb->this_disk.number;
1145
1146 if (!refdev) {
1147 ret = 1;
1148 } else {
1149 __u64 ev1, ev2;
1150 mdp_super_t *refsb = page_address(refdev->sb_page);
1151 if (!md_uuid_equal(refsb, sb)) {
1152 pr_warn("md: %s has different UUID to %s\n",
1153 b, bdevname(refdev->bdev,b2));
1154 goto abort;
1155 }
1156 if (!md_sb_equal(refsb, sb)) {
1157 pr_warn("md: %s has same UUID but different superblock to %s\n",
1158 b, bdevname(refdev->bdev, b2));
1159 goto abort;
1160 }
1161 ev1 = md_event(sb);
1162 ev2 = md_event(refsb);
1163 if (ev1 > ev2)
1164 ret = 1;
1165 else
1166 ret = 0;
1167 }
1168 rdev->sectors = rdev->sb_start;
1169
1170
1171
1172
1173 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1174 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1175
1176 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1177
1178 ret = -EINVAL;
1179
1180 abort:
1181 return ret;
1182}
1183
1184
1185
1186
1187static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1188{
1189 mdp_disk_t *desc;
1190 mdp_super_t *sb = page_address(rdev->sb_page);
1191 __u64 ev1 = md_event(sb);
1192
1193 rdev->raid_disk = -1;
1194 clear_bit(Faulty, &rdev->flags);
1195 clear_bit(In_sync, &rdev->flags);
1196 clear_bit(Bitmap_sync, &rdev->flags);
1197 clear_bit(WriteMostly, &rdev->flags);
1198
1199 if (mddev->raid_disks == 0) {
1200 mddev->major_version = 0;
1201 mddev->minor_version = sb->minor_version;
1202 mddev->patch_version = sb->patch_version;
1203 mddev->external = 0;
1204 mddev->chunk_sectors = sb->chunk_size >> 9;
1205 mddev->ctime = sb->ctime;
1206 mddev->utime = sb->utime;
1207 mddev->level = sb->level;
1208 mddev->clevel[0] = 0;
1209 mddev->layout = sb->layout;
1210 mddev->raid_disks = sb->raid_disks;
1211 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1212 mddev->events = ev1;
1213 mddev->bitmap_info.offset = 0;
1214 mddev->bitmap_info.space = 0;
1215
1216 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1217 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1218 mddev->reshape_backwards = 0;
1219
1220 if (mddev->minor_version >= 91) {
1221 mddev->reshape_position = sb->reshape_position;
1222 mddev->delta_disks = sb->delta_disks;
1223 mddev->new_level = sb->new_level;
1224 mddev->new_layout = sb->new_layout;
1225 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1226 if (mddev->delta_disks < 0)
1227 mddev->reshape_backwards = 1;
1228 } else {
1229 mddev->reshape_position = MaxSector;
1230 mddev->delta_disks = 0;
1231 mddev->new_level = mddev->level;
1232 mddev->new_layout = mddev->layout;
1233 mddev->new_chunk_sectors = mddev->chunk_sectors;
1234 }
1235
1236 if (sb->state & (1<<MD_SB_CLEAN))
1237 mddev->recovery_cp = MaxSector;
1238 else {
1239 if (sb->events_hi == sb->cp_events_hi &&
1240 sb->events_lo == sb->cp_events_lo) {
1241 mddev->recovery_cp = sb->recovery_cp;
1242 } else
1243 mddev->recovery_cp = 0;
1244 }
1245
1246 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1247 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1248 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1249 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1250
1251 mddev->max_disks = MD_SB_DISKS;
1252
1253 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1254 mddev->bitmap_info.file == NULL) {
1255 mddev->bitmap_info.offset =
1256 mddev->bitmap_info.default_offset;
1257 mddev->bitmap_info.space =
1258 mddev->bitmap_info.default_space;
1259 }
1260
1261 } else if (mddev->pers == NULL) {
1262
1263
1264 ++ev1;
1265 if (sb->disks[rdev->desc_nr].state & (
1266 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1267 if (ev1 < mddev->events)
1268 return -EINVAL;
1269 } else if (mddev->bitmap) {
1270
1271
1272
1273 if (ev1 < mddev->bitmap->events_cleared)
1274 return 0;
1275 if (ev1 < mddev->events)
1276 set_bit(Bitmap_sync, &rdev->flags);
1277 } else {
1278 if (ev1 < mddev->events)
1279
1280 return 0;
1281 }
1282
1283 if (mddev->level != LEVEL_MULTIPATH) {
1284 desc = sb->disks + rdev->desc_nr;
1285
1286 if (desc->state & (1<<MD_DISK_FAULTY))
1287 set_bit(Faulty, &rdev->flags);
1288 else if (desc->state & (1<<MD_DISK_SYNC)
1289) {
1290 set_bit(In_sync, &rdev->flags);
1291 rdev->raid_disk = desc->raid_disk;
1292 rdev->saved_raid_disk = desc->raid_disk;
1293 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1294
1295
1296
1297 if (mddev->minor_version >= 91) {
1298 rdev->recovery_offset = 0;
1299 rdev->raid_disk = desc->raid_disk;
1300 }
1301 }
1302 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1303 set_bit(WriteMostly, &rdev->flags);
1304 if (desc->state & (1<<MD_DISK_FAILFAST))
1305 set_bit(FailFast, &rdev->flags);
1306 } else
1307 set_bit(In_sync, &rdev->flags);
1308 return 0;
1309}
1310
1311
1312
1313
1314static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1315{
1316 mdp_super_t *sb;
1317 struct md_rdev *rdev2;
1318 int next_spare = mddev->raid_disks;
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330 int i;
1331 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1332
1333 rdev->sb_size = MD_SB_BYTES;
1334
1335 sb = page_address(rdev->sb_page);
1336
1337 memset(sb, 0, sizeof(*sb));
1338
1339 sb->md_magic = MD_SB_MAGIC;
1340 sb->major_version = mddev->major_version;
1341 sb->patch_version = mddev->patch_version;
1342 sb->gvalid_words = 0;
1343 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1344 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1345 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1346 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1347
1348 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1349 sb->level = mddev->level;
1350 sb->size = mddev->dev_sectors / 2;
1351 sb->raid_disks = mddev->raid_disks;
1352 sb->md_minor = mddev->md_minor;
1353 sb->not_persistent = 0;
1354 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1355 sb->state = 0;
1356 sb->events_hi = (mddev->events>>32);
1357 sb->events_lo = (u32)mddev->events;
1358
1359 if (mddev->reshape_position == MaxSector)
1360 sb->minor_version = 90;
1361 else {
1362 sb->minor_version = 91;
1363 sb->reshape_position = mddev->reshape_position;
1364 sb->new_level = mddev->new_level;
1365 sb->delta_disks = mddev->delta_disks;
1366 sb->new_layout = mddev->new_layout;
1367 sb->new_chunk = mddev->new_chunk_sectors << 9;
1368 }
1369 mddev->minor_version = sb->minor_version;
1370 if (mddev->in_sync)
1371 {
1372 sb->recovery_cp = mddev->recovery_cp;
1373 sb->cp_events_hi = (mddev->events>>32);
1374 sb->cp_events_lo = (u32)mddev->events;
1375 if (mddev->recovery_cp == MaxSector)
1376 sb->state = (1<< MD_SB_CLEAN);
1377 } else
1378 sb->recovery_cp = 0;
1379
1380 sb->layout = mddev->layout;
1381 sb->chunk_size = mddev->chunk_sectors << 9;
1382
1383 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1384 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1385
1386 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1387 rdev_for_each(rdev2, mddev) {
1388 mdp_disk_t *d;
1389 int desc_nr;
1390 int is_active = test_bit(In_sync, &rdev2->flags);
1391
1392 if (rdev2->raid_disk >= 0 &&
1393 sb->minor_version >= 91)
1394
1395
1396
1397
1398 is_active = 1;
1399 if (rdev2->raid_disk < 0 ||
1400 test_bit(Faulty, &rdev2->flags))
1401 is_active = 0;
1402 if (is_active)
1403 desc_nr = rdev2->raid_disk;
1404 else
1405 desc_nr = next_spare++;
1406 rdev2->desc_nr = desc_nr;
1407 d = &sb->disks[rdev2->desc_nr];
1408 nr_disks++;
1409 d->number = rdev2->desc_nr;
1410 d->major = MAJOR(rdev2->bdev->bd_dev);
1411 d->minor = MINOR(rdev2->bdev->bd_dev);
1412 if (is_active)
1413 d->raid_disk = rdev2->raid_disk;
1414 else
1415 d->raid_disk = rdev2->desc_nr;
1416 if (test_bit(Faulty, &rdev2->flags))
1417 d->state = (1<<MD_DISK_FAULTY);
1418 else if (is_active) {
1419 d->state = (1<<MD_DISK_ACTIVE);
1420 if (test_bit(In_sync, &rdev2->flags))
1421 d->state |= (1<<MD_DISK_SYNC);
1422 active++;
1423 working++;
1424 } else {
1425 d->state = 0;
1426 spare++;
1427 working++;
1428 }
1429 if (test_bit(WriteMostly, &rdev2->flags))
1430 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1431 if (test_bit(FailFast, &rdev2->flags))
1432 d->state |= (1<<MD_DISK_FAILFAST);
1433 }
1434
1435 for (i=0 ; i < mddev->raid_disks ; i++) {
1436 mdp_disk_t *d = &sb->disks[i];
1437 if (d->state == 0 && d->number == 0) {
1438 d->number = i;
1439 d->raid_disk = i;
1440 d->state = (1<<MD_DISK_REMOVED);
1441 d->state |= (1<<MD_DISK_FAULTY);
1442 failed++;
1443 }
1444 }
1445 sb->nr_disks = nr_disks;
1446 sb->active_disks = active;
1447 sb->working_disks = working;
1448 sb->failed_disks = failed;
1449 sb->spare_disks = spare;
1450
1451 sb->this_disk = sb->disks[rdev->desc_nr];
1452 sb->sb_csum = calc_sb_csum(sb);
1453}
1454
1455
1456
1457
1458static unsigned long long
1459super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1460{
1461 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1462 return 0;
1463 if (rdev->mddev->bitmap_info.offset)
1464 return 0;
1465 rdev->sb_start = calc_dev_sboffset(rdev);
1466 if (!num_sectors || num_sectors > rdev->sb_start)
1467 num_sectors = rdev->sb_start;
1468
1469
1470
1471 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1472 num_sectors = (sector_t)(2ULL << 32) - 2;
1473 do {
1474 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1475 rdev->sb_page);
1476 } while (md_super_wait(rdev->mddev) < 0);
1477 return num_sectors;
1478}
1479
1480static int
1481super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1482{
1483
1484 return new_offset == 0;
1485}
1486
1487
1488
1489
1490
1491static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1492{
1493 __le32 disk_csum;
1494 u32 csum;
1495 unsigned long long newcsum;
1496 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1497 __le32 *isuper = (__le32*)sb;
1498
1499 disk_csum = sb->sb_csum;
1500 sb->sb_csum = 0;
1501 newcsum = 0;
1502 for (; size >= 4; size -= 4)
1503 newcsum += le32_to_cpu(*isuper++);
1504
1505 if (size == 2)
1506 newcsum += le16_to_cpu(*(__le16*) isuper);
1507
1508 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1509 sb->sb_csum = disk_csum;
1510 return cpu_to_le32(csum);
1511}
1512
1513static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1514{
1515 struct mdp_superblock_1 *sb;
1516 int ret;
1517 sector_t sb_start;
1518 sector_t sectors;
1519 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1520 int bmask;
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530 switch(minor_version) {
1531 case 0:
1532 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1533 sb_start -= 8*2;
1534 sb_start &= ~(sector_t)(4*2-1);
1535 break;
1536 case 1:
1537 sb_start = 0;
1538 break;
1539 case 2:
1540 sb_start = 8;
1541 break;
1542 default:
1543 return -EINVAL;
1544 }
1545 rdev->sb_start = sb_start;
1546
1547
1548
1549
1550 ret = read_disk_sb(rdev, 4096);
1551 if (ret) return ret;
1552
1553 sb = page_address(rdev->sb_page);
1554
1555 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1556 sb->major_version != cpu_to_le32(1) ||
1557 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1558 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1559 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1560 return -EINVAL;
1561
1562 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1563 pr_warn("md: invalid superblock checksum on %s\n",
1564 bdevname(rdev->bdev,b));
1565 return -EINVAL;
1566 }
1567 if (le64_to_cpu(sb->data_size) < 10) {
1568 pr_warn("md: data_size too small on %s\n",
1569 bdevname(rdev->bdev,b));
1570 return -EINVAL;
1571 }
1572 if (sb->pad0 ||
1573 sb->pad3[0] ||
1574 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1575
1576 return -EINVAL;
1577
1578 rdev->preferred_minor = 0xffff;
1579 rdev->data_offset = le64_to_cpu(sb->data_offset);
1580 rdev->new_data_offset = rdev->data_offset;
1581 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1582 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1583 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1584 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1585
1586 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1587 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1588 if (rdev->sb_size & bmask)
1589 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1590
1591 if (minor_version
1592 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1593 return -EINVAL;
1594 if (minor_version
1595 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1596 return -EINVAL;
1597
1598 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1599 rdev->desc_nr = -1;
1600 else
1601 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1602
1603 if (!rdev->bb_page) {
1604 rdev->bb_page = alloc_page(GFP_KERNEL);
1605 if (!rdev->bb_page)
1606 return -ENOMEM;
1607 }
1608 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1609 rdev->badblocks.count == 0) {
1610
1611
1612
1613 s32 offset;
1614 sector_t bb_sector;
1615 __le64 *bbp;
1616 int i;
1617 int sectors = le16_to_cpu(sb->bblog_size);
1618 if (sectors > (PAGE_SIZE / 512))
1619 return -EINVAL;
1620 offset = le32_to_cpu(sb->bblog_offset);
1621 if (offset == 0)
1622 return -EINVAL;
1623 bb_sector = (long long)offset;
1624 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1625 rdev->bb_page, REQ_OP_READ, 0, true))
1626 return -EIO;
1627 bbp = (__le64 *)page_address(rdev->bb_page);
1628 rdev->badblocks.shift = sb->bblog_shift;
1629 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1630 u64 bb = le64_to_cpu(*bbp);
1631 int count = bb & (0x3ff);
1632 u64 sector = bb >> 10;
1633 sector <<= sb->bblog_shift;
1634 count <<= sb->bblog_shift;
1635 if (bb + 1 == 0)
1636 break;
1637 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1638 return -EINVAL;
1639 }
1640 } else if (sb->bblog_offset != 0)
1641 rdev->badblocks.shift = 0;
1642
1643 if ((le32_to_cpu(sb->feature_map) &
1644 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1645 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1646 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1647 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1648 }
1649
1650 if (!refdev) {
1651 ret = 1;
1652 } else {
1653 __u64 ev1, ev2;
1654 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1655
1656 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1657 sb->level != refsb->level ||
1658 sb->layout != refsb->layout ||
1659 sb->chunksize != refsb->chunksize) {
1660 pr_warn("md: %s has strangely different superblock to %s\n",
1661 bdevname(rdev->bdev,b),
1662 bdevname(refdev->bdev,b2));
1663 return -EINVAL;
1664 }
1665 ev1 = le64_to_cpu(sb->events);
1666 ev2 = le64_to_cpu(refsb->events);
1667
1668 if (ev1 > ev2)
1669 ret = 1;
1670 else
1671 ret = 0;
1672 }
1673 if (minor_version) {
1674 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1675 sectors -= rdev->data_offset;
1676 } else
1677 sectors = rdev->sb_start;
1678 if (sectors < le64_to_cpu(sb->data_size))
1679 return -EINVAL;
1680 rdev->sectors = le64_to_cpu(sb->data_size);
1681 return ret;
1682}
1683
1684static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1685{
1686 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1687 __u64 ev1 = le64_to_cpu(sb->events);
1688
1689 rdev->raid_disk = -1;
1690 clear_bit(Faulty, &rdev->flags);
1691 clear_bit(In_sync, &rdev->flags);
1692 clear_bit(Bitmap_sync, &rdev->flags);
1693 clear_bit(WriteMostly, &rdev->flags);
1694
1695 if (mddev->raid_disks == 0) {
1696 mddev->major_version = 1;
1697 mddev->patch_version = 0;
1698 mddev->external = 0;
1699 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1700 mddev->ctime = le64_to_cpu(sb->ctime);
1701 mddev->utime = le64_to_cpu(sb->utime);
1702 mddev->level = le32_to_cpu(sb->level);
1703 mddev->clevel[0] = 0;
1704 mddev->layout = le32_to_cpu(sb->layout);
1705 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1706 mddev->dev_sectors = le64_to_cpu(sb->size);
1707 mddev->events = ev1;
1708 mddev->bitmap_info.offset = 0;
1709 mddev->bitmap_info.space = 0;
1710
1711
1712
1713 mddev->bitmap_info.default_offset = 1024 >> 9;
1714 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1715 mddev->reshape_backwards = 0;
1716
1717 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1718 memcpy(mddev->uuid, sb->set_uuid, 16);
1719
1720 mddev->max_disks = (4096-256)/2;
1721
1722 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1723 mddev->bitmap_info.file == NULL) {
1724 mddev->bitmap_info.offset =
1725 (__s32)le32_to_cpu(sb->bitmap_offset);
1726
1727
1728
1729
1730
1731 if (mddev->minor_version > 0)
1732 mddev->bitmap_info.space = 0;
1733 else if (mddev->bitmap_info.offset > 0)
1734 mddev->bitmap_info.space =
1735 8 - mddev->bitmap_info.offset;
1736 else
1737 mddev->bitmap_info.space =
1738 -mddev->bitmap_info.offset;
1739 }
1740
1741 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1742 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1743 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1744 mddev->new_level = le32_to_cpu(sb->new_level);
1745 mddev->new_layout = le32_to_cpu(sb->new_layout);
1746 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1747 if (mddev->delta_disks < 0 ||
1748 (mddev->delta_disks == 0 &&
1749 (le32_to_cpu(sb->feature_map)
1750 & MD_FEATURE_RESHAPE_BACKWARDS)))
1751 mddev->reshape_backwards = 1;
1752 } else {
1753 mddev->reshape_position = MaxSector;
1754 mddev->delta_disks = 0;
1755 mddev->new_level = mddev->level;
1756 mddev->new_layout = mddev->layout;
1757 mddev->new_chunk_sectors = mddev->chunk_sectors;
1758 }
1759
1760 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1761 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1762
1763 if (le32_to_cpu(sb->feature_map) &
1764 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1765 if (le32_to_cpu(sb->feature_map) &
1766 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1767 return -EINVAL;
1768 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1769 (le32_to_cpu(sb->feature_map) &
1770 MD_FEATURE_MULTIPLE_PPLS))
1771 return -EINVAL;
1772 set_bit(MD_HAS_PPL, &mddev->flags);
1773 }
1774 } else if (mddev->pers == NULL) {
1775
1776
1777 ++ev1;
1778 if (rdev->desc_nr >= 0 &&
1779 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1780 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1781 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1782 if (ev1 < mddev->events)
1783 return -EINVAL;
1784 } else if (mddev->bitmap) {
1785
1786
1787
1788 if (ev1 < mddev->bitmap->events_cleared)
1789 return 0;
1790 if (ev1 < mddev->events)
1791 set_bit(Bitmap_sync, &rdev->flags);
1792 } else {
1793 if (ev1 < mddev->events)
1794
1795 return 0;
1796 }
1797 if (mddev->level != LEVEL_MULTIPATH) {
1798 int role;
1799 if (rdev->desc_nr < 0 ||
1800 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1801 role = MD_DISK_ROLE_SPARE;
1802 rdev->desc_nr = -1;
1803 } else
1804 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1805 switch(role) {
1806 case MD_DISK_ROLE_SPARE:
1807 break;
1808 case MD_DISK_ROLE_FAULTY:
1809 set_bit(Faulty, &rdev->flags);
1810 break;
1811 case MD_DISK_ROLE_JOURNAL:
1812 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1813
1814 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1815 return -EINVAL;
1816 }
1817 set_bit(Journal, &rdev->flags);
1818 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1819 rdev->raid_disk = 0;
1820 break;
1821 default:
1822 rdev->saved_raid_disk = role;
1823 if ((le32_to_cpu(sb->feature_map) &
1824 MD_FEATURE_RECOVERY_OFFSET)) {
1825 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1826 if (!(le32_to_cpu(sb->feature_map) &
1827 MD_FEATURE_RECOVERY_BITMAP))
1828 rdev->saved_raid_disk = -1;
1829 } else
1830 set_bit(In_sync, &rdev->flags);
1831 rdev->raid_disk = role;
1832 break;
1833 }
1834 if (sb->devflags & WriteMostly1)
1835 set_bit(WriteMostly, &rdev->flags);
1836 if (sb->devflags & FailFast1)
1837 set_bit(FailFast, &rdev->flags);
1838 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1839 set_bit(Replacement, &rdev->flags);
1840 } else
1841 set_bit(In_sync, &rdev->flags);
1842
1843 return 0;
1844}
1845
1846static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1847{
1848 struct mdp_superblock_1 *sb;
1849 struct md_rdev *rdev2;
1850 int max_dev, i;
1851
1852
1853 sb = page_address(rdev->sb_page);
1854
1855 sb->feature_map = 0;
1856 sb->pad0 = 0;
1857 sb->recovery_offset = cpu_to_le64(0);
1858 memset(sb->pad3, 0, sizeof(sb->pad3));
1859
1860 sb->utime = cpu_to_le64((__u64)mddev->utime);
1861 sb->events = cpu_to_le64(mddev->events);
1862 if (mddev->in_sync)
1863 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1864 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1865 sb->resync_offset = cpu_to_le64(MaxSector);
1866 else
1867 sb->resync_offset = cpu_to_le64(0);
1868
1869 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1870
1871 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1872 sb->size = cpu_to_le64(mddev->dev_sectors);
1873 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1874 sb->level = cpu_to_le32(mddev->level);
1875 sb->layout = cpu_to_le32(mddev->layout);
1876 if (test_bit(FailFast, &rdev->flags))
1877 sb->devflags |= FailFast1;
1878 else
1879 sb->devflags &= ~FailFast1;
1880
1881 if (test_bit(WriteMostly, &rdev->flags))
1882 sb->devflags |= WriteMostly1;
1883 else
1884 sb->devflags &= ~WriteMostly1;
1885 sb->data_offset = cpu_to_le64(rdev->data_offset);
1886 sb->data_size = cpu_to_le64(rdev->sectors);
1887
1888 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1889 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1890 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1891 }
1892
1893 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1894 !test_bit(In_sync, &rdev->flags)) {
1895 sb->feature_map |=
1896 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1897 sb->recovery_offset =
1898 cpu_to_le64(rdev->recovery_offset);
1899 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1900 sb->feature_map |=
1901 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1902 }
1903
1904 if (test_bit(Journal, &rdev->flags))
1905 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1906 if (test_bit(Replacement, &rdev->flags))
1907 sb->feature_map |=
1908 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1909
1910 if (mddev->reshape_position != MaxSector) {
1911 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1912 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1913 sb->new_layout = cpu_to_le32(mddev->new_layout);
1914 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1915 sb->new_level = cpu_to_le32(mddev->new_level);
1916 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1917 if (mddev->delta_disks == 0 &&
1918 mddev->reshape_backwards)
1919 sb->feature_map
1920 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1921 if (rdev->new_data_offset != rdev->data_offset) {
1922 sb->feature_map
1923 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1924 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1925 - rdev->data_offset));
1926 }
1927 }
1928
1929 if (mddev_is_clustered(mddev))
1930 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1931
1932 if (rdev->badblocks.count == 0)
1933 ;
1934 else if (sb->bblog_offset == 0)
1935
1936 md_error(mddev, rdev);
1937 else {
1938 struct badblocks *bb = &rdev->badblocks;
1939 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
1940 u64 *p = bb->page;
1941 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1942 if (bb->changed) {
1943 unsigned seq;
1944
1945retry:
1946 seq = read_seqbegin(&bb->lock);
1947
1948 memset(bbp, 0xff, PAGE_SIZE);
1949
1950 for (i = 0 ; i < bb->count ; i++) {
1951 u64 internal_bb = p[i];
1952 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1953 | BB_LEN(internal_bb));
1954 bbp[i] = cpu_to_le64(store_bb);
1955 }
1956 bb->changed = 0;
1957 if (read_seqretry(&bb->lock, seq))
1958 goto retry;
1959
1960 bb->sector = (rdev->sb_start +
1961 (int)le32_to_cpu(sb->bblog_offset));
1962 bb->size = le16_to_cpu(sb->bblog_size);
1963 }
1964 }
1965
1966 max_dev = 0;
1967 rdev_for_each(rdev2, mddev)
1968 if (rdev2->desc_nr+1 > max_dev)
1969 max_dev = rdev2->desc_nr+1;
1970
1971 if (max_dev > le32_to_cpu(sb->max_dev)) {
1972 int bmask;
1973 sb->max_dev = cpu_to_le32(max_dev);
1974 rdev->sb_size = max_dev * 2 + 256;
1975 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1976 if (rdev->sb_size & bmask)
1977 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1978 } else
1979 max_dev = le32_to_cpu(sb->max_dev);
1980
1981 for (i=0; i<max_dev;i++)
1982 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1983
1984 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1985 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1986
1987 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
1988 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
1989 sb->feature_map |=
1990 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
1991 else
1992 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
1993 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
1994 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
1995 }
1996
1997 rdev_for_each(rdev2, mddev) {
1998 i = rdev2->desc_nr;
1999 if (test_bit(Faulty, &rdev2->flags))
2000 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2001 else if (test_bit(In_sync, &rdev2->flags))
2002 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2003 else if (test_bit(Journal, &rdev2->flags))
2004 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2005 else if (rdev2->raid_disk >= 0)
2006 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2007 else
2008 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2009 }
2010
2011 sb->sb_csum = calc_sb_1_csum(sb);
2012}
2013
2014static unsigned long long
2015super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2016{
2017 struct mdp_superblock_1 *sb;
2018 sector_t max_sectors;
2019 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2020 return 0;
2021 if (rdev->data_offset != rdev->new_data_offset)
2022 return 0;
2023 if (rdev->sb_start < rdev->data_offset) {
2024
2025 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2026 max_sectors -= rdev->data_offset;
2027 if (!num_sectors || num_sectors > max_sectors)
2028 num_sectors = max_sectors;
2029 } else if (rdev->mddev->bitmap_info.offset) {
2030
2031 return 0;
2032 } else {
2033
2034 sector_t sb_start;
2035 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
2036 sb_start &= ~(sector_t)(4*2 - 1);
2037 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
2038 if (!num_sectors || num_sectors > max_sectors)
2039 num_sectors = max_sectors;
2040 rdev->sb_start = sb_start;
2041 }
2042 sb = page_address(rdev->sb_page);
2043 sb->data_size = cpu_to_le64(num_sectors);
2044 sb->super_offset = cpu_to_le64(rdev->sb_start);
2045 sb->sb_csum = calc_sb_1_csum(sb);
2046 do {
2047 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2048 rdev->sb_page);
2049 } while (md_super_wait(rdev->mddev) < 0);
2050 return num_sectors;
2051
2052}
2053
2054static int
2055super_1_allow_new_offset(struct md_rdev *rdev,
2056 unsigned long long new_offset)
2057{
2058
2059 struct bitmap *bitmap;
2060 if (new_offset >= rdev->data_offset)
2061 return 1;
2062
2063
2064
2065 if (rdev->mddev->minor_version == 0)
2066 return 1;
2067
2068
2069
2070
2071
2072
2073
2074 if (rdev->sb_start + (32+4)*2 > new_offset)
2075 return 0;
2076 bitmap = rdev->mddev->bitmap;
2077 if (bitmap && !rdev->mddev->bitmap_info.file &&
2078 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2079 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2080 return 0;
2081 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2082 return 0;
2083
2084 return 1;
2085}
2086
2087static struct super_type super_types[] = {
2088 [0] = {
2089 .name = "0.90.0",
2090 .owner = THIS_MODULE,
2091 .load_super = super_90_load,
2092 .validate_super = super_90_validate,
2093 .sync_super = super_90_sync,
2094 .rdev_size_change = super_90_rdev_size_change,
2095 .allow_new_offset = super_90_allow_new_offset,
2096 },
2097 [1] = {
2098 .name = "md-1",
2099 .owner = THIS_MODULE,
2100 .load_super = super_1_load,
2101 .validate_super = super_1_validate,
2102 .sync_super = super_1_sync,
2103 .rdev_size_change = super_1_rdev_size_change,
2104 .allow_new_offset = super_1_allow_new_offset,
2105 },
2106};
2107
2108static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2109{
2110 if (mddev->sync_super) {
2111 mddev->sync_super(mddev, rdev);
2112 return;
2113 }
2114
2115 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2116
2117 super_types[mddev->major_version].sync_super(mddev, rdev);
2118}
2119
2120static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2121{
2122 struct md_rdev *rdev, *rdev2;
2123
2124 rcu_read_lock();
2125 rdev_for_each_rcu(rdev, mddev1) {
2126 if (test_bit(Faulty, &rdev->flags) ||
2127 test_bit(Journal, &rdev->flags) ||
2128 rdev->raid_disk == -1)
2129 continue;
2130 rdev_for_each_rcu(rdev2, mddev2) {
2131 if (test_bit(Faulty, &rdev2->flags) ||
2132 test_bit(Journal, &rdev2->flags) ||
2133 rdev2->raid_disk == -1)
2134 continue;
2135 if (rdev->bdev->bd_contains ==
2136 rdev2->bdev->bd_contains) {
2137 rcu_read_unlock();
2138 return 1;
2139 }
2140 }
2141 }
2142 rcu_read_unlock();
2143 return 0;
2144}
2145
2146static LIST_HEAD(pending_raid_disks);
2147
2148
2149
2150
2151
2152
2153
2154
2155int md_integrity_register(struct mddev *mddev)
2156{
2157 struct md_rdev *rdev, *reference = NULL;
2158
2159 if (list_empty(&mddev->disks))
2160 return 0;
2161 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2162 return 0;
2163 rdev_for_each(rdev, mddev) {
2164
2165 if (test_bit(Faulty, &rdev->flags))
2166 continue;
2167 if (rdev->raid_disk < 0)
2168 continue;
2169 if (!reference) {
2170
2171 reference = rdev;
2172 continue;
2173 }
2174
2175 if (blk_integrity_compare(reference->bdev->bd_disk,
2176 rdev->bdev->bd_disk) < 0)
2177 return -EINVAL;
2178 }
2179 if (!reference || !bdev_get_integrity(reference->bdev))
2180 return 0;
2181
2182
2183
2184
2185 blk_integrity_register(mddev->gendisk,
2186 bdev_get_integrity(reference->bdev));
2187
2188 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2189 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2190 pr_err("md: failed to create integrity pool for %s\n",
2191 mdname(mddev));
2192 return -EINVAL;
2193 }
2194 return 0;
2195}
2196EXPORT_SYMBOL(md_integrity_register);
2197
2198
2199
2200
2201
2202int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2203{
2204 struct blk_integrity *bi_mddev;
2205 char name[BDEVNAME_SIZE];
2206
2207 if (!mddev->gendisk)
2208 return 0;
2209
2210 bi_mddev = blk_get_integrity(mddev->gendisk);
2211
2212 if (!bi_mddev)
2213 return 0;
2214
2215 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2216 pr_err("%s: incompatible integrity profile for %s\n",
2217 mdname(mddev), bdevname(rdev->bdev, name));
2218 return -ENXIO;
2219 }
2220
2221 return 0;
2222}
2223EXPORT_SYMBOL(md_integrity_add_rdev);
2224
2225static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2226{
2227 char b[BDEVNAME_SIZE];
2228 struct kobject *ko;
2229 int err;
2230
2231
2232 if (find_rdev(mddev, rdev->bdev->bd_dev))
2233 return -EEXIST;
2234
2235 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2236 mddev->pers)
2237 return -EROFS;
2238
2239
2240 if (!test_bit(Journal, &rdev->flags) &&
2241 rdev->sectors &&
2242 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2243 if (mddev->pers) {
2244
2245
2246
2247
2248 if (mddev->level > 0)
2249 return -ENOSPC;
2250 } else
2251 mddev->dev_sectors = rdev->sectors;
2252 }
2253
2254
2255
2256
2257
2258 rcu_read_lock();
2259 if (rdev->desc_nr < 0) {
2260 int choice = 0;
2261 if (mddev->pers)
2262 choice = mddev->raid_disks;
2263 while (md_find_rdev_nr_rcu(mddev, choice))
2264 choice++;
2265 rdev->desc_nr = choice;
2266 } else {
2267 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2268 rcu_read_unlock();
2269 return -EBUSY;
2270 }
2271 }
2272 rcu_read_unlock();
2273 if (!test_bit(Journal, &rdev->flags) &&
2274 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2275 pr_warn("md: %s: array is limited to %d devices\n",
2276 mdname(mddev), mddev->max_disks);
2277 return -EBUSY;
2278 }
2279 bdevname(rdev->bdev,b);
2280 strreplace(b, '/', '!');
2281
2282 rdev->mddev = mddev;
2283 pr_debug("md: bind<%s>\n", b);
2284
2285 if (mddev->raid_disks)
2286 mddev_create_wb_pool(mddev, rdev, false);
2287
2288 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2289 goto fail;
2290
2291 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2292 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2293 ;
2294 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2295
2296 list_add_rcu(&rdev->same_set, &mddev->disks);
2297 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2298
2299
2300 mddev->recovery_disabled++;
2301
2302 return 0;
2303
2304 fail:
2305 pr_warn("md: failed to register dev-%s for %s\n",
2306 b, mdname(mddev));
2307 return err;
2308}
2309
2310static void md_delayed_delete(struct work_struct *ws)
2311{
2312 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2313 kobject_del(&rdev->kobj);
2314 kobject_put(&rdev->kobj);
2315}
2316
2317static void unbind_rdev_from_array(struct md_rdev *rdev)
2318{
2319 char b[BDEVNAME_SIZE];
2320
2321 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2322 list_del_rcu(&rdev->same_set);
2323 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2324 mddev_destroy_wb_pool(rdev->mddev, rdev);
2325 rdev->mddev = NULL;
2326 sysfs_remove_link(&rdev->kobj, "block");
2327 sysfs_put(rdev->sysfs_state);
2328 rdev->sysfs_state = NULL;
2329 rdev->badblocks.count = 0;
2330
2331
2332
2333
2334 synchronize_rcu();
2335 INIT_WORK(&rdev->del_work, md_delayed_delete);
2336 kobject_get(&rdev->kobj);
2337 queue_work(md_misc_wq, &rdev->del_work);
2338}
2339
2340
2341
2342
2343
2344
2345static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2346{
2347 int err = 0;
2348 struct block_device *bdev;
2349 char b[BDEVNAME_SIZE];
2350
2351 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2352 shared ? (struct md_rdev *)lock_rdev : rdev);
2353 if (IS_ERR(bdev)) {
2354 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2355 return PTR_ERR(bdev);
2356 }
2357 rdev->bdev = bdev;
2358 return err;
2359}
2360
2361static void unlock_rdev(struct md_rdev *rdev)
2362{
2363 struct block_device *bdev = rdev->bdev;
2364 rdev->bdev = NULL;
2365 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2366}
2367
2368void md_autodetect_dev(dev_t dev);
2369
2370static void export_rdev(struct md_rdev *rdev)
2371{
2372 char b[BDEVNAME_SIZE];
2373
2374 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2375 md_rdev_clear(rdev);
2376#ifndef MODULE
2377 if (test_bit(AutoDetected, &rdev->flags))
2378 md_autodetect_dev(rdev->bdev->bd_dev);
2379#endif
2380 unlock_rdev(rdev);
2381 kobject_put(&rdev->kobj);
2382}
2383
2384void md_kick_rdev_from_array(struct md_rdev *rdev)
2385{
2386 unbind_rdev_from_array(rdev);
2387 export_rdev(rdev);
2388}
2389EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2390
2391static void export_array(struct mddev *mddev)
2392{
2393 struct md_rdev *rdev;
2394
2395 while (!list_empty(&mddev->disks)) {
2396 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2397 same_set);
2398 md_kick_rdev_from_array(rdev);
2399 }
2400 mddev->raid_disks = 0;
2401 mddev->major_version = 0;
2402}
2403
2404static bool set_in_sync(struct mddev *mddev)
2405{
2406 lockdep_assert_held(&mddev->lock);
2407 if (!mddev->in_sync) {
2408 mddev->sync_checkers++;
2409 spin_unlock(&mddev->lock);
2410 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2411 spin_lock(&mddev->lock);
2412 if (!mddev->in_sync &&
2413 percpu_ref_is_zero(&mddev->writes_pending)) {
2414 mddev->in_sync = 1;
2415
2416
2417
2418
2419 smp_mb();
2420 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2421 sysfs_notify_dirent_safe(mddev->sysfs_state);
2422 }
2423 if (--mddev->sync_checkers == 0)
2424 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2425 }
2426 if (mddev->safemode == 1)
2427 mddev->safemode = 0;
2428 return mddev->in_sync;
2429}
2430
2431static void sync_sbs(struct mddev *mddev, int nospares)
2432{
2433
2434
2435
2436
2437
2438
2439 struct md_rdev *rdev;
2440 rdev_for_each(rdev, mddev) {
2441 if (rdev->sb_events == mddev->events ||
2442 (nospares &&
2443 rdev->raid_disk < 0 &&
2444 rdev->sb_events+1 == mddev->events)) {
2445
2446 rdev->sb_loaded = 2;
2447 } else {
2448 sync_super(mddev, rdev);
2449 rdev->sb_loaded = 1;
2450 }
2451 }
2452}
2453
2454static bool does_sb_need_changing(struct mddev *mddev)
2455{
2456 struct md_rdev *rdev;
2457 struct mdp_superblock_1 *sb;
2458 int role;
2459
2460
2461 rdev_for_each(rdev, mddev)
2462 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2463 break;
2464
2465
2466 if (!rdev)
2467 return false;
2468
2469 sb = page_address(rdev->sb_page);
2470
2471 rdev_for_each(rdev, mddev) {
2472 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2473
2474 if (role == 0xffff && rdev->raid_disk >=0 &&
2475 !test_bit(Faulty, &rdev->flags))
2476 return true;
2477
2478 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2479 return true;
2480 }
2481
2482
2483 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2484 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2485 (mddev->layout != le32_to_cpu(sb->layout)) ||
2486 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2487 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2488 return true;
2489
2490 return false;
2491}
2492
2493void md_update_sb(struct mddev *mddev, int force_change)
2494{
2495 struct md_rdev *rdev;
2496 int sync_req;
2497 int nospares = 0;
2498 int any_badblocks_changed = 0;
2499 int ret = -1;
2500
2501 if (mddev->ro) {
2502 if (force_change)
2503 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2504 return;
2505 }
2506
2507repeat:
2508 if (mddev_is_clustered(mddev)) {
2509 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2510 force_change = 1;
2511 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2512 nospares = 1;
2513 ret = md_cluster_ops->metadata_update_start(mddev);
2514
2515 if (!does_sb_need_changing(mddev)) {
2516 if (ret == 0)
2517 md_cluster_ops->metadata_update_cancel(mddev);
2518 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2519 BIT(MD_SB_CHANGE_DEVS) |
2520 BIT(MD_SB_CHANGE_CLEAN));
2521 return;
2522 }
2523 }
2524
2525
2526
2527
2528
2529
2530
2531 rdev_for_each(rdev, mddev) {
2532 if (rdev->raid_disk >= 0 &&
2533 mddev->delta_disks >= 0 &&
2534 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2535 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2536 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2537 !test_bit(Journal, &rdev->flags) &&
2538 !test_bit(In_sync, &rdev->flags) &&
2539 mddev->curr_resync_completed > rdev->recovery_offset)
2540 rdev->recovery_offset = mddev->curr_resync_completed;
2541
2542 }
2543 if (!mddev->persistent) {
2544 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2545 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2546 if (!mddev->external) {
2547 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2548 rdev_for_each(rdev, mddev) {
2549 if (rdev->badblocks.changed) {
2550 rdev->badblocks.changed = 0;
2551 ack_all_badblocks(&rdev->badblocks);
2552 md_error(mddev, rdev);
2553 }
2554 clear_bit(Blocked, &rdev->flags);
2555 clear_bit(BlockedBadBlocks, &rdev->flags);
2556 wake_up(&rdev->blocked_wait);
2557 }
2558 }
2559 wake_up(&mddev->sb_wait);
2560 return;
2561 }
2562
2563 spin_lock(&mddev->lock);
2564
2565 mddev->utime = ktime_get_real_seconds();
2566
2567 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2568 force_change = 1;
2569 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2570
2571
2572
2573
2574 nospares = 1;
2575 if (force_change)
2576 nospares = 0;
2577 if (mddev->degraded)
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587 nospares = 0;
2588
2589 sync_req = mddev->in_sync;
2590
2591
2592
2593 if (nospares
2594 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2595 && mddev->can_decrease_events
2596 && mddev->events != 1) {
2597 mddev->events--;
2598 mddev->can_decrease_events = 0;
2599 } else {
2600
2601 mddev->events ++;
2602 mddev->can_decrease_events = nospares;
2603 }
2604
2605
2606
2607
2608
2609
2610 WARN_ON(mddev->events == 0);
2611
2612 rdev_for_each(rdev, mddev) {
2613 if (rdev->badblocks.changed)
2614 any_badblocks_changed++;
2615 if (test_bit(Faulty, &rdev->flags))
2616 set_bit(FaultRecorded, &rdev->flags);
2617 }
2618
2619 sync_sbs(mddev, nospares);
2620 spin_unlock(&mddev->lock);
2621
2622 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2623 mdname(mddev), mddev->in_sync);
2624
2625 if (mddev->queue)
2626 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2627rewrite:
2628 md_bitmap_update_sb(mddev->bitmap);
2629 rdev_for_each(rdev, mddev) {
2630 char b[BDEVNAME_SIZE];
2631
2632 if (rdev->sb_loaded != 1)
2633 continue;
2634
2635 if (!test_bit(Faulty, &rdev->flags)) {
2636 md_super_write(mddev,rdev,
2637 rdev->sb_start, rdev->sb_size,
2638 rdev->sb_page);
2639 pr_debug("md: (write) %s's sb offset: %llu\n",
2640 bdevname(rdev->bdev, b),
2641 (unsigned long long)rdev->sb_start);
2642 rdev->sb_events = mddev->events;
2643 if (rdev->badblocks.size) {
2644 md_super_write(mddev, rdev,
2645 rdev->badblocks.sector,
2646 rdev->badblocks.size << 9,
2647 rdev->bb_page);
2648 rdev->badblocks.size = 0;
2649 }
2650
2651 } else
2652 pr_debug("md: %s (skipping faulty)\n",
2653 bdevname(rdev->bdev, b));
2654
2655 if (mddev->level == LEVEL_MULTIPATH)
2656
2657 break;
2658 }
2659 if (md_super_wait(mddev) < 0)
2660 goto rewrite;
2661
2662
2663 if (mddev_is_clustered(mddev) && ret == 0)
2664 md_cluster_ops->metadata_update_finish(mddev);
2665
2666 if (mddev->in_sync != sync_req ||
2667 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2668 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2669
2670 goto repeat;
2671 wake_up(&mddev->sb_wait);
2672 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2673 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2674
2675 rdev_for_each(rdev, mddev) {
2676 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2677 clear_bit(Blocked, &rdev->flags);
2678
2679 if (any_badblocks_changed)
2680 ack_all_badblocks(&rdev->badblocks);
2681 clear_bit(BlockedBadBlocks, &rdev->flags);
2682 wake_up(&rdev->blocked_wait);
2683 }
2684}
2685EXPORT_SYMBOL(md_update_sb);
2686
2687static int add_bound_rdev(struct md_rdev *rdev)
2688{
2689 struct mddev *mddev = rdev->mddev;
2690 int err = 0;
2691 bool add_journal = test_bit(Journal, &rdev->flags);
2692
2693 if (!mddev->pers->hot_remove_disk || add_journal) {
2694
2695
2696
2697
2698 super_types[mddev->major_version].
2699 validate_super(mddev, rdev);
2700 if (add_journal)
2701 mddev_suspend(mddev);
2702 err = mddev->pers->hot_add_disk(mddev, rdev);
2703 if (add_journal)
2704 mddev_resume(mddev);
2705 if (err) {
2706 md_kick_rdev_from_array(rdev);
2707 return err;
2708 }
2709 }
2710 sysfs_notify_dirent_safe(rdev->sysfs_state);
2711
2712 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2713 if (mddev->degraded)
2714 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2715 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2716 md_new_event(mddev);
2717 md_wakeup_thread(mddev->thread);
2718 return 0;
2719}
2720
2721
2722
2723
2724static int cmd_match(const char *cmd, const char *str)
2725{
2726
2727
2728
2729
2730 while (*cmd && *str && *cmd == *str) {
2731 cmd++;
2732 str++;
2733 }
2734 if (*cmd == '\n')
2735 cmd++;
2736 if (*str || *cmd)
2737 return 0;
2738 return 1;
2739}
2740
2741struct rdev_sysfs_entry {
2742 struct attribute attr;
2743 ssize_t (*show)(struct md_rdev *, char *);
2744 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2745};
2746
2747static ssize_t
2748state_show(struct md_rdev *rdev, char *page)
2749{
2750 char *sep = ",";
2751 size_t len = 0;
2752 unsigned long flags = READ_ONCE(rdev->flags);
2753
2754 if (test_bit(Faulty, &flags) ||
2755 (!test_bit(ExternalBbl, &flags) &&
2756 rdev->badblocks.unacked_exist))
2757 len += sprintf(page+len, "faulty%s", sep);
2758 if (test_bit(In_sync, &flags))
2759 len += sprintf(page+len, "in_sync%s", sep);
2760 if (test_bit(Journal, &flags))
2761 len += sprintf(page+len, "journal%s", sep);
2762 if (test_bit(WriteMostly, &flags))
2763 len += sprintf(page+len, "write_mostly%s", sep);
2764 if (test_bit(Blocked, &flags) ||
2765 (rdev->badblocks.unacked_exist
2766 && !test_bit(Faulty, &flags)))
2767 len += sprintf(page+len, "blocked%s", sep);
2768 if (!test_bit(Faulty, &flags) &&
2769 !test_bit(Journal, &flags) &&
2770 !test_bit(In_sync, &flags))
2771 len += sprintf(page+len, "spare%s", sep);
2772 if (test_bit(WriteErrorSeen, &flags))
2773 len += sprintf(page+len, "write_error%s", sep);
2774 if (test_bit(WantReplacement, &flags))
2775 len += sprintf(page+len, "want_replacement%s", sep);
2776 if (test_bit(Replacement, &flags))
2777 len += sprintf(page+len, "replacement%s", sep);
2778 if (test_bit(ExternalBbl, &flags))
2779 len += sprintf(page+len, "external_bbl%s", sep);
2780 if (test_bit(FailFast, &flags))
2781 len += sprintf(page+len, "failfast%s", sep);
2782
2783 if (len)
2784 len -= strlen(sep);
2785
2786 return len+sprintf(page+len, "\n");
2787}
2788
2789static ssize_t
2790state_store(struct md_rdev *rdev, const char *buf, size_t len)
2791{
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806 int err = -EINVAL;
2807 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2808 md_error(rdev->mddev, rdev);
2809 if (test_bit(Faulty, &rdev->flags))
2810 err = 0;
2811 else
2812 err = -EBUSY;
2813 } else if (cmd_match(buf, "remove")) {
2814 if (rdev->mddev->pers) {
2815 clear_bit(Blocked, &rdev->flags);
2816 remove_and_add_spares(rdev->mddev, rdev);
2817 }
2818 if (rdev->raid_disk >= 0)
2819 err = -EBUSY;
2820 else {
2821 struct mddev *mddev = rdev->mddev;
2822 err = 0;
2823 if (mddev_is_clustered(mddev))
2824 err = md_cluster_ops->remove_disk(mddev, rdev);
2825
2826 if (err == 0) {
2827 md_kick_rdev_from_array(rdev);
2828 if (mddev->pers) {
2829 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2830 md_wakeup_thread(mddev->thread);
2831 }
2832 md_new_event(mddev);
2833 }
2834 }
2835 } else if (cmd_match(buf, "writemostly")) {
2836 set_bit(WriteMostly, &rdev->flags);
2837 mddev_create_wb_pool(rdev->mddev, rdev, false);
2838 err = 0;
2839 } else if (cmd_match(buf, "-writemostly")) {
2840 mddev_destroy_wb_pool(rdev->mddev, rdev);
2841 clear_bit(WriteMostly, &rdev->flags);
2842 err = 0;
2843 } else if (cmd_match(buf, "blocked")) {
2844 set_bit(Blocked, &rdev->flags);
2845 err = 0;
2846 } else if (cmd_match(buf, "-blocked")) {
2847 if (!test_bit(Faulty, &rdev->flags) &&
2848 !test_bit(ExternalBbl, &rdev->flags) &&
2849 rdev->badblocks.unacked_exist) {
2850
2851
2852
2853 md_error(rdev->mddev, rdev);
2854 }
2855 clear_bit(Blocked, &rdev->flags);
2856 clear_bit(BlockedBadBlocks, &rdev->flags);
2857 wake_up(&rdev->blocked_wait);
2858 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2859 md_wakeup_thread(rdev->mddev->thread);
2860
2861 err = 0;
2862 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2863 set_bit(In_sync, &rdev->flags);
2864 err = 0;
2865 } else if (cmd_match(buf, "failfast")) {
2866 set_bit(FailFast, &rdev->flags);
2867 err = 0;
2868 } else if (cmd_match(buf, "-failfast")) {
2869 clear_bit(FailFast, &rdev->flags);
2870 err = 0;
2871 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2872 !test_bit(Journal, &rdev->flags)) {
2873 if (rdev->mddev->pers == NULL) {
2874 clear_bit(In_sync, &rdev->flags);
2875 rdev->saved_raid_disk = rdev->raid_disk;
2876 rdev->raid_disk = -1;
2877 err = 0;
2878 }
2879 } else if (cmd_match(buf, "write_error")) {
2880 set_bit(WriteErrorSeen, &rdev->flags);
2881 err = 0;
2882 } else if (cmd_match(buf, "-write_error")) {
2883 clear_bit(WriteErrorSeen, &rdev->flags);
2884 err = 0;
2885 } else if (cmd_match(buf, "want_replacement")) {
2886
2887
2888
2889
2890 if (rdev->raid_disk >= 0 &&
2891 !test_bit(Journal, &rdev->flags) &&
2892 !test_bit(Replacement, &rdev->flags))
2893 set_bit(WantReplacement, &rdev->flags);
2894 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2895 md_wakeup_thread(rdev->mddev->thread);
2896 err = 0;
2897 } else if (cmd_match(buf, "-want_replacement")) {
2898
2899
2900
2901 err = 0;
2902 clear_bit(WantReplacement, &rdev->flags);
2903 } else if (cmd_match(buf, "replacement")) {
2904
2905
2906
2907
2908 if (rdev->mddev->pers)
2909 err = -EBUSY;
2910 else {
2911 set_bit(Replacement, &rdev->flags);
2912 err = 0;
2913 }
2914 } else if (cmd_match(buf, "-replacement")) {
2915
2916 if (rdev->mddev->pers)
2917 err = -EBUSY;
2918 else {
2919 clear_bit(Replacement, &rdev->flags);
2920 err = 0;
2921 }
2922 } else if (cmd_match(buf, "re-add")) {
2923 if (!rdev->mddev->pers)
2924 err = -EINVAL;
2925 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
2926 rdev->saved_raid_disk >= 0) {
2927
2928
2929
2930
2931
2932
2933 if (!mddev_is_clustered(rdev->mddev) ||
2934 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2935 clear_bit(Faulty, &rdev->flags);
2936 err = add_bound_rdev(rdev);
2937 }
2938 } else
2939 err = -EBUSY;
2940 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2941 set_bit(ExternalBbl, &rdev->flags);
2942 rdev->badblocks.shift = 0;
2943 err = 0;
2944 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2945 clear_bit(ExternalBbl, &rdev->flags);
2946 err = 0;
2947 }
2948 if (!err)
2949 sysfs_notify_dirent_safe(rdev->sysfs_state);
2950 return err ? err : len;
2951}
2952static struct rdev_sysfs_entry rdev_state =
2953__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2954
2955static ssize_t
2956errors_show(struct md_rdev *rdev, char *page)
2957{
2958 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2959}
2960
2961static ssize_t
2962errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2963{
2964 unsigned int n;
2965 int rv;
2966
2967 rv = kstrtouint(buf, 10, &n);
2968 if (rv < 0)
2969 return rv;
2970 atomic_set(&rdev->corrected_errors, n);
2971 return len;
2972}
2973static struct rdev_sysfs_entry rdev_errors =
2974__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2975
2976static ssize_t
2977slot_show(struct md_rdev *rdev, char *page)
2978{
2979 if (test_bit(Journal, &rdev->flags))
2980 return sprintf(page, "journal\n");
2981 else if (rdev->raid_disk < 0)
2982 return sprintf(page, "none\n");
2983 else
2984 return sprintf(page, "%d\n", rdev->raid_disk);
2985}
2986
2987static ssize_t
2988slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2989{
2990 int slot;
2991 int err;
2992
2993 if (test_bit(Journal, &rdev->flags))
2994 return -EBUSY;
2995 if (strncmp(buf, "none", 4)==0)
2996 slot = -1;
2997 else {
2998 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2999 if (err < 0)
3000 return err;
3001 }
3002 if (rdev->mddev->pers && slot == -1) {
3003
3004
3005
3006
3007
3008
3009
3010 if (rdev->raid_disk == -1)
3011 return -EEXIST;
3012
3013 if (rdev->mddev->pers->hot_remove_disk == NULL)
3014 return -EINVAL;
3015 clear_bit(Blocked, &rdev->flags);
3016 remove_and_add_spares(rdev->mddev, rdev);
3017 if (rdev->raid_disk >= 0)
3018 return -EBUSY;
3019 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3020 md_wakeup_thread(rdev->mddev->thread);
3021 } else if (rdev->mddev->pers) {
3022
3023
3024
3025 int err;
3026
3027 if (rdev->raid_disk != -1)
3028 return -EBUSY;
3029
3030 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3031 return -EBUSY;
3032
3033 if (rdev->mddev->pers->hot_add_disk == NULL)
3034 return -EINVAL;
3035
3036 if (slot >= rdev->mddev->raid_disks &&
3037 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3038 return -ENOSPC;
3039
3040 rdev->raid_disk = slot;
3041 if (test_bit(In_sync, &rdev->flags))
3042 rdev->saved_raid_disk = slot;
3043 else
3044 rdev->saved_raid_disk = -1;
3045 clear_bit(In_sync, &rdev->flags);
3046 clear_bit(Bitmap_sync, &rdev->flags);
3047 err = rdev->mddev->pers->
3048 hot_add_disk(rdev->mddev, rdev);
3049 if (err) {
3050 rdev->raid_disk = -1;
3051 return err;
3052 } else
3053 sysfs_notify_dirent_safe(rdev->sysfs_state);
3054 if (sysfs_link_rdev(rdev->mddev, rdev))
3055 ;
3056
3057 } else {
3058 if (slot >= rdev->mddev->raid_disks &&
3059 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3060 return -ENOSPC;
3061 rdev->raid_disk = slot;
3062
3063 clear_bit(Faulty, &rdev->flags);
3064 clear_bit(WriteMostly, &rdev->flags);
3065 set_bit(In_sync, &rdev->flags);
3066 sysfs_notify_dirent_safe(rdev->sysfs_state);
3067 }
3068 return len;
3069}
3070
3071static struct rdev_sysfs_entry rdev_slot =
3072__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3073
3074static ssize_t
3075offset_show(struct md_rdev *rdev, char *page)
3076{
3077 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3078}
3079
3080static ssize_t
3081offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3082{
3083 unsigned long long offset;
3084 if (kstrtoull(buf, 10, &offset) < 0)
3085 return -EINVAL;
3086 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3087 return -EBUSY;
3088 if (rdev->sectors && rdev->mddev->external)
3089
3090
3091 return -EBUSY;
3092 rdev->data_offset = offset;
3093 rdev->new_data_offset = offset;
3094 return len;
3095}
3096
3097static struct rdev_sysfs_entry rdev_offset =
3098__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3099
3100static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3101{
3102 return sprintf(page, "%llu\n",
3103 (unsigned long long)rdev->new_data_offset);
3104}
3105
3106static ssize_t new_offset_store(struct md_rdev *rdev,
3107 const char *buf, size_t len)
3108{
3109 unsigned long long new_offset;
3110 struct mddev *mddev = rdev->mddev;
3111
3112 if (kstrtoull(buf, 10, &new_offset) < 0)
3113 return -EINVAL;
3114
3115 if (mddev->sync_thread ||
3116 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3117 return -EBUSY;
3118 if (new_offset == rdev->data_offset)
3119
3120 ;
3121 else if (new_offset > rdev->data_offset) {
3122
3123 if (new_offset - rdev->data_offset
3124 + mddev->dev_sectors > rdev->sectors)
3125 return -E2BIG;
3126 }
3127
3128
3129
3130
3131
3132 if (new_offset < rdev->data_offset &&
3133 mddev->reshape_backwards)
3134 return -EINVAL;
3135
3136
3137
3138
3139 if (new_offset > rdev->data_offset &&
3140 !mddev->reshape_backwards)
3141 return -EINVAL;
3142
3143 if (mddev->pers && mddev->persistent &&
3144 !super_types[mddev->major_version]
3145 .allow_new_offset(rdev, new_offset))
3146 return -E2BIG;
3147 rdev->new_data_offset = new_offset;
3148 if (new_offset > rdev->data_offset)
3149 mddev->reshape_backwards = 1;
3150 else if (new_offset < rdev->data_offset)
3151 mddev->reshape_backwards = 0;
3152
3153 return len;
3154}
3155static struct rdev_sysfs_entry rdev_new_offset =
3156__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3157
3158static ssize_t
3159rdev_size_show(struct md_rdev *rdev, char *page)
3160{
3161 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3162}
3163
3164static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3165{
3166
3167 if (s1+l1 <= s2)
3168 return 0;
3169 if (s2+l2 <= s1)
3170 return 0;
3171 return 1;
3172}
3173
3174static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3175{
3176 unsigned long long blocks;
3177 sector_t new;
3178
3179 if (kstrtoull(buf, 10, &blocks) < 0)
3180 return -EINVAL;
3181
3182 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3183 return -EINVAL;
3184
3185 new = blocks * 2;
3186 if (new != blocks * 2)
3187 return -EINVAL;
3188
3189 *sectors = new;
3190 return 0;
3191}
3192
3193static ssize_t
3194rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3195{
3196 struct mddev *my_mddev = rdev->mddev;
3197 sector_t oldsectors = rdev->sectors;
3198 sector_t sectors;
3199
3200 if (test_bit(Journal, &rdev->flags))
3201 return -EBUSY;
3202 if (strict_blocks_to_sectors(buf, §ors) < 0)
3203 return -EINVAL;
3204 if (rdev->data_offset != rdev->new_data_offset)
3205 return -EINVAL;
3206 if (my_mddev->pers && rdev->raid_disk >= 0) {
3207 if (my_mddev->persistent) {
3208 sectors = super_types[my_mddev->major_version].
3209 rdev_size_change(rdev, sectors);
3210 if (!sectors)
3211 return -EBUSY;
3212 } else if (!sectors)
3213 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3214 rdev->data_offset;
3215 if (!my_mddev->pers->resize)
3216
3217 return -EINVAL;
3218 }
3219 if (sectors < my_mddev->dev_sectors)
3220 return -EINVAL;
3221
3222 rdev->sectors = sectors;
3223 if (sectors > oldsectors && my_mddev->external) {
3224
3225
3226
3227
3228
3229
3230 struct mddev *mddev;
3231 int overlap = 0;
3232 struct list_head *tmp;
3233
3234 rcu_read_lock();
3235 for_each_mddev(mddev, tmp) {
3236 struct md_rdev *rdev2;
3237
3238 rdev_for_each(rdev2, mddev)
3239 if (rdev->bdev == rdev2->bdev &&
3240 rdev != rdev2 &&
3241 overlaps(rdev->data_offset, rdev->sectors,
3242 rdev2->data_offset,
3243 rdev2->sectors)) {
3244 overlap = 1;
3245 break;
3246 }
3247 if (overlap) {
3248 mddev_put(mddev);
3249 break;
3250 }
3251 }
3252 rcu_read_unlock();
3253 if (overlap) {
3254
3255
3256
3257
3258
3259
3260 rdev->sectors = oldsectors;
3261 return -EBUSY;
3262 }
3263 }
3264 return len;
3265}
3266
3267static struct rdev_sysfs_entry rdev_size =
3268__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3269
3270static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3271{
3272 unsigned long long recovery_start = rdev->recovery_offset;
3273
3274 if (test_bit(In_sync, &rdev->flags) ||
3275 recovery_start == MaxSector)
3276 return sprintf(page, "none\n");
3277
3278 return sprintf(page, "%llu\n", recovery_start);
3279}
3280
3281static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3282{
3283 unsigned long long recovery_start;
3284
3285 if (cmd_match(buf, "none"))
3286 recovery_start = MaxSector;
3287 else if (kstrtoull(buf, 10, &recovery_start))
3288 return -EINVAL;
3289
3290 if (rdev->mddev->pers &&
3291 rdev->raid_disk >= 0)
3292 return -EBUSY;
3293
3294 rdev->recovery_offset = recovery_start;
3295 if (recovery_start == MaxSector)
3296 set_bit(In_sync, &rdev->flags);
3297 else
3298 clear_bit(In_sync, &rdev->flags);
3299 return len;
3300}
3301
3302static struct rdev_sysfs_entry rdev_recovery_start =
3303__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316static ssize_t bb_show(struct md_rdev *rdev, char *page)
3317{
3318 return badblocks_show(&rdev->badblocks, page, 0);
3319}
3320static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3321{
3322 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3323
3324 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3325 wake_up(&rdev->blocked_wait);
3326 return rv;
3327}
3328static struct rdev_sysfs_entry rdev_bad_blocks =
3329__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3330
3331static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3332{
3333 return badblocks_show(&rdev->badblocks, page, 1);
3334}
3335static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3336{
3337 return badblocks_store(&rdev->badblocks, page, len, 1);
3338}
3339static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3340__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3341
3342static ssize_t
3343ppl_sector_show(struct md_rdev *rdev, char *page)
3344{
3345 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3346}
3347
3348static ssize_t
3349ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3350{
3351 unsigned long long sector;
3352
3353 if (kstrtoull(buf, 10, §or) < 0)
3354 return -EINVAL;
3355 if (sector != (sector_t)sector)
3356 return -EINVAL;
3357
3358 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3359 rdev->raid_disk >= 0)
3360 return -EBUSY;
3361
3362 if (rdev->mddev->persistent) {
3363 if (rdev->mddev->major_version == 0)
3364 return -EINVAL;
3365 if ((sector > rdev->sb_start &&
3366 sector - rdev->sb_start > S16_MAX) ||
3367 (sector < rdev->sb_start &&
3368 rdev->sb_start - sector > -S16_MIN))
3369 return -EINVAL;
3370 rdev->ppl.offset = sector - rdev->sb_start;
3371 } else if (!rdev->mddev->external) {
3372 return -EBUSY;
3373 }
3374 rdev->ppl.sector = sector;
3375 return len;
3376}
3377
3378static struct rdev_sysfs_entry rdev_ppl_sector =
3379__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3380
3381static ssize_t
3382ppl_size_show(struct md_rdev *rdev, char *page)
3383{
3384 return sprintf(page, "%u\n", rdev->ppl.size);
3385}
3386
3387static ssize_t
3388ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3389{
3390 unsigned int size;
3391
3392 if (kstrtouint(buf, 10, &size) < 0)
3393 return -EINVAL;
3394
3395 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3396 rdev->raid_disk >= 0)
3397 return -EBUSY;
3398
3399 if (rdev->mddev->persistent) {
3400 if (rdev->mddev->major_version == 0)
3401 return -EINVAL;
3402 if (size > U16_MAX)
3403 return -EINVAL;
3404 } else if (!rdev->mddev->external) {
3405 return -EBUSY;
3406 }
3407 rdev->ppl.size = size;
3408 return len;
3409}
3410
3411static struct rdev_sysfs_entry rdev_ppl_size =
3412__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3413
3414static struct attribute *rdev_default_attrs[] = {
3415 &rdev_state.attr,
3416 &rdev_errors.attr,
3417 &rdev_slot.attr,
3418 &rdev_offset.attr,
3419 &rdev_new_offset.attr,
3420 &rdev_size.attr,
3421 &rdev_recovery_start.attr,
3422 &rdev_bad_blocks.attr,
3423 &rdev_unack_bad_blocks.attr,
3424 &rdev_ppl_sector.attr,
3425 &rdev_ppl_size.attr,
3426 NULL,
3427};
3428static ssize_t
3429rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3430{
3431 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3432 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3433
3434 if (!entry->show)
3435 return -EIO;
3436 if (!rdev->mddev)
3437 return -ENODEV;
3438 return entry->show(rdev, page);
3439}
3440
3441static ssize_t
3442rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3443 const char *page, size_t length)
3444{
3445 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3446 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3447 ssize_t rv;
3448 struct mddev *mddev = rdev->mddev;
3449
3450 if (!entry->store)
3451 return -EIO;
3452 if (!capable(CAP_SYS_ADMIN))
3453 return -EACCES;
3454 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3455 if (!rv) {
3456 if (rdev->mddev == NULL)
3457 rv = -ENODEV;
3458 else
3459 rv = entry->store(rdev, page, length);
3460 mddev_unlock(mddev);
3461 }
3462 return rv;
3463}
3464
3465static void rdev_free(struct kobject *ko)
3466{
3467 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3468 kfree(rdev);
3469}
3470static const struct sysfs_ops rdev_sysfs_ops = {
3471 .show = rdev_attr_show,
3472 .store = rdev_attr_store,
3473};
3474static struct kobj_type rdev_ktype = {
3475 .release = rdev_free,
3476 .sysfs_ops = &rdev_sysfs_ops,
3477 .default_attrs = rdev_default_attrs,
3478};
3479
3480int md_rdev_init(struct md_rdev *rdev)
3481{
3482 rdev->desc_nr = -1;
3483 rdev->saved_raid_disk = -1;
3484 rdev->raid_disk = -1;
3485 rdev->flags = 0;
3486 rdev->data_offset = 0;
3487 rdev->new_data_offset = 0;
3488 rdev->sb_events = 0;
3489 rdev->last_read_error = 0;
3490 rdev->sb_loaded = 0;
3491 rdev->bb_page = NULL;
3492 atomic_set(&rdev->nr_pending, 0);
3493 atomic_set(&rdev->read_errors, 0);
3494 atomic_set(&rdev->corrected_errors, 0);
3495
3496 INIT_LIST_HEAD(&rdev->same_set);
3497 init_waitqueue_head(&rdev->blocked_wait);
3498
3499
3500
3501
3502
3503 return badblocks_init(&rdev->badblocks, 0);
3504}
3505EXPORT_SYMBOL_GPL(md_rdev_init);
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3517{
3518 char b[BDEVNAME_SIZE];
3519 int err;
3520 struct md_rdev *rdev;
3521 sector_t size;
3522
3523 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3524 if (!rdev)
3525 return ERR_PTR(-ENOMEM);
3526
3527 err = md_rdev_init(rdev);
3528 if (err)
3529 goto abort_free;
3530 err = alloc_disk_sb(rdev);
3531 if (err)
3532 goto abort_free;
3533
3534 err = lock_rdev(rdev, newdev, super_format == -2);
3535 if (err)
3536 goto abort_free;
3537
3538 kobject_init(&rdev->kobj, &rdev_ktype);
3539
3540 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3541 if (!size) {
3542 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3543 bdevname(rdev->bdev,b));
3544 err = -EINVAL;
3545 goto abort_free;
3546 }
3547
3548 if (super_format >= 0) {
3549 err = super_types[super_format].
3550 load_super(rdev, NULL, super_minor);
3551 if (err == -EINVAL) {
3552 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3553 bdevname(rdev->bdev,b),
3554 super_format, super_minor);
3555 goto abort_free;
3556 }
3557 if (err < 0) {
3558 pr_warn("md: could not read %s's sb, not importing!\n",
3559 bdevname(rdev->bdev,b));
3560 goto abort_free;
3561 }
3562 }
3563
3564 return rdev;
3565
3566abort_free:
3567 if (rdev->bdev)
3568 unlock_rdev(rdev);
3569 md_rdev_clear(rdev);
3570 kfree(rdev);
3571 return ERR_PTR(err);
3572}
3573
3574
3575
3576
3577
3578static void analyze_sbs(struct mddev *mddev)
3579{
3580 int i;
3581 struct md_rdev *rdev, *freshest, *tmp;
3582 char b[BDEVNAME_SIZE];
3583
3584 freshest = NULL;
3585 rdev_for_each_safe(rdev, tmp, mddev)
3586 switch (super_types[mddev->major_version].
3587 load_super(rdev, freshest, mddev->minor_version)) {
3588 case 1:
3589 freshest = rdev;
3590 break;
3591 case 0:
3592 break;
3593 default:
3594 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3595 bdevname(rdev->bdev,b));
3596 md_kick_rdev_from_array(rdev);
3597 }
3598
3599 super_types[mddev->major_version].
3600 validate_super(mddev, freshest);
3601
3602 i = 0;
3603 rdev_for_each_safe(rdev, tmp, mddev) {
3604 if (mddev->max_disks &&
3605 (rdev->desc_nr >= mddev->max_disks ||
3606 i > mddev->max_disks)) {
3607 pr_warn("md: %s: %s: only %d devices permitted\n",
3608 mdname(mddev), bdevname(rdev->bdev, b),
3609 mddev->max_disks);
3610 md_kick_rdev_from_array(rdev);
3611 continue;
3612 }
3613 if (rdev != freshest) {
3614 if (super_types[mddev->major_version].
3615 validate_super(mddev, rdev)) {
3616 pr_warn("md: kicking non-fresh %s from array!\n",
3617 bdevname(rdev->bdev,b));
3618 md_kick_rdev_from_array(rdev);
3619 continue;
3620 }
3621 }
3622 if (mddev->level == LEVEL_MULTIPATH) {
3623 rdev->desc_nr = i++;
3624 rdev->raid_disk = rdev->desc_nr;
3625 set_bit(In_sync, &rdev->flags);
3626 } else if (rdev->raid_disk >=
3627 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3628 !test_bit(Journal, &rdev->flags)) {
3629 rdev->raid_disk = -1;
3630 clear_bit(In_sync, &rdev->flags);
3631 }
3632 }
3633}
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3646{
3647 unsigned long result = 0;
3648 long decimals = -1;
3649 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3650 if (*cp == '.')
3651 decimals = 0;
3652 else if (decimals < scale) {
3653 unsigned int value;
3654 value = *cp - '0';
3655 result = result * 10 + value;
3656 if (decimals >= 0)
3657 decimals++;
3658 }
3659 cp++;
3660 }
3661 if (*cp == '\n')
3662 cp++;
3663 if (*cp)
3664 return -EINVAL;
3665 if (decimals < 0)
3666 decimals = 0;
3667 while (decimals < scale) {
3668 result *= 10;
3669 decimals ++;
3670 }
3671 *res = result;
3672 return 0;
3673}
3674
3675static ssize_t
3676safe_delay_show(struct mddev *mddev, char *page)
3677{
3678 int msec = (mddev->safemode_delay*1000)/HZ;
3679 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3680}
3681static ssize_t
3682safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3683{
3684 unsigned long msec;
3685
3686 if (mddev_is_clustered(mddev)) {
3687 pr_warn("md: Safemode is disabled for clustered mode\n");
3688 return -EINVAL;
3689 }
3690
3691 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3692 return -EINVAL;
3693 if (msec == 0)
3694 mddev->safemode_delay = 0;
3695 else {
3696 unsigned long old_delay = mddev->safemode_delay;
3697 unsigned long new_delay = (msec*HZ)/1000;
3698
3699 if (new_delay == 0)
3700 new_delay = 1;
3701 mddev->safemode_delay = new_delay;
3702 if (new_delay < old_delay || old_delay == 0)
3703 mod_timer(&mddev->safemode_timer, jiffies+1);
3704 }
3705 return len;
3706}
3707static struct md_sysfs_entry md_safe_delay =
3708__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3709
3710static ssize_t
3711level_show(struct mddev *mddev, char *page)
3712{
3713 struct md_personality *p;
3714 int ret;
3715 spin_lock(&mddev->lock);
3716 p = mddev->pers;
3717 if (p)
3718 ret = sprintf(page, "%s\n", p->name);
3719 else if (mddev->clevel[0])
3720 ret = sprintf(page, "%s\n", mddev->clevel);
3721 else if (mddev->level != LEVEL_NONE)
3722 ret = sprintf(page, "%d\n", mddev->level);
3723 else
3724 ret = 0;
3725 spin_unlock(&mddev->lock);
3726 return ret;
3727}
3728
3729static ssize_t
3730level_store(struct mddev *mddev, const char *buf, size_t len)
3731{
3732 char clevel[16];
3733 ssize_t rv;
3734 size_t slen = len;
3735 struct md_personality *pers, *oldpers;
3736 long level;
3737 void *priv, *oldpriv;
3738 struct md_rdev *rdev;
3739
3740 if (slen == 0 || slen >= sizeof(clevel))
3741 return -EINVAL;
3742
3743 rv = mddev_lock(mddev);
3744 if (rv)
3745 return rv;
3746
3747 if (mddev->pers == NULL) {
3748 strncpy(mddev->clevel, buf, slen);
3749 if (mddev->clevel[slen-1] == '\n')
3750 slen--;
3751 mddev->clevel[slen] = 0;
3752 mddev->level = LEVEL_NONE;
3753 rv = len;
3754 goto out_unlock;
3755 }
3756 rv = -EROFS;
3757 if (mddev->ro)
3758 goto out_unlock;
3759
3760
3761
3762
3763
3764
3765
3766 rv = -EBUSY;
3767 if (mddev->sync_thread ||
3768 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3769 mddev->reshape_position != MaxSector ||
3770 mddev->sysfs_active)
3771 goto out_unlock;
3772
3773 rv = -EINVAL;
3774 if (!mddev->pers->quiesce) {
3775 pr_warn("md: %s: %s does not support online personality change\n",
3776 mdname(mddev), mddev->pers->name);
3777 goto out_unlock;
3778 }
3779
3780
3781 strncpy(clevel, buf, slen);
3782 if (clevel[slen-1] == '\n')
3783 slen--;
3784 clevel[slen] = 0;
3785 if (kstrtol(clevel, 10, &level))
3786 level = LEVEL_NONE;
3787
3788 if (request_module("md-%s", clevel) != 0)
3789 request_module("md-level-%s", clevel);
3790 spin_lock(&pers_lock);
3791 pers = find_pers(level, clevel);
3792 if (!pers || !try_module_get(pers->owner)) {
3793 spin_unlock(&pers_lock);
3794 pr_warn("md: personality %s not loaded\n", clevel);
3795 rv = -EINVAL;
3796 goto out_unlock;
3797 }
3798 spin_unlock(&pers_lock);
3799
3800 if (pers == mddev->pers) {
3801
3802 module_put(pers->owner);
3803 rv = len;
3804 goto out_unlock;
3805 }
3806 if (!pers->takeover) {
3807 module_put(pers->owner);
3808 pr_warn("md: %s: %s does not support personality takeover\n",
3809 mdname(mddev), clevel);
3810 rv = -EINVAL;
3811 goto out_unlock;
3812 }
3813
3814 rdev_for_each(rdev, mddev)
3815 rdev->new_raid_disk = rdev->raid_disk;
3816
3817
3818
3819
3820 priv = pers->takeover(mddev);
3821 if (IS_ERR(priv)) {
3822 mddev->new_level = mddev->level;
3823 mddev->new_layout = mddev->layout;
3824 mddev->new_chunk_sectors = mddev->chunk_sectors;
3825 mddev->raid_disks -= mddev->delta_disks;
3826 mddev->delta_disks = 0;
3827 mddev->reshape_backwards = 0;
3828 module_put(pers->owner);
3829 pr_warn("md: %s: %s would not accept array\n",
3830 mdname(mddev), clevel);
3831 rv = PTR_ERR(priv);
3832 goto out_unlock;
3833 }
3834
3835
3836 mddev_suspend(mddev);
3837 mddev_detach(mddev);
3838
3839 spin_lock(&mddev->lock);
3840 oldpers = mddev->pers;
3841 oldpriv = mddev->private;
3842 mddev->pers = pers;
3843 mddev->private = priv;
3844 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3845 mddev->level = mddev->new_level;
3846 mddev->layout = mddev->new_layout;
3847 mddev->chunk_sectors = mddev->new_chunk_sectors;
3848 mddev->delta_disks = 0;
3849 mddev->reshape_backwards = 0;
3850 mddev->degraded = 0;
3851 spin_unlock(&mddev->lock);
3852
3853 if (oldpers->sync_request == NULL &&
3854 mddev->external) {
3855
3856
3857
3858
3859
3860
3861
3862 mddev->in_sync = 0;
3863 mddev->safemode_delay = 0;
3864 mddev->safemode = 0;
3865 }
3866
3867 oldpers->free(mddev, oldpriv);
3868
3869 if (oldpers->sync_request == NULL &&
3870 pers->sync_request != NULL) {
3871
3872 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3873 pr_warn("md: cannot register extra attributes for %s\n",
3874 mdname(mddev));
3875 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3876 }
3877 if (oldpers->sync_request != NULL &&
3878 pers->sync_request == NULL) {
3879
3880 if (mddev->to_remove == NULL)
3881 mddev->to_remove = &md_redundancy_group;
3882 }
3883
3884 module_put(oldpers->owner);
3885
3886 rdev_for_each(rdev, mddev) {
3887 if (rdev->raid_disk < 0)
3888 continue;
3889 if (rdev->new_raid_disk >= mddev->raid_disks)
3890 rdev->new_raid_disk = -1;
3891 if (rdev->new_raid_disk == rdev->raid_disk)
3892 continue;
3893 sysfs_unlink_rdev(mddev, rdev);
3894 }
3895 rdev_for_each(rdev, mddev) {
3896 if (rdev->raid_disk < 0)
3897 continue;
3898 if (rdev->new_raid_disk == rdev->raid_disk)
3899 continue;
3900 rdev->raid_disk = rdev->new_raid_disk;
3901 if (rdev->raid_disk < 0)
3902 clear_bit(In_sync, &rdev->flags);
3903 else {
3904 if (sysfs_link_rdev(mddev, rdev))
3905 pr_warn("md: cannot register rd%d for %s after level change\n",
3906 rdev->raid_disk, mdname(mddev));
3907 }
3908 }
3909
3910 if (pers->sync_request == NULL) {
3911
3912
3913
3914 mddev->in_sync = 1;
3915 del_timer_sync(&mddev->safemode_timer);
3916 }
3917 blk_set_stacking_limits(&mddev->queue->limits);
3918 pers->run(mddev);
3919 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3920 mddev_resume(mddev);
3921 if (!mddev->thread)
3922 md_update_sb(mddev, 1);
3923 sysfs_notify(&mddev->kobj, NULL, "level");
3924 md_new_event(mddev);
3925 rv = len;
3926out_unlock:
3927 mddev_unlock(mddev);
3928 return rv;
3929}
3930
3931static struct md_sysfs_entry md_level =
3932__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3933
3934static ssize_t
3935layout_show(struct mddev *mddev, char *page)
3936{
3937
3938 if (mddev->reshape_position != MaxSector &&
3939 mddev->layout != mddev->new_layout)
3940 return sprintf(page, "%d (%d)\n",
3941 mddev->new_layout, mddev->layout);
3942 return sprintf(page, "%d\n", mddev->layout);
3943}
3944
3945static ssize_t
3946layout_store(struct mddev *mddev, const char *buf, size_t len)
3947{
3948 unsigned int n;
3949 int err;
3950
3951 err = kstrtouint(buf, 10, &n);
3952 if (err < 0)
3953 return err;
3954 err = mddev_lock(mddev);
3955 if (err)
3956 return err;
3957
3958 if (mddev->pers) {
3959 if (mddev->pers->check_reshape == NULL)
3960 err = -EBUSY;
3961 else if (mddev->ro)
3962 err = -EROFS;
3963 else {
3964 mddev->new_layout = n;
3965 err = mddev->pers->check_reshape(mddev);
3966 if (err)
3967 mddev->new_layout = mddev->layout;
3968 }
3969 } else {
3970 mddev->new_layout = n;
3971 if (mddev->reshape_position == MaxSector)
3972 mddev->layout = n;
3973 }
3974 mddev_unlock(mddev);
3975 return err ?: len;
3976}
3977static struct md_sysfs_entry md_layout =
3978__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3979
3980static ssize_t
3981raid_disks_show(struct mddev *mddev, char *page)
3982{
3983 if (mddev->raid_disks == 0)
3984 return 0;
3985 if (mddev->reshape_position != MaxSector &&
3986 mddev->delta_disks != 0)
3987 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3988 mddev->raid_disks - mddev->delta_disks);
3989 return sprintf(page, "%d\n", mddev->raid_disks);
3990}
3991
3992static int update_raid_disks(struct mddev *mddev, int raid_disks);
3993
3994static ssize_t
3995raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3996{
3997 unsigned int n;
3998 int err;
3999
4000 err = kstrtouint(buf, 10, &n);
4001 if (err < 0)
4002 return err;
4003
4004 err = mddev_lock(mddev);
4005 if (err)
4006 return err;
4007 if (mddev->pers)
4008 err = update_raid_disks(mddev, n);
4009 else if (mddev->reshape_position != MaxSector) {
4010 struct md_rdev *rdev;
4011 int olddisks = mddev->raid_disks - mddev->delta_disks;
4012
4013 err = -EINVAL;
4014 rdev_for_each(rdev, mddev) {
4015 if (olddisks < n &&
4016 rdev->data_offset < rdev->new_data_offset)
4017 goto out_unlock;
4018 if (olddisks > n &&
4019 rdev->data_offset > rdev->new_data_offset)
4020 goto out_unlock;
4021 }
4022 err = 0;
4023 mddev->delta_disks = n - olddisks;
4024 mddev->raid_disks = n;
4025 mddev->reshape_backwards = (mddev->delta_disks < 0);
4026 } else
4027 mddev->raid_disks = n;
4028out_unlock:
4029 mddev_unlock(mddev);
4030 return err ? err : len;
4031}
4032static struct md_sysfs_entry md_raid_disks =
4033__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4034
4035static ssize_t
4036chunk_size_show(struct mddev *mddev, char *page)
4037{
4038 if (mddev->reshape_position != MaxSector &&
4039 mddev->chunk_sectors != mddev->new_chunk_sectors)
4040 return sprintf(page, "%d (%d)\n",
4041 mddev->new_chunk_sectors << 9,
4042 mddev->chunk_sectors << 9);
4043 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4044}
4045
4046static ssize_t
4047chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4048{
4049 unsigned long n;
4050 int err;
4051
4052 err = kstrtoul(buf, 10, &n);
4053 if (err < 0)
4054 return err;
4055
4056 err = mddev_lock(mddev);
4057 if (err)
4058 return err;
4059 if (mddev->pers) {
4060 if (mddev->pers->check_reshape == NULL)
4061 err = -EBUSY;
4062 else if (mddev->ro)
4063 err = -EROFS;
4064 else {
4065 mddev->new_chunk_sectors = n >> 9;
4066 err = mddev->pers->check_reshape(mddev);
4067 if (err)
4068 mddev->new_chunk_sectors = mddev->chunk_sectors;
4069 }
4070 } else {
4071 mddev->new_chunk_sectors = n >> 9;
4072 if (mddev->reshape_position == MaxSector)
4073 mddev->chunk_sectors = n >> 9;
4074 }
4075 mddev_unlock(mddev);
4076 return err ?: len;
4077}
4078static struct md_sysfs_entry md_chunk_size =
4079__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4080
4081static ssize_t
4082resync_start_show(struct mddev *mddev, char *page)
4083{
4084 if (mddev->recovery_cp == MaxSector)
4085 return sprintf(page, "none\n");
4086 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4087}
4088
4089static ssize_t
4090resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4091{
4092 unsigned long long n;
4093 int err;
4094
4095 if (cmd_match(buf, "none"))
4096 n = MaxSector;
4097 else {
4098 err = kstrtoull(buf, 10, &n);
4099 if (err < 0)
4100 return err;
4101 if (n != (sector_t)n)
4102 return -EINVAL;
4103 }
4104
4105 err = mddev_lock(mddev);
4106 if (err)
4107 return err;
4108 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4109 err = -EBUSY;
4110
4111 if (!err) {
4112 mddev->recovery_cp = n;
4113 if (mddev->pers)
4114 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4115 }
4116 mddev_unlock(mddev);
4117 return err ?: len;
4118}
4119static struct md_sysfs_entry md_resync_start =
4120__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4121 resync_start_show, resync_start_store);
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4160 write_pending, active_idle, bad_word};
4161static char *array_states[] = {
4162 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4163 "write-pending", "active-idle", NULL };
4164
4165static int match_word(const char *word, char **list)
4166{
4167 int n;
4168 for (n=0; list[n]; n++)
4169 if (cmd_match(word, list[n]))
4170 break;
4171 return n;
4172}
4173
4174static ssize_t
4175array_state_show(struct mddev *mddev, char *page)
4176{
4177 enum array_state st = inactive;
4178
4179 if (mddev->pers)
4180 switch(mddev->ro) {
4181 case 1:
4182 st = readonly;
4183 break;
4184 case 2:
4185 st = read_auto;
4186 break;
4187 case 0:
4188 spin_lock(&mddev->lock);
4189 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4190 st = write_pending;
4191 else if (mddev->in_sync)
4192 st = clean;
4193 else if (mddev->safemode)
4194 st = active_idle;
4195 else
4196 st = active;
4197 spin_unlock(&mddev->lock);
4198 }
4199 else {
4200 if (list_empty(&mddev->disks) &&
4201 mddev->raid_disks == 0 &&
4202 mddev->dev_sectors == 0)
4203 st = clear;
4204 else
4205 st = inactive;
4206 }
4207 return sprintf(page, "%s\n", array_states[st]);
4208}
4209
4210static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4211static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4212static int do_md_run(struct mddev *mddev);
4213static int restart_array(struct mddev *mddev);
4214
4215static ssize_t
4216array_state_store(struct mddev *mddev, const char *buf, size_t len)
4217{
4218 int err = 0;
4219 enum array_state st = match_word(buf, array_states);
4220
4221 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4222
4223
4224
4225 spin_lock(&mddev->lock);
4226 if (st == active) {
4227 restart_array(mddev);
4228 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4229 md_wakeup_thread(mddev->thread);
4230 wake_up(&mddev->sb_wait);
4231 } else {
4232 restart_array(mddev);
4233 if (!set_in_sync(mddev))
4234 err = -EBUSY;
4235 }
4236 if (!err)
4237 sysfs_notify_dirent_safe(mddev->sysfs_state);
4238 spin_unlock(&mddev->lock);
4239 return err ?: len;
4240 }
4241 err = mddev_lock(mddev);
4242 if (err)
4243 return err;
4244 err = -EINVAL;
4245 switch(st) {
4246 case bad_word:
4247 break;
4248 case clear:
4249
4250 err = do_md_stop(mddev, 0, NULL);
4251 break;
4252 case inactive:
4253
4254 if (mddev->pers)
4255 err = do_md_stop(mddev, 2, NULL);
4256 else
4257 err = 0;
4258 break;
4259 case suspended:
4260 break;
4261 case readonly:
4262 if (mddev->pers)
4263 err = md_set_readonly(mddev, NULL);
4264 else {
4265 mddev->ro = 1;
4266 set_disk_ro(mddev->gendisk, 1);
4267 err = do_md_run(mddev);
4268 }
4269 break;
4270 case read_auto:
4271 if (mddev->pers) {
4272 if (mddev->ro == 0)
4273 err = md_set_readonly(mddev, NULL);
4274 else if (mddev->ro == 1)
4275 err = restart_array(mddev);
4276 if (err == 0) {
4277 mddev->ro = 2;
4278 set_disk_ro(mddev->gendisk, 0);
4279 }
4280 } else {
4281 mddev->ro = 2;
4282 err = do_md_run(mddev);
4283 }
4284 break;
4285 case clean:
4286 if (mddev->pers) {
4287 err = restart_array(mddev);
4288 if (err)
4289 break;
4290 spin_lock(&mddev->lock);
4291 if (!set_in_sync(mddev))
4292 err = -EBUSY;
4293 spin_unlock(&mddev->lock);
4294 } else
4295 err = -EINVAL;
4296 break;
4297 case active:
4298 if (mddev->pers) {
4299 err = restart_array(mddev);
4300 if (err)
4301 break;
4302 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4303 wake_up(&mddev->sb_wait);
4304 err = 0;
4305 } else {
4306 mddev->ro = 0;
4307 set_disk_ro(mddev->gendisk, 0);
4308 err = do_md_run(mddev);
4309 }
4310 break;
4311 case write_pending:
4312 case active_idle:
4313
4314 break;
4315 }
4316
4317 if (!err) {
4318 if (mddev->hold_active == UNTIL_IOCTL)
4319 mddev->hold_active = 0;
4320 sysfs_notify_dirent_safe(mddev->sysfs_state);
4321 }
4322 mddev_unlock(mddev);
4323 return err ?: len;
4324}
4325static struct md_sysfs_entry md_array_state =
4326__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4327
4328static ssize_t
4329max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4330 return sprintf(page, "%d\n",
4331 atomic_read(&mddev->max_corr_read_errors));
4332}
4333
4334static ssize_t
4335max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4336{
4337 unsigned int n;
4338 int rv;
4339
4340 rv = kstrtouint(buf, 10, &n);
4341 if (rv < 0)
4342 return rv;
4343 atomic_set(&mddev->max_corr_read_errors, n);
4344 return len;
4345}
4346
4347static struct md_sysfs_entry max_corr_read_errors =
4348__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4349 max_corrected_read_errors_store);
4350
4351static ssize_t
4352null_show(struct mddev *mddev, char *page)
4353{
4354 return -EINVAL;
4355}
4356
4357static ssize_t
4358new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4359{
4360
4361
4362
4363
4364
4365
4366
4367 char *e;
4368 int major = simple_strtoul(buf, &e, 10);
4369 int minor;
4370 dev_t dev;
4371 struct md_rdev *rdev;
4372 int err;
4373
4374 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4375 return -EINVAL;
4376 minor = simple_strtoul(e+1, &e, 10);
4377 if (*e && *e != '\n')
4378 return -EINVAL;
4379 dev = MKDEV(major, minor);
4380 if (major != MAJOR(dev) ||
4381 minor != MINOR(dev))
4382 return -EOVERFLOW;
4383
4384 flush_workqueue(md_misc_wq);
4385
4386 err = mddev_lock(mddev);
4387 if (err)
4388 return err;
4389 if (mddev->persistent) {
4390 rdev = md_import_device(dev, mddev->major_version,
4391 mddev->minor_version);
4392 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4393 struct md_rdev *rdev0
4394 = list_entry(mddev->disks.next,
4395 struct md_rdev, same_set);
4396 err = super_types[mddev->major_version]
4397 .load_super(rdev, rdev0, mddev->minor_version);
4398 if (err < 0)
4399 goto out;
4400 }
4401 } else if (mddev->external)
4402 rdev = md_import_device(dev, -2, -1);
4403 else
4404 rdev = md_import_device(dev, -1, -1);
4405
4406 if (IS_ERR(rdev)) {
4407 mddev_unlock(mddev);
4408 return PTR_ERR(rdev);
4409 }
4410 err = bind_rdev_to_array(rdev, mddev);
4411 out:
4412 if (err)
4413 export_rdev(rdev);
4414 mddev_unlock(mddev);
4415 if (!err)
4416 md_new_event(mddev);
4417 return err ? err : len;
4418}
4419
4420static struct md_sysfs_entry md_new_device =
4421__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4422
4423static ssize_t
4424bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4425{
4426 char *end;
4427 unsigned long chunk, end_chunk;
4428 int err;
4429
4430 err = mddev_lock(mddev);
4431 if (err)
4432 return err;
4433 if (!mddev->bitmap)
4434 goto out;
4435
4436 while (*buf) {
4437 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4438 if (buf == end) break;
4439 if (*end == '-') {
4440 buf = end + 1;
4441 end_chunk = simple_strtoul(buf, &end, 0);
4442 if (buf == end) break;
4443 }
4444 if (*end && !isspace(*end)) break;
4445 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4446 buf = skip_spaces(end);
4447 }
4448 md_bitmap_unplug(mddev->bitmap);
4449out:
4450 mddev_unlock(mddev);
4451 return len;
4452}
4453
4454static struct md_sysfs_entry md_bitmap =
4455__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4456
4457static ssize_t
4458size_show(struct mddev *mddev, char *page)
4459{
4460 return sprintf(page, "%llu\n",
4461 (unsigned long long)mddev->dev_sectors / 2);
4462}
4463
4464static int update_size(struct mddev *mddev, sector_t num_sectors);
4465
4466static ssize_t
4467size_store(struct mddev *mddev, const char *buf, size_t len)
4468{
4469
4470
4471
4472
4473 sector_t sectors;
4474 int err = strict_blocks_to_sectors(buf, §ors);
4475
4476 if (err < 0)
4477 return err;
4478 err = mddev_lock(mddev);
4479 if (err)
4480 return err;
4481 if (mddev->pers) {
4482 err = update_size(mddev, sectors);
4483 if (err == 0)
4484 md_update_sb(mddev, 1);
4485 } else {
4486 if (mddev->dev_sectors == 0 ||
4487 mddev->dev_sectors > sectors)
4488 mddev->dev_sectors = sectors;
4489 else
4490 err = -ENOSPC;
4491 }
4492 mddev_unlock(mddev);
4493 return err ? err : len;
4494}
4495
4496static struct md_sysfs_entry md_size =
4497__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4498
4499
4500
4501
4502
4503
4504
4505static ssize_t
4506metadata_show(struct mddev *mddev, char *page)
4507{
4508 if (mddev->persistent)
4509 return sprintf(page, "%d.%d\n",
4510 mddev->major_version, mddev->minor_version);
4511 else if (mddev->external)
4512 return sprintf(page, "external:%s\n", mddev->metadata_type);
4513 else
4514 return sprintf(page, "none\n");
4515}
4516
4517static ssize_t
4518metadata_store(struct mddev *mddev, const char *buf, size_t len)
4519{
4520 int major, minor;
4521 char *e;
4522 int err;
4523
4524
4525
4526
4527
4528 err = mddev_lock(mddev);
4529 if (err)
4530 return err;
4531 err = -EBUSY;
4532 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4533 ;
4534 else if (!list_empty(&mddev->disks))
4535 goto out_unlock;
4536
4537 err = 0;
4538 if (cmd_match(buf, "none")) {
4539 mddev->persistent = 0;
4540 mddev->external = 0;
4541 mddev->major_version = 0;
4542 mddev->minor_version = 90;
4543 goto out_unlock;
4544 }
4545 if (strncmp(buf, "external:", 9) == 0) {
4546 size_t namelen = len-9;
4547 if (namelen >= sizeof(mddev->metadata_type))
4548 namelen = sizeof(mddev->metadata_type)-1;
4549 strncpy(mddev->metadata_type, buf+9, namelen);
4550 mddev->metadata_type[namelen] = 0;
4551 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4552 mddev->metadata_type[--namelen] = 0;
4553 mddev->persistent = 0;
4554 mddev->external = 1;
4555 mddev->major_version = 0;
4556 mddev->minor_version = 90;
4557 goto out_unlock;
4558 }
4559 major = simple_strtoul(buf, &e, 10);
4560 err = -EINVAL;
4561 if (e==buf || *e != '.')
4562 goto out_unlock;
4563 buf = e+1;
4564 minor = simple_strtoul(buf, &e, 10);
4565 if (e==buf || (*e && *e != '\n') )
4566 goto out_unlock;
4567 err = -ENOENT;
4568 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4569 goto out_unlock;
4570 mddev->major_version = major;
4571 mddev->minor_version = minor;
4572 mddev->persistent = 1;
4573 mddev->external = 0;
4574 err = 0;
4575out_unlock:
4576 mddev_unlock(mddev);
4577 return err ?: len;
4578}
4579
4580static struct md_sysfs_entry md_metadata =
4581__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4582
4583static ssize_t
4584action_show(struct mddev *mddev, char *page)
4585{
4586 char *type = "idle";
4587 unsigned long recovery = mddev->recovery;
4588 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4589 type = "frozen";
4590 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4591 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4592 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4593 type = "reshape";
4594 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4595 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4596 type = "resync";
4597 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4598 type = "check";
4599 else
4600 type = "repair";
4601 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4602 type = "recover";
4603 else if (mddev->reshape_position != MaxSector)
4604 type = "reshape";
4605 }
4606 return sprintf(page, "%s\n", type);
4607}
4608
4609static ssize_t
4610action_store(struct mddev *mddev, const char *page, size_t len)
4611{
4612 if (!mddev->pers || !mddev->pers->sync_request)
4613 return -EINVAL;
4614
4615
4616 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4617 if (cmd_match(page, "frozen"))
4618 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4619 else
4620 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4621 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4622 mddev_lock(mddev) == 0) {
4623 flush_workqueue(md_misc_wq);
4624 if (mddev->sync_thread) {
4625 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4626 md_reap_sync_thread(mddev);
4627 }
4628 mddev_unlock(mddev);
4629 }
4630 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4631 return -EBUSY;
4632 else if (cmd_match(page, "resync"))
4633 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4634 else if (cmd_match(page, "recover")) {
4635 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4636 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4637 } else if (cmd_match(page, "reshape")) {
4638 int err;
4639 if (mddev->pers->start_reshape == NULL)
4640 return -EINVAL;
4641 err = mddev_lock(mddev);
4642 if (!err) {
4643 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4644 err = -EBUSY;
4645 else {
4646 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4647 err = mddev->pers->start_reshape(mddev);
4648 }
4649 mddev_unlock(mddev);
4650 }
4651 if (err)
4652 return err;
4653 sysfs_notify(&mddev->kobj, NULL, "degraded");
4654 } else {
4655 if (cmd_match(page, "check"))
4656 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4657 else if (!cmd_match(page, "repair"))
4658 return -EINVAL;
4659 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4660 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4661 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4662 }
4663 if (mddev->ro == 2) {
4664
4665
4666
4667 mddev->ro = 0;
4668 md_wakeup_thread(mddev->sync_thread);
4669 }
4670 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4671 md_wakeup_thread(mddev->thread);
4672 sysfs_notify_dirent_safe(mddev->sysfs_action);
4673 return len;
4674}
4675
4676static struct md_sysfs_entry md_scan_mode =
4677__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4678
4679static ssize_t
4680last_sync_action_show(struct mddev *mddev, char *page)
4681{
4682 return sprintf(page, "%s\n", mddev->last_sync_action);
4683}
4684
4685static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4686
4687static ssize_t
4688mismatch_cnt_show(struct mddev *mddev, char *page)
4689{
4690 return sprintf(page, "%llu\n",
4691 (unsigned long long)
4692 atomic64_read(&mddev->resync_mismatches));
4693}
4694
4695static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4696
4697static ssize_t
4698sync_min_show(struct mddev *mddev, char *page)
4699{
4700 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4701 mddev->sync_speed_min ? "local": "system");
4702}
4703
4704static ssize_t
4705sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4706{
4707 unsigned int min;
4708 int rv;
4709
4710 if (strncmp(buf, "system", 6)==0) {
4711 min = 0;
4712 } else {
4713 rv = kstrtouint(buf, 10, &min);
4714 if (rv < 0)
4715 return rv;
4716 if (min == 0)
4717 return -EINVAL;
4718 }
4719 mddev->sync_speed_min = min;
4720 return len;
4721}
4722
4723static struct md_sysfs_entry md_sync_min =
4724__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4725
4726static ssize_t
4727sync_max_show(struct mddev *mddev, char *page)
4728{
4729 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4730 mddev->sync_speed_max ? "local": "system");
4731}
4732
4733static ssize_t
4734sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4735{
4736 unsigned int max;
4737 int rv;
4738
4739 if (strncmp(buf, "system", 6)==0) {
4740 max = 0;
4741 } else {
4742 rv = kstrtouint(buf, 10, &max);
4743 if (rv < 0)
4744 return rv;
4745 if (max == 0)
4746 return -EINVAL;
4747 }
4748 mddev->sync_speed_max = max;
4749 return len;
4750}
4751
4752static struct md_sysfs_entry md_sync_max =
4753__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4754
4755static ssize_t
4756degraded_show(struct mddev *mddev, char *page)
4757{
4758 return sprintf(page, "%d\n", mddev->degraded);
4759}
4760static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4761
4762static ssize_t
4763sync_force_parallel_show(struct mddev *mddev, char *page)
4764{
4765 return sprintf(page, "%d\n", mddev->parallel_resync);
4766}
4767
4768static ssize_t
4769sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4770{
4771 long n;
4772
4773 if (kstrtol(buf, 10, &n))
4774 return -EINVAL;
4775
4776 if (n != 0 && n != 1)
4777 return -EINVAL;
4778
4779 mddev->parallel_resync = n;
4780
4781 if (mddev->sync_thread)
4782 wake_up(&resync_wait);
4783
4784 return len;
4785}
4786
4787
4788static struct md_sysfs_entry md_sync_force_parallel =
4789__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4790 sync_force_parallel_show, sync_force_parallel_store);
4791
4792static ssize_t
4793sync_speed_show(struct mddev *mddev, char *page)
4794{
4795 unsigned long resync, dt, db;
4796 if (mddev->curr_resync == 0)
4797 return sprintf(page, "none\n");
4798 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4799 dt = (jiffies - mddev->resync_mark) / HZ;
4800 if (!dt) dt++;
4801 db = resync - mddev->resync_mark_cnt;
4802 return sprintf(page, "%lu\n", db/dt/2);
4803}
4804
4805static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4806
4807static ssize_t
4808sync_completed_show(struct mddev *mddev, char *page)
4809{
4810 unsigned long long max_sectors, resync;
4811
4812 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4813 return sprintf(page, "none\n");
4814
4815 if (mddev->curr_resync == 1 ||
4816 mddev->curr_resync == 2)
4817 return sprintf(page, "delayed\n");
4818
4819 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4820 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4821 max_sectors = mddev->resync_max_sectors;
4822 else
4823 max_sectors = mddev->dev_sectors;
4824
4825 resync = mddev->curr_resync_completed;
4826 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4827}
4828
4829static struct md_sysfs_entry md_sync_completed =
4830 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4831
4832static ssize_t
4833min_sync_show(struct mddev *mddev, char *page)
4834{
4835 return sprintf(page, "%llu\n",
4836 (unsigned long long)mddev->resync_min);
4837}
4838static ssize_t
4839min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4840{
4841 unsigned long long min;
4842 int err;
4843
4844 if (kstrtoull(buf, 10, &min))
4845 return -EINVAL;
4846
4847 spin_lock(&mddev->lock);
4848 err = -EINVAL;
4849 if (min > mddev->resync_max)
4850 goto out_unlock;
4851
4852 err = -EBUSY;
4853 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4854 goto out_unlock;
4855
4856
4857 mddev->resync_min = round_down(min, 8);
4858 err = 0;
4859
4860out_unlock:
4861 spin_unlock(&mddev->lock);
4862 return err ?: len;
4863}
4864
4865static struct md_sysfs_entry md_min_sync =
4866__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4867
4868static ssize_t
4869max_sync_show(struct mddev *mddev, char *page)
4870{
4871 if (mddev->resync_max == MaxSector)
4872 return sprintf(page, "max\n");
4873 else
4874 return sprintf(page, "%llu\n",
4875 (unsigned long long)mddev->resync_max);
4876}
4877static ssize_t
4878max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4879{
4880 int err;
4881 spin_lock(&mddev->lock);
4882 if (strncmp(buf, "max", 3) == 0)
4883 mddev->resync_max = MaxSector;
4884 else {
4885 unsigned long long max;
4886 int chunk;
4887
4888 err = -EINVAL;
4889 if (kstrtoull(buf, 10, &max))
4890 goto out_unlock;
4891 if (max < mddev->resync_min)
4892 goto out_unlock;
4893
4894 err = -EBUSY;
4895 if (max < mddev->resync_max &&
4896 mddev->ro == 0 &&
4897 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4898 goto out_unlock;
4899
4900
4901 chunk = mddev->chunk_sectors;
4902 if (chunk) {
4903 sector_t temp = max;
4904
4905 err = -EINVAL;
4906 if (sector_div(temp, chunk))
4907 goto out_unlock;
4908 }
4909 mddev->resync_max = max;
4910 }
4911 wake_up(&mddev->recovery_wait);
4912 err = 0;
4913out_unlock:
4914 spin_unlock(&mddev->lock);
4915 return err ?: len;
4916}
4917
4918static struct md_sysfs_entry md_max_sync =
4919__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4920
4921static ssize_t
4922suspend_lo_show(struct mddev *mddev, char *page)
4923{
4924 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4925}
4926
4927static ssize_t
4928suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4929{
4930 unsigned long long new;
4931 int err;
4932
4933 err = kstrtoull(buf, 10, &new);
4934 if (err < 0)
4935 return err;
4936 if (new != (sector_t)new)
4937 return -EINVAL;
4938
4939 err = mddev_lock(mddev);
4940 if (err)
4941 return err;
4942 err = -EINVAL;
4943 if (mddev->pers == NULL ||
4944 mddev->pers->quiesce == NULL)
4945 goto unlock;
4946 mddev_suspend(mddev);
4947 mddev->suspend_lo = new;
4948 mddev_resume(mddev);
4949
4950 err = 0;
4951unlock:
4952 mddev_unlock(mddev);
4953 return err ?: len;
4954}
4955static struct md_sysfs_entry md_suspend_lo =
4956__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4957
4958static ssize_t
4959suspend_hi_show(struct mddev *mddev, char *page)
4960{
4961 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4962}
4963
4964static ssize_t
4965suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4966{
4967 unsigned long long new;
4968 int err;
4969
4970 err = kstrtoull(buf, 10, &new);
4971 if (err < 0)
4972 return err;
4973 if (new != (sector_t)new)
4974 return -EINVAL;
4975
4976 err = mddev_lock(mddev);
4977 if (err)
4978 return err;
4979 err = -EINVAL;
4980 if (mddev->pers == NULL)
4981 goto unlock;
4982
4983 mddev_suspend(mddev);
4984 mddev->suspend_hi = new;
4985 mddev_resume(mddev);
4986
4987 err = 0;
4988unlock:
4989 mddev_unlock(mddev);
4990 return err ?: len;
4991}
4992static struct md_sysfs_entry md_suspend_hi =
4993__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4994
4995static ssize_t
4996reshape_position_show(struct mddev *mddev, char *page)
4997{
4998 if (mddev->reshape_position != MaxSector)
4999 return sprintf(page, "%llu\n",
5000 (unsigned long long)mddev->reshape_position);
5001 strcpy(page, "none\n");
5002 return 5;
5003}
5004
5005static ssize_t
5006reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5007{
5008 struct md_rdev *rdev;
5009 unsigned long long new;
5010 int err;
5011
5012 err = kstrtoull(buf, 10, &new);
5013 if (err < 0)
5014 return err;
5015 if (new != (sector_t)new)
5016 return -EINVAL;
5017 err = mddev_lock(mddev);
5018 if (err)
5019 return err;
5020 err = -EBUSY;
5021 if (mddev->pers)
5022 goto unlock;
5023 mddev->reshape_position = new;
5024 mddev->delta_disks = 0;
5025 mddev->reshape_backwards = 0;
5026 mddev->new_level = mddev->level;
5027 mddev->new_layout = mddev->layout;
5028 mddev->new_chunk_sectors = mddev->chunk_sectors;
5029 rdev_for_each(rdev, mddev)
5030 rdev->new_data_offset = rdev->data_offset;
5031 err = 0;
5032unlock:
5033 mddev_unlock(mddev);
5034 return err ?: len;
5035}
5036
5037static struct md_sysfs_entry md_reshape_position =
5038__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5039 reshape_position_store);
5040
5041static ssize_t
5042reshape_direction_show(struct mddev *mddev, char *page)
5043{
5044 return sprintf(page, "%s\n",
5045 mddev->reshape_backwards ? "backwards" : "forwards");
5046}
5047
5048static ssize_t
5049reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5050{
5051 int backwards = 0;
5052 int err;
5053
5054 if (cmd_match(buf, "forwards"))
5055 backwards = 0;
5056 else if (cmd_match(buf, "backwards"))
5057 backwards = 1;
5058 else
5059 return -EINVAL;
5060 if (mddev->reshape_backwards == backwards)
5061 return len;
5062
5063 err = mddev_lock(mddev);
5064 if (err)
5065 return err;
5066
5067 if (mddev->delta_disks)
5068 err = -EBUSY;
5069 else if (mddev->persistent &&
5070 mddev->major_version == 0)
5071 err = -EINVAL;
5072 else
5073 mddev->reshape_backwards = backwards;
5074 mddev_unlock(mddev);
5075 return err ?: len;
5076}
5077
5078static struct md_sysfs_entry md_reshape_direction =
5079__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5080 reshape_direction_store);
5081
5082static ssize_t
5083array_size_show(struct mddev *mddev, char *page)
5084{
5085 if (mddev->external_size)
5086 return sprintf(page, "%llu\n",
5087 (unsigned long long)mddev->array_sectors/2);
5088 else
5089 return sprintf(page, "default\n");
5090}
5091
5092static ssize_t
5093array_size_store(struct mddev *mddev, const char *buf, size_t len)
5094{
5095 sector_t sectors;
5096 int err;
5097
5098 err = mddev_lock(mddev);
5099 if (err)
5100 return err;
5101
5102
5103 if (mddev_is_clustered(mddev)) {
5104 mddev_unlock(mddev);
5105 return -EINVAL;
5106 }
5107
5108 if (strncmp(buf, "default", 7) == 0) {
5109 if (mddev->pers)
5110 sectors = mddev->pers->size(mddev, 0, 0);
5111 else
5112 sectors = mddev->array_sectors;
5113
5114 mddev->external_size = 0;
5115 } else {
5116 if (strict_blocks_to_sectors(buf, §ors) < 0)
5117 err = -EINVAL;
5118 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5119 err = -E2BIG;
5120 else
5121 mddev->external_size = 1;
5122 }
5123
5124 if (!err) {
5125 mddev->array_sectors = sectors;
5126 if (mddev->pers) {
5127 set_capacity(mddev->gendisk, mddev->array_sectors);
5128 revalidate_disk(mddev->gendisk);
5129 }
5130 }
5131 mddev_unlock(mddev);
5132 return err ?: len;
5133}
5134
5135static struct md_sysfs_entry md_array_size =
5136__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5137 array_size_store);
5138
5139static ssize_t
5140consistency_policy_show(struct mddev *mddev, char *page)
5141{
5142 int ret;
5143
5144 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5145 ret = sprintf(page, "journal\n");
5146 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5147 ret = sprintf(page, "ppl\n");
5148 } else if (mddev->bitmap) {
5149 ret = sprintf(page, "bitmap\n");
5150 } else if (mddev->pers) {
5151 if (mddev->pers->sync_request)
5152 ret = sprintf(page, "resync\n");
5153 else
5154 ret = sprintf(page, "none\n");
5155 } else {
5156 ret = sprintf(page, "unknown\n");
5157 }
5158
5159 return ret;
5160}
5161
5162static ssize_t
5163consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5164{
5165 int err = 0;
5166
5167 if (mddev->pers) {
5168 if (mddev->pers->change_consistency_policy)
5169 err = mddev->pers->change_consistency_policy(mddev, buf);
5170 else
5171 err = -EBUSY;
5172 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5173 set_bit(MD_HAS_PPL, &mddev->flags);
5174 } else {
5175 err = -EINVAL;
5176 }
5177
5178 return err ? err : len;
5179}
5180
5181static struct md_sysfs_entry md_consistency_policy =
5182__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5183 consistency_policy_store);
5184
5185static struct attribute *md_default_attrs[] = {
5186 &md_level.attr,
5187 &md_layout.attr,
5188 &md_raid_disks.attr,
5189 &md_chunk_size.attr,
5190 &md_size.attr,
5191 &md_resync_start.attr,
5192 &md_metadata.attr,
5193 &md_new_device.attr,
5194 &md_safe_delay.attr,
5195 &md_array_state.attr,
5196 &md_reshape_position.attr,
5197 &md_reshape_direction.attr,
5198 &md_array_size.attr,
5199 &max_corr_read_errors.attr,
5200 &md_consistency_policy.attr,
5201 NULL,
5202};
5203
5204static struct attribute *md_redundancy_attrs[] = {
5205 &md_scan_mode.attr,
5206 &md_last_scan_mode.attr,
5207 &md_mismatches.attr,
5208 &md_sync_min.attr,
5209 &md_sync_max.attr,
5210 &md_sync_speed.attr,
5211 &md_sync_force_parallel.attr,
5212 &md_sync_completed.attr,
5213 &md_min_sync.attr,
5214 &md_max_sync.attr,
5215 &md_suspend_lo.attr,
5216 &md_suspend_hi.attr,
5217 &md_bitmap.attr,
5218 &md_degraded.attr,
5219 NULL,
5220};
5221static struct attribute_group md_redundancy_group = {
5222 .name = NULL,
5223 .attrs = md_redundancy_attrs,
5224};
5225
5226static ssize_t
5227md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5228{
5229 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5230 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5231 ssize_t rv;
5232
5233 if (!entry->show)
5234 return -EIO;
5235 spin_lock(&all_mddevs_lock);
5236 if (list_empty(&mddev->all_mddevs)) {
5237 spin_unlock(&all_mddevs_lock);
5238 return -EBUSY;
5239 }
5240 mddev_get(mddev);
5241 spin_unlock(&all_mddevs_lock);
5242
5243 rv = entry->show(mddev, page);
5244 mddev_put(mddev);
5245 return rv;
5246}
5247
5248static ssize_t
5249md_attr_store(struct kobject *kobj, struct attribute *attr,
5250 const char *page, size_t length)
5251{
5252 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5253 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5254 ssize_t rv;
5255
5256 if (!entry->store)
5257 return -EIO;
5258 if (!capable(CAP_SYS_ADMIN))
5259 return -EACCES;
5260 spin_lock(&all_mddevs_lock);
5261 if (list_empty(&mddev->all_mddevs)) {
5262 spin_unlock(&all_mddevs_lock);
5263 return -EBUSY;
5264 }
5265 mddev_get(mddev);
5266 spin_unlock(&all_mddevs_lock);
5267 rv = entry->store(mddev, page, length);
5268 mddev_put(mddev);
5269 return rv;
5270}
5271
5272static void md_free(struct kobject *ko)
5273{
5274 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5275
5276 if (mddev->sysfs_state)
5277 sysfs_put(mddev->sysfs_state);
5278
5279 if (mddev->gendisk)
5280 del_gendisk(mddev->gendisk);
5281 if (mddev->queue)
5282 blk_cleanup_queue(mddev->queue);
5283 if (mddev->gendisk)
5284 put_disk(mddev->gendisk);
5285 percpu_ref_exit(&mddev->writes_pending);
5286
5287 bioset_exit(&mddev->bio_set);
5288 bioset_exit(&mddev->sync_set);
5289 kfree(mddev);
5290}
5291
5292static const struct sysfs_ops md_sysfs_ops = {
5293 .show = md_attr_show,
5294 .store = md_attr_store,
5295};
5296static struct kobj_type md_ktype = {
5297 .release = md_free,
5298 .sysfs_ops = &md_sysfs_ops,
5299 .default_attrs = md_default_attrs,
5300};
5301
5302int mdp_major = 0;
5303
5304static void mddev_delayed_delete(struct work_struct *ws)
5305{
5306 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5307
5308 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5309 kobject_del(&mddev->kobj);
5310 kobject_put(&mddev->kobj);
5311}
5312
5313static void no_op(struct percpu_ref *r) {}
5314
5315int mddev_init_writes_pending(struct mddev *mddev)
5316{
5317 if (mddev->writes_pending.percpu_count_ptr)
5318 return 0;
5319 if (percpu_ref_init(&mddev->writes_pending, no_op,
5320 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5321 return -ENOMEM;
5322
5323 percpu_ref_put(&mddev->writes_pending);
5324 return 0;
5325}
5326EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5327
5328static int md_alloc(dev_t dev, char *name)
5329{
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339 static DEFINE_MUTEX(disks_mutex);
5340 struct mddev *mddev = mddev_find(dev);
5341 struct gendisk *disk;
5342 int partitioned;
5343 int shift;
5344 int unit;
5345 int error;
5346
5347 if (!mddev)
5348 return -ENODEV;
5349
5350 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5351 shift = partitioned ? MdpMinorShift : 0;
5352 unit = MINOR(mddev->unit) >> shift;
5353
5354
5355
5356
5357 flush_workqueue(md_misc_wq);
5358
5359 mutex_lock(&disks_mutex);
5360 error = -EEXIST;
5361 if (mddev->gendisk)
5362 goto abort;
5363
5364 if (name && !dev) {
5365
5366
5367 struct mddev *mddev2;
5368 spin_lock(&all_mddevs_lock);
5369
5370 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5371 if (mddev2->gendisk &&
5372 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5373 spin_unlock(&all_mddevs_lock);
5374 goto abort;
5375 }
5376 spin_unlock(&all_mddevs_lock);
5377 }
5378 if (name && dev)
5379
5380
5381
5382 mddev->hold_active = UNTIL_STOP;
5383
5384 error = -ENOMEM;
5385 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5386 if (!mddev->queue)
5387 goto abort;
5388 mddev->queue->queuedata = mddev;
5389
5390 blk_queue_make_request(mddev->queue, md_make_request);
5391 blk_set_stacking_limits(&mddev->queue->limits);
5392
5393 disk = alloc_disk(1 << shift);
5394 if (!disk) {
5395 blk_cleanup_queue(mddev->queue);
5396 mddev->queue = NULL;
5397 goto abort;
5398 }
5399 disk->major = MAJOR(mddev->unit);
5400 disk->first_minor = unit << shift;
5401 if (name)
5402 strcpy(disk->disk_name, name);
5403 else if (partitioned)
5404 sprintf(disk->disk_name, "md_d%d", unit);
5405 else
5406 sprintf(disk->disk_name, "md%d", unit);
5407 disk->fops = &md_fops;
5408 disk->private_data = mddev;
5409 disk->queue = mddev->queue;
5410 blk_queue_write_cache(mddev->queue, true, true);
5411
5412
5413
5414
5415 disk->flags |= GENHD_FL_EXT_DEVT;
5416 mddev->gendisk = disk;
5417
5418
5419
5420 mutex_lock(&mddev->open_mutex);
5421 add_disk(disk);
5422
5423 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5424 if (error) {
5425
5426
5427
5428 pr_debug("md: cannot register %s/md - name in use\n",
5429 disk->disk_name);
5430 error = 0;
5431 }
5432 if (mddev->kobj.sd &&
5433 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5434 pr_debug("pointless warning\n");
5435 mutex_unlock(&mddev->open_mutex);
5436 abort:
5437 mutex_unlock(&disks_mutex);
5438 if (!error && mddev->kobj.sd) {
5439 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5440 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5441 }
5442 mddev_put(mddev);
5443 return error;
5444}
5445
5446static struct kobject *md_probe(dev_t dev, int *part, void *data)
5447{
5448 if (create_on_open)
5449 md_alloc(dev, NULL);
5450 return NULL;
5451}
5452
5453static int add_named_array(const char *val, const struct kernel_param *kp)
5454{
5455
5456
5457
5458
5459
5460
5461
5462 int len = strlen(val);
5463 char buf[DISK_NAME_LEN];
5464 unsigned long devnum;
5465
5466 while (len && val[len-1] == '\n')
5467 len--;
5468 if (len >= DISK_NAME_LEN)
5469 return -E2BIG;
5470 strlcpy(buf, val, len+1);
5471 if (strncmp(buf, "md_", 3) == 0)
5472 return md_alloc(0, buf);
5473 if (strncmp(buf, "md", 2) == 0 &&
5474 isdigit(buf[2]) &&
5475 kstrtoul(buf+2, 10, &devnum) == 0 &&
5476 devnum <= MINORMASK)
5477 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5478
5479 return -EINVAL;
5480}
5481
5482static void md_safemode_timeout(struct timer_list *t)
5483{
5484 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5485
5486 mddev->safemode = 1;
5487 if (mddev->external)
5488 sysfs_notify_dirent_safe(mddev->sysfs_state);
5489
5490 md_wakeup_thread(mddev->thread);
5491}
5492
5493static int start_dirty_degraded;
5494
5495int md_run(struct mddev *mddev)
5496{
5497 int err;
5498 struct md_rdev *rdev;
5499 struct md_personality *pers;
5500
5501 if (list_empty(&mddev->disks))
5502
5503 return -EINVAL;
5504
5505 if (mddev->pers)
5506 return -EBUSY;
5507
5508 if (mddev->sysfs_active)
5509 return -EBUSY;
5510
5511
5512
5513
5514 if (!mddev->raid_disks) {
5515 if (!mddev->persistent)
5516 return -EINVAL;
5517 analyze_sbs(mddev);
5518 }
5519
5520 if (mddev->level != LEVEL_NONE)
5521 request_module("md-level-%d", mddev->level);
5522 else if (mddev->clevel[0])
5523 request_module("md-%s", mddev->clevel);
5524
5525
5526
5527
5528
5529
5530 mddev->has_superblocks = false;
5531 rdev_for_each(rdev, mddev) {
5532 if (test_bit(Faulty, &rdev->flags))
5533 continue;
5534 sync_blockdev(rdev->bdev);
5535 invalidate_bdev(rdev->bdev);
5536 if (mddev->ro != 1 &&
5537 (bdev_read_only(rdev->bdev) ||
5538 bdev_read_only(rdev->meta_bdev))) {
5539 mddev->ro = 1;
5540 if (mddev->gendisk)
5541 set_disk_ro(mddev->gendisk, 1);
5542 }
5543
5544 if (rdev->sb_page)
5545 mddev->has_superblocks = true;
5546
5547
5548
5549
5550
5551 if (rdev->meta_bdev) {
5552 ;
5553 } else if (rdev->data_offset < rdev->sb_start) {
5554 if (mddev->dev_sectors &&
5555 rdev->data_offset + mddev->dev_sectors
5556 > rdev->sb_start) {
5557 pr_warn("md: %s: data overlaps metadata\n",
5558 mdname(mddev));
5559 return -EINVAL;
5560 }
5561 } else {
5562 if (rdev->sb_start + rdev->sb_size/512
5563 > rdev->data_offset) {
5564 pr_warn("md: %s: metadata overlaps data\n",
5565 mdname(mddev));
5566 return -EINVAL;
5567 }
5568 }
5569 sysfs_notify_dirent_safe(rdev->sysfs_state);
5570 }
5571
5572 if (!bioset_initialized(&mddev->bio_set)) {
5573 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5574 if (err)
5575 return err;
5576 }
5577 if (!bioset_initialized(&mddev->sync_set)) {
5578 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5579 if (err)
5580 return err;
5581 }
5582
5583 spin_lock(&pers_lock);
5584 pers = find_pers(mddev->level, mddev->clevel);
5585 if (!pers || !try_module_get(pers->owner)) {
5586 spin_unlock(&pers_lock);
5587 if (mddev->level != LEVEL_NONE)
5588 pr_warn("md: personality for level %d is not loaded!\n",
5589 mddev->level);
5590 else
5591 pr_warn("md: personality for level %s is not loaded!\n",
5592 mddev->clevel);
5593 err = -EINVAL;
5594 goto abort;
5595 }
5596 spin_unlock(&pers_lock);
5597 if (mddev->level != pers->level) {
5598 mddev->level = pers->level;
5599 mddev->new_level = pers->level;
5600 }
5601 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5602
5603 if (mddev->reshape_position != MaxSector &&
5604 pers->start_reshape == NULL) {
5605
5606 module_put(pers->owner);
5607 err = -EINVAL;
5608 goto abort;
5609 }
5610
5611 if (pers->sync_request) {
5612
5613
5614
5615 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5616 struct md_rdev *rdev2;
5617 int warned = 0;
5618
5619 rdev_for_each(rdev, mddev)
5620 rdev_for_each(rdev2, mddev) {
5621 if (rdev < rdev2 &&
5622 rdev->bdev->bd_contains ==
5623 rdev2->bdev->bd_contains) {
5624 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5625 mdname(mddev),
5626 bdevname(rdev->bdev,b),
5627 bdevname(rdev2->bdev,b2));
5628 warned = 1;
5629 }
5630 }
5631
5632 if (warned)
5633 pr_warn("True protection against single-disk failure might be compromised.\n");
5634 }
5635
5636 mddev->recovery = 0;
5637
5638 mddev->resync_max_sectors = mddev->dev_sectors;
5639
5640 mddev->ok_start_degraded = start_dirty_degraded;
5641
5642 if (start_readonly && mddev->ro == 0)
5643 mddev->ro = 2;
5644
5645 err = pers->run(mddev);
5646 if (err)
5647 pr_warn("md: pers->run() failed ...\n");
5648 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5649 WARN_ONCE(!mddev->external_size,
5650 "%s: default size too small, but 'external_size' not in effect?\n",
5651 __func__);
5652 pr_warn("md: invalid array_size %llu > default size %llu\n",
5653 (unsigned long long)mddev->array_sectors / 2,
5654 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5655 err = -EINVAL;
5656 }
5657 if (err == 0 && pers->sync_request &&
5658 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5659 struct bitmap *bitmap;
5660
5661 bitmap = md_bitmap_create(mddev, -1);
5662 if (IS_ERR(bitmap)) {
5663 err = PTR_ERR(bitmap);
5664 pr_warn("%s: failed to create bitmap (%d)\n",
5665 mdname(mddev), err);
5666 } else
5667 mddev->bitmap = bitmap;
5668
5669 }
5670 if (err)
5671 goto bitmap_abort;
5672
5673 if (mddev->bitmap_info.max_write_behind > 0) {
5674 bool creat_pool = false;
5675
5676 rdev_for_each(rdev, mddev) {
5677 if (test_bit(WriteMostly, &rdev->flags) &&
5678 rdev_init_wb(rdev))
5679 creat_pool = true;
5680 }
5681 if (creat_pool && mddev->wb_info_pool == NULL) {
5682 mddev->wb_info_pool =
5683 mempool_create_kmalloc_pool(NR_WB_INFOS,
5684 sizeof(struct wb_info));
5685 if (!mddev->wb_info_pool) {
5686 err = -ENOMEM;
5687 goto bitmap_abort;
5688 }
5689 }
5690 }
5691
5692 if (mddev->queue) {
5693 bool nonrot = true;
5694
5695 rdev_for_each(rdev, mddev) {
5696 if (rdev->raid_disk >= 0 &&
5697 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5698 nonrot = false;
5699 break;
5700 }
5701 }
5702 if (mddev->degraded)
5703 nonrot = false;
5704 if (nonrot)
5705 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5706 else
5707 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5708 mddev->queue->backing_dev_info->congested_data = mddev;
5709 mddev->queue->backing_dev_info->congested_fn = md_congested;
5710 }
5711 if (pers->sync_request) {
5712 if (mddev->kobj.sd &&
5713 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5714 pr_warn("md: cannot register extra attributes for %s\n",
5715 mdname(mddev));
5716 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5717 } else if (mddev->ro == 2)
5718 mddev->ro = 0;
5719
5720 atomic_set(&mddev->max_corr_read_errors,
5721 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5722 mddev->safemode = 0;
5723 if (mddev_is_clustered(mddev))
5724 mddev->safemode_delay = 0;
5725 else
5726 mddev->safemode_delay = (200 * HZ)/1000 +1;
5727 mddev->in_sync = 1;
5728 smp_wmb();
5729 spin_lock(&mddev->lock);
5730 mddev->pers = pers;
5731 spin_unlock(&mddev->lock);
5732 rdev_for_each(rdev, mddev)
5733 if (rdev->raid_disk >= 0)
5734 sysfs_link_rdev(mddev, rdev);
5735
5736 if (mddev->degraded && !mddev->ro)
5737
5738
5739
5740 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5741 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5742
5743 if (mddev->sb_flags)
5744 md_update_sb(mddev, 0);
5745
5746 md_new_event(mddev);
5747 sysfs_notify_dirent_safe(mddev->sysfs_state);
5748 sysfs_notify_dirent_safe(mddev->sysfs_action);
5749 sysfs_notify(&mddev->kobj, NULL, "degraded");
5750 return 0;
5751
5752bitmap_abort:
5753 mddev_detach(mddev);
5754 if (mddev->private)
5755 pers->free(mddev, mddev->private);
5756 mddev->private = NULL;
5757 module_put(pers->owner);
5758 md_bitmap_destroy(mddev);
5759abort:
5760 bioset_exit(&mddev->bio_set);
5761 bioset_exit(&mddev->sync_set);
5762 return err;
5763}
5764EXPORT_SYMBOL_GPL(md_run);
5765
5766static int do_md_run(struct mddev *mddev)
5767{
5768 int err;
5769
5770 err = md_run(mddev);
5771 if (err)
5772 goto out;
5773 err = md_bitmap_load(mddev);
5774 if (err) {
5775 md_bitmap_destroy(mddev);
5776 goto out;
5777 }
5778
5779 if (mddev_is_clustered(mddev))
5780 md_allow_write(mddev);
5781
5782
5783 md_start(mddev);
5784
5785 md_wakeup_thread(mddev->thread);
5786 md_wakeup_thread(mddev->sync_thread);
5787
5788 set_capacity(mddev->gendisk, mddev->array_sectors);
5789 revalidate_disk(mddev->gendisk);
5790 mddev->changed = 1;
5791 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5792out:
5793 return err;
5794}
5795
5796int md_start(struct mddev *mddev)
5797{
5798 int ret = 0;
5799
5800 if (mddev->pers->start) {
5801 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5802 md_wakeup_thread(mddev->thread);
5803 ret = mddev->pers->start(mddev);
5804 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5805 md_wakeup_thread(mddev->sync_thread);
5806 }
5807 return ret;
5808}
5809EXPORT_SYMBOL_GPL(md_start);
5810
5811static int restart_array(struct mddev *mddev)
5812{
5813 struct gendisk *disk = mddev->gendisk;
5814 struct md_rdev *rdev;
5815 bool has_journal = false;
5816 bool has_readonly = false;
5817
5818
5819 if (list_empty(&mddev->disks))
5820 return -ENXIO;
5821 if (!mddev->pers)
5822 return -EINVAL;
5823 if (!mddev->ro)
5824 return -EBUSY;
5825
5826 rcu_read_lock();
5827 rdev_for_each_rcu(rdev, mddev) {
5828 if (test_bit(Journal, &rdev->flags) &&
5829 !test_bit(Faulty, &rdev->flags))
5830 has_journal = true;
5831 if (bdev_read_only(rdev->bdev))
5832 has_readonly = true;
5833 }
5834 rcu_read_unlock();
5835 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
5836
5837 return -EINVAL;
5838 if (has_readonly)
5839 return -EROFS;
5840
5841 mddev->safemode = 0;
5842 mddev->ro = 0;
5843 set_disk_ro(disk, 0);
5844 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5845
5846 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5847 md_wakeup_thread(mddev->thread);
5848 md_wakeup_thread(mddev->sync_thread);
5849 sysfs_notify_dirent_safe(mddev->sysfs_state);
5850 return 0;
5851}
5852
5853static void md_clean(struct mddev *mddev)
5854{
5855 mddev->array_sectors = 0;
5856 mddev->external_size = 0;
5857 mddev->dev_sectors = 0;
5858 mddev->raid_disks = 0;
5859 mddev->recovery_cp = 0;
5860 mddev->resync_min = 0;
5861 mddev->resync_max = MaxSector;
5862 mddev->reshape_position = MaxSector;
5863 mddev->external = 0;
5864 mddev->persistent = 0;
5865 mddev->level = LEVEL_NONE;
5866 mddev->clevel[0] = 0;
5867 mddev->flags = 0;
5868 mddev->sb_flags = 0;
5869 mddev->ro = 0;
5870 mddev->metadata_type[0] = 0;
5871 mddev->chunk_sectors = 0;
5872 mddev->ctime = mddev->utime = 0;
5873 mddev->layout = 0;
5874 mddev->max_disks = 0;
5875 mddev->events = 0;
5876 mddev->can_decrease_events = 0;
5877 mddev->delta_disks = 0;
5878 mddev->reshape_backwards = 0;
5879 mddev->new_level = LEVEL_NONE;
5880 mddev->new_layout = 0;
5881 mddev->new_chunk_sectors = 0;
5882 mddev->curr_resync = 0;
5883 atomic64_set(&mddev->resync_mismatches, 0);
5884 mddev->suspend_lo = mddev->suspend_hi = 0;
5885 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5886 mddev->recovery = 0;
5887 mddev->in_sync = 0;
5888 mddev->changed = 0;
5889 mddev->degraded = 0;
5890 mddev->safemode = 0;
5891 mddev->private = NULL;
5892 mddev->cluster_info = NULL;
5893 mddev->bitmap_info.offset = 0;
5894 mddev->bitmap_info.default_offset = 0;
5895 mddev->bitmap_info.default_space = 0;
5896 mddev->bitmap_info.chunksize = 0;
5897 mddev->bitmap_info.daemon_sleep = 0;
5898 mddev->bitmap_info.max_write_behind = 0;
5899 mddev->bitmap_info.nodes = 0;
5900}
5901
5902static void __md_stop_writes(struct mddev *mddev)
5903{
5904 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5905 flush_workqueue(md_misc_wq);
5906 if (mddev->sync_thread) {
5907 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5908 md_reap_sync_thread(mddev);
5909 }
5910
5911 del_timer_sync(&mddev->safemode_timer);
5912
5913 if (mddev->pers && mddev->pers->quiesce) {
5914 mddev->pers->quiesce(mddev, 1);
5915 mddev->pers->quiesce(mddev, 0);
5916 }
5917 md_bitmap_flush(mddev);
5918
5919 if (mddev->ro == 0 &&
5920 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5921 mddev->sb_flags)) {
5922
5923 if (!mddev_is_clustered(mddev))
5924 mddev->in_sync = 1;
5925 md_update_sb(mddev, 1);
5926 }
5927 mempool_destroy(mddev->wb_info_pool);
5928 mddev->wb_info_pool = NULL;
5929}
5930
5931void md_stop_writes(struct mddev *mddev)
5932{
5933 mddev_lock_nointr(mddev);
5934 __md_stop_writes(mddev);
5935 mddev_unlock(mddev);
5936}
5937EXPORT_SYMBOL_GPL(md_stop_writes);
5938
5939static void mddev_detach(struct mddev *mddev)
5940{
5941 md_bitmap_wait_behind_writes(mddev);
5942 if (mddev->pers && mddev->pers->quiesce) {
5943 mddev->pers->quiesce(mddev, 1);
5944 mddev->pers->quiesce(mddev, 0);
5945 }
5946 md_unregister_thread(&mddev->thread);
5947 if (mddev->queue)
5948 blk_sync_queue(mddev->queue);
5949}
5950
5951static void __md_stop(struct mddev *mddev)
5952{
5953 struct md_personality *pers = mddev->pers;
5954 md_bitmap_destroy(mddev);
5955 mddev_detach(mddev);
5956
5957 flush_workqueue(md_misc_wq);
5958 spin_lock(&mddev->lock);
5959 mddev->pers = NULL;
5960 spin_unlock(&mddev->lock);
5961 pers->free(mddev, mddev->private);
5962 mddev->private = NULL;
5963 if (pers->sync_request && mddev->to_remove == NULL)
5964 mddev->to_remove = &md_redundancy_group;
5965 module_put(pers->owner);
5966 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5967}
5968
5969void md_stop(struct mddev *mddev)
5970{
5971
5972
5973
5974 __md_stop(mddev);
5975 bioset_exit(&mddev->bio_set);
5976 bioset_exit(&mddev->sync_set);
5977}
5978
5979EXPORT_SYMBOL_GPL(md_stop);
5980
5981static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5982{
5983 int err = 0;
5984 int did_freeze = 0;
5985
5986 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5987 did_freeze = 1;
5988 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5989 md_wakeup_thread(mddev->thread);
5990 }
5991 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5992 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5993 if (mddev->sync_thread)
5994
5995
5996 wake_up_process(mddev->sync_thread->tsk);
5997
5998 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
5999 return -EBUSY;
6000 mddev_unlock(mddev);
6001 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6002 &mddev->recovery));
6003 wait_event(mddev->sb_wait,
6004 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6005 mddev_lock_nointr(mddev);
6006
6007 mutex_lock(&mddev->open_mutex);
6008 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6009 mddev->sync_thread ||
6010 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6011 pr_warn("md: %s still in use.\n",mdname(mddev));
6012 if (did_freeze) {
6013 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6014 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6015 md_wakeup_thread(mddev->thread);
6016 }
6017 err = -EBUSY;
6018 goto out;
6019 }
6020 if (mddev->pers) {
6021 __md_stop_writes(mddev);
6022
6023 err = -ENXIO;
6024 if (mddev->ro==1)
6025 goto out;
6026 mddev->ro = 1;
6027 set_disk_ro(mddev->gendisk, 1);
6028 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6029 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6030 md_wakeup_thread(mddev->thread);
6031 sysfs_notify_dirent_safe(mddev->sysfs_state);
6032 err = 0;
6033 }
6034out:
6035 mutex_unlock(&mddev->open_mutex);
6036 return err;
6037}
6038
6039
6040
6041
6042
6043static int do_md_stop(struct mddev *mddev, int mode,
6044 struct block_device *bdev)
6045{
6046 struct gendisk *disk = mddev->gendisk;
6047 struct md_rdev *rdev;
6048 int did_freeze = 0;
6049
6050 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6051 did_freeze = 1;
6052 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6053 md_wakeup_thread(mddev->thread);
6054 }
6055 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6056 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6057 if (mddev->sync_thread)
6058
6059
6060 wake_up_process(mddev->sync_thread->tsk);
6061
6062 mddev_unlock(mddev);
6063 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6064 !test_bit(MD_RECOVERY_RUNNING,
6065 &mddev->recovery)));
6066 mddev_lock_nointr(mddev);
6067
6068 mutex_lock(&mddev->open_mutex);
6069 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6070 mddev->sysfs_active ||
6071 mddev->sync_thread ||
6072 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6073 pr_warn("md: %s still in use.\n",mdname(mddev));
6074 mutex_unlock(&mddev->open_mutex);
6075 if (did_freeze) {
6076 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6077 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6078 md_wakeup_thread(mddev->thread);
6079 }
6080 return -EBUSY;
6081 }
6082 if (mddev->pers) {
6083 if (mddev->ro)
6084 set_disk_ro(disk, 0);
6085
6086 __md_stop_writes(mddev);
6087 __md_stop(mddev);
6088 mddev->queue->backing_dev_info->congested_fn = NULL;
6089
6090
6091 sysfs_notify_dirent_safe(mddev->sysfs_state);
6092
6093 rdev_for_each(rdev, mddev)
6094 if (rdev->raid_disk >= 0)
6095 sysfs_unlink_rdev(mddev, rdev);
6096
6097 set_capacity(disk, 0);
6098 mutex_unlock(&mddev->open_mutex);
6099 mddev->changed = 1;
6100 revalidate_disk(disk);
6101
6102 if (mddev->ro)
6103 mddev->ro = 0;
6104 } else
6105 mutex_unlock(&mddev->open_mutex);
6106
6107
6108
6109 if (mode == 0) {
6110 pr_info("md: %s stopped.\n", mdname(mddev));
6111
6112 if (mddev->bitmap_info.file) {
6113 struct file *f = mddev->bitmap_info.file;
6114 spin_lock(&mddev->lock);
6115 mddev->bitmap_info.file = NULL;
6116 spin_unlock(&mddev->lock);
6117 fput(f);
6118 }
6119 mddev->bitmap_info.offset = 0;
6120
6121 export_array(mddev);
6122
6123 md_clean(mddev);
6124 if (mddev->hold_active == UNTIL_STOP)
6125 mddev->hold_active = 0;
6126 }
6127 md_new_event(mddev);
6128 sysfs_notify_dirent_safe(mddev->sysfs_state);
6129 return 0;
6130}
6131
6132#ifndef MODULE
6133static void autorun_array(struct mddev *mddev)
6134{
6135 struct md_rdev *rdev;
6136 int err;
6137
6138 if (list_empty(&mddev->disks))
6139 return;
6140
6141 pr_info("md: running: ");
6142
6143 rdev_for_each(rdev, mddev) {
6144 char b[BDEVNAME_SIZE];
6145 pr_cont("<%s>", bdevname(rdev->bdev,b));
6146 }
6147 pr_cont("\n");
6148
6149 err = do_md_run(mddev);
6150 if (err) {
6151 pr_warn("md: do_md_run() returned %d\n", err);
6152 do_md_stop(mddev, 0, NULL);
6153 }
6154}
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168static void autorun_devices(int part)
6169{
6170 struct md_rdev *rdev0, *rdev, *tmp;
6171 struct mddev *mddev;
6172 char b[BDEVNAME_SIZE];
6173
6174 pr_info("md: autorun ...\n");
6175 while (!list_empty(&pending_raid_disks)) {
6176 int unit;
6177 dev_t dev;
6178 LIST_HEAD(candidates);
6179 rdev0 = list_entry(pending_raid_disks.next,
6180 struct md_rdev, same_set);
6181
6182 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6183 INIT_LIST_HEAD(&candidates);
6184 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6185 if (super_90_load(rdev, rdev0, 0) >= 0) {
6186 pr_debug("md: adding %s ...\n",
6187 bdevname(rdev->bdev,b));
6188 list_move(&rdev->same_set, &candidates);
6189 }
6190
6191
6192
6193
6194
6195 if (part) {
6196 dev = MKDEV(mdp_major,
6197 rdev0->preferred_minor << MdpMinorShift);
6198 unit = MINOR(dev) >> MdpMinorShift;
6199 } else {
6200 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6201 unit = MINOR(dev);
6202 }
6203 if (rdev0->preferred_minor != unit) {
6204 pr_warn("md: unit number in %s is bad: %d\n",
6205 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6206 break;
6207 }
6208
6209 md_probe(dev, NULL, NULL);
6210 mddev = mddev_find(dev);
6211 if (!mddev || !mddev->gendisk) {
6212 if (mddev)
6213 mddev_put(mddev);
6214 break;
6215 }
6216 if (mddev_lock(mddev))
6217 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6218 else if (mddev->raid_disks || mddev->major_version
6219 || !list_empty(&mddev->disks)) {
6220 pr_warn("md: %s already running, cannot run %s\n",
6221 mdname(mddev), bdevname(rdev0->bdev,b));
6222 mddev_unlock(mddev);
6223 } else {
6224 pr_debug("md: created %s\n", mdname(mddev));
6225 mddev->persistent = 1;
6226 rdev_for_each_list(rdev, tmp, &candidates) {
6227 list_del_init(&rdev->same_set);
6228 if (bind_rdev_to_array(rdev, mddev))
6229 export_rdev(rdev);
6230 }
6231 autorun_array(mddev);
6232 mddev_unlock(mddev);
6233 }
6234
6235
6236
6237 rdev_for_each_list(rdev, tmp, &candidates) {
6238 list_del_init(&rdev->same_set);
6239 export_rdev(rdev);
6240 }
6241 mddev_put(mddev);
6242 }
6243 pr_info("md: ... autorun DONE.\n");
6244}
6245#endif
6246
6247static int get_version(void __user *arg)
6248{
6249 mdu_version_t ver;
6250
6251 ver.major = MD_MAJOR_VERSION;
6252 ver.minor = MD_MINOR_VERSION;
6253 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6254
6255 if (copy_to_user(arg, &ver, sizeof(ver)))
6256 return -EFAULT;
6257
6258 return 0;
6259}
6260
6261static int get_array_info(struct mddev *mddev, void __user *arg)
6262{
6263 mdu_array_info_t info;
6264 int nr,working,insync,failed,spare;
6265 struct md_rdev *rdev;
6266
6267 nr = working = insync = failed = spare = 0;
6268 rcu_read_lock();
6269 rdev_for_each_rcu(rdev, mddev) {
6270 nr++;
6271 if (test_bit(Faulty, &rdev->flags))
6272 failed++;
6273 else {
6274 working++;
6275 if (test_bit(In_sync, &rdev->flags))
6276 insync++;
6277 else if (test_bit(Journal, &rdev->flags))
6278
6279 ;
6280 else
6281 spare++;
6282 }
6283 }
6284 rcu_read_unlock();
6285
6286 info.major_version = mddev->major_version;
6287 info.minor_version = mddev->minor_version;
6288 info.patch_version = MD_PATCHLEVEL_VERSION;
6289 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6290 info.level = mddev->level;
6291 info.size = mddev->dev_sectors / 2;
6292 if (info.size != mddev->dev_sectors / 2)
6293 info.size = -1;
6294 info.nr_disks = nr;
6295 info.raid_disks = mddev->raid_disks;
6296 info.md_minor = mddev->md_minor;
6297 info.not_persistent= !mddev->persistent;
6298
6299 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6300 info.state = 0;
6301 if (mddev->in_sync)
6302 info.state = (1<<MD_SB_CLEAN);
6303 if (mddev->bitmap && mddev->bitmap_info.offset)
6304 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6305 if (mddev_is_clustered(mddev))
6306 info.state |= (1<<MD_SB_CLUSTERED);
6307 info.active_disks = insync;
6308 info.working_disks = working;
6309 info.failed_disks = failed;
6310 info.spare_disks = spare;
6311
6312 info.layout = mddev->layout;
6313 info.chunk_size = mddev->chunk_sectors << 9;
6314
6315 if (copy_to_user(arg, &info, sizeof(info)))
6316 return -EFAULT;
6317
6318 return 0;
6319}
6320
6321static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6322{
6323 mdu_bitmap_file_t *file = NULL;
6324 char *ptr;
6325 int err;
6326
6327 file = kzalloc(sizeof(*file), GFP_NOIO);
6328 if (!file)
6329 return -ENOMEM;
6330
6331 err = 0;
6332 spin_lock(&mddev->lock);
6333
6334 if (mddev->bitmap_info.file) {
6335 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6336 sizeof(file->pathname));
6337 if (IS_ERR(ptr))
6338 err = PTR_ERR(ptr);
6339 else
6340 memmove(file->pathname, ptr,
6341 sizeof(file->pathname)-(ptr-file->pathname));
6342 }
6343 spin_unlock(&mddev->lock);
6344
6345 if (err == 0 &&
6346 copy_to_user(arg, file, sizeof(*file)))
6347 err = -EFAULT;
6348
6349 kfree(file);
6350 return err;
6351}
6352
6353static int get_disk_info(struct mddev *mddev, void __user * arg)
6354{
6355 mdu_disk_info_t info;
6356 struct md_rdev *rdev;
6357
6358 if (copy_from_user(&info, arg, sizeof(info)))
6359 return -EFAULT;
6360
6361 rcu_read_lock();
6362 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6363 if (rdev) {
6364 info.major = MAJOR(rdev->bdev->bd_dev);
6365 info.minor = MINOR(rdev->bdev->bd_dev);
6366 info.raid_disk = rdev->raid_disk;
6367 info.state = 0;
6368 if (test_bit(Faulty, &rdev->flags))
6369 info.state |= (1<<MD_DISK_FAULTY);
6370 else if (test_bit(In_sync, &rdev->flags)) {
6371 info.state |= (1<<MD_DISK_ACTIVE);
6372 info.state |= (1<<MD_DISK_SYNC);
6373 }
6374 if (test_bit(Journal, &rdev->flags))
6375 info.state |= (1<<MD_DISK_JOURNAL);
6376 if (test_bit(WriteMostly, &rdev->flags))
6377 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6378 if (test_bit(FailFast, &rdev->flags))
6379 info.state |= (1<<MD_DISK_FAILFAST);
6380 } else {
6381 info.major = info.minor = 0;
6382 info.raid_disk = -1;
6383 info.state = (1<<MD_DISK_REMOVED);
6384 }
6385 rcu_read_unlock();
6386
6387 if (copy_to_user(arg, &info, sizeof(info)))
6388 return -EFAULT;
6389
6390 return 0;
6391}
6392
6393static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6394{
6395 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6396 struct md_rdev *rdev;
6397 dev_t dev = MKDEV(info->major,info->minor);
6398
6399 if (mddev_is_clustered(mddev) &&
6400 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6401 pr_warn("%s: Cannot add to clustered mddev.\n",
6402 mdname(mddev));
6403 return -EINVAL;
6404 }
6405
6406 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6407 return -EOVERFLOW;
6408
6409 if (!mddev->raid_disks) {
6410 int err;
6411
6412 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6413 if (IS_ERR(rdev)) {
6414 pr_warn("md: md_import_device returned %ld\n",
6415 PTR_ERR(rdev));
6416 return PTR_ERR(rdev);
6417 }
6418 if (!list_empty(&mddev->disks)) {
6419 struct md_rdev *rdev0
6420 = list_entry(mddev->disks.next,
6421 struct md_rdev, same_set);
6422 err = super_types[mddev->major_version]
6423 .load_super(rdev, rdev0, mddev->minor_version);
6424 if (err < 0) {
6425 pr_warn("md: %s has different UUID to %s\n",
6426 bdevname(rdev->bdev,b),
6427 bdevname(rdev0->bdev,b2));
6428 export_rdev(rdev);
6429 return -EINVAL;
6430 }
6431 }
6432 err = bind_rdev_to_array(rdev, mddev);
6433 if (err)
6434 export_rdev(rdev);
6435 return err;
6436 }
6437
6438
6439
6440
6441
6442
6443 if (mddev->pers) {
6444 int err;
6445 if (!mddev->pers->hot_add_disk) {
6446 pr_warn("%s: personality does not support diskops!\n",
6447 mdname(mddev));
6448 return -EINVAL;
6449 }
6450 if (mddev->persistent)
6451 rdev = md_import_device(dev, mddev->major_version,
6452 mddev->minor_version);
6453 else
6454 rdev = md_import_device(dev, -1, -1);
6455 if (IS_ERR(rdev)) {
6456 pr_warn("md: md_import_device returned %ld\n",
6457 PTR_ERR(rdev));
6458 return PTR_ERR(rdev);
6459 }
6460
6461 if (!mddev->persistent) {
6462 if (info->state & (1<<MD_DISK_SYNC) &&
6463 info->raid_disk < mddev->raid_disks) {
6464 rdev->raid_disk = info->raid_disk;
6465 set_bit(In_sync, &rdev->flags);
6466 clear_bit(Bitmap_sync, &rdev->flags);
6467 } else
6468 rdev->raid_disk = -1;
6469 rdev->saved_raid_disk = rdev->raid_disk;
6470 } else
6471 super_types[mddev->major_version].
6472 validate_super(mddev, rdev);
6473 if ((info->state & (1<<MD_DISK_SYNC)) &&
6474 rdev->raid_disk != info->raid_disk) {
6475
6476
6477
6478 export_rdev(rdev);
6479 return -EINVAL;
6480 }
6481
6482 clear_bit(In_sync, &rdev->flags);
6483 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6484 set_bit(WriteMostly, &rdev->flags);
6485 else
6486 clear_bit(WriteMostly, &rdev->flags);
6487 if (info->state & (1<<MD_DISK_FAILFAST))
6488 set_bit(FailFast, &rdev->flags);
6489 else
6490 clear_bit(FailFast, &rdev->flags);
6491
6492 if (info->state & (1<<MD_DISK_JOURNAL)) {
6493 struct md_rdev *rdev2;
6494 bool has_journal = false;
6495
6496
6497 rdev_for_each(rdev2, mddev) {
6498 if (test_bit(Journal, &rdev2->flags)) {
6499 has_journal = true;
6500 break;
6501 }
6502 }
6503 if (has_journal || mddev->bitmap) {
6504 export_rdev(rdev);
6505 return -EBUSY;
6506 }
6507 set_bit(Journal, &rdev->flags);
6508 }
6509
6510
6511
6512 if (mddev_is_clustered(mddev)) {
6513 if (info->state & (1 << MD_DISK_CANDIDATE))
6514 set_bit(Candidate, &rdev->flags);
6515 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6516
6517 err = md_cluster_ops->add_new_disk(mddev, rdev);
6518 if (err) {
6519 export_rdev(rdev);
6520 return err;
6521 }
6522 }
6523 }
6524
6525 rdev->raid_disk = -1;
6526 err = bind_rdev_to_array(rdev, mddev);
6527
6528 if (err)
6529 export_rdev(rdev);
6530
6531 if (mddev_is_clustered(mddev)) {
6532 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6533 if (!err) {
6534 err = md_cluster_ops->new_disk_ack(mddev,
6535 err == 0);
6536 if (err)
6537 md_kick_rdev_from_array(rdev);
6538 }
6539 } else {
6540 if (err)
6541 md_cluster_ops->add_new_disk_cancel(mddev);
6542 else
6543 err = add_bound_rdev(rdev);
6544 }
6545
6546 } else if (!err)
6547 err = add_bound_rdev(rdev);
6548
6549 return err;
6550 }
6551
6552
6553
6554
6555 if (mddev->major_version != 0) {
6556 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6557 return -EINVAL;
6558 }
6559
6560 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6561 int err;
6562 rdev = md_import_device(dev, -1, 0);
6563 if (IS_ERR(rdev)) {
6564 pr_warn("md: error, md_import_device() returned %ld\n",
6565 PTR_ERR(rdev));
6566 return PTR_ERR(rdev);
6567 }
6568 rdev->desc_nr = info->number;
6569 if (info->raid_disk < mddev->raid_disks)
6570 rdev->raid_disk = info->raid_disk;
6571 else
6572 rdev->raid_disk = -1;
6573
6574 if (rdev->raid_disk < mddev->raid_disks)
6575 if (info->state & (1<<MD_DISK_SYNC))
6576 set_bit(In_sync, &rdev->flags);
6577
6578 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6579 set_bit(WriteMostly, &rdev->flags);
6580 if (info->state & (1<<MD_DISK_FAILFAST))
6581 set_bit(FailFast, &rdev->flags);
6582
6583 if (!mddev->persistent) {
6584 pr_debug("md: nonpersistent superblock ...\n");
6585 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6586 } else
6587 rdev->sb_start = calc_dev_sboffset(rdev);
6588 rdev->sectors = rdev->sb_start;
6589
6590 err = bind_rdev_to_array(rdev, mddev);
6591 if (err) {
6592 export_rdev(rdev);
6593 return err;
6594 }
6595 }
6596
6597 return 0;
6598}
6599
6600static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6601{
6602 char b[BDEVNAME_SIZE];
6603 struct md_rdev *rdev;
6604
6605 if (!mddev->pers)
6606 return -ENODEV;
6607
6608 rdev = find_rdev(mddev, dev);
6609 if (!rdev)
6610 return -ENXIO;
6611
6612 if (rdev->raid_disk < 0)
6613 goto kick_rdev;
6614
6615 clear_bit(Blocked, &rdev->flags);
6616 remove_and_add_spares(mddev, rdev);
6617
6618 if (rdev->raid_disk >= 0)
6619 goto busy;
6620
6621kick_rdev:
6622 if (mddev_is_clustered(mddev))
6623 md_cluster_ops->remove_disk(mddev, rdev);
6624
6625 md_kick_rdev_from_array(rdev);
6626 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6627 if (mddev->thread)
6628 md_wakeup_thread(mddev->thread);
6629 else
6630 md_update_sb(mddev, 1);
6631 md_new_event(mddev);
6632
6633 return 0;
6634busy:
6635 pr_debug("md: cannot remove active disk %s from %s ...\n",
6636 bdevname(rdev->bdev,b), mdname(mddev));
6637 return -EBUSY;
6638}
6639
6640static int hot_add_disk(struct mddev *mddev, dev_t dev)
6641{
6642 char b[BDEVNAME_SIZE];
6643 int err;
6644 struct md_rdev *rdev;
6645
6646 if (!mddev->pers)
6647 return -ENODEV;
6648
6649 if (mddev->major_version != 0) {
6650 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6651 mdname(mddev));
6652 return -EINVAL;
6653 }
6654 if (!mddev->pers->hot_add_disk) {
6655 pr_warn("%s: personality does not support diskops!\n",
6656 mdname(mddev));
6657 return -EINVAL;
6658 }
6659
6660 rdev = md_import_device(dev, -1, 0);
6661 if (IS_ERR(rdev)) {
6662 pr_warn("md: error, md_import_device() returned %ld\n",
6663 PTR_ERR(rdev));
6664 return -EINVAL;
6665 }
6666
6667 if (mddev->persistent)
6668 rdev->sb_start = calc_dev_sboffset(rdev);
6669 else
6670 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6671
6672 rdev->sectors = rdev->sb_start;
6673
6674 if (test_bit(Faulty, &rdev->flags)) {
6675 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6676 bdevname(rdev->bdev,b), mdname(mddev));
6677 err = -EINVAL;
6678 goto abort_export;
6679 }
6680
6681 clear_bit(In_sync, &rdev->flags);
6682 rdev->desc_nr = -1;
6683 rdev->saved_raid_disk = -1;
6684 err = bind_rdev_to_array(rdev, mddev);
6685 if (err)
6686 goto abort_export;
6687
6688
6689
6690
6691
6692
6693 rdev->raid_disk = -1;
6694
6695 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6696 if (!mddev->thread)
6697 md_update_sb(mddev, 1);
6698
6699
6700
6701
6702 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6703 md_wakeup_thread(mddev->thread);
6704 md_new_event(mddev);
6705 return 0;
6706
6707abort_export:
6708 export_rdev(rdev);
6709 return err;
6710}
6711
6712static int set_bitmap_file(struct mddev *mddev, int fd)
6713{
6714 int err = 0;
6715
6716 if (mddev->pers) {
6717 if (!mddev->pers->quiesce || !mddev->thread)
6718 return -EBUSY;
6719 if (mddev->recovery || mddev->sync_thread)
6720 return -EBUSY;
6721
6722 }
6723
6724 if (fd >= 0) {
6725 struct inode *inode;
6726 struct file *f;
6727
6728 if (mddev->bitmap || mddev->bitmap_info.file)
6729 return -EEXIST;
6730 f = fget(fd);
6731
6732 if (f == NULL) {
6733 pr_warn("%s: error: failed to get bitmap file\n",
6734 mdname(mddev));
6735 return -EBADF;
6736 }
6737
6738 inode = f->f_mapping->host;
6739 if (!S_ISREG(inode->i_mode)) {
6740 pr_warn("%s: error: bitmap file must be a regular file\n",
6741 mdname(mddev));
6742 err = -EBADF;
6743 } else if (!(f->f_mode & FMODE_WRITE)) {
6744 pr_warn("%s: error: bitmap file must open for write\n",
6745 mdname(mddev));
6746 err = -EBADF;
6747 } else if (atomic_read(&inode->i_writecount) != 1) {
6748 pr_warn("%s: error: bitmap file is already in use\n",
6749 mdname(mddev));
6750 err = -EBUSY;
6751 }
6752 if (err) {
6753 fput(f);
6754 return err;
6755 }
6756 mddev->bitmap_info.file = f;
6757 mddev->bitmap_info.offset = 0;
6758 } else if (mddev->bitmap == NULL)
6759 return -ENOENT;
6760 err = 0;
6761 if (mddev->pers) {
6762 if (fd >= 0) {
6763 struct bitmap *bitmap;
6764
6765 bitmap = md_bitmap_create(mddev, -1);
6766 mddev_suspend(mddev);
6767 if (!IS_ERR(bitmap)) {
6768 mddev->bitmap = bitmap;
6769 err = md_bitmap_load(mddev);
6770 } else
6771 err = PTR_ERR(bitmap);
6772 if (err) {
6773 md_bitmap_destroy(mddev);
6774 fd = -1;
6775 }
6776 mddev_resume(mddev);
6777 } else if (fd < 0) {
6778 mddev_suspend(mddev);
6779 md_bitmap_destroy(mddev);
6780 mddev_resume(mddev);
6781 }
6782 }
6783 if (fd < 0) {
6784 struct file *f = mddev->bitmap_info.file;
6785 if (f) {
6786 spin_lock(&mddev->lock);
6787 mddev->bitmap_info.file = NULL;
6788 spin_unlock(&mddev->lock);
6789 fput(f);
6790 }
6791 }
6792
6793 return err;
6794}
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6810{
6811
6812 if (info->raid_disks == 0) {
6813
6814 if (info->major_version < 0 ||
6815 info->major_version >= ARRAY_SIZE(super_types) ||
6816 super_types[info->major_version].name == NULL) {
6817
6818 pr_warn("md: superblock version %d not known\n",
6819 info->major_version);
6820 return -EINVAL;
6821 }
6822 mddev->major_version = info->major_version;
6823 mddev->minor_version = info->minor_version;
6824 mddev->patch_version = info->patch_version;
6825 mddev->persistent = !info->not_persistent;
6826
6827
6828
6829 mddev->ctime = ktime_get_real_seconds();
6830 return 0;
6831 }
6832 mddev->major_version = MD_MAJOR_VERSION;
6833 mddev->minor_version = MD_MINOR_VERSION;
6834 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6835 mddev->ctime = ktime_get_real_seconds();
6836
6837 mddev->level = info->level;
6838 mddev->clevel[0] = 0;
6839 mddev->dev_sectors = 2 * (sector_t)info->size;
6840 mddev->raid_disks = info->raid_disks;
6841
6842
6843
6844 if (info->state & (1<<MD_SB_CLEAN))
6845 mddev->recovery_cp = MaxSector;
6846 else
6847 mddev->recovery_cp = 0;
6848 mddev->persistent = ! info->not_persistent;
6849 mddev->external = 0;
6850
6851 mddev->layout = info->layout;
6852 mddev->chunk_sectors = info->chunk_size >> 9;
6853
6854 if (mddev->persistent) {
6855 mddev->max_disks = MD_SB_DISKS;
6856 mddev->flags = 0;
6857 mddev->sb_flags = 0;
6858 }
6859 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6860
6861 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6862 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6863 mddev->bitmap_info.offset = 0;
6864
6865 mddev->reshape_position = MaxSector;
6866
6867
6868
6869
6870 get_random_bytes(mddev->uuid, 16);
6871
6872 mddev->new_level = mddev->level;
6873 mddev->new_chunk_sectors = mddev->chunk_sectors;
6874 mddev->new_layout = mddev->layout;
6875 mddev->delta_disks = 0;
6876 mddev->reshape_backwards = 0;
6877
6878 return 0;
6879}
6880
6881void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6882{
6883 lockdep_assert_held(&mddev->reconfig_mutex);
6884
6885 if (mddev->external_size)
6886 return;
6887
6888 mddev->array_sectors = array_sectors;
6889}
6890EXPORT_SYMBOL(md_set_array_sectors);
6891
6892static int update_size(struct mddev *mddev, sector_t num_sectors)
6893{
6894 struct md_rdev *rdev;
6895 int rv;
6896 int fit = (num_sectors == 0);
6897 sector_t old_dev_sectors = mddev->dev_sectors;
6898
6899 if (mddev->pers->resize == NULL)
6900 return -EINVAL;
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6911 mddev->sync_thread)
6912 return -EBUSY;
6913 if (mddev->ro)
6914 return -EROFS;
6915
6916 rdev_for_each(rdev, mddev) {
6917 sector_t avail = rdev->sectors;
6918
6919 if (fit && (num_sectors == 0 || num_sectors > avail))
6920 num_sectors = avail;
6921 if (avail < num_sectors)
6922 return -ENOSPC;
6923 }
6924 rv = mddev->pers->resize(mddev, num_sectors);
6925 if (!rv) {
6926 if (mddev_is_clustered(mddev))
6927 md_cluster_ops->update_size(mddev, old_dev_sectors);
6928 else if (mddev->queue) {
6929 set_capacity(mddev->gendisk, mddev->array_sectors);
6930 revalidate_disk(mddev->gendisk);
6931 }
6932 }
6933 return rv;
6934}
6935
6936static int update_raid_disks(struct mddev *mddev, int raid_disks)
6937{
6938 int rv;
6939 struct md_rdev *rdev;
6940
6941 if (mddev->pers->check_reshape == NULL)
6942 return -EINVAL;
6943 if (mddev->ro)
6944 return -EROFS;
6945 if (raid_disks <= 0 ||
6946 (mddev->max_disks && raid_disks >= mddev->max_disks))
6947 return -EINVAL;
6948 if (mddev->sync_thread ||
6949 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6950 mddev->reshape_position != MaxSector)
6951 return -EBUSY;
6952
6953 rdev_for_each(rdev, mddev) {
6954 if (mddev->raid_disks < raid_disks &&
6955 rdev->data_offset < rdev->new_data_offset)
6956 return -EINVAL;
6957 if (mddev->raid_disks > raid_disks &&
6958 rdev->data_offset > rdev->new_data_offset)
6959 return -EINVAL;
6960 }
6961
6962 mddev->delta_disks = raid_disks - mddev->raid_disks;
6963 if (mddev->delta_disks < 0)
6964 mddev->reshape_backwards = 1;
6965 else if (mddev->delta_disks > 0)
6966 mddev->reshape_backwards = 0;
6967
6968 rv = mddev->pers->check_reshape(mddev);
6969 if (rv < 0) {
6970 mddev->delta_disks = 0;
6971 mddev->reshape_backwards = 0;
6972 }
6973 return rv;
6974}
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6985{
6986 int rv = 0;
6987 int cnt = 0;
6988 int state = 0;
6989
6990
6991 if (mddev->bitmap && mddev->bitmap_info.offset)
6992 state |= (1 << MD_SB_BITMAP_PRESENT);
6993
6994 if (mddev->major_version != info->major_version ||
6995 mddev->minor_version != info->minor_version ||
6996
6997 mddev->ctime != info->ctime ||
6998 mddev->level != info->level ||
6999
7000 mddev->persistent != !info->not_persistent ||
7001 mddev->chunk_sectors != info->chunk_size >> 9 ||
7002
7003 ((state^info->state) & 0xfffffe00)
7004 )
7005 return -EINVAL;
7006
7007 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7008 cnt++;
7009 if (mddev->raid_disks != info->raid_disks)
7010 cnt++;
7011 if (mddev->layout != info->layout)
7012 cnt++;
7013 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7014 cnt++;
7015 if (cnt == 0)
7016 return 0;
7017 if (cnt > 1)
7018 return -EINVAL;
7019
7020 if (mddev->layout != info->layout) {
7021
7022
7023
7024
7025 if (mddev->pers->check_reshape == NULL)
7026 return -EINVAL;
7027 else {
7028 mddev->new_layout = info->layout;
7029 rv = mddev->pers->check_reshape(mddev);
7030 if (rv)
7031 mddev->new_layout = mddev->layout;
7032 return rv;
7033 }
7034 }
7035 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7036 rv = update_size(mddev, (sector_t)info->size * 2);
7037
7038 if (mddev->raid_disks != info->raid_disks)
7039 rv = update_raid_disks(mddev, info->raid_disks);
7040
7041 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7042 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7043 rv = -EINVAL;
7044 goto err;
7045 }
7046 if (mddev->recovery || mddev->sync_thread) {
7047 rv = -EBUSY;
7048 goto err;
7049 }
7050 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7051 struct bitmap *bitmap;
7052
7053 if (mddev->bitmap) {
7054 rv = -EEXIST;
7055 goto err;
7056 }
7057 if (mddev->bitmap_info.default_offset == 0) {
7058 rv = -EINVAL;
7059 goto err;
7060 }
7061 mddev->bitmap_info.offset =
7062 mddev->bitmap_info.default_offset;
7063 mddev->bitmap_info.space =
7064 mddev->bitmap_info.default_space;
7065 bitmap = md_bitmap_create(mddev, -1);
7066 mddev_suspend(mddev);
7067 if (!IS_ERR(bitmap)) {
7068 mddev->bitmap = bitmap;
7069 rv = md_bitmap_load(mddev);
7070 } else
7071 rv = PTR_ERR(bitmap);
7072 if (rv)
7073 md_bitmap_destroy(mddev);
7074 mddev_resume(mddev);
7075 } else {
7076
7077 if (!mddev->bitmap) {
7078 rv = -ENOENT;
7079 goto err;
7080 }
7081 if (mddev->bitmap->storage.file) {
7082 rv = -EINVAL;
7083 goto err;
7084 }
7085 if (mddev->bitmap_info.nodes) {
7086
7087 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7088 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7089 rv = -EPERM;
7090 md_cluster_ops->unlock_all_bitmaps(mddev);
7091 goto err;
7092 }
7093
7094 mddev->bitmap_info.nodes = 0;
7095 md_cluster_ops->leave(mddev);
7096 }
7097 mddev_suspend(mddev);
7098 md_bitmap_destroy(mddev);
7099 mddev_resume(mddev);
7100 mddev->bitmap_info.offset = 0;
7101 }
7102 }
7103 md_update_sb(mddev, 1);
7104 return rv;
7105err:
7106 return rv;
7107}
7108
7109static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7110{
7111 struct md_rdev *rdev;
7112 int err = 0;
7113
7114 if (mddev->pers == NULL)
7115 return -ENODEV;
7116
7117 rcu_read_lock();
7118 rdev = md_find_rdev_rcu(mddev, dev);
7119 if (!rdev)
7120 err = -ENODEV;
7121 else {
7122 md_error(mddev, rdev);
7123 if (!test_bit(Faulty, &rdev->flags))
7124 err = -EBUSY;
7125 }
7126 rcu_read_unlock();
7127 return err;
7128}
7129
7130
7131
7132
7133
7134
7135
7136static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7137{
7138 struct mddev *mddev = bdev->bd_disk->private_data;
7139
7140 geo->heads = 2;
7141 geo->sectors = 4;
7142 geo->cylinders = mddev->array_sectors / 8;
7143 return 0;
7144}
7145
7146static inline bool md_ioctl_valid(unsigned int cmd)
7147{
7148 switch (cmd) {
7149 case ADD_NEW_DISK:
7150 case BLKROSET:
7151 case GET_ARRAY_INFO:
7152 case GET_BITMAP_FILE:
7153 case GET_DISK_INFO:
7154 case HOT_ADD_DISK:
7155 case HOT_REMOVE_DISK:
7156 case RAID_AUTORUN:
7157 case RAID_VERSION:
7158 case RESTART_ARRAY_RW:
7159 case RUN_ARRAY:
7160 case SET_ARRAY_INFO:
7161 case SET_BITMAP_FILE:
7162 case SET_DISK_FAULTY:
7163 case STOP_ARRAY:
7164 case STOP_ARRAY_RO:
7165 case CLUSTERED_DISK_NACK:
7166 return true;
7167 default:
7168 return false;
7169 }
7170}
7171
7172static int md_ioctl(struct block_device *bdev, fmode_t mode,
7173 unsigned int cmd, unsigned long arg)
7174{
7175 int err = 0;
7176 void __user *argp = (void __user *)arg;
7177 struct mddev *mddev = NULL;
7178 int ro;
7179 bool did_set_md_closing = false;
7180
7181 if (!md_ioctl_valid(cmd))
7182 return -ENOTTY;
7183
7184 switch (cmd) {
7185 case RAID_VERSION:
7186 case GET_ARRAY_INFO:
7187 case GET_DISK_INFO:
7188 break;
7189 default:
7190 if (!capable(CAP_SYS_ADMIN))
7191 return -EACCES;
7192 }
7193
7194
7195
7196
7197
7198 switch (cmd) {
7199 case RAID_VERSION:
7200 err = get_version(argp);
7201 goto out;
7202
7203#ifndef MODULE
7204 case RAID_AUTORUN:
7205 err = 0;
7206 autostart_arrays(arg);
7207 goto out;
7208#endif
7209 default:;
7210 }
7211
7212
7213
7214
7215
7216 mddev = bdev->bd_disk->private_data;
7217
7218 if (!mddev) {
7219 BUG();
7220 goto out;
7221 }
7222
7223
7224 switch (cmd) {
7225 case GET_ARRAY_INFO:
7226 if (!mddev->raid_disks && !mddev->external)
7227 err = -ENODEV;
7228 else
7229 err = get_array_info(mddev, argp);
7230 goto out;
7231
7232 case GET_DISK_INFO:
7233 if (!mddev->raid_disks && !mddev->external)
7234 err = -ENODEV;
7235 else
7236 err = get_disk_info(mddev, argp);
7237 goto out;
7238
7239 case SET_DISK_FAULTY:
7240 err = set_disk_faulty(mddev, new_decode_dev(arg));
7241 goto out;
7242
7243 case GET_BITMAP_FILE:
7244 err = get_bitmap_file(mddev, argp);
7245 goto out;
7246
7247 }
7248
7249 if (cmd == ADD_NEW_DISK)
7250
7251 flush_workqueue(md_misc_wq);
7252
7253 if (cmd == HOT_REMOVE_DISK)
7254
7255 wait_event_interruptible_timeout(mddev->sb_wait,
7256 !test_bit(MD_RECOVERY_NEEDED,
7257 &mddev->recovery),
7258 msecs_to_jiffies(5000));
7259 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7260
7261
7262
7263 mutex_lock(&mddev->open_mutex);
7264 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7265 mutex_unlock(&mddev->open_mutex);
7266 err = -EBUSY;
7267 goto out;
7268 }
7269 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7270 set_bit(MD_CLOSING, &mddev->flags);
7271 did_set_md_closing = true;
7272 mutex_unlock(&mddev->open_mutex);
7273 sync_blockdev(bdev);
7274 }
7275 err = mddev_lock(mddev);
7276 if (err) {
7277 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7278 err, cmd);
7279 goto out;
7280 }
7281
7282 if (cmd == SET_ARRAY_INFO) {
7283 mdu_array_info_t info;
7284 if (!arg)
7285 memset(&info, 0, sizeof(info));
7286 else if (copy_from_user(&info, argp, sizeof(info))) {
7287 err = -EFAULT;
7288 goto unlock;
7289 }
7290 if (mddev->pers) {
7291 err = update_array_info(mddev, &info);
7292 if (err) {
7293 pr_warn("md: couldn't update array info. %d\n", err);
7294 goto unlock;
7295 }
7296 goto unlock;
7297 }
7298 if (!list_empty(&mddev->disks)) {
7299 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7300 err = -EBUSY;
7301 goto unlock;
7302 }
7303 if (mddev->raid_disks) {
7304 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7305 err = -EBUSY;
7306 goto unlock;
7307 }
7308 err = set_array_info(mddev, &info);
7309 if (err) {
7310 pr_warn("md: couldn't set array info. %d\n", err);
7311 goto unlock;
7312 }
7313 goto unlock;
7314 }
7315
7316
7317
7318
7319
7320
7321 if ((!mddev->raid_disks && !mddev->external)
7322 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7323 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7324 && cmd != GET_BITMAP_FILE) {
7325 err = -ENODEV;
7326 goto unlock;
7327 }
7328
7329
7330
7331
7332 switch (cmd) {
7333 case RESTART_ARRAY_RW:
7334 err = restart_array(mddev);
7335 goto unlock;
7336
7337 case STOP_ARRAY:
7338 err = do_md_stop(mddev, 0, bdev);
7339 goto unlock;
7340
7341 case STOP_ARRAY_RO:
7342 err = md_set_readonly(mddev, bdev);
7343 goto unlock;
7344
7345 case HOT_REMOVE_DISK:
7346 err = hot_remove_disk(mddev, new_decode_dev(arg));
7347 goto unlock;
7348
7349 case ADD_NEW_DISK:
7350
7351
7352
7353
7354 if (mddev->pers) {
7355 mdu_disk_info_t info;
7356 if (copy_from_user(&info, argp, sizeof(info)))
7357 err = -EFAULT;
7358 else if (!(info.state & (1<<MD_DISK_SYNC)))
7359
7360 break;
7361 else
7362 err = add_new_disk(mddev, &info);
7363 goto unlock;
7364 }
7365 break;
7366
7367 case BLKROSET:
7368 if (get_user(ro, (int __user *)(arg))) {
7369 err = -EFAULT;
7370 goto unlock;
7371 }
7372 err = -EINVAL;
7373
7374
7375
7376
7377 if (ro)
7378 goto unlock;
7379
7380
7381 if (mddev->ro != 1)
7382 goto unlock;
7383
7384
7385
7386
7387 if (mddev->pers) {
7388 err = restart_array(mddev);
7389 if (err == 0) {
7390 mddev->ro = 2;
7391 set_disk_ro(mddev->gendisk, 0);
7392 }
7393 }
7394 goto unlock;
7395 }
7396
7397
7398
7399
7400
7401 if (mddev->ro && mddev->pers) {
7402 if (mddev->ro == 2) {
7403 mddev->ro = 0;
7404 sysfs_notify_dirent_safe(mddev->sysfs_state);
7405 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7406
7407
7408
7409
7410 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7411 mddev_unlock(mddev);
7412 wait_event(mddev->sb_wait,
7413 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7414 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7415 mddev_lock_nointr(mddev);
7416 }
7417 } else {
7418 err = -EROFS;
7419 goto unlock;
7420 }
7421 }
7422
7423 switch (cmd) {
7424 case ADD_NEW_DISK:
7425 {
7426 mdu_disk_info_t info;
7427 if (copy_from_user(&info, argp, sizeof(info)))
7428 err = -EFAULT;
7429 else
7430 err = add_new_disk(mddev, &info);
7431 goto unlock;
7432 }
7433
7434 case CLUSTERED_DISK_NACK:
7435 if (mddev_is_clustered(mddev))
7436 md_cluster_ops->new_disk_ack(mddev, false);
7437 else
7438 err = -EINVAL;
7439 goto unlock;
7440
7441 case HOT_ADD_DISK:
7442 err = hot_add_disk(mddev, new_decode_dev(arg));
7443 goto unlock;
7444
7445 case RUN_ARRAY:
7446 err = do_md_run(mddev);
7447 goto unlock;
7448
7449 case SET_BITMAP_FILE:
7450 err = set_bitmap_file(mddev, (int)arg);
7451 goto unlock;
7452
7453 default:
7454 err = -EINVAL;
7455 goto unlock;
7456 }
7457
7458unlock:
7459 if (mddev->hold_active == UNTIL_IOCTL &&
7460 err != -EINVAL)
7461 mddev->hold_active = 0;
7462 mddev_unlock(mddev);
7463out:
7464 if(did_set_md_closing)
7465 clear_bit(MD_CLOSING, &mddev->flags);
7466 return err;
7467}
7468#ifdef CONFIG_COMPAT
7469static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7470 unsigned int cmd, unsigned long arg)
7471{
7472 switch (cmd) {
7473 case HOT_REMOVE_DISK:
7474 case HOT_ADD_DISK:
7475 case SET_DISK_FAULTY:
7476 case SET_BITMAP_FILE:
7477
7478 break;
7479 default:
7480 arg = (unsigned long)compat_ptr(arg);
7481 break;
7482 }
7483
7484 return md_ioctl(bdev, mode, cmd, arg);
7485}
7486#endif
7487
7488static int md_open(struct block_device *bdev, fmode_t mode)
7489{
7490
7491
7492
7493
7494 struct mddev *mddev = mddev_find(bdev->bd_dev);
7495 int err;
7496
7497 if (!mddev)
7498 return -ENODEV;
7499
7500 if (mddev->gendisk != bdev->bd_disk) {
7501
7502
7503
7504 mddev_put(mddev);
7505
7506 flush_workqueue(md_misc_wq);
7507
7508 return -ERESTARTSYS;
7509 }
7510 BUG_ON(mddev != bdev->bd_disk->private_data);
7511
7512 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7513 goto out;
7514
7515 if (test_bit(MD_CLOSING, &mddev->flags)) {
7516 mutex_unlock(&mddev->open_mutex);
7517 err = -ENODEV;
7518 goto out;
7519 }
7520
7521 err = 0;
7522 atomic_inc(&mddev->openers);
7523 mutex_unlock(&mddev->open_mutex);
7524
7525 check_disk_change(bdev);
7526 out:
7527 if (err)
7528 mddev_put(mddev);
7529 return err;
7530}
7531
7532static void md_release(struct gendisk *disk, fmode_t mode)
7533{
7534 struct mddev *mddev = disk->private_data;
7535
7536 BUG_ON(!mddev);
7537 atomic_dec(&mddev->openers);
7538 mddev_put(mddev);
7539}
7540
7541static int md_media_changed(struct gendisk *disk)
7542{
7543 struct mddev *mddev = disk->private_data;
7544
7545 return mddev->changed;
7546}
7547
7548static int md_revalidate(struct gendisk *disk)
7549{
7550 struct mddev *mddev = disk->private_data;
7551
7552 mddev->changed = 0;
7553 return 0;
7554}
7555static const struct block_device_operations md_fops =
7556{
7557 .owner = THIS_MODULE,
7558 .open = md_open,
7559 .release = md_release,
7560 .ioctl = md_ioctl,
7561#ifdef CONFIG_COMPAT
7562 .compat_ioctl = md_compat_ioctl,
7563#endif
7564 .getgeo = md_getgeo,
7565 .media_changed = md_media_changed,
7566 .revalidate_disk= md_revalidate,
7567};
7568
7569static int md_thread(void *arg)
7570{
7571 struct md_thread *thread = arg;
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585 allow_signal(SIGKILL);
7586 while (!kthread_should_stop()) {
7587
7588
7589
7590
7591
7592
7593 if (signal_pending(current))
7594 flush_signals(current);
7595
7596 wait_event_interruptible_timeout
7597 (thread->wqueue,
7598 test_bit(THREAD_WAKEUP, &thread->flags)
7599 || kthread_should_stop() || kthread_should_park(),
7600 thread->timeout);
7601
7602 clear_bit(THREAD_WAKEUP, &thread->flags);
7603 if (kthread_should_park())
7604 kthread_parkme();
7605 if (!kthread_should_stop())
7606 thread->run(thread);
7607 }
7608
7609 return 0;
7610}
7611
7612void md_wakeup_thread(struct md_thread *thread)
7613{
7614 if (thread) {
7615 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7616 set_bit(THREAD_WAKEUP, &thread->flags);
7617 wake_up(&thread->wqueue);
7618 }
7619}
7620EXPORT_SYMBOL(md_wakeup_thread);
7621
7622struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7623 struct mddev *mddev, const char *name)
7624{
7625 struct md_thread *thread;
7626
7627 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7628 if (!thread)
7629 return NULL;
7630
7631 init_waitqueue_head(&thread->wqueue);
7632
7633 thread->run = run;
7634 thread->mddev = mddev;
7635 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7636 thread->tsk = kthread_run(md_thread, thread,
7637 "%s_%s",
7638 mdname(thread->mddev),
7639 name);
7640 if (IS_ERR(thread->tsk)) {
7641 kfree(thread);
7642 return NULL;
7643 }
7644 return thread;
7645}
7646EXPORT_SYMBOL(md_register_thread);
7647
7648void md_unregister_thread(struct md_thread **threadp)
7649{
7650 struct md_thread *thread = *threadp;
7651 if (!thread)
7652 return;
7653 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7654
7655
7656
7657 spin_lock(&pers_lock);
7658 *threadp = NULL;
7659 spin_unlock(&pers_lock);
7660
7661 kthread_stop(thread->tsk);
7662 kfree(thread);
7663}
7664EXPORT_SYMBOL(md_unregister_thread);
7665
7666void md_error(struct mddev *mddev, struct md_rdev *rdev)
7667{
7668 if (!rdev || test_bit(Faulty, &rdev->flags))
7669 return;
7670
7671 if (!mddev->pers || !mddev->pers->error_handler)
7672 return;
7673 mddev->pers->error_handler(mddev,rdev);
7674 if (mddev->degraded)
7675 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7676 sysfs_notify_dirent_safe(rdev->sysfs_state);
7677 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7678 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7679 md_wakeup_thread(mddev->thread);
7680 if (mddev->event_work.func)
7681 queue_work(md_misc_wq, &mddev->event_work);
7682 md_new_event(mddev);
7683}
7684EXPORT_SYMBOL(md_error);
7685
7686
7687
7688static void status_unused(struct seq_file *seq)
7689{
7690 int i = 0;
7691 struct md_rdev *rdev;
7692
7693 seq_printf(seq, "unused devices: ");
7694
7695 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7696 char b[BDEVNAME_SIZE];
7697 i++;
7698 seq_printf(seq, "%s ",
7699 bdevname(rdev->bdev,b));
7700 }
7701 if (!i)
7702 seq_printf(seq, "<none>");
7703
7704 seq_printf(seq, "\n");
7705}
7706
7707static int status_resync(struct seq_file *seq, struct mddev *mddev)
7708{
7709 sector_t max_sectors, resync, res;
7710 unsigned long dt, db = 0;
7711 sector_t rt, curr_mark_cnt, resync_mark_cnt;
7712 int scale, recovery_active;
7713 unsigned int per_milli;
7714
7715 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7716 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7717 max_sectors = mddev->resync_max_sectors;
7718 else
7719 max_sectors = mddev->dev_sectors;
7720
7721 resync = mddev->curr_resync;
7722 if (resync <= 3) {
7723 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7724
7725 resync = max_sectors;
7726 } else if (resync > max_sectors)
7727 resync = max_sectors;
7728 else
7729 resync -= atomic_read(&mddev->recovery_active);
7730
7731 if (resync == 0) {
7732 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
7733 struct md_rdev *rdev;
7734
7735 rdev_for_each(rdev, mddev)
7736 if (rdev->raid_disk >= 0 &&
7737 !test_bit(Faulty, &rdev->flags) &&
7738 rdev->recovery_offset != MaxSector &&
7739 rdev->recovery_offset) {
7740 seq_printf(seq, "\trecover=REMOTE");
7741 return 1;
7742 }
7743 if (mddev->reshape_position != MaxSector)
7744 seq_printf(seq, "\treshape=REMOTE");
7745 else
7746 seq_printf(seq, "\tresync=REMOTE");
7747 return 1;
7748 }
7749 if (mddev->recovery_cp < MaxSector) {
7750 seq_printf(seq, "\tresync=PENDING");
7751 return 1;
7752 }
7753 return 0;
7754 }
7755 if (resync < 3) {
7756 seq_printf(seq, "\tresync=DELAYED");
7757 return 1;
7758 }
7759
7760 WARN_ON(max_sectors == 0);
7761
7762
7763
7764
7765
7766 scale = 10;
7767 if (sizeof(sector_t) > sizeof(unsigned long)) {
7768 while ( max_sectors/2 > (1ULL<<(scale+32)))
7769 scale++;
7770 }
7771 res = (resync>>scale)*1000;
7772 sector_div(res, (u32)((max_sectors>>scale)+1));
7773
7774 per_milli = res;
7775 {
7776 int i, x = per_milli/50, y = 20-x;
7777 seq_printf(seq, "[");
7778 for (i = 0; i < x; i++)
7779 seq_printf(seq, "=");
7780 seq_printf(seq, ">");
7781 for (i = 0; i < y; i++)
7782 seq_printf(seq, ".");
7783 seq_printf(seq, "] ");
7784 }
7785 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7786 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7787 "reshape" :
7788 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7789 "check" :
7790 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7791 "resync" : "recovery"))),
7792 per_milli/10, per_milli % 10,
7793 (unsigned long long) resync/2,
7794 (unsigned long long) max_sectors/2);
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813 dt = ((jiffies - mddev->resync_mark) / HZ);
7814 if (!dt) dt++;
7815
7816 curr_mark_cnt = mddev->curr_mark_cnt;
7817 recovery_active = atomic_read(&mddev->recovery_active);
7818 resync_mark_cnt = mddev->resync_mark_cnt;
7819
7820 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
7821 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
7822
7823 rt = max_sectors - resync;
7824 rt = div64_u64(rt, db/32+1);
7825 rt *= dt;
7826 rt >>= 5;
7827
7828 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7829 ((unsigned long)rt % 60)/6);
7830
7831 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7832 return 1;
7833}
7834
7835static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7836{
7837 struct list_head *tmp;
7838 loff_t l = *pos;
7839 struct mddev *mddev;
7840
7841 if (l >= 0x10000)
7842 return NULL;
7843 if (!l--)
7844
7845 return (void*)1;
7846
7847 spin_lock(&all_mddevs_lock);
7848 list_for_each(tmp,&all_mddevs)
7849 if (!l--) {
7850 mddev = list_entry(tmp, struct mddev, all_mddevs);
7851 mddev_get(mddev);
7852 spin_unlock(&all_mddevs_lock);
7853 return mddev;
7854 }
7855 spin_unlock(&all_mddevs_lock);
7856 if (!l--)
7857 return (void*)2;
7858 return NULL;
7859}
7860
7861static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7862{
7863 struct list_head *tmp;
7864 struct mddev *next_mddev, *mddev = v;
7865
7866 ++*pos;
7867 if (v == (void*)2)
7868 return NULL;
7869
7870 spin_lock(&all_mddevs_lock);
7871 if (v == (void*)1)
7872 tmp = all_mddevs.next;
7873 else
7874 tmp = mddev->all_mddevs.next;
7875 if (tmp != &all_mddevs)
7876 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7877 else {
7878 next_mddev = (void*)2;
7879 *pos = 0x10000;
7880 }
7881 spin_unlock(&all_mddevs_lock);
7882
7883 if (v != (void*)1)
7884 mddev_put(mddev);
7885 return next_mddev;
7886
7887}
7888
7889static void md_seq_stop(struct seq_file *seq, void *v)
7890{
7891 struct mddev *mddev = v;
7892
7893 if (mddev && v != (void*)1 && v != (void*)2)
7894 mddev_put(mddev);
7895}
7896
7897static int md_seq_show(struct seq_file *seq, void *v)
7898{
7899 struct mddev *mddev = v;
7900 sector_t sectors;
7901 struct md_rdev *rdev;
7902
7903 if (v == (void*)1) {
7904 struct md_personality *pers;
7905 seq_printf(seq, "Personalities : ");
7906 spin_lock(&pers_lock);
7907 list_for_each_entry(pers, &pers_list, list)
7908 seq_printf(seq, "[%s] ", pers->name);
7909
7910 spin_unlock(&pers_lock);
7911 seq_printf(seq, "\n");
7912 seq->poll_event = atomic_read(&md_event_count);
7913 return 0;
7914 }
7915 if (v == (void*)2) {
7916 status_unused(seq);
7917 return 0;
7918 }
7919
7920 spin_lock(&mddev->lock);
7921 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7922 seq_printf(seq, "%s : %sactive", mdname(mddev),
7923 mddev->pers ? "" : "in");
7924 if (mddev->pers) {
7925 if (mddev->ro==1)
7926 seq_printf(seq, " (read-only)");
7927 if (mddev->ro==2)
7928 seq_printf(seq, " (auto-read-only)");
7929 seq_printf(seq, " %s", mddev->pers->name);
7930 }
7931
7932 sectors = 0;
7933 rcu_read_lock();
7934 rdev_for_each_rcu(rdev, mddev) {
7935 char b[BDEVNAME_SIZE];
7936 seq_printf(seq, " %s[%d]",
7937 bdevname(rdev->bdev,b), rdev->desc_nr);
7938 if (test_bit(WriteMostly, &rdev->flags))
7939 seq_printf(seq, "(W)");
7940 if (test_bit(Journal, &rdev->flags))
7941 seq_printf(seq, "(J)");
7942 if (test_bit(Faulty, &rdev->flags)) {
7943 seq_printf(seq, "(F)");
7944 continue;
7945 }
7946 if (rdev->raid_disk < 0)
7947 seq_printf(seq, "(S)");
7948 if (test_bit(Replacement, &rdev->flags))
7949 seq_printf(seq, "(R)");
7950 sectors += rdev->sectors;
7951 }
7952 rcu_read_unlock();
7953
7954 if (!list_empty(&mddev->disks)) {
7955 if (mddev->pers)
7956 seq_printf(seq, "\n %llu blocks",
7957 (unsigned long long)
7958 mddev->array_sectors / 2);
7959 else
7960 seq_printf(seq, "\n %llu blocks",
7961 (unsigned long long)sectors / 2);
7962 }
7963 if (mddev->persistent) {
7964 if (mddev->major_version != 0 ||
7965 mddev->minor_version != 90) {
7966 seq_printf(seq," super %d.%d",
7967 mddev->major_version,
7968 mddev->minor_version);
7969 }
7970 } else if (mddev->external)
7971 seq_printf(seq, " super external:%s",
7972 mddev->metadata_type);
7973 else
7974 seq_printf(seq, " super non-persistent");
7975
7976 if (mddev->pers) {
7977 mddev->pers->status(seq, mddev);
7978 seq_printf(seq, "\n ");
7979 if (mddev->pers->sync_request) {
7980 if (status_resync(seq, mddev))
7981 seq_printf(seq, "\n ");
7982 }
7983 } else
7984 seq_printf(seq, "\n ");
7985
7986 md_bitmap_status(seq, mddev->bitmap);
7987
7988 seq_printf(seq, "\n");
7989 }
7990 spin_unlock(&mddev->lock);
7991
7992 return 0;
7993}
7994
7995static const struct seq_operations md_seq_ops = {
7996 .start = md_seq_start,
7997 .next = md_seq_next,
7998 .stop = md_seq_stop,
7999 .show = md_seq_show,
8000};
8001
8002static int md_seq_open(struct inode *inode, struct file *file)
8003{
8004 struct seq_file *seq;
8005 int error;
8006
8007 error = seq_open(file, &md_seq_ops);
8008 if (error)
8009 return error;
8010
8011 seq = file->private_data;
8012 seq->poll_event = atomic_read(&md_event_count);
8013 return error;
8014}
8015
8016static int md_unloading;
8017static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8018{
8019 struct seq_file *seq = filp->private_data;
8020 __poll_t mask;
8021
8022 if (md_unloading)
8023 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8024 poll_wait(filp, &md_event_waiters, wait);
8025
8026
8027 mask = EPOLLIN | EPOLLRDNORM;
8028
8029 if (seq->poll_event != atomic_read(&md_event_count))
8030 mask |= EPOLLERR | EPOLLPRI;
8031 return mask;
8032}
8033
8034static const struct file_operations md_seq_fops = {
8035 .owner = THIS_MODULE,
8036 .open = md_seq_open,
8037 .read = seq_read,
8038 .llseek = seq_lseek,
8039 .release = seq_release,
8040 .poll = mdstat_poll,
8041};
8042
8043int register_md_personality(struct md_personality *p)
8044{
8045 pr_debug("md: %s personality registered for level %d\n",
8046 p->name, p->level);
8047 spin_lock(&pers_lock);
8048 list_add_tail(&p->list, &pers_list);
8049 spin_unlock(&pers_lock);
8050 return 0;
8051}
8052EXPORT_SYMBOL(register_md_personality);
8053
8054int unregister_md_personality(struct md_personality *p)
8055{
8056 pr_debug("md: %s personality unregistered\n", p->name);
8057 spin_lock(&pers_lock);
8058 list_del_init(&p->list);
8059 spin_unlock(&pers_lock);
8060 return 0;
8061}
8062EXPORT_SYMBOL(unregister_md_personality);
8063
8064int register_md_cluster_operations(struct md_cluster_operations *ops,
8065 struct module *module)
8066{
8067 int ret = 0;
8068 spin_lock(&pers_lock);
8069 if (md_cluster_ops != NULL)
8070 ret = -EALREADY;
8071 else {
8072 md_cluster_ops = ops;
8073 md_cluster_mod = module;
8074 }
8075 spin_unlock(&pers_lock);
8076 return ret;
8077}
8078EXPORT_SYMBOL(register_md_cluster_operations);
8079
8080int unregister_md_cluster_operations(void)
8081{
8082 spin_lock(&pers_lock);
8083 md_cluster_ops = NULL;
8084 spin_unlock(&pers_lock);
8085 return 0;
8086}
8087EXPORT_SYMBOL(unregister_md_cluster_operations);
8088
8089int md_setup_cluster(struct mddev *mddev, int nodes)
8090{
8091 if (!md_cluster_ops)
8092 request_module("md-cluster");
8093 spin_lock(&pers_lock);
8094
8095 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8096 pr_warn("can't find md-cluster module or get it's reference.\n");
8097 spin_unlock(&pers_lock);
8098 return -ENOENT;
8099 }
8100 spin_unlock(&pers_lock);
8101
8102 return md_cluster_ops->join(mddev, nodes);
8103}
8104
8105void md_cluster_stop(struct mddev *mddev)
8106{
8107 if (!md_cluster_ops)
8108 return;
8109 md_cluster_ops->leave(mddev);
8110 module_put(md_cluster_mod);
8111}
8112
8113static int is_mddev_idle(struct mddev *mddev, int init)
8114{
8115 struct md_rdev *rdev;
8116 int idle;
8117 int curr_events;
8118
8119 idle = 1;
8120 rcu_read_lock();
8121 rdev_for_each_rcu(rdev, mddev) {
8122 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8123 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8124 atomic_read(&disk->sync_io);
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147 if (init || curr_events - rdev->last_events > 64) {
8148 rdev->last_events = curr_events;
8149 idle = 0;
8150 }
8151 }
8152 rcu_read_unlock();
8153 return idle;
8154}
8155
8156void md_done_sync(struct mddev *mddev, int blocks, int ok)
8157{
8158
8159 atomic_sub(blocks, &mddev->recovery_active);
8160 wake_up(&mddev->recovery_wait);
8161 if (!ok) {
8162 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8163 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8164 md_wakeup_thread(mddev->thread);
8165
8166 }
8167}
8168EXPORT_SYMBOL(md_done_sync);
8169
8170
8171
8172
8173
8174
8175
8176
8177bool md_write_start(struct mddev *mddev, struct bio *bi)
8178{
8179 int did_change = 0;
8180
8181 if (bio_data_dir(bi) != WRITE)
8182 return true;
8183
8184 BUG_ON(mddev->ro == 1);
8185 if (mddev->ro == 2) {
8186
8187 mddev->ro = 0;
8188 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8189 md_wakeup_thread(mddev->thread);
8190 md_wakeup_thread(mddev->sync_thread);
8191 did_change = 1;
8192 }
8193 rcu_read_lock();
8194 percpu_ref_get(&mddev->writes_pending);
8195 smp_mb();
8196 if (mddev->safemode == 1)
8197 mddev->safemode = 0;
8198
8199 if (mddev->in_sync || mddev->sync_checkers) {
8200 spin_lock(&mddev->lock);
8201 if (mddev->in_sync) {
8202 mddev->in_sync = 0;
8203 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8204 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8205 md_wakeup_thread(mddev->thread);
8206 did_change = 1;
8207 }
8208 spin_unlock(&mddev->lock);
8209 }
8210 rcu_read_unlock();
8211 if (did_change)
8212 sysfs_notify_dirent_safe(mddev->sysfs_state);
8213 if (!mddev->has_superblocks)
8214 return true;
8215 wait_event(mddev->sb_wait,
8216 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8217 mddev->suspended);
8218 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8219 percpu_ref_put(&mddev->writes_pending);
8220 return false;
8221 }
8222 return true;
8223}
8224EXPORT_SYMBOL(md_write_start);
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234void md_write_inc(struct mddev *mddev, struct bio *bi)
8235{
8236 if (bio_data_dir(bi) != WRITE)
8237 return;
8238 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8239 percpu_ref_get(&mddev->writes_pending);
8240}
8241EXPORT_SYMBOL(md_write_inc);
8242
8243void md_write_end(struct mddev *mddev)
8244{
8245 percpu_ref_put(&mddev->writes_pending);
8246
8247 if (mddev->safemode == 2)
8248 md_wakeup_thread(mddev->thread);
8249 else if (mddev->safemode_delay)
8250
8251
8252
8253 mod_timer(&mddev->safemode_timer,
8254 roundup(jiffies, mddev->safemode_delay) +
8255 mddev->safemode_delay);
8256}
8257
8258EXPORT_SYMBOL(md_write_end);
8259
8260
8261
8262
8263
8264
8265
8266void md_allow_write(struct mddev *mddev)
8267{
8268 if (!mddev->pers)
8269 return;
8270 if (mddev->ro)
8271 return;
8272 if (!mddev->pers->sync_request)
8273 return;
8274
8275 spin_lock(&mddev->lock);
8276 if (mddev->in_sync) {
8277 mddev->in_sync = 0;
8278 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8279 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8280 if (mddev->safemode_delay &&
8281 mddev->safemode == 0)
8282 mddev->safemode = 1;
8283 spin_unlock(&mddev->lock);
8284 md_update_sb(mddev, 0);
8285 sysfs_notify_dirent_safe(mddev->sysfs_state);
8286
8287 wait_event(mddev->sb_wait,
8288 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8289 } else
8290 spin_unlock(&mddev->lock);
8291}
8292EXPORT_SYMBOL_GPL(md_allow_write);
8293
8294#define SYNC_MARKS 10
8295#define SYNC_MARK_STEP (3*HZ)
8296#define UPDATE_FREQUENCY (5*60*HZ)
8297void md_do_sync(struct md_thread *thread)
8298{
8299 struct mddev *mddev = thread->mddev;
8300 struct mddev *mddev2;
8301 unsigned int currspeed = 0, window;
8302 sector_t max_sectors,j, io_sectors, recovery_done;
8303 unsigned long mark[SYNC_MARKS];
8304 unsigned long update_time;
8305 sector_t mark_cnt[SYNC_MARKS];
8306 int last_mark,m;
8307 struct list_head *tmp;
8308 sector_t last_check;
8309 int skipped = 0;
8310 struct md_rdev *rdev;
8311 char *desc, *action = NULL;
8312 struct blk_plug plug;
8313 int ret;
8314
8315
8316 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8317 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8318 return;
8319 if (mddev->ro) {
8320 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8321 return;
8322 }
8323
8324 if (mddev_is_clustered(mddev)) {
8325 ret = md_cluster_ops->resync_start(mddev);
8326 if (ret)
8327 goto skip;
8328
8329 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8330 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8331 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8332 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8333 && ((unsigned long long)mddev->curr_resync_completed
8334 < (unsigned long long)mddev->resync_max_sectors))
8335 goto skip;
8336 }
8337
8338 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8339 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8340 desc = "data-check";
8341 action = "check";
8342 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8343 desc = "requested-resync";
8344 action = "repair";
8345 } else
8346 desc = "resync";
8347 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8348 desc = "reshape";
8349 else
8350 desc = "recovery";
8351
8352 mddev->last_sync_action = action ?: desc;
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370 do {
8371 int mddev2_minor = -1;
8372 mddev->curr_resync = 2;
8373
8374 try_again:
8375 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8376 goto skip;
8377 for_each_mddev(mddev2, tmp) {
8378 if (mddev2 == mddev)
8379 continue;
8380 if (!mddev->parallel_resync
8381 && mddev2->curr_resync
8382 && match_mddev_units(mddev, mddev2)) {
8383 DEFINE_WAIT(wq);
8384 if (mddev < mddev2 && mddev->curr_resync == 2) {
8385
8386 mddev->curr_resync = 1;
8387 wake_up(&resync_wait);
8388 }
8389 if (mddev > mddev2 && mddev->curr_resync == 1)
8390
8391
8392
8393 continue;
8394
8395
8396
8397
8398 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8399 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8400 mddev2->curr_resync >= mddev->curr_resync) {
8401 if (mddev2_minor != mddev2->md_minor) {
8402 mddev2_minor = mddev2->md_minor;
8403 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8404 desc, mdname(mddev),
8405 mdname(mddev2));
8406 }
8407 mddev_put(mddev2);
8408 if (signal_pending(current))
8409 flush_signals(current);
8410 schedule();
8411 finish_wait(&resync_wait, &wq);
8412 goto try_again;
8413 }
8414 finish_wait(&resync_wait, &wq);
8415 }
8416 }
8417 } while (mddev->curr_resync < 2);
8418
8419 j = 0;
8420 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8421
8422
8423
8424 max_sectors = mddev->resync_max_sectors;
8425 atomic64_set(&mddev->resync_mismatches, 0);
8426
8427 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8428 j = mddev->resync_min;
8429 else if (!mddev->bitmap)
8430 j = mddev->recovery_cp;
8431
8432 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8433 max_sectors = mddev->resync_max_sectors;
8434
8435
8436
8437
8438
8439 if (mddev_is_clustered(mddev) &&
8440 mddev->reshape_position != MaxSector)
8441 j = mddev->reshape_position;
8442 } else {
8443
8444 max_sectors = mddev->dev_sectors;
8445 j = MaxSector;
8446 rcu_read_lock();
8447 rdev_for_each_rcu(rdev, mddev)
8448 if (rdev->raid_disk >= 0 &&
8449 !test_bit(Journal, &rdev->flags) &&
8450 !test_bit(Faulty, &rdev->flags) &&
8451 !test_bit(In_sync, &rdev->flags) &&
8452 rdev->recovery_offset < j)
8453 j = rdev->recovery_offset;
8454 rcu_read_unlock();
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464 if (mddev->bitmap) {
8465 mddev->pers->quiesce(mddev, 1);
8466 mddev->pers->quiesce(mddev, 0);
8467 }
8468 }
8469
8470 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8471 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8472 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8473 speed_max(mddev), desc);
8474
8475 is_mddev_idle(mddev, 1);
8476
8477 io_sectors = 0;
8478 for (m = 0; m < SYNC_MARKS; m++) {
8479 mark[m] = jiffies;
8480 mark_cnt[m] = io_sectors;
8481 }
8482 last_mark = 0;
8483 mddev->resync_mark = mark[last_mark];
8484 mddev->resync_mark_cnt = mark_cnt[last_mark];
8485
8486
8487
8488
8489 window = 32 * (PAGE_SIZE / 512);
8490 pr_debug("md: using %dk window, over a total of %lluk.\n",
8491 window/2, (unsigned long long)max_sectors/2);
8492
8493 atomic_set(&mddev->recovery_active, 0);
8494 last_check = 0;
8495
8496 if (j>2) {
8497 pr_debug("md: resuming %s of %s from checkpoint.\n",
8498 desc, mdname(mddev));
8499 mddev->curr_resync = j;
8500 } else
8501 mddev->curr_resync = 3;
8502 mddev->curr_resync_completed = j;
8503 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8504 md_new_event(mddev);
8505 update_time = jiffies;
8506
8507 blk_start_plug(&plug);
8508 while (j < max_sectors) {
8509 sector_t sectors;
8510
8511 skipped = 0;
8512
8513 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8514 ((mddev->curr_resync > mddev->curr_resync_completed &&
8515 (mddev->curr_resync - mddev->curr_resync_completed)
8516 > (max_sectors >> 4)) ||
8517 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8518 (j - mddev->curr_resync_completed)*2
8519 >= mddev->resync_max - mddev->curr_resync_completed ||
8520 mddev->curr_resync_completed > mddev->resync_max
8521 )) {
8522
8523 wait_event(mddev->recovery_wait,
8524 atomic_read(&mddev->recovery_active) == 0);
8525 mddev->curr_resync_completed = j;
8526 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8527 j > mddev->recovery_cp)
8528 mddev->recovery_cp = j;
8529 update_time = jiffies;
8530 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8531 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8532 }
8533
8534 while (j >= mddev->resync_max &&
8535 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8536
8537
8538
8539
8540 flush_signals(current);
8541 wait_event_interruptible(mddev->recovery_wait,
8542 mddev->resync_max > j
8543 || test_bit(MD_RECOVERY_INTR,
8544 &mddev->recovery));
8545 }
8546
8547 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8548 break;
8549
8550 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8551 if (sectors == 0) {
8552 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8553 break;
8554 }
8555
8556 if (!skipped) {
8557 io_sectors += sectors;
8558 atomic_add(sectors, &mddev->recovery_active);
8559 }
8560
8561 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8562 break;
8563
8564 j += sectors;
8565 if (j > max_sectors)
8566
8567 j = max_sectors;
8568 if (j > 2)
8569 mddev->curr_resync = j;
8570 mddev->curr_mark_cnt = io_sectors;
8571 if (last_check == 0)
8572
8573
8574
8575 md_new_event(mddev);
8576
8577 if (last_check + window > io_sectors || j == max_sectors)
8578 continue;
8579
8580 last_check = io_sectors;
8581 repeat:
8582 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8583
8584 int next = (last_mark+1) % SYNC_MARKS;
8585
8586 mddev->resync_mark = mark[next];
8587 mddev->resync_mark_cnt = mark_cnt[next];
8588 mark[next] = jiffies;
8589 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8590 last_mark = next;
8591 }
8592
8593 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8594 break;
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604 cond_resched();
8605
8606 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8607 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8608 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8609
8610 if (currspeed > speed_min(mddev)) {
8611 if (currspeed > speed_max(mddev)) {
8612 msleep(500);
8613 goto repeat;
8614 }
8615 if (!is_mddev_idle(mddev, 0)) {
8616
8617
8618
8619
8620 wait_event(mddev->recovery_wait,
8621 !atomic_read(&mddev->recovery_active));
8622 }
8623 }
8624 }
8625 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8626 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8627 ? "interrupted" : "done");
8628
8629
8630
8631 blk_finish_plug(&plug);
8632 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8633
8634 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8635 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8636 mddev->curr_resync > 3) {
8637 mddev->curr_resync_completed = mddev->curr_resync;
8638 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8639 }
8640 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8641
8642 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8643 mddev->curr_resync > 3) {
8644 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8645 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8646 if (mddev->curr_resync >= mddev->recovery_cp) {
8647 pr_debug("md: checkpointing %s of %s.\n",
8648 desc, mdname(mddev));
8649 if (test_bit(MD_RECOVERY_ERROR,
8650 &mddev->recovery))
8651 mddev->recovery_cp =
8652 mddev->curr_resync_completed;
8653 else
8654 mddev->recovery_cp =
8655 mddev->curr_resync;
8656 }
8657 } else
8658 mddev->recovery_cp = MaxSector;
8659 } else {
8660 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8661 mddev->curr_resync = MaxSector;
8662 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8663 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8664 rcu_read_lock();
8665 rdev_for_each_rcu(rdev, mddev)
8666 if (rdev->raid_disk >= 0 &&
8667 mddev->delta_disks >= 0 &&
8668 !test_bit(Journal, &rdev->flags) &&
8669 !test_bit(Faulty, &rdev->flags) &&
8670 !test_bit(In_sync, &rdev->flags) &&
8671 rdev->recovery_offset < mddev->curr_resync)
8672 rdev->recovery_offset = mddev->curr_resync;
8673 rcu_read_unlock();
8674 }
8675 }
8676 }
8677 skip:
8678
8679
8680
8681 set_mask_bits(&mddev->sb_flags, 0,
8682 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8683
8684 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8685 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8686 mddev->delta_disks > 0 &&
8687 mddev->pers->finish_reshape &&
8688 mddev->pers->size &&
8689 mddev->queue) {
8690 mddev_lock_nointr(mddev);
8691 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8692 mddev_unlock(mddev);
8693 if (!mddev_is_clustered(mddev)) {
8694 set_capacity(mddev->gendisk, mddev->array_sectors);
8695 revalidate_disk(mddev->gendisk);
8696 }
8697 }
8698
8699 spin_lock(&mddev->lock);
8700 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8701
8702 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8703 mddev->resync_min = 0;
8704 mddev->resync_max = MaxSector;
8705 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8706 mddev->resync_min = mddev->curr_resync_completed;
8707 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8708 mddev->curr_resync = 0;
8709 spin_unlock(&mddev->lock);
8710
8711 wake_up(&resync_wait);
8712 md_wakeup_thread(mddev->thread);
8713 return;
8714}
8715EXPORT_SYMBOL_GPL(md_do_sync);
8716
8717static int remove_and_add_spares(struct mddev *mddev,
8718 struct md_rdev *this)
8719{
8720 struct md_rdev *rdev;
8721 int spares = 0;
8722 int removed = 0;
8723 bool remove_some = false;
8724
8725 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8726
8727 return 0;
8728
8729 rdev_for_each(rdev, mddev) {
8730 if ((this == NULL || rdev == this) &&
8731 rdev->raid_disk >= 0 &&
8732 !test_bit(Blocked, &rdev->flags) &&
8733 test_bit(Faulty, &rdev->flags) &&
8734 atomic_read(&rdev->nr_pending)==0) {
8735
8736
8737
8738
8739
8740 remove_some = true;
8741 set_bit(RemoveSynchronized, &rdev->flags);
8742 }
8743 }
8744
8745 if (remove_some)
8746 synchronize_rcu();
8747 rdev_for_each(rdev, mddev) {
8748 if ((this == NULL || rdev == this) &&
8749 rdev->raid_disk >= 0 &&
8750 !test_bit(Blocked, &rdev->flags) &&
8751 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8752 (!test_bit(In_sync, &rdev->flags) &&
8753 !test_bit(Journal, &rdev->flags))) &&
8754 atomic_read(&rdev->nr_pending)==0)) {
8755 if (mddev->pers->hot_remove_disk(
8756 mddev, rdev) == 0) {
8757 sysfs_unlink_rdev(mddev, rdev);
8758 rdev->saved_raid_disk = rdev->raid_disk;
8759 rdev->raid_disk = -1;
8760 removed++;
8761 }
8762 }
8763 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8764 clear_bit(RemoveSynchronized, &rdev->flags);
8765 }
8766
8767 if (removed && mddev->kobj.sd)
8768 sysfs_notify(&mddev->kobj, NULL, "degraded");
8769
8770 if (this && removed)
8771 goto no_add;
8772
8773 rdev_for_each(rdev, mddev) {
8774 if (this && this != rdev)
8775 continue;
8776 if (test_bit(Candidate, &rdev->flags))
8777 continue;
8778 if (rdev->raid_disk >= 0 &&
8779 !test_bit(In_sync, &rdev->flags) &&
8780 !test_bit(Journal, &rdev->flags) &&
8781 !test_bit(Faulty, &rdev->flags))
8782 spares++;
8783 if (rdev->raid_disk >= 0)
8784 continue;
8785 if (test_bit(Faulty, &rdev->flags))
8786 continue;
8787 if (!test_bit(Journal, &rdev->flags)) {
8788 if (mddev->ro &&
8789 ! (rdev->saved_raid_disk >= 0 &&
8790 !test_bit(Bitmap_sync, &rdev->flags)))
8791 continue;
8792
8793 rdev->recovery_offset = 0;
8794 }
8795 if (mddev->pers->
8796 hot_add_disk(mddev, rdev) == 0) {
8797 if (sysfs_link_rdev(mddev, rdev))
8798 ;
8799 if (!test_bit(Journal, &rdev->flags))
8800 spares++;
8801 md_new_event(mddev);
8802 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8803 }
8804 }
8805no_add:
8806 if (removed)
8807 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8808 return spares;
8809}
8810
8811static void md_start_sync(struct work_struct *ws)
8812{
8813 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8814
8815 mddev->sync_thread = md_register_thread(md_do_sync,
8816 mddev,
8817 "resync");
8818 if (!mddev->sync_thread) {
8819 pr_warn("%s: could not start resync thread...\n",
8820 mdname(mddev));
8821
8822 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8823 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8824 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8825 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8826 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8827 wake_up(&resync_wait);
8828 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8829 &mddev->recovery))
8830 if (mddev->sysfs_action)
8831 sysfs_notify_dirent_safe(mddev->sysfs_action);
8832 } else
8833 md_wakeup_thread(mddev->sync_thread);
8834 sysfs_notify_dirent_safe(mddev->sysfs_action);
8835 md_new_event(mddev);
8836}
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860void md_check_recovery(struct mddev *mddev)
8861{
8862 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
8863
8864
8865
8866 set_bit(MD_UPDATING_SB, &mddev->flags);
8867 smp_mb__after_atomic();
8868 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
8869 md_update_sb(mddev, 0);
8870 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
8871 wake_up(&mddev->sb_wait);
8872 }
8873
8874 if (mddev->suspended)
8875 return;
8876
8877 if (mddev->bitmap)
8878 md_bitmap_daemon_work(mddev);
8879
8880 if (signal_pending(current)) {
8881 if (mddev->pers->sync_request && !mddev->external) {
8882 pr_debug("md: %s in immediate safe mode\n",
8883 mdname(mddev));
8884 mddev->safemode = 2;
8885 }
8886 flush_signals(current);
8887 }
8888
8889 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8890 return;
8891 if ( ! (
8892 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8893 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8894 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8895 (mddev->external == 0 && mddev->safemode == 1) ||
8896 (mddev->safemode == 2
8897 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8898 ))
8899 return;
8900
8901 if (mddev_trylock(mddev)) {
8902 int spares = 0;
8903
8904 if (!mddev->external && mddev->safemode == 1)
8905 mddev->safemode = 0;
8906
8907 if (mddev->ro) {
8908 struct md_rdev *rdev;
8909 if (!mddev->external && mddev->in_sync)
8910
8911
8912
8913
8914
8915 rdev_for_each(rdev, mddev)
8916 clear_bit(Blocked, &rdev->flags);
8917
8918
8919
8920
8921
8922
8923
8924 remove_and_add_spares(mddev, NULL);
8925
8926
8927
8928 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8929 md_reap_sync_thread(mddev);
8930 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8931 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8932 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8933 goto unlock;
8934 }
8935
8936 if (mddev_is_clustered(mddev)) {
8937 struct md_rdev *rdev;
8938
8939
8940
8941 rdev_for_each(rdev, mddev) {
8942 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8943 rdev->raid_disk < 0)
8944 md_kick_rdev_from_array(rdev);
8945 }
8946 }
8947
8948 if (!mddev->external && !mddev->in_sync) {
8949 spin_lock(&mddev->lock);
8950 set_in_sync(mddev);
8951 spin_unlock(&mddev->lock);
8952 }
8953
8954 if (mddev->sb_flags)
8955 md_update_sb(mddev, 0);
8956
8957 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8958 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8959
8960 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8961 goto unlock;
8962 }
8963 if (mddev->sync_thread) {
8964 md_reap_sync_thread(mddev);
8965 goto unlock;
8966 }
8967
8968
8969
8970 mddev->curr_resync_completed = 0;
8971 spin_lock(&mddev->lock);
8972 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8973 spin_unlock(&mddev->lock);
8974
8975
8976
8977 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8978 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8979
8980 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8981 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8982 goto not_running;
8983
8984
8985
8986
8987
8988
8989
8990 if (mddev->reshape_position != MaxSector) {
8991 if (mddev->pers->check_reshape == NULL ||
8992 mddev->pers->check_reshape(mddev) != 0)
8993
8994 goto not_running;
8995 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8996 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8997 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8998 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8999 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9000 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9001 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9002 } else if (mddev->recovery_cp < MaxSector) {
9003 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9004 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9005 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9006
9007 goto not_running;
9008
9009 if (mddev->pers->sync_request) {
9010 if (spares) {
9011
9012
9013
9014
9015 md_bitmap_write_all(mddev->bitmap);
9016 }
9017 INIT_WORK(&mddev->del_work, md_start_sync);
9018 queue_work(md_misc_wq, &mddev->del_work);
9019 goto unlock;
9020 }
9021 not_running:
9022 if (!mddev->sync_thread) {
9023 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9024 wake_up(&resync_wait);
9025 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9026 &mddev->recovery))
9027 if (mddev->sysfs_action)
9028 sysfs_notify_dirent_safe(mddev->sysfs_action);
9029 }
9030 unlock:
9031 wake_up(&mddev->sb_wait);
9032 mddev_unlock(mddev);
9033 }
9034}
9035EXPORT_SYMBOL(md_check_recovery);
9036
9037void md_reap_sync_thread(struct mddev *mddev)
9038{
9039 struct md_rdev *rdev;
9040 sector_t old_dev_sectors = mddev->dev_sectors;
9041 bool is_reshaped = false;
9042
9043
9044 md_unregister_thread(&mddev->sync_thread);
9045 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9046 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
9047
9048
9049 if (mddev->pers->spare_active(mddev)) {
9050 sysfs_notify(&mddev->kobj, NULL,
9051 "degraded");
9052 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9053 }
9054 }
9055 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9056 mddev->pers->finish_reshape) {
9057 mddev->pers->finish_reshape(mddev);
9058 if (mddev_is_clustered(mddev))
9059 is_reshaped = true;
9060 }
9061
9062
9063
9064
9065 if (!mddev->degraded)
9066 rdev_for_each(rdev, mddev)
9067 rdev->saved_raid_disk = -1;
9068
9069 md_update_sb(mddev, 1);
9070
9071
9072
9073 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9074 md_cluster_ops->resync_finish(mddev);
9075 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9076 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9077 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9078 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9079 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9080 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9081
9082
9083
9084
9085
9086 if (mddev_is_clustered(mddev) && is_reshaped
9087 && !test_bit(MD_CLOSING, &mddev->flags))
9088 md_cluster_ops->update_size(mddev, old_dev_sectors);
9089 wake_up(&resync_wait);
9090
9091 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9092 sysfs_notify_dirent_safe(mddev->sysfs_action);
9093 md_new_event(mddev);
9094 if (mddev->event_work.func)
9095 queue_work(md_misc_wq, &mddev->event_work);
9096}
9097EXPORT_SYMBOL(md_reap_sync_thread);
9098
9099void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9100{
9101 sysfs_notify_dirent_safe(rdev->sysfs_state);
9102 wait_event_timeout(rdev->blocked_wait,
9103 !test_bit(Blocked, &rdev->flags) &&
9104 !test_bit(BlockedBadBlocks, &rdev->flags),
9105 msecs_to_jiffies(5000));
9106 rdev_dec_pending(rdev, mddev);
9107}
9108EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9109
9110void md_finish_reshape(struct mddev *mddev)
9111{
9112
9113 struct md_rdev *rdev;
9114
9115 rdev_for_each(rdev, mddev) {
9116 if (rdev->data_offset > rdev->new_data_offset)
9117 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9118 else
9119 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9120 rdev->data_offset = rdev->new_data_offset;
9121 }
9122}
9123EXPORT_SYMBOL(md_finish_reshape);
9124
9125
9126
9127
9128int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9129 int is_new)
9130{
9131 struct mddev *mddev = rdev->mddev;
9132 int rv;
9133 if (is_new)
9134 s += rdev->new_data_offset;
9135 else
9136 s += rdev->data_offset;
9137 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9138 if (rv == 0) {
9139
9140 if (test_bit(ExternalBbl, &rdev->flags))
9141 sysfs_notify(&rdev->kobj, NULL,
9142 "unacknowledged_bad_blocks");
9143 sysfs_notify_dirent_safe(rdev->sysfs_state);
9144 set_mask_bits(&mddev->sb_flags, 0,
9145 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9146 md_wakeup_thread(rdev->mddev->thread);
9147 return 1;
9148 } else
9149 return 0;
9150}
9151EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9152
9153int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9154 int is_new)
9155{
9156 int rv;
9157 if (is_new)
9158 s += rdev->new_data_offset;
9159 else
9160 s += rdev->data_offset;
9161 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9162 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9163 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9164 return rv;
9165}
9166EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9167
9168static int md_notify_reboot(struct notifier_block *this,
9169 unsigned long code, void *x)
9170{
9171 struct list_head *tmp;
9172 struct mddev *mddev;
9173 int need_delay = 0;
9174
9175 for_each_mddev(mddev, tmp) {
9176 if (mddev_trylock(mddev)) {
9177 if (mddev->pers)
9178 __md_stop_writes(mddev);
9179 if (mddev->persistent)
9180 mddev->safemode = 2;
9181 mddev_unlock(mddev);
9182 }
9183 need_delay = 1;
9184 }
9185
9186
9187
9188
9189
9190
9191 if (need_delay)
9192 mdelay(1000*1);
9193
9194 return NOTIFY_DONE;
9195}
9196
9197static struct notifier_block md_notifier = {
9198 .notifier_call = md_notify_reboot,
9199 .next = NULL,
9200 .priority = INT_MAX,
9201};
9202
9203static void md_geninit(void)
9204{
9205 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9206
9207 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
9208}
9209
9210static int __init md_init(void)
9211{
9212 int ret = -ENOMEM;
9213
9214 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9215 if (!md_wq)
9216 goto err_wq;
9217
9218 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9219 if (!md_misc_wq)
9220 goto err_misc_wq;
9221
9222 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9223 goto err_md;
9224
9225 if ((ret = register_blkdev(0, "mdp")) < 0)
9226 goto err_mdp;
9227 mdp_major = ret;
9228
9229 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9230 md_probe, NULL, NULL);
9231 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9232 md_probe, NULL, NULL);
9233
9234 register_reboot_notifier(&md_notifier);
9235 raid_table_header = register_sysctl_table(raid_root_table);
9236
9237 md_geninit();
9238 return 0;
9239
9240err_mdp:
9241 unregister_blkdev(MD_MAJOR, "md");
9242err_md:
9243 destroy_workqueue(md_misc_wq);
9244err_misc_wq:
9245 destroy_workqueue(md_wq);
9246err_wq:
9247 return ret;
9248}
9249
9250static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9251{
9252 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9253 struct md_rdev *rdev2;
9254 int role, ret;
9255 char b[BDEVNAME_SIZE];
9256
9257
9258
9259
9260
9261 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9262 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9263 if (ret)
9264 pr_info("md-cluster: resize failed\n");
9265 else
9266 md_bitmap_update_sb(mddev->bitmap);
9267 }
9268
9269
9270 rdev_for_each(rdev2, mddev) {
9271 if (test_bit(Faulty, &rdev2->flags))
9272 continue;
9273
9274
9275 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9276
9277 if (test_bit(Candidate, &rdev2->flags)) {
9278 if (role == 0xfffe) {
9279 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9280 md_kick_rdev_from_array(rdev2);
9281 continue;
9282 }
9283 else
9284 clear_bit(Candidate, &rdev2->flags);
9285 }
9286
9287 if (role != rdev2->raid_disk) {
9288
9289
9290
9291 if (rdev2->raid_disk == -1 && role != 0xffff &&
9292 !(le32_to_cpu(sb->feature_map) &
9293 MD_FEATURE_RESHAPE_ACTIVE)) {
9294 rdev2->saved_raid_disk = role;
9295 ret = remove_and_add_spares(mddev, rdev2);
9296 pr_info("Activated spare: %s\n",
9297 bdevname(rdev2->bdev,b));
9298
9299
9300 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9301 md_wakeup_thread(mddev->thread);
9302 }
9303
9304
9305
9306
9307
9308 if ((role == 0xfffe) || (role == 0xfffd)) {
9309 md_error(mddev, rdev2);
9310 clear_bit(Blocked, &rdev2->flags);
9311 }
9312 }
9313 }
9314
9315 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9316 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9317
9318
9319
9320
9321
9322 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9323 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9324
9325
9326
9327
9328 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9329 if (mddev->pers->update_reshape_pos)
9330 mddev->pers->update_reshape_pos(mddev);
9331 if (mddev->pers->start_reshape)
9332 mddev->pers->start_reshape(mddev);
9333 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9334 mddev->reshape_position != MaxSector &&
9335 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9336
9337 mddev->reshape_position = MaxSector;
9338 if (mddev->pers->update_reshape_pos)
9339 mddev->pers->update_reshape_pos(mddev);
9340 }
9341
9342
9343 mddev->events = le64_to_cpu(sb->events);
9344}
9345
9346static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9347{
9348 int err;
9349 struct page *swapout = rdev->sb_page;
9350 struct mdp_superblock_1 *sb;
9351
9352
9353
9354
9355 rdev->sb_page = NULL;
9356 err = alloc_disk_sb(rdev);
9357 if (err == 0) {
9358 ClearPageUptodate(rdev->sb_page);
9359 rdev->sb_loaded = 0;
9360 err = super_types[mddev->major_version].
9361 load_super(rdev, NULL, mddev->minor_version);
9362 }
9363 if (err < 0) {
9364 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9365 __func__, __LINE__, rdev->desc_nr, err);
9366 if (rdev->sb_page)
9367 put_page(rdev->sb_page);
9368 rdev->sb_page = swapout;
9369 rdev->sb_loaded = 1;
9370 return err;
9371 }
9372
9373 sb = page_address(rdev->sb_page);
9374
9375
9376
9377
9378 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9379 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9380
9381
9382
9383
9384 if (rdev->recovery_offset == MaxSector &&
9385 !test_bit(In_sync, &rdev->flags) &&
9386 mddev->pers->spare_active(mddev))
9387 sysfs_notify(&mddev->kobj, NULL, "degraded");
9388
9389 put_page(swapout);
9390 return 0;
9391}
9392
9393void md_reload_sb(struct mddev *mddev, int nr)
9394{
9395 struct md_rdev *rdev;
9396 int err;
9397
9398
9399 rdev_for_each_rcu(rdev, mddev) {
9400 if (rdev->desc_nr == nr)
9401 break;
9402 }
9403
9404 if (!rdev || rdev->desc_nr != nr) {
9405 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9406 return;
9407 }
9408
9409 err = read_rdev(mddev, rdev);
9410 if (err < 0)
9411 return;
9412
9413 check_sb_changes(mddev, rdev);
9414
9415
9416 rdev_for_each_rcu(rdev, mddev) {
9417 if (!test_bit(Faulty, &rdev->flags))
9418 read_rdev(mddev, rdev);
9419 }
9420}
9421EXPORT_SYMBOL(md_reload_sb);
9422
9423#ifndef MODULE
9424
9425
9426
9427
9428
9429
9430static DEFINE_MUTEX(detected_devices_mutex);
9431static LIST_HEAD(all_detected_devices);
9432struct detected_devices_node {
9433 struct list_head list;
9434 dev_t dev;
9435};
9436
9437void md_autodetect_dev(dev_t dev)
9438{
9439 struct detected_devices_node *node_detected_dev;
9440
9441 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9442 if (node_detected_dev) {
9443 node_detected_dev->dev = dev;
9444 mutex_lock(&detected_devices_mutex);
9445 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9446 mutex_unlock(&detected_devices_mutex);
9447 }
9448}
9449
9450static void autostart_arrays(int part)
9451{
9452 struct md_rdev *rdev;
9453 struct detected_devices_node *node_detected_dev;
9454 dev_t dev;
9455 int i_scanned, i_passed;
9456
9457 i_scanned = 0;
9458 i_passed = 0;
9459
9460 pr_info("md: Autodetecting RAID arrays.\n");
9461
9462 mutex_lock(&detected_devices_mutex);
9463 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9464 i_scanned++;
9465 node_detected_dev = list_entry(all_detected_devices.next,
9466 struct detected_devices_node, list);
9467 list_del(&node_detected_dev->list);
9468 dev = node_detected_dev->dev;
9469 kfree(node_detected_dev);
9470 mutex_unlock(&detected_devices_mutex);
9471 rdev = md_import_device(dev,0, 90);
9472 mutex_lock(&detected_devices_mutex);
9473 if (IS_ERR(rdev))
9474 continue;
9475
9476 if (test_bit(Faulty, &rdev->flags))
9477 continue;
9478
9479 set_bit(AutoDetected, &rdev->flags);
9480 list_add(&rdev->same_set, &pending_raid_disks);
9481 i_passed++;
9482 }
9483 mutex_unlock(&detected_devices_mutex);
9484
9485 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9486
9487 autorun_devices(part);
9488}
9489
9490#endif
9491
9492static __exit void md_exit(void)
9493{
9494 struct mddev *mddev;
9495 struct list_head *tmp;
9496 int delay = 1;
9497
9498 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9499 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9500
9501 unregister_blkdev(MD_MAJOR,"md");
9502 unregister_blkdev(mdp_major, "mdp");
9503 unregister_reboot_notifier(&md_notifier);
9504 unregister_sysctl_table(raid_table_header);
9505
9506
9507
9508
9509 md_unloading = 1;
9510 while (waitqueue_active(&md_event_waiters)) {
9511
9512 wake_up(&md_event_waiters);
9513 msleep(delay);
9514 delay += delay;
9515 }
9516 remove_proc_entry("mdstat", NULL);
9517
9518 for_each_mddev(mddev, tmp) {
9519 export_array(mddev);
9520 mddev->ctime = 0;
9521 mddev->hold_active = 0;
9522
9523
9524
9525
9526
9527
9528 }
9529 destroy_workqueue(md_misc_wq);
9530 destroy_workqueue(md_wq);
9531}
9532
9533subsys_initcall(md_init);
9534module_exit(md_exit)
9535
9536static int get_ro(char *buffer, const struct kernel_param *kp)
9537{
9538 return sprintf(buffer, "%d", start_readonly);
9539}
9540static int set_ro(const char *val, const struct kernel_param *kp)
9541{
9542 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9543}
9544
9545module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9546module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9547module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9548module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9549
9550MODULE_LICENSE("GPL");
9551MODULE_DESCRIPTION("MD RAID framework");
9552MODULE_ALIAS("md");
9553MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9554