1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/slab.h>
62#include <linux/percpu-refcount.h>
63
64#include <trace/events/block.h>
65#include "md.h"
66#include "md-bitmap.h"
67#include "md-cluster.h"
68
69#ifndef MODULE
70static void autostart_arrays(int part);
71#endif
72
73
74
75
76
77
78static LIST_HEAD(pers_list);
79static DEFINE_SPINLOCK(pers_lock);
80
81static struct kobj_type md_ktype;
82
83struct md_cluster_operations *md_cluster_ops;
84EXPORT_SYMBOL(md_cluster_ops);
85static struct module *md_cluster_mod;
86
87static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
88static struct workqueue_struct *md_wq;
89static struct workqueue_struct *md_misc_wq;
90
91static int remove_and_add_spares(struct mddev *mddev,
92 struct md_rdev *this);
93static void mddev_detach(struct mddev *mddev);
94
95
96
97
98
99
100#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
101
102
103
104
105
106
107
108
109
110
111
112
113
114static int sysctl_speed_limit_min = 1000;
115static int sysctl_speed_limit_max = 200000;
116static inline int speed_min(struct mddev *mddev)
117{
118 return mddev->sync_speed_min ?
119 mddev->sync_speed_min : sysctl_speed_limit_min;
120}
121
122static inline int speed_max(struct mddev *mddev)
123{
124 return mddev->sync_speed_max ?
125 mddev->sync_speed_max : sysctl_speed_limit_max;
126}
127
128static void rdev_uninit_serial(struct md_rdev *rdev)
129{
130 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
131 return;
132
133 kvfree(rdev->serial);
134 rdev->serial = NULL;
135}
136
137static void rdevs_uninit_serial(struct mddev *mddev)
138{
139 struct md_rdev *rdev;
140
141 rdev_for_each(rdev, mddev)
142 rdev_uninit_serial(rdev);
143}
144
145static int rdev_init_serial(struct md_rdev *rdev)
146{
147
148 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
149 struct serial_in_rdev *serial = NULL;
150
151 if (test_bit(CollisionCheck, &rdev->flags))
152 return 0;
153
154 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
155 GFP_KERNEL);
156 if (!serial)
157 return -ENOMEM;
158
159 for (i = 0; i < serial_nums; i++) {
160 struct serial_in_rdev *serial_tmp = &serial[i];
161
162 spin_lock_init(&serial_tmp->serial_lock);
163 serial_tmp->serial_rb = RB_ROOT_CACHED;
164 init_waitqueue_head(&serial_tmp->serial_io_wait);
165 }
166
167 rdev->serial = serial;
168 set_bit(CollisionCheck, &rdev->flags);
169
170 return 0;
171}
172
173static int rdevs_init_serial(struct mddev *mddev)
174{
175 struct md_rdev *rdev;
176 int ret = 0;
177
178 rdev_for_each(rdev, mddev) {
179 ret = rdev_init_serial(rdev);
180 if (ret)
181 break;
182 }
183
184
185 if (ret && !mddev->serial_info_pool)
186 rdevs_uninit_serial(mddev);
187
188 return ret;
189}
190
191
192
193
194
195
196static int rdev_need_serial(struct md_rdev *rdev)
197{
198 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
199 rdev->bdev->bd_queue->nr_hw_queues != 1 &&
200 test_bit(WriteMostly, &rdev->flags));
201}
202
203
204
205
206
207
208void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
209 bool is_suspend)
210{
211 int ret = 0;
212
213 if (rdev && !rdev_need_serial(rdev) &&
214 !test_bit(CollisionCheck, &rdev->flags))
215 return;
216
217 if (!is_suspend)
218 mddev_suspend(mddev);
219
220 if (!rdev)
221 ret = rdevs_init_serial(mddev);
222 else
223 ret = rdev_init_serial(rdev);
224 if (ret)
225 goto abort;
226
227 if (mddev->serial_info_pool == NULL) {
228 unsigned int noio_flag;
229
230 noio_flag = memalloc_noio_save();
231 mddev->serial_info_pool =
232 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
233 sizeof(struct serial_info));
234 memalloc_noio_restore(noio_flag);
235 if (!mddev->serial_info_pool) {
236 rdevs_uninit_serial(mddev);
237 pr_err("can't alloc memory pool for serialization\n");
238 }
239 }
240
241abort:
242 if (!is_suspend)
243 mddev_resume(mddev);
244}
245
246
247
248
249
250
251
252void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
253 bool is_suspend)
254{
255 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
256 return;
257
258 if (mddev->serial_info_pool) {
259 struct md_rdev *temp;
260 int num = 0;
261
262 if (!is_suspend)
263 mddev_suspend(mddev);
264 rdev_for_each(temp, mddev) {
265 if (!rdev) {
266 if (!mddev->serialize_policy ||
267 !rdev_need_serial(temp))
268 rdev_uninit_serial(temp);
269 else
270 num++;
271 } else if (temp != rdev &&
272 test_bit(CollisionCheck, &temp->flags))
273 num++;
274 }
275
276 if (rdev)
277 rdev_uninit_serial(rdev);
278
279 if (num)
280 pr_info("The mempool could be used by other devices\n");
281 else {
282 mempool_destroy(mddev->serial_info_pool);
283 mddev->serial_info_pool = NULL;
284 }
285 if (!is_suspend)
286 mddev_resume(mddev);
287 }
288}
289
290static struct ctl_table_header *raid_table_header;
291
292static struct ctl_table raid_table[] = {
293 {
294 .procname = "speed_limit_min",
295 .data = &sysctl_speed_limit_min,
296 .maxlen = sizeof(int),
297 .mode = S_IRUGO|S_IWUSR,
298 .proc_handler = proc_dointvec,
299 },
300 {
301 .procname = "speed_limit_max",
302 .data = &sysctl_speed_limit_max,
303 .maxlen = sizeof(int),
304 .mode = S_IRUGO|S_IWUSR,
305 .proc_handler = proc_dointvec,
306 },
307 { }
308};
309
310static struct ctl_table raid_dir_table[] = {
311 {
312 .procname = "raid",
313 .maxlen = 0,
314 .mode = S_IRUGO|S_IXUGO,
315 .child = raid_table,
316 },
317 { }
318};
319
320static struct ctl_table raid_root_table[] = {
321 {
322 .procname = "dev",
323 .maxlen = 0,
324 .mode = 0555,
325 .child = raid_dir_table,
326 },
327 { }
328};
329
330static const struct block_device_operations md_fops;
331
332static int start_readonly;
333
334
335
336
337
338
339
340
341
342static bool create_on_open = true;
343
344struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
345 struct mddev *mddev)
346{
347 if (!mddev || !bioset_initialized(&mddev->bio_set))
348 return bio_alloc(gfp_mask, nr_iovecs);
349
350 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
351}
352EXPORT_SYMBOL_GPL(bio_alloc_mddev);
353
354static struct bio *md_bio_alloc_sync(struct mddev *mddev)
355{
356 if (!mddev || !bioset_initialized(&mddev->sync_set))
357 return bio_alloc(GFP_NOIO, 1);
358
359 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
360}
361
362
363
364
365
366
367
368
369
370
371
372static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
373static atomic_t md_event_count;
374void md_new_event(struct mddev *mddev)
375{
376 atomic_inc(&md_event_count);
377 wake_up(&md_event_waiters);
378}
379EXPORT_SYMBOL_GPL(md_new_event);
380
381
382
383
384
385static LIST_HEAD(all_mddevs);
386static DEFINE_SPINLOCK(all_mddevs_lock);
387
388
389
390
391
392
393
394
395#define for_each_mddev(_mddev,_tmp) \
396 \
397 for (({ spin_lock(&all_mddevs_lock); \
398 _tmp = all_mddevs.next; \
399 _mddev = NULL;}); \
400 ({ if (_tmp != &all_mddevs) \
401 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
402 spin_unlock(&all_mddevs_lock); \
403 if (_mddev) mddev_put(_mddev); \
404 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
405 _tmp != &all_mddevs;}); \
406 ({ spin_lock(&all_mddevs_lock); \
407 _tmp = _tmp->next;}) \
408 )
409
410
411
412
413
414
415
416
417static bool is_suspended(struct mddev *mddev, struct bio *bio)
418{
419 if (mddev->suspended)
420 return true;
421 if (bio_data_dir(bio) != WRITE)
422 return false;
423 if (mddev->suspend_lo >= mddev->suspend_hi)
424 return false;
425 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
426 return false;
427 if (bio_end_sector(bio) < mddev->suspend_lo)
428 return false;
429 return true;
430}
431
432void md_handle_request(struct mddev *mddev, struct bio *bio)
433{
434check_suspended:
435 rcu_read_lock();
436 if (is_suspended(mddev, bio)) {
437 DEFINE_WAIT(__wait);
438 for (;;) {
439 prepare_to_wait(&mddev->sb_wait, &__wait,
440 TASK_UNINTERRUPTIBLE);
441 if (!is_suspended(mddev, bio))
442 break;
443 rcu_read_unlock();
444 schedule();
445 rcu_read_lock();
446 }
447 finish_wait(&mddev->sb_wait, &__wait);
448 }
449 atomic_inc(&mddev->active_io);
450 rcu_read_unlock();
451
452 if (!mddev->pers->make_request(mddev, bio)) {
453 atomic_dec(&mddev->active_io);
454 wake_up(&mddev->sb_wait);
455 goto check_suspended;
456 }
457
458 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
459 wake_up(&mddev->sb_wait);
460}
461EXPORT_SYMBOL(md_handle_request);
462
463static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
464{
465 const int rw = bio_data_dir(bio);
466 const int sgrp = op_stat_group(bio_op(bio));
467 struct mddev *mddev = q->queuedata;
468 unsigned int sectors;
469
470 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
471 bio_io_error(bio);
472 return BLK_QC_T_NONE;
473 }
474
475 blk_queue_split(q, &bio);
476
477 if (mddev == NULL || mddev->pers == NULL) {
478 bio_io_error(bio);
479 return BLK_QC_T_NONE;
480 }
481 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
482 if (bio_sectors(bio) != 0)
483 bio->bi_status = BLK_STS_IOERR;
484 bio_endio(bio);
485 return BLK_QC_T_NONE;
486 }
487
488
489
490
491
492 sectors = bio_sectors(bio);
493
494 bio->bi_opf &= ~REQ_NOMERGE;
495
496 md_handle_request(mddev, bio);
497
498 part_stat_lock();
499 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
500 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
501 part_stat_unlock();
502
503 return BLK_QC_T_NONE;
504}
505
506
507
508
509
510
511
512void mddev_suspend(struct mddev *mddev)
513{
514 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
515 lockdep_assert_held(&mddev->reconfig_mutex);
516 if (mddev->suspended++)
517 return;
518 synchronize_rcu();
519 wake_up(&mddev->sb_wait);
520 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
521 smp_mb__after_atomic();
522 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
523 mddev->pers->quiesce(mddev, 1);
524 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
525 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
526
527 del_timer_sync(&mddev->safemode_timer);
528}
529EXPORT_SYMBOL_GPL(mddev_suspend);
530
531void mddev_resume(struct mddev *mddev)
532{
533 lockdep_assert_held(&mddev->reconfig_mutex);
534 if (--mddev->suspended)
535 return;
536 wake_up(&mddev->sb_wait);
537 mddev->pers->quiesce(mddev, 0);
538
539 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
540 md_wakeup_thread(mddev->thread);
541 md_wakeup_thread(mddev->sync_thread);
542}
543EXPORT_SYMBOL_GPL(mddev_resume);
544
545int mddev_congested(struct mddev *mddev, int bits)
546{
547 struct md_personality *pers = mddev->pers;
548 int ret = 0;
549
550 rcu_read_lock();
551 if (mddev->suspended)
552 ret = 1;
553 else if (pers && pers->congested)
554 ret = pers->congested(mddev, bits);
555 rcu_read_unlock();
556 return ret;
557}
558EXPORT_SYMBOL_GPL(mddev_congested);
559static int md_congested(void *data, int bits)
560{
561 struct mddev *mddev = data;
562 return mddev_congested(mddev, bits);
563}
564
565
566
567
568
569static void md_end_flush(struct bio *bio)
570{
571 struct md_rdev *rdev = bio->bi_private;
572 struct mddev *mddev = rdev->mddev;
573
574 rdev_dec_pending(rdev, mddev);
575
576 if (atomic_dec_and_test(&mddev->flush_pending)) {
577
578 queue_work(md_wq, &mddev->flush_work);
579 }
580 bio_put(bio);
581}
582
583static void md_submit_flush_data(struct work_struct *ws);
584
585static void submit_flushes(struct work_struct *ws)
586{
587 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
588 struct md_rdev *rdev;
589
590 mddev->start_flush = ktime_get_boottime();
591 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
592 atomic_set(&mddev->flush_pending, 1);
593 rcu_read_lock();
594 rdev_for_each_rcu(rdev, mddev)
595 if (rdev->raid_disk >= 0 &&
596 !test_bit(Faulty, &rdev->flags)) {
597
598
599
600
601 struct bio *bi;
602 atomic_inc(&rdev->nr_pending);
603 atomic_inc(&rdev->nr_pending);
604 rcu_read_unlock();
605 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
606 bi->bi_end_io = md_end_flush;
607 bi->bi_private = rdev;
608 bio_set_dev(bi, rdev->bdev);
609 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
610 atomic_inc(&mddev->flush_pending);
611 submit_bio(bi);
612 rcu_read_lock();
613 rdev_dec_pending(rdev, mddev);
614 }
615 rcu_read_unlock();
616 if (atomic_dec_and_test(&mddev->flush_pending))
617 queue_work(md_wq, &mddev->flush_work);
618}
619
620static void md_submit_flush_data(struct work_struct *ws)
621{
622 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
623 struct bio *bio = mddev->flush_bio;
624
625
626
627
628
629
630
631 mddev->last_flush = mddev->start_flush;
632 mddev->flush_bio = NULL;
633 wake_up(&mddev->sb_wait);
634
635 if (bio->bi_iter.bi_size == 0) {
636
637 bio_endio(bio);
638 } else {
639 bio->bi_opf &= ~REQ_PREFLUSH;
640 md_handle_request(mddev, bio);
641 }
642}
643
644
645
646
647
648
649
650bool md_flush_request(struct mddev *mddev, struct bio *bio)
651{
652 ktime_t start = ktime_get_boottime();
653 spin_lock_irq(&mddev->lock);
654 wait_event_lock_irq(mddev->sb_wait,
655 !mddev->flush_bio ||
656 ktime_after(mddev->last_flush, start),
657 mddev->lock);
658 if (!ktime_after(mddev->last_flush, start)) {
659 WARN_ON(mddev->flush_bio);
660 mddev->flush_bio = bio;
661 bio = NULL;
662 }
663 spin_unlock_irq(&mddev->lock);
664
665 if (!bio) {
666 INIT_WORK(&mddev->flush_work, submit_flushes);
667 queue_work(md_wq, &mddev->flush_work);
668 } else {
669
670 if (bio->bi_iter.bi_size == 0)
671
672 bio_endio(bio);
673 else {
674 bio->bi_opf &= ~REQ_PREFLUSH;
675 return false;
676 }
677 }
678 return true;
679}
680EXPORT_SYMBOL(md_flush_request);
681
682static inline struct mddev *mddev_get(struct mddev *mddev)
683{
684 atomic_inc(&mddev->active);
685 return mddev;
686}
687
688static void mddev_delayed_delete(struct work_struct *ws);
689
690static void mddev_put(struct mddev *mddev)
691{
692 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
693 return;
694 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
695 mddev->ctime == 0 && !mddev->hold_active) {
696
697
698 list_del_init(&mddev->all_mddevs);
699
700
701
702
703
704
705 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
706 queue_work(md_misc_wq, &mddev->del_work);
707 }
708 spin_unlock(&all_mddevs_lock);
709}
710
711static void md_safemode_timeout(struct timer_list *t);
712
713void mddev_init(struct mddev *mddev)
714{
715 kobject_init(&mddev->kobj, &md_ktype);
716 mutex_init(&mddev->open_mutex);
717 mutex_init(&mddev->reconfig_mutex);
718 mutex_init(&mddev->bitmap_info.mutex);
719 INIT_LIST_HEAD(&mddev->disks);
720 INIT_LIST_HEAD(&mddev->all_mddevs);
721 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
722 atomic_set(&mddev->active, 1);
723 atomic_set(&mddev->openers, 0);
724 atomic_set(&mddev->active_io, 0);
725 spin_lock_init(&mddev->lock);
726 atomic_set(&mddev->flush_pending, 0);
727 init_waitqueue_head(&mddev->sb_wait);
728 init_waitqueue_head(&mddev->recovery_wait);
729 mddev->reshape_position = MaxSector;
730 mddev->reshape_backwards = 0;
731 mddev->last_sync_action = "none";
732 mddev->resync_min = 0;
733 mddev->resync_max = MaxSector;
734 mddev->level = LEVEL_NONE;
735}
736EXPORT_SYMBOL_GPL(mddev_init);
737
738static struct mddev *mddev_find(dev_t unit)
739{
740 struct mddev *mddev, *new = NULL;
741
742 if (unit && MAJOR(unit) != MD_MAJOR)
743 unit &= ~((1<<MdpMinorShift)-1);
744
745 retry:
746 spin_lock(&all_mddevs_lock);
747
748 if (unit) {
749 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
750 if (mddev->unit == unit) {
751 mddev_get(mddev);
752 spin_unlock(&all_mddevs_lock);
753 kfree(new);
754 return mddev;
755 }
756
757 if (new) {
758 list_add(&new->all_mddevs, &all_mddevs);
759 spin_unlock(&all_mddevs_lock);
760 new->hold_active = UNTIL_IOCTL;
761 return new;
762 }
763 } else if (new) {
764
765 static int next_minor = 512;
766 int start = next_minor;
767 int is_free = 0;
768 int dev = 0;
769 while (!is_free) {
770 dev = MKDEV(MD_MAJOR, next_minor);
771 next_minor++;
772 if (next_minor > MINORMASK)
773 next_minor = 0;
774 if (next_minor == start) {
775
776 spin_unlock(&all_mddevs_lock);
777 kfree(new);
778 return NULL;
779 }
780
781 is_free = 1;
782 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
783 if (mddev->unit == dev) {
784 is_free = 0;
785 break;
786 }
787 }
788 new->unit = dev;
789 new->md_minor = MINOR(dev);
790 new->hold_active = UNTIL_STOP;
791 list_add(&new->all_mddevs, &all_mddevs);
792 spin_unlock(&all_mddevs_lock);
793 return new;
794 }
795 spin_unlock(&all_mddevs_lock);
796
797 new = kzalloc(sizeof(*new), GFP_KERNEL);
798 if (!new)
799 return NULL;
800
801 new->unit = unit;
802 if (MAJOR(unit) == MD_MAJOR)
803 new->md_minor = MINOR(unit);
804 else
805 new->md_minor = MINOR(unit) >> MdpMinorShift;
806
807 mddev_init(new);
808
809 goto retry;
810}
811
812static struct attribute_group md_redundancy_group;
813
814void mddev_unlock(struct mddev *mddev)
815{
816 if (mddev->to_remove) {
817
818
819
820
821
822
823
824
825
826
827
828
829 struct attribute_group *to_remove = mddev->to_remove;
830 mddev->to_remove = NULL;
831 mddev->sysfs_active = 1;
832 mutex_unlock(&mddev->reconfig_mutex);
833
834 if (mddev->kobj.sd) {
835 if (to_remove != &md_redundancy_group)
836 sysfs_remove_group(&mddev->kobj, to_remove);
837 if (mddev->pers == NULL ||
838 mddev->pers->sync_request == NULL) {
839 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
840 if (mddev->sysfs_action)
841 sysfs_put(mddev->sysfs_action);
842 mddev->sysfs_action = NULL;
843 }
844 }
845 mddev->sysfs_active = 0;
846 } else
847 mutex_unlock(&mddev->reconfig_mutex);
848
849
850
851
852 spin_lock(&pers_lock);
853 md_wakeup_thread(mddev->thread);
854 wake_up(&mddev->sb_wait);
855 spin_unlock(&pers_lock);
856}
857EXPORT_SYMBOL_GPL(mddev_unlock);
858
859struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
860{
861 struct md_rdev *rdev;
862
863 rdev_for_each_rcu(rdev, mddev)
864 if (rdev->desc_nr == nr)
865 return rdev;
866
867 return NULL;
868}
869EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
870
871static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
872{
873 struct md_rdev *rdev;
874
875 rdev_for_each(rdev, mddev)
876 if (rdev->bdev->bd_dev == dev)
877 return rdev;
878
879 return NULL;
880}
881
882struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
883{
884 struct md_rdev *rdev;
885
886 rdev_for_each_rcu(rdev, mddev)
887 if (rdev->bdev->bd_dev == dev)
888 return rdev;
889
890 return NULL;
891}
892EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
893
894static struct md_personality *find_pers(int level, char *clevel)
895{
896 struct md_personality *pers;
897 list_for_each_entry(pers, &pers_list, list) {
898 if (level != LEVEL_NONE && pers->level == level)
899 return pers;
900 if (strcmp(pers->name, clevel)==0)
901 return pers;
902 }
903 return NULL;
904}
905
906
907static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
908{
909 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
910 return MD_NEW_SIZE_SECTORS(num_sectors);
911}
912
913static int alloc_disk_sb(struct md_rdev *rdev)
914{
915 rdev->sb_page = alloc_page(GFP_KERNEL);
916 if (!rdev->sb_page)
917 return -ENOMEM;
918 return 0;
919}
920
921void md_rdev_clear(struct md_rdev *rdev)
922{
923 if (rdev->sb_page) {
924 put_page(rdev->sb_page);
925 rdev->sb_loaded = 0;
926 rdev->sb_page = NULL;
927 rdev->sb_start = 0;
928 rdev->sectors = 0;
929 }
930 if (rdev->bb_page) {
931 put_page(rdev->bb_page);
932 rdev->bb_page = NULL;
933 }
934 badblocks_exit(&rdev->badblocks);
935}
936EXPORT_SYMBOL_GPL(md_rdev_clear);
937
938static void super_written(struct bio *bio)
939{
940 struct md_rdev *rdev = bio->bi_private;
941 struct mddev *mddev = rdev->mddev;
942
943 if (bio->bi_status) {
944 pr_err("md: super_written gets error=%d\n", bio->bi_status);
945 md_error(mddev, rdev);
946 if (!test_bit(Faulty, &rdev->flags)
947 && (bio->bi_opf & MD_FAILFAST)) {
948 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
949 set_bit(LastDev, &rdev->flags);
950 }
951 } else
952 clear_bit(LastDev, &rdev->flags);
953
954 if (atomic_dec_and_test(&mddev->pending_writes))
955 wake_up(&mddev->sb_wait);
956 rdev_dec_pending(rdev, mddev);
957 bio_put(bio);
958}
959
960void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
961 sector_t sector, int size, struct page *page)
962{
963
964
965
966
967
968
969 struct bio *bio;
970 int ff = 0;
971
972 if (!page)
973 return;
974
975 if (test_bit(Faulty, &rdev->flags))
976 return;
977
978 bio = md_bio_alloc_sync(mddev);
979
980 atomic_inc(&rdev->nr_pending);
981
982 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
983 bio->bi_iter.bi_sector = sector;
984 bio_add_page(bio, page, size, 0);
985 bio->bi_private = rdev;
986 bio->bi_end_io = super_written;
987
988 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
989 test_bit(FailFast, &rdev->flags) &&
990 !test_bit(LastDev, &rdev->flags))
991 ff = MD_FAILFAST;
992 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
993
994 atomic_inc(&mddev->pending_writes);
995 submit_bio(bio);
996}
997
998int md_super_wait(struct mddev *mddev)
999{
1000
1001 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1002 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1003 return -EAGAIN;
1004 return 0;
1005}
1006
1007int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
1008 struct page *page, int op, int op_flags, bool metadata_op)
1009{
1010 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1011 int ret;
1012
1013 if (metadata_op && rdev->meta_bdev)
1014 bio_set_dev(bio, rdev->meta_bdev);
1015 else
1016 bio_set_dev(bio, rdev->bdev);
1017 bio_set_op_attrs(bio, op, op_flags);
1018 if (metadata_op)
1019 bio->bi_iter.bi_sector = sector + rdev->sb_start;
1020 else if (rdev->mddev->reshape_position != MaxSector &&
1021 (rdev->mddev->reshape_backwards ==
1022 (sector >= rdev->mddev->reshape_position)))
1023 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
1024 else
1025 bio->bi_iter.bi_sector = sector + rdev->data_offset;
1026 bio_add_page(bio, page, size, 0);
1027
1028 submit_bio_wait(bio);
1029
1030 ret = !bio->bi_status;
1031 bio_put(bio);
1032 return ret;
1033}
1034EXPORT_SYMBOL_GPL(sync_page_io);
1035
1036static int read_disk_sb(struct md_rdev *rdev, int size)
1037{
1038 char b[BDEVNAME_SIZE];
1039
1040 if (rdev->sb_loaded)
1041 return 0;
1042
1043 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1044 goto fail;
1045 rdev->sb_loaded = 1;
1046 return 0;
1047
1048fail:
1049 pr_err("md: disabled device %s, could not read superblock.\n",
1050 bdevname(rdev->bdev,b));
1051 return -EINVAL;
1052}
1053
1054static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1055{
1056 return sb1->set_uuid0 == sb2->set_uuid0 &&
1057 sb1->set_uuid1 == sb2->set_uuid1 &&
1058 sb1->set_uuid2 == sb2->set_uuid2 &&
1059 sb1->set_uuid3 == sb2->set_uuid3;
1060}
1061
1062static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1063{
1064 int ret;
1065 mdp_super_t *tmp1, *tmp2;
1066
1067 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1068 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1069
1070 if (!tmp1 || !tmp2) {
1071 ret = 0;
1072 goto abort;
1073 }
1074
1075 *tmp1 = *sb1;
1076 *tmp2 = *sb2;
1077
1078
1079
1080
1081 tmp1->nr_disks = 0;
1082 tmp2->nr_disks = 0;
1083
1084 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1085abort:
1086 kfree(tmp1);
1087 kfree(tmp2);
1088 return ret;
1089}
1090
1091static u32 md_csum_fold(u32 csum)
1092{
1093 csum = (csum & 0xffff) + (csum >> 16);
1094 return (csum & 0xffff) + (csum >> 16);
1095}
1096
1097static unsigned int calc_sb_csum(mdp_super_t *sb)
1098{
1099 u64 newcsum = 0;
1100 u32 *sb32 = (u32*)sb;
1101 int i;
1102 unsigned int disk_csum, csum;
1103
1104 disk_csum = sb->sb_csum;
1105 sb->sb_csum = 0;
1106
1107 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1108 newcsum += sb32[i];
1109 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1110
1111#ifdef CONFIG_ALPHA
1112
1113
1114
1115
1116
1117
1118
1119
1120 sb->sb_csum = md_csum_fold(disk_csum);
1121#else
1122 sb->sb_csum = disk_csum;
1123#endif
1124 return csum;
1125}
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157struct super_type {
1158 char *name;
1159 struct module *owner;
1160 int (*load_super)(struct md_rdev *rdev,
1161 struct md_rdev *refdev,
1162 int minor_version);
1163 int (*validate_super)(struct mddev *mddev,
1164 struct md_rdev *rdev);
1165 void (*sync_super)(struct mddev *mddev,
1166 struct md_rdev *rdev);
1167 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1168 sector_t num_sectors);
1169 int (*allow_new_offset)(struct md_rdev *rdev,
1170 unsigned long long new_offset);
1171};
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181int md_check_no_bitmap(struct mddev *mddev)
1182{
1183 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1184 return 0;
1185 pr_warn("%s: bitmaps are not supported for %s\n",
1186 mdname(mddev), mddev->pers->name);
1187 return 1;
1188}
1189EXPORT_SYMBOL(md_check_no_bitmap);
1190
1191
1192
1193
1194static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1195{
1196 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1197 mdp_super_t *sb;
1198 int ret;
1199 bool spare_disk = true;
1200
1201
1202
1203
1204
1205
1206
1207 rdev->sb_start = calc_dev_sboffset(rdev);
1208
1209 ret = read_disk_sb(rdev, MD_SB_BYTES);
1210 if (ret)
1211 return ret;
1212
1213 ret = -EINVAL;
1214
1215 bdevname(rdev->bdev, b);
1216 sb = page_address(rdev->sb_page);
1217
1218 if (sb->md_magic != MD_SB_MAGIC) {
1219 pr_warn("md: invalid raid superblock magic on %s\n", b);
1220 goto abort;
1221 }
1222
1223 if (sb->major_version != 0 ||
1224 sb->minor_version < 90 ||
1225 sb->minor_version > 91) {
1226 pr_warn("Bad version number %d.%d on %s\n",
1227 sb->major_version, sb->minor_version, b);
1228 goto abort;
1229 }
1230
1231 if (sb->raid_disks <= 0)
1232 goto abort;
1233
1234 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1235 pr_warn("md: invalid superblock checksum on %s\n", b);
1236 goto abort;
1237 }
1238
1239 rdev->preferred_minor = sb->md_minor;
1240 rdev->data_offset = 0;
1241 rdev->new_data_offset = 0;
1242 rdev->sb_size = MD_SB_BYTES;
1243 rdev->badblocks.shift = -1;
1244
1245 if (sb->level == LEVEL_MULTIPATH)
1246 rdev->desc_nr = -1;
1247 else
1248 rdev->desc_nr = sb->this_disk.number;
1249
1250
1251 if (sb->level == LEVEL_MULTIPATH ||
1252 (rdev->desc_nr >= 0 &&
1253 rdev->desc_nr < MD_SB_DISKS &&
1254 sb->disks[rdev->desc_nr].state &
1255 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1256 spare_disk = false;
1257
1258 if (!refdev) {
1259 if (!spare_disk)
1260 ret = 1;
1261 else
1262 ret = 0;
1263 } else {
1264 __u64 ev1, ev2;
1265 mdp_super_t *refsb = page_address(refdev->sb_page);
1266 if (!md_uuid_equal(refsb, sb)) {
1267 pr_warn("md: %s has different UUID to %s\n",
1268 b, bdevname(refdev->bdev,b2));
1269 goto abort;
1270 }
1271 if (!md_sb_equal(refsb, sb)) {
1272 pr_warn("md: %s has same UUID but different superblock to %s\n",
1273 b, bdevname(refdev->bdev, b2));
1274 goto abort;
1275 }
1276 ev1 = md_event(sb);
1277 ev2 = md_event(refsb);
1278
1279 if (!spare_disk && ev1 > ev2)
1280 ret = 1;
1281 else
1282 ret = 0;
1283 }
1284 rdev->sectors = rdev->sb_start;
1285
1286
1287
1288
1289 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1290 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1291
1292 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1293
1294 ret = -EINVAL;
1295
1296 abort:
1297 return ret;
1298}
1299
1300
1301
1302
1303static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1304{
1305 mdp_disk_t *desc;
1306 mdp_super_t *sb = page_address(rdev->sb_page);
1307 __u64 ev1 = md_event(sb);
1308
1309 rdev->raid_disk = -1;
1310 clear_bit(Faulty, &rdev->flags);
1311 clear_bit(In_sync, &rdev->flags);
1312 clear_bit(Bitmap_sync, &rdev->flags);
1313 clear_bit(WriteMostly, &rdev->flags);
1314
1315 if (mddev->raid_disks == 0) {
1316 mddev->major_version = 0;
1317 mddev->minor_version = sb->minor_version;
1318 mddev->patch_version = sb->patch_version;
1319 mddev->external = 0;
1320 mddev->chunk_sectors = sb->chunk_size >> 9;
1321 mddev->ctime = sb->ctime;
1322 mddev->utime = sb->utime;
1323 mddev->level = sb->level;
1324 mddev->clevel[0] = 0;
1325 mddev->layout = sb->layout;
1326 mddev->raid_disks = sb->raid_disks;
1327 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1328 mddev->events = ev1;
1329 mddev->bitmap_info.offset = 0;
1330 mddev->bitmap_info.space = 0;
1331
1332 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1333 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1334 mddev->reshape_backwards = 0;
1335
1336 if (mddev->minor_version >= 91) {
1337 mddev->reshape_position = sb->reshape_position;
1338 mddev->delta_disks = sb->delta_disks;
1339 mddev->new_level = sb->new_level;
1340 mddev->new_layout = sb->new_layout;
1341 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1342 if (mddev->delta_disks < 0)
1343 mddev->reshape_backwards = 1;
1344 } else {
1345 mddev->reshape_position = MaxSector;
1346 mddev->delta_disks = 0;
1347 mddev->new_level = mddev->level;
1348 mddev->new_layout = mddev->layout;
1349 mddev->new_chunk_sectors = mddev->chunk_sectors;
1350 }
1351 if (mddev->level == 0)
1352 mddev->layout = -1;
1353
1354 if (sb->state & (1<<MD_SB_CLEAN))
1355 mddev->recovery_cp = MaxSector;
1356 else {
1357 if (sb->events_hi == sb->cp_events_hi &&
1358 sb->events_lo == sb->cp_events_lo) {
1359 mddev->recovery_cp = sb->recovery_cp;
1360 } else
1361 mddev->recovery_cp = 0;
1362 }
1363
1364 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1365 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1366 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1367 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1368
1369 mddev->max_disks = MD_SB_DISKS;
1370
1371 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1372 mddev->bitmap_info.file == NULL) {
1373 mddev->bitmap_info.offset =
1374 mddev->bitmap_info.default_offset;
1375 mddev->bitmap_info.space =
1376 mddev->bitmap_info.default_space;
1377 }
1378
1379 } else if (mddev->pers == NULL) {
1380
1381
1382 ++ev1;
1383 if (sb->disks[rdev->desc_nr].state & (
1384 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1385 if (ev1 < mddev->events)
1386 return -EINVAL;
1387 } else if (mddev->bitmap) {
1388
1389
1390
1391 if (ev1 < mddev->bitmap->events_cleared)
1392 return 0;
1393 if (ev1 < mddev->events)
1394 set_bit(Bitmap_sync, &rdev->flags);
1395 } else {
1396 if (ev1 < mddev->events)
1397
1398 return 0;
1399 }
1400
1401 if (mddev->level != LEVEL_MULTIPATH) {
1402 desc = sb->disks + rdev->desc_nr;
1403
1404 if (desc->state & (1<<MD_DISK_FAULTY))
1405 set_bit(Faulty, &rdev->flags);
1406 else if (desc->state & (1<<MD_DISK_SYNC)
1407) {
1408 set_bit(In_sync, &rdev->flags);
1409 rdev->raid_disk = desc->raid_disk;
1410 rdev->saved_raid_disk = desc->raid_disk;
1411 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1412
1413
1414
1415 if (mddev->minor_version >= 91) {
1416 rdev->recovery_offset = 0;
1417 rdev->raid_disk = desc->raid_disk;
1418 }
1419 }
1420 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1421 set_bit(WriteMostly, &rdev->flags);
1422 if (desc->state & (1<<MD_DISK_FAILFAST))
1423 set_bit(FailFast, &rdev->flags);
1424 } else
1425 set_bit(In_sync, &rdev->flags);
1426 return 0;
1427}
1428
1429
1430
1431
1432static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1433{
1434 mdp_super_t *sb;
1435 struct md_rdev *rdev2;
1436 int next_spare = mddev->raid_disks;
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448 int i;
1449 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1450
1451 rdev->sb_size = MD_SB_BYTES;
1452
1453 sb = page_address(rdev->sb_page);
1454
1455 memset(sb, 0, sizeof(*sb));
1456
1457 sb->md_magic = MD_SB_MAGIC;
1458 sb->major_version = mddev->major_version;
1459 sb->patch_version = mddev->patch_version;
1460 sb->gvalid_words = 0;
1461 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1462 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1463 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1464 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1465
1466 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1467 sb->level = mddev->level;
1468 sb->size = mddev->dev_sectors / 2;
1469 sb->raid_disks = mddev->raid_disks;
1470 sb->md_minor = mddev->md_minor;
1471 sb->not_persistent = 0;
1472 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1473 sb->state = 0;
1474 sb->events_hi = (mddev->events>>32);
1475 sb->events_lo = (u32)mddev->events;
1476
1477 if (mddev->reshape_position == MaxSector)
1478 sb->minor_version = 90;
1479 else {
1480 sb->minor_version = 91;
1481 sb->reshape_position = mddev->reshape_position;
1482 sb->new_level = mddev->new_level;
1483 sb->delta_disks = mddev->delta_disks;
1484 sb->new_layout = mddev->new_layout;
1485 sb->new_chunk = mddev->new_chunk_sectors << 9;
1486 }
1487 mddev->minor_version = sb->minor_version;
1488 if (mddev->in_sync)
1489 {
1490 sb->recovery_cp = mddev->recovery_cp;
1491 sb->cp_events_hi = (mddev->events>>32);
1492 sb->cp_events_lo = (u32)mddev->events;
1493 if (mddev->recovery_cp == MaxSector)
1494 sb->state = (1<< MD_SB_CLEAN);
1495 } else
1496 sb->recovery_cp = 0;
1497
1498 sb->layout = mddev->layout;
1499 sb->chunk_size = mddev->chunk_sectors << 9;
1500
1501 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1502 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1503
1504 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1505 rdev_for_each(rdev2, mddev) {
1506 mdp_disk_t *d;
1507 int desc_nr;
1508 int is_active = test_bit(In_sync, &rdev2->flags);
1509
1510 if (rdev2->raid_disk >= 0 &&
1511 sb->minor_version >= 91)
1512
1513
1514
1515
1516 is_active = 1;
1517 if (rdev2->raid_disk < 0 ||
1518 test_bit(Faulty, &rdev2->flags))
1519 is_active = 0;
1520 if (is_active)
1521 desc_nr = rdev2->raid_disk;
1522 else
1523 desc_nr = next_spare++;
1524 rdev2->desc_nr = desc_nr;
1525 d = &sb->disks[rdev2->desc_nr];
1526 nr_disks++;
1527 d->number = rdev2->desc_nr;
1528 d->major = MAJOR(rdev2->bdev->bd_dev);
1529 d->minor = MINOR(rdev2->bdev->bd_dev);
1530 if (is_active)
1531 d->raid_disk = rdev2->raid_disk;
1532 else
1533 d->raid_disk = rdev2->desc_nr;
1534 if (test_bit(Faulty, &rdev2->flags))
1535 d->state = (1<<MD_DISK_FAULTY);
1536 else if (is_active) {
1537 d->state = (1<<MD_DISK_ACTIVE);
1538 if (test_bit(In_sync, &rdev2->flags))
1539 d->state |= (1<<MD_DISK_SYNC);
1540 active++;
1541 working++;
1542 } else {
1543 d->state = 0;
1544 spare++;
1545 working++;
1546 }
1547 if (test_bit(WriteMostly, &rdev2->flags))
1548 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1549 if (test_bit(FailFast, &rdev2->flags))
1550 d->state |= (1<<MD_DISK_FAILFAST);
1551 }
1552
1553 for (i=0 ; i < mddev->raid_disks ; i++) {
1554 mdp_disk_t *d = &sb->disks[i];
1555 if (d->state == 0 && d->number == 0) {
1556 d->number = i;
1557 d->raid_disk = i;
1558 d->state = (1<<MD_DISK_REMOVED);
1559 d->state |= (1<<MD_DISK_FAULTY);
1560 failed++;
1561 }
1562 }
1563 sb->nr_disks = nr_disks;
1564 sb->active_disks = active;
1565 sb->working_disks = working;
1566 sb->failed_disks = failed;
1567 sb->spare_disks = spare;
1568
1569 sb->this_disk = sb->disks[rdev->desc_nr];
1570 sb->sb_csum = calc_sb_csum(sb);
1571}
1572
1573
1574
1575
1576static unsigned long long
1577super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1578{
1579 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1580 return 0;
1581 if (rdev->mddev->bitmap_info.offset)
1582 return 0;
1583 rdev->sb_start = calc_dev_sboffset(rdev);
1584 if (!num_sectors || num_sectors > rdev->sb_start)
1585 num_sectors = rdev->sb_start;
1586
1587
1588
1589 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1590 num_sectors = (sector_t)(2ULL << 32) - 2;
1591 do {
1592 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1593 rdev->sb_page);
1594 } while (md_super_wait(rdev->mddev) < 0);
1595 return num_sectors;
1596}
1597
1598static int
1599super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1600{
1601
1602 return new_offset == 0;
1603}
1604
1605
1606
1607
1608
1609static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1610{
1611 __le32 disk_csum;
1612 u32 csum;
1613 unsigned long long newcsum;
1614 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1615 __le32 *isuper = (__le32*)sb;
1616
1617 disk_csum = sb->sb_csum;
1618 sb->sb_csum = 0;
1619 newcsum = 0;
1620 for (; size >= 4; size -= 4)
1621 newcsum += le32_to_cpu(*isuper++);
1622
1623 if (size == 2)
1624 newcsum += le16_to_cpu(*(__le16*) isuper);
1625
1626 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1627 sb->sb_csum = disk_csum;
1628 return cpu_to_le32(csum);
1629}
1630
1631static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1632{
1633 struct mdp_superblock_1 *sb;
1634 int ret;
1635 sector_t sb_start;
1636 sector_t sectors;
1637 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1638 int bmask;
1639 bool spare_disk = true;
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649 switch(minor_version) {
1650 case 0:
1651 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1652 sb_start -= 8*2;
1653 sb_start &= ~(sector_t)(4*2-1);
1654 break;
1655 case 1:
1656 sb_start = 0;
1657 break;
1658 case 2:
1659 sb_start = 8;
1660 break;
1661 default:
1662 return -EINVAL;
1663 }
1664 rdev->sb_start = sb_start;
1665
1666
1667
1668
1669 ret = read_disk_sb(rdev, 4096);
1670 if (ret) return ret;
1671
1672 sb = page_address(rdev->sb_page);
1673
1674 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1675 sb->major_version != cpu_to_le32(1) ||
1676 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1677 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1678 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1679 return -EINVAL;
1680
1681 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1682 pr_warn("md: invalid superblock checksum on %s\n",
1683 bdevname(rdev->bdev,b));
1684 return -EINVAL;
1685 }
1686 if (le64_to_cpu(sb->data_size) < 10) {
1687 pr_warn("md: data_size too small on %s\n",
1688 bdevname(rdev->bdev,b));
1689 return -EINVAL;
1690 }
1691 if (sb->pad0 ||
1692 sb->pad3[0] ||
1693 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1694
1695 return -EINVAL;
1696
1697 rdev->preferred_minor = 0xffff;
1698 rdev->data_offset = le64_to_cpu(sb->data_offset);
1699 rdev->new_data_offset = rdev->data_offset;
1700 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1701 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1702 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1703 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1704
1705 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1706 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1707 if (rdev->sb_size & bmask)
1708 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1709
1710 if (minor_version
1711 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1712 return -EINVAL;
1713 if (minor_version
1714 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1715 return -EINVAL;
1716
1717 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1718 rdev->desc_nr = -1;
1719 else
1720 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1721
1722 if (!rdev->bb_page) {
1723 rdev->bb_page = alloc_page(GFP_KERNEL);
1724 if (!rdev->bb_page)
1725 return -ENOMEM;
1726 }
1727 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1728 rdev->badblocks.count == 0) {
1729
1730
1731
1732 s32 offset;
1733 sector_t bb_sector;
1734 __le64 *bbp;
1735 int i;
1736 int sectors = le16_to_cpu(sb->bblog_size);
1737 if (sectors > (PAGE_SIZE / 512))
1738 return -EINVAL;
1739 offset = le32_to_cpu(sb->bblog_offset);
1740 if (offset == 0)
1741 return -EINVAL;
1742 bb_sector = (long long)offset;
1743 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1744 rdev->bb_page, REQ_OP_READ, 0, true))
1745 return -EIO;
1746 bbp = (__le64 *)page_address(rdev->bb_page);
1747 rdev->badblocks.shift = sb->bblog_shift;
1748 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1749 u64 bb = le64_to_cpu(*bbp);
1750 int count = bb & (0x3ff);
1751 u64 sector = bb >> 10;
1752 sector <<= sb->bblog_shift;
1753 count <<= sb->bblog_shift;
1754 if (bb + 1 == 0)
1755 break;
1756 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1757 return -EINVAL;
1758 }
1759 } else if (sb->bblog_offset != 0)
1760 rdev->badblocks.shift = 0;
1761
1762 if ((le32_to_cpu(sb->feature_map) &
1763 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1764 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1765 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1766 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1767 }
1768
1769 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1770 sb->level != 0)
1771 return -EINVAL;
1772
1773
1774 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1775 (rdev->desc_nr >= 0 &&
1776 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1777 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1778 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1779 spare_disk = false;
1780
1781 if (!refdev) {
1782 if (!spare_disk)
1783 ret = 1;
1784 else
1785 ret = 0;
1786 } else {
1787 __u64 ev1, ev2;
1788 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1789
1790 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1791 sb->level != refsb->level ||
1792 sb->layout != refsb->layout ||
1793 sb->chunksize != refsb->chunksize) {
1794 pr_warn("md: %s has strangely different superblock to %s\n",
1795 bdevname(rdev->bdev,b),
1796 bdevname(refdev->bdev,b2));
1797 return -EINVAL;
1798 }
1799 ev1 = le64_to_cpu(sb->events);
1800 ev2 = le64_to_cpu(refsb->events);
1801
1802 if (!spare_disk && ev1 > ev2)
1803 ret = 1;
1804 else
1805 ret = 0;
1806 }
1807 if (minor_version) {
1808 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1809 sectors -= rdev->data_offset;
1810 } else
1811 sectors = rdev->sb_start;
1812 if (sectors < le64_to_cpu(sb->data_size))
1813 return -EINVAL;
1814 rdev->sectors = le64_to_cpu(sb->data_size);
1815 return ret;
1816}
1817
1818static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1819{
1820 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1821 __u64 ev1 = le64_to_cpu(sb->events);
1822
1823 rdev->raid_disk = -1;
1824 clear_bit(Faulty, &rdev->flags);
1825 clear_bit(In_sync, &rdev->flags);
1826 clear_bit(Bitmap_sync, &rdev->flags);
1827 clear_bit(WriteMostly, &rdev->flags);
1828
1829 if (mddev->raid_disks == 0) {
1830 mddev->major_version = 1;
1831 mddev->patch_version = 0;
1832 mddev->external = 0;
1833 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1834 mddev->ctime = le64_to_cpu(sb->ctime);
1835 mddev->utime = le64_to_cpu(sb->utime);
1836 mddev->level = le32_to_cpu(sb->level);
1837 mddev->clevel[0] = 0;
1838 mddev->layout = le32_to_cpu(sb->layout);
1839 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1840 mddev->dev_sectors = le64_to_cpu(sb->size);
1841 mddev->events = ev1;
1842 mddev->bitmap_info.offset = 0;
1843 mddev->bitmap_info.space = 0;
1844
1845
1846
1847 mddev->bitmap_info.default_offset = 1024 >> 9;
1848 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1849 mddev->reshape_backwards = 0;
1850
1851 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1852 memcpy(mddev->uuid, sb->set_uuid, 16);
1853
1854 mddev->max_disks = (4096-256)/2;
1855
1856 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1857 mddev->bitmap_info.file == NULL) {
1858 mddev->bitmap_info.offset =
1859 (__s32)le32_to_cpu(sb->bitmap_offset);
1860
1861
1862
1863
1864
1865 if (mddev->minor_version > 0)
1866 mddev->bitmap_info.space = 0;
1867 else if (mddev->bitmap_info.offset > 0)
1868 mddev->bitmap_info.space =
1869 8 - mddev->bitmap_info.offset;
1870 else
1871 mddev->bitmap_info.space =
1872 -mddev->bitmap_info.offset;
1873 }
1874
1875 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1876 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1877 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1878 mddev->new_level = le32_to_cpu(sb->new_level);
1879 mddev->new_layout = le32_to_cpu(sb->new_layout);
1880 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1881 if (mddev->delta_disks < 0 ||
1882 (mddev->delta_disks == 0 &&
1883 (le32_to_cpu(sb->feature_map)
1884 & MD_FEATURE_RESHAPE_BACKWARDS)))
1885 mddev->reshape_backwards = 1;
1886 } else {
1887 mddev->reshape_position = MaxSector;
1888 mddev->delta_disks = 0;
1889 mddev->new_level = mddev->level;
1890 mddev->new_layout = mddev->layout;
1891 mddev->new_chunk_sectors = mddev->chunk_sectors;
1892 }
1893
1894 if (mddev->level == 0 &&
1895 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1896 mddev->layout = -1;
1897
1898 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1899 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1900
1901 if (le32_to_cpu(sb->feature_map) &
1902 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1903 if (le32_to_cpu(sb->feature_map) &
1904 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1905 return -EINVAL;
1906 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1907 (le32_to_cpu(sb->feature_map) &
1908 MD_FEATURE_MULTIPLE_PPLS))
1909 return -EINVAL;
1910 set_bit(MD_HAS_PPL, &mddev->flags);
1911 }
1912 } else if (mddev->pers == NULL) {
1913
1914
1915 ++ev1;
1916 if (rdev->desc_nr >= 0 &&
1917 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1918 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1919 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1920 if (ev1 < mddev->events)
1921 return -EINVAL;
1922 } else if (mddev->bitmap) {
1923
1924
1925
1926 if (ev1 < mddev->bitmap->events_cleared)
1927 return 0;
1928 if (ev1 < mddev->events)
1929 set_bit(Bitmap_sync, &rdev->flags);
1930 } else {
1931 if (ev1 < mddev->events)
1932
1933 return 0;
1934 }
1935 if (mddev->level != LEVEL_MULTIPATH) {
1936 int role;
1937 if (rdev->desc_nr < 0 ||
1938 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1939 role = MD_DISK_ROLE_SPARE;
1940 rdev->desc_nr = -1;
1941 } else
1942 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1943 switch(role) {
1944 case MD_DISK_ROLE_SPARE:
1945 break;
1946 case MD_DISK_ROLE_FAULTY:
1947 set_bit(Faulty, &rdev->flags);
1948 break;
1949 case MD_DISK_ROLE_JOURNAL:
1950 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1951
1952 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1953 return -EINVAL;
1954 }
1955 set_bit(Journal, &rdev->flags);
1956 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1957 rdev->raid_disk = 0;
1958 break;
1959 default:
1960 rdev->saved_raid_disk = role;
1961 if ((le32_to_cpu(sb->feature_map) &
1962 MD_FEATURE_RECOVERY_OFFSET)) {
1963 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1964 if (!(le32_to_cpu(sb->feature_map) &
1965 MD_FEATURE_RECOVERY_BITMAP))
1966 rdev->saved_raid_disk = -1;
1967 } else {
1968
1969
1970
1971
1972 if (!test_bit(MD_RECOVERY_FROZEN,
1973 &mddev->recovery))
1974 set_bit(In_sync, &rdev->flags);
1975 }
1976 rdev->raid_disk = role;
1977 break;
1978 }
1979 if (sb->devflags & WriteMostly1)
1980 set_bit(WriteMostly, &rdev->flags);
1981 if (sb->devflags & FailFast1)
1982 set_bit(FailFast, &rdev->flags);
1983 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1984 set_bit(Replacement, &rdev->flags);
1985 } else
1986 set_bit(In_sync, &rdev->flags);
1987
1988 return 0;
1989}
1990
1991static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1992{
1993 struct mdp_superblock_1 *sb;
1994 struct md_rdev *rdev2;
1995 int max_dev, i;
1996
1997
1998 sb = page_address(rdev->sb_page);
1999
2000 sb->feature_map = 0;
2001 sb->pad0 = 0;
2002 sb->recovery_offset = cpu_to_le64(0);
2003 memset(sb->pad3, 0, sizeof(sb->pad3));
2004
2005 sb->utime = cpu_to_le64((__u64)mddev->utime);
2006 sb->events = cpu_to_le64(mddev->events);
2007 if (mddev->in_sync)
2008 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2009 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2010 sb->resync_offset = cpu_to_le64(MaxSector);
2011 else
2012 sb->resync_offset = cpu_to_le64(0);
2013
2014 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2015
2016 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2017 sb->size = cpu_to_le64(mddev->dev_sectors);
2018 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2019 sb->level = cpu_to_le32(mddev->level);
2020 sb->layout = cpu_to_le32(mddev->layout);
2021 if (test_bit(FailFast, &rdev->flags))
2022 sb->devflags |= FailFast1;
2023 else
2024 sb->devflags &= ~FailFast1;
2025
2026 if (test_bit(WriteMostly, &rdev->flags))
2027 sb->devflags |= WriteMostly1;
2028 else
2029 sb->devflags &= ~WriteMostly1;
2030 sb->data_offset = cpu_to_le64(rdev->data_offset);
2031 sb->data_size = cpu_to_le64(rdev->sectors);
2032
2033 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2034 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2035 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2036 }
2037
2038 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2039 !test_bit(In_sync, &rdev->flags)) {
2040 sb->feature_map |=
2041 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2042 sb->recovery_offset =
2043 cpu_to_le64(rdev->recovery_offset);
2044 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2045 sb->feature_map |=
2046 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2047 }
2048
2049 if (test_bit(Journal, &rdev->flags))
2050 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2051 if (test_bit(Replacement, &rdev->flags))
2052 sb->feature_map |=
2053 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2054
2055 if (mddev->reshape_position != MaxSector) {
2056 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2057 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2058 sb->new_layout = cpu_to_le32(mddev->new_layout);
2059 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2060 sb->new_level = cpu_to_le32(mddev->new_level);
2061 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2062 if (mddev->delta_disks == 0 &&
2063 mddev->reshape_backwards)
2064 sb->feature_map
2065 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2066 if (rdev->new_data_offset != rdev->data_offset) {
2067 sb->feature_map
2068 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2069 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2070 - rdev->data_offset));
2071 }
2072 }
2073
2074 if (mddev_is_clustered(mddev))
2075 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2076
2077 if (rdev->badblocks.count == 0)
2078 ;
2079 else if (sb->bblog_offset == 0)
2080
2081 md_error(mddev, rdev);
2082 else {
2083 struct badblocks *bb = &rdev->badblocks;
2084 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2085 u64 *p = bb->page;
2086 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2087 if (bb->changed) {
2088 unsigned seq;
2089
2090retry:
2091 seq = read_seqbegin(&bb->lock);
2092
2093 memset(bbp, 0xff, PAGE_SIZE);
2094
2095 for (i = 0 ; i < bb->count ; i++) {
2096 u64 internal_bb = p[i];
2097 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2098 | BB_LEN(internal_bb));
2099 bbp[i] = cpu_to_le64(store_bb);
2100 }
2101 bb->changed = 0;
2102 if (read_seqretry(&bb->lock, seq))
2103 goto retry;
2104
2105 bb->sector = (rdev->sb_start +
2106 (int)le32_to_cpu(sb->bblog_offset));
2107 bb->size = le16_to_cpu(sb->bblog_size);
2108 }
2109 }
2110
2111 max_dev = 0;
2112 rdev_for_each(rdev2, mddev)
2113 if (rdev2->desc_nr+1 > max_dev)
2114 max_dev = rdev2->desc_nr+1;
2115
2116 if (max_dev > le32_to_cpu(sb->max_dev)) {
2117 int bmask;
2118 sb->max_dev = cpu_to_le32(max_dev);
2119 rdev->sb_size = max_dev * 2 + 256;
2120 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2121 if (rdev->sb_size & bmask)
2122 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2123 } else
2124 max_dev = le32_to_cpu(sb->max_dev);
2125
2126 for (i=0; i<max_dev;i++)
2127 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2128
2129 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2130 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2131
2132 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2133 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2134 sb->feature_map |=
2135 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2136 else
2137 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2138 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2139 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2140 }
2141
2142 rdev_for_each(rdev2, mddev) {
2143 i = rdev2->desc_nr;
2144 if (test_bit(Faulty, &rdev2->flags))
2145 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2146 else if (test_bit(In_sync, &rdev2->flags))
2147 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2148 else if (test_bit(Journal, &rdev2->flags))
2149 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2150 else if (rdev2->raid_disk >= 0)
2151 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2152 else
2153 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2154 }
2155
2156 sb->sb_csum = calc_sb_1_csum(sb);
2157}
2158
2159static unsigned long long
2160super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2161{
2162 struct mdp_superblock_1 *sb;
2163 sector_t max_sectors;
2164 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2165 return 0;
2166 if (rdev->data_offset != rdev->new_data_offset)
2167 return 0;
2168 if (rdev->sb_start < rdev->data_offset) {
2169
2170 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2171 max_sectors -= rdev->data_offset;
2172 if (!num_sectors || num_sectors > max_sectors)
2173 num_sectors = max_sectors;
2174 } else if (rdev->mddev->bitmap_info.offset) {
2175
2176 return 0;
2177 } else {
2178
2179 sector_t sb_start;
2180 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
2181 sb_start &= ~(sector_t)(4*2 - 1);
2182 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
2183 if (!num_sectors || num_sectors > max_sectors)
2184 num_sectors = max_sectors;
2185 rdev->sb_start = sb_start;
2186 }
2187 sb = page_address(rdev->sb_page);
2188 sb->data_size = cpu_to_le64(num_sectors);
2189 sb->super_offset = cpu_to_le64(rdev->sb_start);
2190 sb->sb_csum = calc_sb_1_csum(sb);
2191 do {
2192 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2193 rdev->sb_page);
2194 } while (md_super_wait(rdev->mddev) < 0);
2195 return num_sectors;
2196
2197}
2198
2199static int
2200super_1_allow_new_offset(struct md_rdev *rdev,
2201 unsigned long long new_offset)
2202{
2203
2204 struct bitmap *bitmap;
2205 if (new_offset >= rdev->data_offset)
2206 return 1;
2207
2208
2209
2210 if (rdev->mddev->minor_version == 0)
2211 return 1;
2212
2213
2214
2215
2216
2217
2218
2219 if (rdev->sb_start + (32+4)*2 > new_offset)
2220 return 0;
2221 bitmap = rdev->mddev->bitmap;
2222 if (bitmap && !rdev->mddev->bitmap_info.file &&
2223 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2224 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2225 return 0;
2226 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2227 return 0;
2228
2229 return 1;
2230}
2231
2232static struct super_type super_types[] = {
2233 [0] = {
2234 .name = "0.90.0",
2235 .owner = THIS_MODULE,
2236 .load_super = super_90_load,
2237 .validate_super = super_90_validate,
2238 .sync_super = super_90_sync,
2239 .rdev_size_change = super_90_rdev_size_change,
2240 .allow_new_offset = super_90_allow_new_offset,
2241 },
2242 [1] = {
2243 .name = "md-1",
2244 .owner = THIS_MODULE,
2245 .load_super = super_1_load,
2246 .validate_super = super_1_validate,
2247 .sync_super = super_1_sync,
2248 .rdev_size_change = super_1_rdev_size_change,
2249 .allow_new_offset = super_1_allow_new_offset,
2250 },
2251};
2252
2253static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2254{
2255 if (mddev->sync_super) {
2256 mddev->sync_super(mddev, rdev);
2257 return;
2258 }
2259
2260 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2261
2262 super_types[mddev->major_version].sync_super(mddev, rdev);
2263}
2264
2265static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2266{
2267 struct md_rdev *rdev, *rdev2;
2268
2269 rcu_read_lock();
2270 rdev_for_each_rcu(rdev, mddev1) {
2271 if (test_bit(Faulty, &rdev->flags) ||
2272 test_bit(Journal, &rdev->flags) ||
2273 rdev->raid_disk == -1)
2274 continue;
2275 rdev_for_each_rcu(rdev2, mddev2) {
2276 if (test_bit(Faulty, &rdev2->flags) ||
2277 test_bit(Journal, &rdev2->flags) ||
2278 rdev2->raid_disk == -1)
2279 continue;
2280 if (rdev->bdev->bd_contains ==
2281 rdev2->bdev->bd_contains) {
2282 rcu_read_unlock();
2283 return 1;
2284 }
2285 }
2286 }
2287 rcu_read_unlock();
2288 return 0;
2289}
2290
2291static LIST_HEAD(pending_raid_disks);
2292
2293
2294
2295
2296
2297
2298
2299
2300int md_integrity_register(struct mddev *mddev)
2301{
2302 struct md_rdev *rdev, *reference = NULL;
2303
2304 if (list_empty(&mddev->disks))
2305 return 0;
2306 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2307 return 0;
2308 rdev_for_each(rdev, mddev) {
2309
2310 if (test_bit(Faulty, &rdev->flags))
2311 continue;
2312 if (rdev->raid_disk < 0)
2313 continue;
2314 if (!reference) {
2315
2316 reference = rdev;
2317 continue;
2318 }
2319
2320 if (blk_integrity_compare(reference->bdev->bd_disk,
2321 rdev->bdev->bd_disk) < 0)
2322 return -EINVAL;
2323 }
2324 if (!reference || !bdev_get_integrity(reference->bdev))
2325 return 0;
2326
2327
2328
2329
2330 blk_integrity_register(mddev->gendisk,
2331 bdev_get_integrity(reference->bdev));
2332
2333 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2334 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2335 pr_err("md: failed to create integrity pool for %s\n",
2336 mdname(mddev));
2337 return -EINVAL;
2338 }
2339 return 0;
2340}
2341EXPORT_SYMBOL(md_integrity_register);
2342
2343
2344
2345
2346
2347int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2348{
2349 struct blk_integrity *bi_mddev;
2350 char name[BDEVNAME_SIZE];
2351
2352 if (!mddev->gendisk)
2353 return 0;
2354
2355 bi_mddev = blk_get_integrity(mddev->gendisk);
2356
2357 if (!bi_mddev)
2358 return 0;
2359
2360 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2361 pr_err("%s: incompatible integrity profile for %s\n",
2362 mdname(mddev), bdevname(rdev->bdev, name));
2363 return -ENXIO;
2364 }
2365
2366 return 0;
2367}
2368EXPORT_SYMBOL(md_integrity_add_rdev);
2369
2370static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2371{
2372 char b[BDEVNAME_SIZE];
2373 struct kobject *ko;
2374 int err;
2375
2376
2377 if (find_rdev(mddev, rdev->bdev->bd_dev))
2378 return -EEXIST;
2379
2380 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2381 mddev->pers)
2382 return -EROFS;
2383
2384
2385 if (!test_bit(Journal, &rdev->flags) &&
2386 rdev->sectors &&
2387 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2388 if (mddev->pers) {
2389
2390
2391
2392
2393 if (mddev->level > 0)
2394 return -ENOSPC;
2395 } else
2396 mddev->dev_sectors = rdev->sectors;
2397 }
2398
2399
2400
2401
2402
2403 rcu_read_lock();
2404 if (rdev->desc_nr < 0) {
2405 int choice = 0;
2406 if (mddev->pers)
2407 choice = mddev->raid_disks;
2408 while (md_find_rdev_nr_rcu(mddev, choice))
2409 choice++;
2410 rdev->desc_nr = choice;
2411 } else {
2412 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2413 rcu_read_unlock();
2414 return -EBUSY;
2415 }
2416 }
2417 rcu_read_unlock();
2418 if (!test_bit(Journal, &rdev->flags) &&
2419 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2420 pr_warn("md: %s: array is limited to %d devices\n",
2421 mdname(mddev), mddev->max_disks);
2422 return -EBUSY;
2423 }
2424 bdevname(rdev->bdev,b);
2425 strreplace(b, '/', '!');
2426
2427 rdev->mddev = mddev;
2428 pr_debug("md: bind<%s>\n", b);
2429
2430 if (mddev->raid_disks)
2431 mddev_create_serial_pool(mddev, rdev, false);
2432
2433 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2434 goto fail;
2435
2436 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2437 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2438 ;
2439 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2440
2441 list_add_rcu(&rdev->same_set, &mddev->disks);
2442 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2443
2444
2445 mddev->recovery_disabled++;
2446
2447 return 0;
2448
2449 fail:
2450 pr_warn("md: failed to register dev-%s for %s\n",
2451 b, mdname(mddev));
2452 return err;
2453}
2454
2455static void md_delayed_delete(struct work_struct *ws)
2456{
2457 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2458 kobject_del(&rdev->kobj);
2459 kobject_put(&rdev->kobj);
2460}
2461
2462static void unbind_rdev_from_array(struct md_rdev *rdev)
2463{
2464 char b[BDEVNAME_SIZE];
2465
2466 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2467 list_del_rcu(&rdev->same_set);
2468 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2469 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2470 rdev->mddev = NULL;
2471 sysfs_remove_link(&rdev->kobj, "block");
2472 sysfs_put(rdev->sysfs_state);
2473 rdev->sysfs_state = NULL;
2474 rdev->badblocks.count = 0;
2475
2476
2477
2478
2479 synchronize_rcu();
2480 INIT_WORK(&rdev->del_work, md_delayed_delete);
2481 kobject_get(&rdev->kobj);
2482 queue_work(md_misc_wq, &rdev->del_work);
2483}
2484
2485
2486
2487
2488
2489
2490static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2491{
2492 int err = 0;
2493 struct block_device *bdev;
2494 char b[BDEVNAME_SIZE];
2495
2496 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2497 shared ? (struct md_rdev *)lock_rdev : rdev);
2498 if (IS_ERR(bdev)) {
2499 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2500 return PTR_ERR(bdev);
2501 }
2502 rdev->bdev = bdev;
2503 return err;
2504}
2505
2506static void unlock_rdev(struct md_rdev *rdev)
2507{
2508 struct block_device *bdev = rdev->bdev;
2509 rdev->bdev = NULL;
2510 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2511}
2512
2513void md_autodetect_dev(dev_t dev);
2514
2515static void export_rdev(struct md_rdev *rdev)
2516{
2517 char b[BDEVNAME_SIZE];
2518
2519 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2520 md_rdev_clear(rdev);
2521#ifndef MODULE
2522 if (test_bit(AutoDetected, &rdev->flags))
2523 md_autodetect_dev(rdev->bdev->bd_dev);
2524#endif
2525 unlock_rdev(rdev);
2526 kobject_put(&rdev->kobj);
2527}
2528
2529void md_kick_rdev_from_array(struct md_rdev *rdev)
2530{
2531 unbind_rdev_from_array(rdev);
2532 export_rdev(rdev);
2533}
2534EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2535
2536static void export_array(struct mddev *mddev)
2537{
2538 struct md_rdev *rdev;
2539
2540 while (!list_empty(&mddev->disks)) {
2541 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2542 same_set);
2543 md_kick_rdev_from_array(rdev);
2544 }
2545 mddev->raid_disks = 0;
2546 mddev->major_version = 0;
2547}
2548
2549static bool set_in_sync(struct mddev *mddev)
2550{
2551 lockdep_assert_held(&mddev->lock);
2552 if (!mddev->in_sync) {
2553 mddev->sync_checkers++;
2554 spin_unlock(&mddev->lock);
2555 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2556 spin_lock(&mddev->lock);
2557 if (!mddev->in_sync &&
2558 percpu_ref_is_zero(&mddev->writes_pending)) {
2559 mddev->in_sync = 1;
2560
2561
2562
2563
2564 smp_mb();
2565 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2566 sysfs_notify_dirent_safe(mddev->sysfs_state);
2567 }
2568 if (--mddev->sync_checkers == 0)
2569 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2570 }
2571 if (mddev->safemode == 1)
2572 mddev->safemode = 0;
2573 return mddev->in_sync;
2574}
2575
2576static void sync_sbs(struct mddev *mddev, int nospares)
2577{
2578
2579
2580
2581
2582
2583
2584 struct md_rdev *rdev;
2585 rdev_for_each(rdev, mddev) {
2586 if (rdev->sb_events == mddev->events ||
2587 (nospares &&
2588 rdev->raid_disk < 0 &&
2589 rdev->sb_events+1 == mddev->events)) {
2590
2591 rdev->sb_loaded = 2;
2592 } else {
2593 sync_super(mddev, rdev);
2594 rdev->sb_loaded = 1;
2595 }
2596 }
2597}
2598
2599static bool does_sb_need_changing(struct mddev *mddev)
2600{
2601 struct md_rdev *rdev;
2602 struct mdp_superblock_1 *sb;
2603 int role;
2604
2605
2606 rdev_for_each(rdev, mddev)
2607 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2608 break;
2609
2610
2611 if (!rdev)
2612 return false;
2613
2614 sb = page_address(rdev->sb_page);
2615
2616 rdev_for_each(rdev, mddev) {
2617 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2618
2619 if (role == 0xffff && rdev->raid_disk >=0 &&
2620 !test_bit(Faulty, &rdev->flags))
2621 return true;
2622
2623 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2624 return true;
2625 }
2626
2627
2628 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2629 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2630 (mddev->layout != le32_to_cpu(sb->layout)) ||
2631 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2632 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2633 return true;
2634
2635 return false;
2636}
2637
2638void md_update_sb(struct mddev *mddev, int force_change)
2639{
2640 struct md_rdev *rdev;
2641 int sync_req;
2642 int nospares = 0;
2643 int any_badblocks_changed = 0;
2644 int ret = -1;
2645
2646 if (mddev->ro) {
2647 if (force_change)
2648 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2649 return;
2650 }
2651
2652repeat:
2653 if (mddev_is_clustered(mddev)) {
2654 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2655 force_change = 1;
2656 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2657 nospares = 1;
2658 ret = md_cluster_ops->metadata_update_start(mddev);
2659
2660 if (!does_sb_need_changing(mddev)) {
2661 if (ret == 0)
2662 md_cluster_ops->metadata_update_cancel(mddev);
2663 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2664 BIT(MD_SB_CHANGE_DEVS) |
2665 BIT(MD_SB_CHANGE_CLEAN));
2666 return;
2667 }
2668 }
2669
2670
2671
2672
2673
2674
2675
2676 rdev_for_each(rdev, mddev) {
2677 if (rdev->raid_disk >= 0 &&
2678 mddev->delta_disks >= 0 &&
2679 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2680 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2681 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2682 !test_bit(Journal, &rdev->flags) &&
2683 !test_bit(In_sync, &rdev->flags) &&
2684 mddev->curr_resync_completed > rdev->recovery_offset)
2685 rdev->recovery_offset = mddev->curr_resync_completed;
2686
2687 }
2688 if (!mddev->persistent) {
2689 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2690 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2691 if (!mddev->external) {
2692 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2693 rdev_for_each(rdev, mddev) {
2694 if (rdev->badblocks.changed) {
2695 rdev->badblocks.changed = 0;
2696 ack_all_badblocks(&rdev->badblocks);
2697 md_error(mddev, rdev);
2698 }
2699 clear_bit(Blocked, &rdev->flags);
2700 clear_bit(BlockedBadBlocks, &rdev->flags);
2701 wake_up(&rdev->blocked_wait);
2702 }
2703 }
2704 wake_up(&mddev->sb_wait);
2705 return;
2706 }
2707
2708 spin_lock(&mddev->lock);
2709
2710 mddev->utime = ktime_get_real_seconds();
2711
2712 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2713 force_change = 1;
2714 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2715
2716
2717
2718
2719 nospares = 1;
2720 if (force_change)
2721 nospares = 0;
2722 if (mddev->degraded)
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732 nospares = 0;
2733
2734 sync_req = mddev->in_sync;
2735
2736
2737
2738 if (nospares
2739 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2740 && mddev->can_decrease_events
2741 && mddev->events != 1) {
2742 mddev->events--;
2743 mddev->can_decrease_events = 0;
2744 } else {
2745
2746 mddev->events ++;
2747 mddev->can_decrease_events = nospares;
2748 }
2749
2750
2751
2752
2753
2754
2755 WARN_ON(mddev->events == 0);
2756
2757 rdev_for_each(rdev, mddev) {
2758 if (rdev->badblocks.changed)
2759 any_badblocks_changed++;
2760 if (test_bit(Faulty, &rdev->flags))
2761 set_bit(FaultRecorded, &rdev->flags);
2762 }
2763
2764 sync_sbs(mddev, nospares);
2765 spin_unlock(&mddev->lock);
2766
2767 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2768 mdname(mddev), mddev->in_sync);
2769
2770 if (mddev->queue)
2771 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2772rewrite:
2773 md_bitmap_update_sb(mddev->bitmap);
2774 rdev_for_each(rdev, mddev) {
2775 char b[BDEVNAME_SIZE];
2776
2777 if (rdev->sb_loaded != 1)
2778 continue;
2779
2780 if (!test_bit(Faulty, &rdev->flags)) {
2781 md_super_write(mddev,rdev,
2782 rdev->sb_start, rdev->sb_size,
2783 rdev->sb_page);
2784 pr_debug("md: (write) %s's sb offset: %llu\n",
2785 bdevname(rdev->bdev, b),
2786 (unsigned long long)rdev->sb_start);
2787 rdev->sb_events = mddev->events;
2788 if (rdev->badblocks.size) {
2789 md_super_write(mddev, rdev,
2790 rdev->badblocks.sector,
2791 rdev->badblocks.size << 9,
2792 rdev->bb_page);
2793 rdev->badblocks.size = 0;
2794 }
2795
2796 } else
2797 pr_debug("md: %s (skipping faulty)\n",
2798 bdevname(rdev->bdev, b));
2799
2800 if (mddev->level == LEVEL_MULTIPATH)
2801
2802 break;
2803 }
2804 if (md_super_wait(mddev) < 0)
2805 goto rewrite;
2806
2807
2808 if (mddev_is_clustered(mddev) && ret == 0)
2809 md_cluster_ops->metadata_update_finish(mddev);
2810
2811 if (mddev->in_sync != sync_req ||
2812 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2813 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2814
2815 goto repeat;
2816 wake_up(&mddev->sb_wait);
2817 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2818 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2819
2820 rdev_for_each(rdev, mddev) {
2821 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2822 clear_bit(Blocked, &rdev->flags);
2823
2824 if (any_badblocks_changed)
2825 ack_all_badblocks(&rdev->badblocks);
2826 clear_bit(BlockedBadBlocks, &rdev->flags);
2827 wake_up(&rdev->blocked_wait);
2828 }
2829}
2830EXPORT_SYMBOL(md_update_sb);
2831
2832static int add_bound_rdev(struct md_rdev *rdev)
2833{
2834 struct mddev *mddev = rdev->mddev;
2835 int err = 0;
2836 bool add_journal = test_bit(Journal, &rdev->flags);
2837
2838 if (!mddev->pers->hot_remove_disk || add_journal) {
2839
2840
2841
2842
2843 super_types[mddev->major_version].
2844 validate_super(mddev, rdev);
2845 if (add_journal)
2846 mddev_suspend(mddev);
2847 err = mddev->pers->hot_add_disk(mddev, rdev);
2848 if (add_journal)
2849 mddev_resume(mddev);
2850 if (err) {
2851 md_kick_rdev_from_array(rdev);
2852 return err;
2853 }
2854 }
2855 sysfs_notify_dirent_safe(rdev->sysfs_state);
2856
2857 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2858 if (mddev->degraded)
2859 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2860 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2861 md_new_event(mddev);
2862 md_wakeup_thread(mddev->thread);
2863 return 0;
2864}
2865
2866
2867
2868
2869static int cmd_match(const char *cmd, const char *str)
2870{
2871
2872
2873
2874
2875 while (*cmd && *str && *cmd == *str) {
2876 cmd++;
2877 str++;
2878 }
2879 if (*cmd == '\n')
2880 cmd++;
2881 if (*str || *cmd)
2882 return 0;
2883 return 1;
2884}
2885
2886struct rdev_sysfs_entry {
2887 struct attribute attr;
2888 ssize_t (*show)(struct md_rdev *, char *);
2889 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2890};
2891
2892static ssize_t
2893state_show(struct md_rdev *rdev, char *page)
2894{
2895 char *sep = ",";
2896 size_t len = 0;
2897 unsigned long flags = READ_ONCE(rdev->flags);
2898
2899 if (test_bit(Faulty, &flags) ||
2900 (!test_bit(ExternalBbl, &flags) &&
2901 rdev->badblocks.unacked_exist))
2902 len += sprintf(page+len, "faulty%s", sep);
2903 if (test_bit(In_sync, &flags))
2904 len += sprintf(page+len, "in_sync%s", sep);
2905 if (test_bit(Journal, &flags))
2906 len += sprintf(page+len, "journal%s", sep);
2907 if (test_bit(WriteMostly, &flags))
2908 len += sprintf(page+len, "write_mostly%s", sep);
2909 if (test_bit(Blocked, &flags) ||
2910 (rdev->badblocks.unacked_exist
2911 && !test_bit(Faulty, &flags)))
2912 len += sprintf(page+len, "blocked%s", sep);
2913 if (!test_bit(Faulty, &flags) &&
2914 !test_bit(Journal, &flags) &&
2915 !test_bit(In_sync, &flags))
2916 len += sprintf(page+len, "spare%s", sep);
2917 if (test_bit(WriteErrorSeen, &flags))
2918 len += sprintf(page+len, "write_error%s", sep);
2919 if (test_bit(WantReplacement, &flags))
2920 len += sprintf(page+len, "want_replacement%s", sep);
2921 if (test_bit(Replacement, &flags))
2922 len += sprintf(page+len, "replacement%s", sep);
2923 if (test_bit(ExternalBbl, &flags))
2924 len += sprintf(page+len, "external_bbl%s", sep);
2925 if (test_bit(FailFast, &flags))
2926 len += sprintf(page+len, "failfast%s", sep);
2927
2928 if (len)
2929 len -= strlen(sep);
2930
2931 return len+sprintf(page+len, "\n");
2932}
2933
2934static ssize_t
2935state_store(struct md_rdev *rdev, const char *buf, size_t len)
2936{
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951 int err = -EINVAL;
2952 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2953 md_error(rdev->mddev, rdev);
2954 if (test_bit(Faulty, &rdev->flags))
2955 err = 0;
2956 else
2957 err = -EBUSY;
2958 } else if (cmd_match(buf, "remove")) {
2959 if (rdev->mddev->pers) {
2960 clear_bit(Blocked, &rdev->flags);
2961 remove_and_add_spares(rdev->mddev, rdev);
2962 }
2963 if (rdev->raid_disk >= 0)
2964 err = -EBUSY;
2965 else {
2966 struct mddev *mddev = rdev->mddev;
2967 err = 0;
2968 if (mddev_is_clustered(mddev))
2969 err = md_cluster_ops->remove_disk(mddev, rdev);
2970
2971 if (err == 0) {
2972 md_kick_rdev_from_array(rdev);
2973 if (mddev->pers) {
2974 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2975 md_wakeup_thread(mddev->thread);
2976 }
2977 md_new_event(mddev);
2978 }
2979 }
2980 } else if (cmd_match(buf, "writemostly")) {
2981 set_bit(WriteMostly, &rdev->flags);
2982 mddev_create_serial_pool(rdev->mddev, rdev, false);
2983 err = 0;
2984 } else if (cmd_match(buf, "-writemostly")) {
2985 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2986 clear_bit(WriteMostly, &rdev->flags);
2987 err = 0;
2988 } else if (cmd_match(buf, "blocked")) {
2989 set_bit(Blocked, &rdev->flags);
2990 err = 0;
2991 } else if (cmd_match(buf, "-blocked")) {
2992 if (!test_bit(Faulty, &rdev->flags) &&
2993 !test_bit(ExternalBbl, &rdev->flags) &&
2994 rdev->badblocks.unacked_exist) {
2995
2996
2997
2998 md_error(rdev->mddev, rdev);
2999 }
3000 clear_bit(Blocked, &rdev->flags);
3001 clear_bit(BlockedBadBlocks, &rdev->flags);
3002 wake_up(&rdev->blocked_wait);
3003 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3004 md_wakeup_thread(rdev->mddev->thread);
3005
3006 err = 0;
3007 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3008 set_bit(In_sync, &rdev->flags);
3009 err = 0;
3010 } else if (cmd_match(buf, "failfast")) {
3011 set_bit(FailFast, &rdev->flags);
3012 err = 0;
3013 } else if (cmd_match(buf, "-failfast")) {
3014 clear_bit(FailFast, &rdev->flags);
3015 err = 0;
3016 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3017 !test_bit(Journal, &rdev->flags)) {
3018 if (rdev->mddev->pers == NULL) {
3019 clear_bit(In_sync, &rdev->flags);
3020 rdev->saved_raid_disk = rdev->raid_disk;
3021 rdev->raid_disk = -1;
3022 err = 0;
3023 }
3024 } else if (cmd_match(buf, "write_error")) {
3025 set_bit(WriteErrorSeen, &rdev->flags);
3026 err = 0;
3027 } else if (cmd_match(buf, "-write_error")) {
3028 clear_bit(WriteErrorSeen, &rdev->flags);
3029 err = 0;
3030 } else if (cmd_match(buf, "want_replacement")) {
3031
3032
3033
3034
3035 if (rdev->raid_disk >= 0 &&
3036 !test_bit(Journal, &rdev->flags) &&
3037 !test_bit(Replacement, &rdev->flags))
3038 set_bit(WantReplacement, &rdev->flags);
3039 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3040 md_wakeup_thread(rdev->mddev->thread);
3041 err = 0;
3042 } else if (cmd_match(buf, "-want_replacement")) {
3043
3044
3045
3046 err = 0;
3047 clear_bit(WantReplacement, &rdev->flags);
3048 } else if (cmd_match(buf, "replacement")) {
3049
3050
3051
3052
3053 if (rdev->mddev->pers)
3054 err = -EBUSY;
3055 else {
3056 set_bit(Replacement, &rdev->flags);
3057 err = 0;
3058 }
3059 } else if (cmd_match(buf, "-replacement")) {
3060
3061 if (rdev->mddev->pers)
3062 err = -EBUSY;
3063 else {
3064 clear_bit(Replacement, &rdev->flags);
3065 err = 0;
3066 }
3067 } else if (cmd_match(buf, "re-add")) {
3068 if (!rdev->mddev->pers)
3069 err = -EINVAL;
3070 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3071 rdev->saved_raid_disk >= 0) {
3072
3073
3074
3075
3076
3077
3078 if (!mddev_is_clustered(rdev->mddev) ||
3079 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3080 clear_bit(Faulty, &rdev->flags);
3081 err = add_bound_rdev(rdev);
3082 }
3083 } else
3084 err = -EBUSY;
3085 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3086 set_bit(ExternalBbl, &rdev->flags);
3087 rdev->badblocks.shift = 0;
3088 err = 0;
3089 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3090 clear_bit(ExternalBbl, &rdev->flags);
3091 err = 0;
3092 }
3093 if (!err)
3094 sysfs_notify_dirent_safe(rdev->sysfs_state);
3095 return err ? err : len;
3096}
3097static struct rdev_sysfs_entry rdev_state =
3098__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3099
3100static ssize_t
3101errors_show(struct md_rdev *rdev, char *page)
3102{
3103 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3104}
3105
3106static ssize_t
3107errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3108{
3109 unsigned int n;
3110 int rv;
3111
3112 rv = kstrtouint(buf, 10, &n);
3113 if (rv < 0)
3114 return rv;
3115 atomic_set(&rdev->corrected_errors, n);
3116 return len;
3117}
3118static struct rdev_sysfs_entry rdev_errors =
3119__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3120
3121static ssize_t
3122slot_show(struct md_rdev *rdev, char *page)
3123{
3124 if (test_bit(Journal, &rdev->flags))
3125 return sprintf(page, "journal\n");
3126 else if (rdev->raid_disk < 0)
3127 return sprintf(page, "none\n");
3128 else
3129 return sprintf(page, "%d\n", rdev->raid_disk);
3130}
3131
3132static ssize_t
3133slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3134{
3135 int slot;
3136 int err;
3137
3138 if (test_bit(Journal, &rdev->flags))
3139 return -EBUSY;
3140 if (strncmp(buf, "none", 4)==0)
3141 slot = -1;
3142 else {
3143 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3144 if (err < 0)
3145 return err;
3146 }
3147 if (rdev->mddev->pers && slot == -1) {
3148
3149
3150
3151
3152
3153
3154
3155 if (rdev->raid_disk == -1)
3156 return -EEXIST;
3157
3158 if (rdev->mddev->pers->hot_remove_disk == NULL)
3159 return -EINVAL;
3160 clear_bit(Blocked, &rdev->flags);
3161 remove_and_add_spares(rdev->mddev, rdev);
3162 if (rdev->raid_disk >= 0)
3163 return -EBUSY;
3164 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3165 md_wakeup_thread(rdev->mddev->thread);
3166 } else if (rdev->mddev->pers) {
3167
3168
3169
3170 int err;
3171
3172 if (rdev->raid_disk != -1)
3173 return -EBUSY;
3174
3175 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3176 return -EBUSY;
3177
3178 if (rdev->mddev->pers->hot_add_disk == NULL)
3179 return -EINVAL;
3180
3181 if (slot >= rdev->mddev->raid_disks &&
3182 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3183 return -ENOSPC;
3184
3185 rdev->raid_disk = slot;
3186 if (test_bit(In_sync, &rdev->flags))
3187 rdev->saved_raid_disk = slot;
3188 else
3189 rdev->saved_raid_disk = -1;
3190 clear_bit(In_sync, &rdev->flags);
3191 clear_bit(Bitmap_sync, &rdev->flags);
3192 err = rdev->mddev->pers->
3193 hot_add_disk(rdev->mddev, rdev);
3194 if (err) {
3195 rdev->raid_disk = -1;
3196 return err;
3197 } else
3198 sysfs_notify_dirent_safe(rdev->sysfs_state);
3199 if (sysfs_link_rdev(rdev->mddev, rdev))
3200 ;
3201
3202 } else {
3203 if (slot >= rdev->mddev->raid_disks &&
3204 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3205 return -ENOSPC;
3206 rdev->raid_disk = slot;
3207
3208 clear_bit(Faulty, &rdev->flags);
3209 clear_bit(WriteMostly, &rdev->flags);
3210 set_bit(In_sync, &rdev->flags);
3211 sysfs_notify_dirent_safe(rdev->sysfs_state);
3212 }
3213 return len;
3214}
3215
3216static struct rdev_sysfs_entry rdev_slot =
3217__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3218
3219static ssize_t
3220offset_show(struct md_rdev *rdev, char *page)
3221{
3222 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3223}
3224
3225static ssize_t
3226offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3227{
3228 unsigned long long offset;
3229 if (kstrtoull(buf, 10, &offset) < 0)
3230 return -EINVAL;
3231 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3232 return -EBUSY;
3233 if (rdev->sectors && rdev->mddev->external)
3234
3235
3236 return -EBUSY;
3237 rdev->data_offset = offset;
3238 rdev->new_data_offset = offset;
3239 return len;
3240}
3241
3242static struct rdev_sysfs_entry rdev_offset =
3243__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3244
3245static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3246{
3247 return sprintf(page, "%llu\n",
3248 (unsigned long long)rdev->new_data_offset);
3249}
3250
3251static ssize_t new_offset_store(struct md_rdev *rdev,
3252 const char *buf, size_t len)
3253{
3254 unsigned long long new_offset;
3255 struct mddev *mddev = rdev->mddev;
3256
3257 if (kstrtoull(buf, 10, &new_offset) < 0)
3258 return -EINVAL;
3259
3260 if (mddev->sync_thread ||
3261 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3262 return -EBUSY;
3263 if (new_offset == rdev->data_offset)
3264
3265 ;
3266 else if (new_offset > rdev->data_offset) {
3267
3268 if (new_offset - rdev->data_offset
3269 + mddev->dev_sectors > rdev->sectors)
3270 return -E2BIG;
3271 }
3272
3273
3274
3275
3276
3277 if (new_offset < rdev->data_offset &&
3278 mddev->reshape_backwards)
3279 return -EINVAL;
3280
3281
3282
3283
3284 if (new_offset > rdev->data_offset &&
3285 !mddev->reshape_backwards)
3286 return -EINVAL;
3287
3288 if (mddev->pers && mddev->persistent &&
3289 !super_types[mddev->major_version]
3290 .allow_new_offset(rdev, new_offset))
3291 return -E2BIG;
3292 rdev->new_data_offset = new_offset;
3293 if (new_offset > rdev->data_offset)
3294 mddev->reshape_backwards = 1;
3295 else if (new_offset < rdev->data_offset)
3296 mddev->reshape_backwards = 0;
3297
3298 return len;
3299}
3300static struct rdev_sysfs_entry rdev_new_offset =
3301__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3302
3303static ssize_t
3304rdev_size_show(struct md_rdev *rdev, char *page)
3305{
3306 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3307}
3308
3309static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3310{
3311
3312 if (s1+l1 <= s2)
3313 return 0;
3314 if (s2+l2 <= s1)
3315 return 0;
3316 return 1;
3317}
3318
3319static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3320{
3321 unsigned long long blocks;
3322 sector_t new;
3323
3324 if (kstrtoull(buf, 10, &blocks) < 0)
3325 return -EINVAL;
3326
3327 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3328 return -EINVAL;
3329
3330 new = blocks * 2;
3331 if (new != blocks * 2)
3332 return -EINVAL;
3333
3334 *sectors = new;
3335 return 0;
3336}
3337
3338static ssize_t
3339rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3340{
3341 struct mddev *my_mddev = rdev->mddev;
3342 sector_t oldsectors = rdev->sectors;
3343 sector_t sectors;
3344
3345 if (test_bit(Journal, &rdev->flags))
3346 return -EBUSY;
3347 if (strict_blocks_to_sectors(buf, §ors) < 0)
3348 return -EINVAL;
3349 if (rdev->data_offset != rdev->new_data_offset)
3350 return -EINVAL;
3351 if (my_mddev->pers && rdev->raid_disk >= 0) {
3352 if (my_mddev->persistent) {
3353 sectors = super_types[my_mddev->major_version].
3354 rdev_size_change(rdev, sectors);
3355 if (!sectors)
3356 return -EBUSY;
3357 } else if (!sectors)
3358 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3359 rdev->data_offset;
3360 if (!my_mddev->pers->resize)
3361
3362 return -EINVAL;
3363 }
3364 if (sectors < my_mddev->dev_sectors)
3365 return -EINVAL;
3366
3367 rdev->sectors = sectors;
3368 if (sectors > oldsectors && my_mddev->external) {
3369
3370
3371
3372
3373
3374
3375 struct mddev *mddev;
3376 int overlap = 0;
3377 struct list_head *tmp;
3378
3379 rcu_read_lock();
3380 for_each_mddev(mddev, tmp) {
3381 struct md_rdev *rdev2;
3382
3383 rdev_for_each(rdev2, mddev)
3384 if (rdev->bdev == rdev2->bdev &&
3385 rdev != rdev2 &&
3386 overlaps(rdev->data_offset, rdev->sectors,
3387 rdev2->data_offset,
3388 rdev2->sectors)) {
3389 overlap = 1;
3390 break;
3391 }
3392 if (overlap) {
3393 mddev_put(mddev);
3394 break;
3395 }
3396 }
3397 rcu_read_unlock();
3398 if (overlap) {
3399
3400
3401
3402
3403
3404
3405 rdev->sectors = oldsectors;
3406 return -EBUSY;
3407 }
3408 }
3409 return len;
3410}
3411
3412static struct rdev_sysfs_entry rdev_size =
3413__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3414
3415static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3416{
3417 unsigned long long recovery_start = rdev->recovery_offset;
3418
3419 if (test_bit(In_sync, &rdev->flags) ||
3420 recovery_start == MaxSector)
3421 return sprintf(page, "none\n");
3422
3423 return sprintf(page, "%llu\n", recovery_start);
3424}
3425
3426static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3427{
3428 unsigned long long recovery_start;
3429
3430 if (cmd_match(buf, "none"))
3431 recovery_start = MaxSector;
3432 else if (kstrtoull(buf, 10, &recovery_start))
3433 return -EINVAL;
3434
3435 if (rdev->mddev->pers &&
3436 rdev->raid_disk >= 0)
3437 return -EBUSY;
3438
3439 rdev->recovery_offset = recovery_start;
3440 if (recovery_start == MaxSector)
3441 set_bit(In_sync, &rdev->flags);
3442 else
3443 clear_bit(In_sync, &rdev->flags);
3444 return len;
3445}
3446
3447static struct rdev_sysfs_entry rdev_recovery_start =
3448__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461static ssize_t bb_show(struct md_rdev *rdev, char *page)
3462{
3463 return badblocks_show(&rdev->badblocks, page, 0);
3464}
3465static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3466{
3467 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3468
3469 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3470 wake_up(&rdev->blocked_wait);
3471 return rv;
3472}
3473static struct rdev_sysfs_entry rdev_bad_blocks =
3474__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3475
3476static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3477{
3478 return badblocks_show(&rdev->badblocks, page, 1);
3479}
3480static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3481{
3482 return badblocks_store(&rdev->badblocks, page, len, 1);
3483}
3484static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3485__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3486
3487static ssize_t
3488ppl_sector_show(struct md_rdev *rdev, char *page)
3489{
3490 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3491}
3492
3493static ssize_t
3494ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3495{
3496 unsigned long long sector;
3497
3498 if (kstrtoull(buf, 10, §or) < 0)
3499 return -EINVAL;
3500 if (sector != (sector_t)sector)
3501 return -EINVAL;
3502
3503 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3504 rdev->raid_disk >= 0)
3505 return -EBUSY;
3506
3507 if (rdev->mddev->persistent) {
3508 if (rdev->mddev->major_version == 0)
3509 return -EINVAL;
3510 if ((sector > rdev->sb_start &&
3511 sector - rdev->sb_start > S16_MAX) ||
3512 (sector < rdev->sb_start &&
3513 rdev->sb_start - sector > -S16_MIN))
3514 return -EINVAL;
3515 rdev->ppl.offset = sector - rdev->sb_start;
3516 } else if (!rdev->mddev->external) {
3517 return -EBUSY;
3518 }
3519 rdev->ppl.sector = sector;
3520 return len;
3521}
3522
3523static struct rdev_sysfs_entry rdev_ppl_sector =
3524__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3525
3526static ssize_t
3527ppl_size_show(struct md_rdev *rdev, char *page)
3528{
3529 return sprintf(page, "%u\n", rdev->ppl.size);
3530}
3531
3532static ssize_t
3533ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3534{
3535 unsigned int size;
3536
3537 if (kstrtouint(buf, 10, &size) < 0)
3538 return -EINVAL;
3539
3540 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3541 rdev->raid_disk >= 0)
3542 return -EBUSY;
3543
3544 if (rdev->mddev->persistent) {
3545 if (rdev->mddev->major_version == 0)
3546 return -EINVAL;
3547 if (size > U16_MAX)
3548 return -EINVAL;
3549 } else if (!rdev->mddev->external) {
3550 return -EBUSY;
3551 }
3552 rdev->ppl.size = size;
3553 return len;
3554}
3555
3556static struct rdev_sysfs_entry rdev_ppl_size =
3557__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3558
3559static struct attribute *rdev_default_attrs[] = {
3560 &rdev_state.attr,
3561 &rdev_errors.attr,
3562 &rdev_slot.attr,
3563 &rdev_offset.attr,
3564 &rdev_new_offset.attr,
3565 &rdev_size.attr,
3566 &rdev_recovery_start.attr,
3567 &rdev_bad_blocks.attr,
3568 &rdev_unack_bad_blocks.attr,
3569 &rdev_ppl_sector.attr,
3570 &rdev_ppl_size.attr,
3571 NULL,
3572};
3573static ssize_t
3574rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3575{
3576 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3577 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3578
3579 if (!entry->show)
3580 return -EIO;
3581 if (!rdev->mddev)
3582 return -ENODEV;
3583 return entry->show(rdev, page);
3584}
3585
3586static ssize_t
3587rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3588 const char *page, size_t length)
3589{
3590 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3591 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3592 ssize_t rv;
3593 struct mddev *mddev = rdev->mddev;
3594
3595 if (!entry->store)
3596 return -EIO;
3597 if (!capable(CAP_SYS_ADMIN))
3598 return -EACCES;
3599 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3600 if (!rv) {
3601 if (rdev->mddev == NULL)
3602 rv = -ENODEV;
3603 else
3604 rv = entry->store(rdev, page, length);
3605 mddev_unlock(mddev);
3606 }
3607 return rv;
3608}
3609
3610static void rdev_free(struct kobject *ko)
3611{
3612 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3613 kfree(rdev);
3614}
3615static const struct sysfs_ops rdev_sysfs_ops = {
3616 .show = rdev_attr_show,
3617 .store = rdev_attr_store,
3618};
3619static struct kobj_type rdev_ktype = {
3620 .release = rdev_free,
3621 .sysfs_ops = &rdev_sysfs_ops,
3622 .default_attrs = rdev_default_attrs,
3623};
3624
3625int md_rdev_init(struct md_rdev *rdev)
3626{
3627 rdev->desc_nr = -1;
3628 rdev->saved_raid_disk = -1;
3629 rdev->raid_disk = -1;
3630 rdev->flags = 0;
3631 rdev->data_offset = 0;
3632 rdev->new_data_offset = 0;
3633 rdev->sb_events = 0;
3634 rdev->last_read_error = 0;
3635 rdev->sb_loaded = 0;
3636 rdev->bb_page = NULL;
3637 atomic_set(&rdev->nr_pending, 0);
3638 atomic_set(&rdev->read_errors, 0);
3639 atomic_set(&rdev->corrected_errors, 0);
3640
3641 INIT_LIST_HEAD(&rdev->same_set);
3642 init_waitqueue_head(&rdev->blocked_wait);
3643
3644
3645
3646
3647
3648 return badblocks_init(&rdev->badblocks, 0);
3649}
3650EXPORT_SYMBOL_GPL(md_rdev_init);
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3662{
3663 char b[BDEVNAME_SIZE];
3664 int err;
3665 struct md_rdev *rdev;
3666 sector_t size;
3667
3668 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3669 if (!rdev)
3670 return ERR_PTR(-ENOMEM);
3671
3672 err = md_rdev_init(rdev);
3673 if (err)
3674 goto abort_free;
3675 err = alloc_disk_sb(rdev);
3676 if (err)
3677 goto abort_free;
3678
3679 err = lock_rdev(rdev, newdev, super_format == -2);
3680 if (err)
3681 goto abort_free;
3682
3683 kobject_init(&rdev->kobj, &rdev_ktype);
3684
3685 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3686 if (!size) {
3687 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3688 bdevname(rdev->bdev,b));
3689 err = -EINVAL;
3690 goto abort_free;
3691 }
3692
3693 if (super_format >= 0) {
3694 err = super_types[super_format].
3695 load_super(rdev, NULL, super_minor);
3696 if (err == -EINVAL) {
3697 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3698 bdevname(rdev->bdev,b),
3699 super_format, super_minor);
3700 goto abort_free;
3701 }
3702 if (err < 0) {
3703 pr_warn("md: could not read %s's sb, not importing!\n",
3704 bdevname(rdev->bdev,b));
3705 goto abort_free;
3706 }
3707 }
3708
3709 return rdev;
3710
3711abort_free:
3712 if (rdev->bdev)
3713 unlock_rdev(rdev);
3714 md_rdev_clear(rdev);
3715 kfree(rdev);
3716 return ERR_PTR(err);
3717}
3718
3719
3720
3721
3722
3723static int analyze_sbs(struct mddev *mddev)
3724{
3725 int i;
3726 struct md_rdev *rdev, *freshest, *tmp;
3727 char b[BDEVNAME_SIZE];
3728
3729 freshest = NULL;
3730 rdev_for_each_safe(rdev, tmp, mddev)
3731 switch (super_types[mddev->major_version].
3732 load_super(rdev, freshest, mddev->minor_version)) {
3733 case 1:
3734 freshest = rdev;
3735 break;
3736 case 0:
3737 break;
3738 default:
3739 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3740 bdevname(rdev->bdev,b));
3741 md_kick_rdev_from_array(rdev);
3742 }
3743
3744
3745 if (!freshest) {
3746 pr_warn("md: cannot find a valid disk\n");
3747 return -EINVAL;
3748 }
3749
3750 super_types[mddev->major_version].
3751 validate_super(mddev, freshest);
3752
3753 i = 0;
3754 rdev_for_each_safe(rdev, tmp, mddev) {
3755 if (mddev->max_disks &&
3756 (rdev->desc_nr >= mddev->max_disks ||
3757 i > mddev->max_disks)) {
3758 pr_warn("md: %s: %s: only %d devices permitted\n",
3759 mdname(mddev), bdevname(rdev->bdev, b),
3760 mddev->max_disks);
3761 md_kick_rdev_from_array(rdev);
3762 continue;
3763 }
3764 if (rdev != freshest) {
3765 if (super_types[mddev->major_version].
3766 validate_super(mddev, rdev)) {
3767 pr_warn("md: kicking non-fresh %s from array!\n",
3768 bdevname(rdev->bdev,b));
3769 md_kick_rdev_from_array(rdev);
3770 continue;
3771 }
3772 }
3773 if (mddev->level == LEVEL_MULTIPATH) {
3774 rdev->desc_nr = i++;
3775 rdev->raid_disk = rdev->desc_nr;
3776 set_bit(In_sync, &rdev->flags);
3777 } else if (rdev->raid_disk >=
3778 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3779 !test_bit(Journal, &rdev->flags)) {
3780 rdev->raid_disk = -1;
3781 clear_bit(In_sync, &rdev->flags);
3782 }
3783 }
3784
3785 return 0;
3786}
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3799{
3800 unsigned long result = 0;
3801 long decimals = -1;
3802 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3803 if (*cp == '.')
3804 decimals = 0;
3805 else if (decimals < scale) {
3806 unsigned int value;
3807 value = *cp - '0';
3808 result = result * 10 + value;
3809 if (decimals >= 0)
3810 decimals++;
3811 }
3812 cp++;
3813 }
3814 if (*cp == '\n')
3815 cp++;
3816 if (*cp)
3817 return -EINVAL;
3818 if (decimals < 0)
3819 decimals = 0;
3820 *res = result * int_pow(10, scale - decimals);
3821 return 0;
3822}
3823
3824static ssize_t
3825safe_delay_show(struct mddev *mddev, char *page)
3826{
3827 int msec = (mddev->safemode_delay*1000)/HZ;
3828 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3829}
3830static ssize_t
3831safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3832{
3833 unsigned long msec;
3834
3835 if (mddev_is_clustered(mddev)) {
3836 pr_warn("md: Safemode is disabled for clustered mode\n");
3837 return -EINVAL;
3838 }
3839
3840 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3841 return -EINVAL;
3842 if (msec == 0)
3843 mddev->safemode_delay = 0;
3844 else {
3845 unsigned long old_delay = mddev->safemode_delay;
3846 unsigned long new_delay = (msec*HZ)/1000;
3847
3848 if (new_delay == 0)
3849 new_delay = 1;
3850 mddev->safemode_delay = new_delay;
3851 if (new_delay < old_delay || old_delay == 0)
3852 mod_timer(&mddev->safemode_timer, jiffies+1);
3853 }
3854 return len;
3855}
3856static struct md_sysfs_entry md_safe_delay =
3857__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3858
3859static ssize_t
3860level_show(struct mddev *mddev, char *page)
3861{
3862 struct md_personality *p;
3863 int ret;
3864 spin_lock(&mddev->lock);
3865 p = mddev->pers;
3866 if (p)
3867 ret = sprintf(page, "%s\n", p->name);
3868 else if (mddev->clevel[0])
3869 ret = sprintf(page, "%s\n", mddev->clevel);
3870 else if (mddev->level != LEVEL_NONE)
3871 ret = sprintf(page, "%d\n", mddev->level);
3872 else
3873 ret = 0;
3874 spin_unlock(&mddev->lock);
3875 return ret;
3876}
3877
3878static ssize_t
3879level_store(struct mddev *mddev, const char *buf, size_t len)
3880{
3881 char clevel[16];
3882 ssize_t rv;
3883 size_t slen = len;
3884 struct md_personality *pers, *oldpers;
3885 long level;
3886 void *priv, *oldpriv;
3887 struct md_rdev *rdev;
3888
3889 if (slen == 0 || slen >= sizeof(clevel))
3890 return -EINVAL;
3891
3892 rv = mddev_lock(mddev);
3893 if (rv)
3894 return rv;
3895
3896 if (mddev->pers == NULL) {
3897 strncpy(mddev->clevel, buf, slen);
3898 if (mddev->clevel[slen-1] == '\n')
3899 slen--;
3900 mddev->clevel[slen] = 0;
3901 mddev->level = LEVEL_NONE;
3902 rv = len;
3903 goto out_unlock;
3904 }
3905 rv = -EROFS;
3906 if (mddev->ro)
3907 goto out_unlock;
3908
3909
3910
3911
3912
3913
3914
3915 rv = -EBUSY;
3916 if (mddev->sync_thread ||
3917 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3918 mddev->reshape_position != MaxSector ||
3919 mddev->sysfs_active)
3920 goto out_unlock;
3921
3922 rv = -EINVAL;
3923 if (!mddev->pers->quiesce) {
3924 pr_warn("md: %s: %s does not support online personality change\n",
3925 mdname(mddev), mddev->pers->name);
3926 goto out_unlock;
3927 }
3928
3929
3930 strncpy(clevel, buf, slen);
3931 if (clevel[slen-1] == '\n')
3932 slen--;
3933 clevel[slen] = 0;
3934 if (kstrtol(clevel, 10, &level))
3935 level = LEVEL_NONE;
3936
3937 if (request_module("md-%s", clevel) != 0)
3938 request_module("md-level-%s", clevel);
3939 spin_lock(&pers_lock);
3940 pers = find_pers(level, clevel);
3941 if (!pers || !try_module_get(pers->owner)) {
3942 spin_unlock(&pers_lock);
3943 pr_warn("md: personality %s not loaded\n", clevel);
3944 rv = -EINVAL;
3945 goto out_unlock;
3946 }
3947 spin_unlock(&pers_lock);
3948
3949 if (pers == mddev->pers) {
3950
3951 module_put(pers->owner);
3952 rv = len;
3953 goto out_unlock;
3954 }
3955 if (!pers->takeover) {
3956 module_put(pers->owner);
3957 pr_warn("md: %s: %s does not support personality takeover\n",
3958 mdname(mddev), clevel);
3959 rv = -EINVAL;
3960 goto out_unlock;
3961 }
3962
3963 rdev_for_each(rdev, mddev)
3964 rdev->new_raid_disk = rdev->raid_disk;
3965
3966
3967
3968
3969 priv = pers->takeover(mddev);
3970 if (IS_ERR(priv)) {
3971 mddev->new_level = mddev->level;
3972 mddev->new_layout = mddev->layout;
3973 mddev->new_chunk_sectors = mddev->chunk_sectors;
3974 mddev->raid_disks -= mddev->delta_disks;
3975 mddev->delta_disks = 0;
3976 mddev->reshape_backwards = 0;
3977 module_put(pers->owner);
3978 pr_warn("md: %s: %s would not accept array\n",
3979 mdname(mddev), clevel);
3980 rv = PTR_ERR(priv);
3981 goto out_unlock;
3982 }
3983
3984
3985 mddev_suspend(mddev);
3986 mddev_detach(mddev);
3987
3988 spin_lock(&mddev->lock);
3989 oldpers = mddev->pers;
3990 oldpriv = mddev->private;
3991 mddev->pers = pers;
3992 mddev->private = priv;
3993 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3994 mddev->level = mddev->new_level;
3995 mddev->layout = mddev->new_layout;
3996 mddev->chunk_sectors = mddev->new_chunk_sectors;
3997 mddev->delta_disks = 0;
3998 mddev->reshape_backwards = 0;
3999 mddev->degraded = 0;
4000 spin_unlock(&mddev->lock);
4001
4002 if (oldpers->sync_request == NULL &&
4003 mddev->external) {
4004
4005
4006
4007
4008
4009
4010
4011 mddev->in_sync = 0;
4012 mddev->safemode_delay = 0;
4013 mddev->safemode = 0;
4014 }
4015
4016 oldpers->free(mddev, oldpriv);
4017
4018 if (oldpers->sync_request == NULL &&
4019 pers->sync_request != NULL) {
4020
4021 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4022 pr_warn("md: cannot register extra attributes for %s\n",
4023 mdname(mddev));
4024 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4025 }
4026 if (oldpers->sync_request != NULL &&
4027 pers->sync_request == NULL) {
4028
4029 if (mddev->to_remove == NULL)
4030 mddev->to_remove = &md_redundancy_group;
4031 }
4032
4033 module_put(oldpers->owner);
4034
4035 rdev_for_each(rdev, mddev) {
4036 if (rdev->raid_disk < 0)
4037 continue;
4038 if (rdev->new_raid_disk >= mddev->raid_disks)
4039 rdev->new_raid_disk = -1;
4040 if (rdev->new_raid_disk == rdev->raid_disk)
4041 continue;
4042 sysfs_unlink_rdev(mddev, rdev);
4043 }
4044 rdev_for_each(rdev, mddev) {
4045 if (rdev->raid_disk < 0)
4046 continue;
4047 if (rdev->new_raid_disk == rdev->raid_disk)
4048 continue;
4049 rdev->raid_disk = rdev->new_raid_disk;
4050 if (rdev->raid_disk < 0)
4051 clear_bit(In_sync, &rdev->flags);
4052 else {
4053 if (sysfs_link_rdev(mddev, rdev))
4054 pr_warn("md: cannot register rd%d for %s after level change\n",
4055 rdev->raid_disk, mdname(mddev));
4056 }
4057 }
4058
4059 if (pers->sync_request == NULL) {
4060
4061
4062
4063 mddev->in_sync = 1;
4064 del_timer_sync(&mddev->safemode_timer);
4065 }
4066 blk_set_stacking_limits(&mddev->queue->limits);
4067 pers->run(mddev);
4068 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4069 mddev_resume(mddev);
4070 if (!mddev->thread)
4071 md_update_sb(mddev, 1);
4072 sysfs_notify(&mddev->kobj, NULL, "level");
4073 md_new_event(mddev);
4074 rv = len;
4075out_unlock:
4076 mddev_unlock(mddev);
4077 return rv;
4078}
4079
4080static struct md_sysfs_entry md_level =
4081__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4082
4083static ssize_t
4084layout_show(struct mddev *mddev, char *page)
4085{
4086
4087 if (mddev->reshape_position != MaxSector &&
4088 mddev->layout != mddev->new_layout)
4089 return sprintf(page, "%d (%d)\n",
4090 mddev->new_layout, mddev->layout);
4091 return sprintf(page, "%d\n", mddev->layout);
4092}
4093
4094static ssize_t
4095layout_store(struct mddev *mddev, const char *buf, size_t len)
4096{
4097 unsigned int n;
4098 int err;
4099
4100 err = kstrtouint(buf, 10, &n);
4101 if (err < 0)
4102 return err;
4103 err = mddev_lock(mddev);
4104 if (err)
4105 return err;
4106
4107 if (mddev->pers) {
4108 if (mddev->pers->check_reshape == NULL)
4109 err = -EBUSY;
4110 else if (mddev->ro)
4111 err = -EROFS;
4112 else {
4113 mddev->new_layout = n;
4114 err = mddev->pers->check_reshape(mddev);
4115 if (err)
4116 mddev->new_layout = mddev->layout;
4117 }
4118 } else {
4119 mddev->new_layout = n;
4120 if (mddev->reshape_position == MaxSector)
4121 mddev->layout = n;
4122 }
4123 mddev_unlock(mddev);
4124 return err ?: len;
4125}
4126static struct md_sysfs_entry md_layout =
4127__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4128
4129static ssize_t
4130raid_disks_show(struct mddev *mddev, char *page)
4131{
4132 if (mddev->raid_disks == 0)
4133 return 0;
4134 if (mddev->reshape_position != MaxSector &&
4135 mddev->delta_disks != 0)
4136 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4137 mddev->raid_disks - mddev->delta_disks);
4138 return sprintf(page, "%d\n", mddev->raid_disks);
4139}
4140
4141static int update_raid_disks(struct mddev *mddev, int raid_disks);
4142
4143static ssize_t
4144raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4145{
4146 unsigned int n;
4147 int err;
4148
4149 err = kstrtouint(buf, 10, &n);
4150 if (err < 0)
4151 return err;
4152
4153 err = mddev_lock(mddev);
4154 if (err)
4155 return err;
4156 if (mddev->pers)
4157 err = update_raid_disks(mddev, n);
4158 else if (mddev->reshape_position != MaxSector) {
4159 struct md_rdev *rdev;
4160 int olddisks = mddev->raid_disks - mddev->delta_disks;
4161
4162 err = -EINVAL;
4163 rdev_for_each(rdev, mddev) {
4164 if (olddisks < n &&
4165 rdev->data_offset < rdev->new_data_offset)
4166 goto out_unlock;
4167 if (olddisks > n &&
4168 rdev->data_offset > rdev->new_data_offset)
4169 goto out_unlock;
4170 }
4171 err = 0;
4172 mddev->delta_disks = n - olddisks;
4173 mddev->raid_disks = n;
4174 mddev->reshape_backwards = (mddev->delta_disks < 0);
4175 } else
4176 mddev->raid_disks = n;
4177out_unlock:
4178 mddev_unlock(mddev);
4179 return err ? err : len;
4180}
4181static struct md_sysfs_entry md_raid_disks =
4182__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4183
4184static ssize_t
4185chunk_size_show(struct mddev *mddev, char *page)
4186{
4187 if (mddev->reshape_position != MaxSector &&
4188 mddev->chunk_sectors != mddev->new_chunk_sectors)
4189 return sprintf(page, "%d (%d)\n",
4190 mddev->new_chunk_sectors << 9,
4191 mddev->chunk_sectors << 9);
4192 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4193}
4194
4195static ssize_t
4196chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4197{
4198 unsigned long n;
4199 int err;
4200
4201 err = kstrtoul(buf, 10, &n);
4202 if (err < 0)
4203 return err;
4204
4205 err = mddev_lock(mddev);
4206 if (err)
4207 return err;
4208 if (mddev->pers) {
4209 if (mddev->pers->check_reshape == NULL)
4210 err = -EBUSY;
4211 else if (mddev->ro)
4212 err = -EROFS;
4213 else {
4214 mddev->new_chunk_sectors = n >> 9;
4215 err = mddev->pers->check_reshape(mddev);
4216 if (err)
4217 mddev->new_chunk_sectors = mddev->chunk_sectors;
4218 }
4219 } else {
4220 mddev->new_chunk_sectors = n >> 9;
4221 if (mddev->reshape_position == MaxSector)
4222 mddev->chunk_sectors = n >> 9;
4223 }
4224 mddev_unlock(mddev);
4225 return err ?: len;
4226}
4227static struct md_sysfs_entry md_chunk_size =
4228__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4229
4230static ssize_t
4231resync_start_show(struct mddev *mddev, char *page)
4232{
4233 if (mddev->recovery_cp == MaxSector)
4234 return sprintf(page, "none\n");
4235 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4236}
4237
4238static ssize_t
4239resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4240{
4241 unsigned long long n;
4242 int err;
4243
4244 if (cmd_match(buf, "none"))
4245 n = MaxSector;
4246 else {
4247 err = kstrtoull(buf, 10, &n);
4248 if (err < 0)
4249 return err;
4250 if (n != (sector_t)n)
4251 return -EINVAL;
4252 }
4253
4254 err = mddev_lock(mddev);
4255 if (err)
4256 return err;
4257 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4258 err = -EBUSY;
4259
4260 if (!err) {
4261 mddev->recovery_cp = n;
4262 if (mddev->pers)
4263 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4264 }
4265 mddev_unlock(mddev);
4266 return err ?: len;
4267}
4268static struct md_sysfs_entry md_resync_start =
4269__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4270 resync_start_show, resync_start_store);
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4314 write_pending, active_idle, broken, bad_word};
4315static char *array_states[] = {
4316 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4317 "write-pending", "active-idle", "broken", NULL };
4318
4319static int match_word(const char *word, char **list)
4320{
4321 int n;
4322 for (n=0; list[n]; n++)
4323 if (cmd_match(word, list[n]))
4324 break;
4325 return n;
4326}
4327
4328static ssize_t
4329array_state_show(struct mddev *mddev, char *page)
4330{
4331 enum array_state st = inactive;
4332
4333 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4334 switch(mddev->ro) {
4335 case 1:
4336 st = readonly;
4337 break;
4338 case 2:
4339 st = read_auto;
4340 break;
4341 case 0:
4342 spin_lock(&mddev->lock);
4343 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4344 st = write_pending;
4345 else if (mddev->in_sync)
4346 st = clean;
4347 else if (mddev->safemode)
4348 st = active_idle;
4349 else
4350 st = active;
4351 spin_unlock(&mddev->lock);
4352 }
4353
4354 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4355 st = broken;
4356 } else {
4357 if (list_empty(&mddev->disks) &&
4358 mddev->raid_disks == 0 &&
4359 mddev->dev_sectors == 0)
4360 st = clear;
4361 else
4362 st = inactive;
4363 }
4364 return sprintf(page, "%s\n", array_states[st]);
4365}
4366
4367static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4368static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4369static int do_md_run(struct mddev *mddev);
4370static int restart_array(struct mddev *mddev);
4371
4372static ssize_t
4373array_state_store(struct mddev *mddev, const char *buf, size_t len)
4374{
4375 int err = 0;
4376 enum array_state st = match_word(buf, array_states);
4377
4378 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4379
4380
4381
4382 spin_lock(&mddev->lock);
4383 if (st == active) {
4384 restart_array(mddev);
4385 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4386 md_wakeup_thread(mddev->thread);
4387 wake_up(&mddev->sb_wait);
4388 } else {
4389 restart_array(mddev);
4390 if (!set_in_sync(mddev))
4391 err = -EBUSY;
4392 }
4393 if (!err)
4394 sysfs_notify_dirent_safe(mddev->sysfs_state);
4395 spin_unlock(&mddev->lock);
4396 return err ?: len;
4397 }
4398 err = mddev_lock(mddev);
4399 if (err)
4400 return err;
4401 err = -EINVAL;
4402 switch(st) {
4403 case bad_word:
4404 break;
4405 case clear:
4406
4407 err = do_md_stop(mddev, 0, NULL);
4408 break;
4409 case inactive:
4410
4411 if (mddev->pers)
4412 err = do_md_stop(mddev, 2, NULL);
4413 else
4414 err = 0;
4415 break;
4416 case suspended:
4417 break;
4418 case readonly:
4419 if (mddev->pers)
4420 err = md_set_readonly(mddev, NULL);
4421 else {
4422 mddev->ro = 1;
4423 set_disk_ro(mddev->gendisk, 1);
4424 err = do_md_run(mddev);
4425 }
4426 break;
4427 case read_auto:
4428 if (mddev->pers) {
4429 if (mddev->ro == 0)
4430 err = md_set_readonly(mddev, NULL);
4431 else if (mddev->ro == 1)
4432 err = restart_array(mddev);
4433 if (err == 0) {
4434 mddev->ro = 2;
4435 set_disk_ro(mddev->gendisk, 0);
4436 }
4437 } else {
4438 mddev->ro = 2;
4439 err = do_md_run(mddev);
4440 }
4441 break;
4442 case clean:
4443 if (mddev->pers) {
4444 err = restart_array(mddev);
4445 if (err)
4446 break;
4447 spin_lock(&mddev->lock);
4448 if (!set_in_sync(mddev))
4449 err = -EBUSY;
4450 spin_unlock(&mddev->lock);
4451 } else
4452 err = -EINVAL;
4453 break;
4454 case active:
4455 if (mddev->pers) {
4456 err = restart_array(mddev);
4457 if (err)
4458 break;
4459 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4460 wake_up(&mddev->sb_wait);
4461 err = 0;
4462 } else {
4463 mddev->ro = 0;
4464 set_disk_ro(mddev->gendisk, 0);
4465 err = do_md_run(mddev);
4466 }
4467 break;
4468 case write_pending:
4469 case active_idle:
4470 case broken:
4471
4472 break;
4473 }
4474
4475 if (!err) {
4476 if (mddev->hold_active == UNTIL_IOCTL)
4477 mddev->hold_active = 0;
4478 sysfs_notify_dirent_safe(mddev->sysfs_state);
4479 }
4480 mddev_unlock(mddev);
4481 return err ?: len;
4482}
4483static struct md_sysfs_entry md_array_state =
4484__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4485
4486static ssize_t
4487max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4488 return sprintf(page, "%d\n",
4489 atomic_read(&mddev->max_corr_read_errors));
4490}
4491
4492static ssize_t
4493max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4494{
4495 unsigned int n;
4496 int rv;
4497
4498 rv = kstrtouint(buf, 10, &n);
4499 if (rv < 0)
4500 return rv;
4501 atomic_set(&mddev->max_corr_read_errors, n);
4502 return len;
4503}
4504
4505static struct md_sysfs_entry max_corr_read_errors =
4506__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4507 max_corrected_read_errors_store);
4508
4509static ssize_t
4510null_show(struct mddev *mddev, char *page)
4511{
4512 return -EINVAL;
4513}
4514
4515static ssize_t
4516new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4517{
4518
4519
4520
4521
4522
4523
4524
4525 char *e;
4526 int major = simple_strtoul(buf, &e, 10);
4527 int minor;
4528 dev_t dev;
4529 struct md_rdev *rdev;
4530 int err;
4531
4532 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4533 return -EINVAL;
4534 minor = simple_strtoul(e+1, &e, 10);
4535 if (*e && *e != '\n')
4536 return -EINVAL;
4537 dev = MKDEV(major, minor);
4538 if (major != MAJOR(dev) ||
4539 minor != MINOR(dev))
4540 return -EOVERFLOW;
4541
4542 flush_workqueue(md_misc_wq);
4543
4544 err = mddev_lock(mddev);
4545 if (err)
4546 return err;
4547 if (mddev->persistent) {
4548 rdev = md_import_device(dev, mddev->major_version,
4549 mddev->minor_version);
4550 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4551 struct md_rdev *rdev0
4552 = list_entry(mddev->disks.next,
4553 struct md_rdev, same_set);
4554 err = super_types[mddev->major_version]
4555 .load_super(rdev, rdev0, mddev->minor_version);
4556 if (err < 0)
4557 goto out;
4558 }
4559 } else if (mddev->external)
4560 rdev = md_import_device(dev, -2, -1);
4561 else
4562 rdev = md_import_device(dev, -1, -1);
4563
4564 if (IS_ERR(rdev)) {
4565 mddev_unlock(mddev);
4566 return PTR_ERR(rdev);
4567 }
4568 err = bind_rdev_to_array(rdev, mddev);
4569 out:
4570 if (err)
4571 export_rdev(rdev);
4572 mddev_unlock(mddev);
4573 if (!err)
4574 md_new_event(mddev);
4575 return err ? err : len;
4576}
4577
4578static struct md_sysfs_entry md_new_device =
4579__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4580
4581static ssize_t
4582bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4583{
4584 char *end;
4585 unsigned long chunk, end_chunk;
4586 int err;
4587
4588 err = mddev_lock(mddev);
4589 if (err)
4590 return err;
4591 if (!mddev->bitmap)
4592 goto out;
4593
4594 while (*buf) {
4595 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4596 if (buf == end) break;
4597 if (*end == '-') {
4598 buf = end + 1;
4599 end_chunk = simple_strtoul(buf, &end, 0);
4600 if (buf == end) break;
4601 }
4602 if (*end && !isspace(*end)) break;
4603 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4604 buf = skip_spaces(end);
4605 }
4606 md_bitmap_unplug(mddev->bitmap);
4607out:
4608 mddev_unlock(mddev);
4609 return len;
4610}
4611
4612static struct md_sysfs_entry md_bitmap =
4613__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4614
4615static ssize_t
4616size_show(struct mddev *mddev, char *page)
4617{
4618 return sprintf(page, "%llu\n",
4619 (unsigned long long)mddev->dev_sectors / 2);
4620}
4621
4622static int update_size(struct mddev *mddev, sector_t num_sectors);
4623
4624static ssize_t
4625size_store(struct mddev *mddev, const char *buf, size_t len)
4626{
4627
4628
4629
4630
4631 sector_t sectors;
4632 int err = strict_blocks_to_sectors(buf, §ors);
4633
4634 if (err < 0)
4635 return err;
4636 err = mddev_lock(mddev);
4637 if (err)
4638 return err;
4639 if (mddev->pers) {
4640 err = update_size(mddev, sectors);
4641 if (err == 0)
4642 md_update_sb(mddev, 1);
4643 } else {
4644 if (mddev->dev_sectors == 0 ||
4645 mddev->dev_sectors > sectors)
4646 mddev->dev_sectors = sectors;
4647 else
4648 err = -ENOSPC;
4649 }
4650 mddev_unlock(mddev);
4651 return err ? err : len;
4652}
4653
4654static struct md_sysfs_entry md_size =
4655__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4656
4657
4658
4659
4660
4661
4662
4663static ssize_t
4664metadata_show(struct mddev *mddev, char *page)
4665{
4666 if (mddev->persistent)
4667 return sprintf(page, "%d.%d\n",
4668 mddev->major_version, mddev->minor_version);
4669 else if (mddev->external)
4670 return sprintf(page, "external:%s\n", mddev->metadata_type);
4671 else
4672 return sprintf(page, "none\n");
4673}
4674
4675static ssize_t
4676metadata_store(struct mddev *mddev, const char *buf, size_t len)
4677{
4678 int major, minor;
4679 char *e;
4680 int err;
4681
4682
4683
4684
4685
4686 err = mddev_lock(mddev);
4687 if (err)
4688 return err;
4689 err = -EBUSY;
4690 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4691 ;
4692 else if (!list_empty(&mddev->disks))
4693 goto out_unlock;
4694
4695 err = 0;
4696 if (cmd_match(buf, "none")) {
4697 mddev->persistent = 0;
4698 mddev->external = 0;
4699 mddev->major_version = 0;
4700 mddev->minor_version = 90;
4701 goto out_unlock;
4702 }
4703 if (strncmp(buf, "external:", 9) == 0) {
4704 size_t namelen = len-9;
4705 if (namelen >= sizeof(mddev->metadata_type))
4706 namelen = sizeof(mddev->metadata_type)-1;
4707 strncpy(mddev->metadata_type, buf+9, namelen);
4708 mddev->metadata_type[namelen] = 0;
4709 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4710 mddev->metadata_type[--namelen] = 0;
4711 mddev->persistent = 0;
4712 mddev->external = 1;
4713 mddev->major_version = 0;
4714 mddev->minor_version = 90;
4715 goto out_unlock;
4716 }
4717 major = simple_strtoul(buf, &e, 10);
4718 err = -EINVAL;
4719 if (e==buf || *e != '.')
4720 goto out_unlock;
4721 buf = e+1;
4722 minor = simple_strtoul(buf, &e, 10);
4723 if (e==buf || (*e && *e != '\n') )
4724 goto out_unlock;
4725 err = -ENOENT;
4726 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4727 goto out_unlock;
4728 mddev->major_version = major;
4729 mddev->minor_version = minor;
4730 mddev->persistent = 1;
4731 mddev->external = 0;
4732 err = 0;
4733out_unlock:
4734 mddev_unlock(mddev);
4735 return err ?: len;
4736}
4737
4738static struct md_sysfs_entry md_metadata =
4739__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4740
4741static ssize_t
4742action_show(struct mddev *mddev, char *page)
4743{
4744 char *type = "idle";
4745 unsigned long recovery = mddev->recovery;
4746 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4747 type = "frozen";
4748 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4749 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4750 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4751 type = "reshape";
4752 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4753 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4754 type = "resync";
4755 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4756 type = "check";
4757 else
4758 type = "repair";
4759 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4760 type = "recover";
4761 else if (mddev->reshape_position != MaxSector)
4762 type = "reshape";
4763 }
4764 return sprintf(page, "%s\n", type);
4765}
4766
4767static ssize_t
4768action_store(struct mddev *mddev, const char *page, size_t len)
4769{
4770 if (!mddev->pers || !mddev->pers->sync_request)
4771 return -EINVAL;
4772
4773
4774 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4775 if (cmd_match(page, "frozen"))
4776 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4777 else
4778 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4779 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4780 mddev_lock(mddev) == 0) {
4781 flush_workqueue(md_misc_wq);
4782 if (mddev->sync_thread) {
4783 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4784 md_reap_sync_thread(mddev);
4785 }
4786 mddev_unlock(mddev);
4787 }
4788 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4789 return -EBUSY;
4790 else if (cmd_match(page, "resync"))
4791 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4792 else if (cmd_match(page, "recover")) {
4793 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4794 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4795 } else if (cmd_match(page, "reshape")) {
4796 int err;
4797 if (mddev->pers->start_reshape == NULL)
4798 return -EINVAL;
4799 err = mddev_lock(mddev);
4800 if (!err) {
4801 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4802 err = -EBUSY;
4803 else {
4804 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4805 err = mddev->pers->start_reshape(mddev);
4806 }
4807 mddev_unlock(mddev);
4808 }
4809 if (err)
4810 return err;
4811 sysfs_notify(&mddev->kobj, NULL, "degraded");
4812 } else {
4813 if (cmd_match(page, "check"))
4814 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4815 else if (!cmd_match(page, "repair"))
4816 return -EINVAL;
4817 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4818 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4819 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4820 }
4821 if (mddev->ro == 2) {
4822
4823
4824
4825 mddev->ro = 0;
4826 md_wakeup_thread(mddev->sync_thread);
4827 }
4828 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4829 md_wakeup_thread(mddev->thread);
4830 sysfs_notify_dirent_safe(mddev->sysfs_action);
4831 return len;
4832}
4833
4834static struct md_sysfs_entry md_scan_mode =
4835__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4836
4837static ssize_t
4838last_sync_action_show(struct mddev *mddev, char *page)
4839{
4840 return sprintf(page, "%s\n", mddev->last_sync_action);
4841}
4842
4843static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4844
4845static ssize_t
4846mismatch_cnt_show(struct mddev *mddev, char *page)
4847{
4848 return sprintf(page, "%llu\n",
4849 (unsigned long long)
4850 atomic64_read(&mddev->resync_mismatches));
4851}
4852
4853static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4854
4855static ssize_t
4856sync_min_show(struct mddev *mddev, char *page)
4857{
4858 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4859 mddev->sync_speed_min ? "local": "system");
4860}
4861
4862static ssize_t
4863sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4864{
4865 unsigned int min;
4866 int rv;
4867
4868 if (strncmp(buf, "system", 6)==0) {
4869 min = 0;
4870 } else {
4871 rv = kstrtouint(buf, 10, &min);
4872 if (rv < 0)
4873 return rv;
4874 if (min == 0)
4875 return -EINVAL;
4876 }
4877 mddev->sync_speed_min = min;
4878 return len;
4879}
4880
4881static struct md_sysfs_entry md_sync_min =
4882__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4883
4884static ssize_t
4885sync_max_show(struct mddev *mddev, char *page)
4886{
4887 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4888 mddev->sync_speed_max ? "local": "system");
4889}
4890
4891static ssize_t
4892sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4893{
4894 unsigned int max;
4895 int rv;
4896
4897 if (strncmp(buf, "system", 6)==0) {
4898 max = 0;
4899 } else {
4900 rv = kstrtouint(buf, 10, &max);
4901 if (rv < 0)
4902 return rv;
4903 if (max == 0)
4904 return -EINVAL;
4905 }
4906 mddev->sync_speed_max = max;
4907 return len;
4908}
4909
4910static struct md_sysfs_entry md_sync_max =
4911__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4912
4913static ssize_t
4914degraded_show(struct mddev *mddev, char *page)
4915{
4916 return sprintf(page, "%d\n", mddev->degraded);
4917}
4918static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4919
4920static ssize_t
4921sync_force_parallel_show(struct mddev *mddev, char *page)
4922{
4923 return sprintf(page, "%d\n", mddev->parallel_resync);
4924}
4925
4926static ssize_t
4927sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4928{
4929 long n;
4930
4931 if (kstrtol(buf, 10, &n))
4932 return -EINVAL;
4933
4934 if (n != 0 && n != 1)
4935 return -EINVAL;
4936
4937 mddev->parallel_resync = n;
4938
4939 if (mddev->sync_thread)
4940 wake_up(&resync_wait);
4941
4942 return len;
4943}
4944
4945
4946static struct md_sysfs_entry md_sync_force_parallel =
4947__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4948 sync_force_parallel_show, sync_force_parallel_store);
4949
4950static ssize_t
4951sync_speed_show(struct mddev *mddev, char *page)
4952{
4953 unsigned long resync, dt, db;
4954 if (mddev->curr_resync == 0)
4955 return sprintf(page, "none\n");
4956 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4957 dt = (jiffies - mddev->resync_mark) / HZ;
4958 if (!dt) dt++;
4959 db = resync - mddev->resync_mark_cnt;
4960 return sprintf(page, "%lu\n", db/dt/2);
4961}
4962
4963static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4964
4965static ssize_t
4966sync_completed_show(struct mddev *mddev, char *page)
4967{
4968 unsigned long long max_sectors, resync;
4969
4970 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4971 return sprintf(page, "none\n");
4972
4973 if (mddev->curr_resync == 1 ||
4974 mddev->curr_resync == 2)
4975 return sprintf(page, "delayed\n");
4976
4977 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4978 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4979 max_sectors = mddev->resync_max_sectors;
4980 else
4981 max_sectors = mddev->dev_sectors;
4982
4983 resync = mddev->curr_resync_completed;
4984 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4985}
4986
4987static struct md_sysfs_entry md_sync_completed =
4988 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4989
4990static ssize_t
4991min_sync_show(struct mddev *mddev, char *page)
4992{
4993 return sprintf(page, "%llu\n",
4994 (unsigned long long)mddev->resync_min);
4995}
4996static ssize_t
4997min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4998{
4999 unsigned long long min;
5000 int err;
5001
5002 if (kstrtoull(buf, 10, &min))
5003 return -EINVAL;
5004
5005 spin_lock(&mddev->lock);
5006 err = -EINVAL;
5007 if (min > mddev->resync_max)
5008 goto out_unlock;
5009
5010 err = -EBUSY;
5011 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5012 goto out_unlock;
5013
5014
5015 mddev->resync_min = round_down(min, 8);
5016 err = 0;
5017
5018out_unlock:
5019 spin_unlock(&mddev->lock);
5020 return err ?: len;
5021}
5022
5023static struct md_sysfs_entry md_min_sync =
5024__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5025
5026static ssize_t
5027max_sync_show(struct mddev *mddev, char *page)
5028{
5029 if (mddev->resync_max == MaxSector)
5030 return sprintf(page, "max\n");
5031 else
5032 return sprintf(page, "%llu\n",
5033 (unsigned long long)mddev->resync_max);
5034}
5035static ssize_t
5036max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5037{
5038 int err;
5039 spin_lock(&mddev->lock);
5040 if (strncmp(buf, "max", 3) == 0)
5041 mddev->resync_max = MaxSector;
5042 else {
5043 unsigned long long max;
5044 int chunk;
5045
5046 err = -EINVAL;
5047 if (kstrtoull(buf, 10, &max))
5048 goto out_unlock;
5049 if (max < mddev->resync_min)
5050 goto out_unlock;
5051
5052 err = -EBUSY;
5053 if (max < mddev->resync_max &&
5054 mddev->ro == 0 &&
5055 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5056 goto out_unlock;
5057
5058
5059 chunk = mddev->chunk_sectors;
5060 if (chunk) {
5061 sector_t temp = max;
5062
5063 err = -EINVAL;
5064 if (sector_div(temp, chunk))
5065 goto out_unlock;
5066 }
5067 mddev->resync_max = max;
5068 }
5069 wake_up(&mddev->recovery_wait);
5070 err = 0;
5071out_unlock:
5072 spin_unlock(&mddev->lock);
5073 return err ?: len;
5074}
5075
5076static struct md_sysfs_entry md_max_sync =
5077__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5078
5079static ssize_t
5080suspend_lo_show(struct mddev *mddev, char *page)
5081{
5082 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5083}
5084
5085static ssize_t
5086suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5087{
5088 unsigned long long new;
5089 int err;
5090
5091 err = kstrtoull(buf, 10, &new);
5092 if (err < 0)
5093 return err;
5094 if (new != (sector_t)new)
5095 return -EINVAL;
5096
5097 err = mddev_lock(mddev);
5098 if (err)
5099 return err;
5100 err = -EINVAL;
5101 if (mddev->pers == NULL ||
5102 mddev->pers->quiesce == NULL)
5103 goto unlock;
5104 mddev_suspend(mddev);
5105 mddev->suspend_lo = new;
5106 mddev_resume(mddev);
5107
5108 err = 0;
5109unlock:
5110 mddev_unlock(mddev);
5111 return err ?: len;
5112}
5113static struct md_sysfs_entry md_suspend_lo =
5114__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5115
5116static ssize_t
5117suspend_hi_show(struct mddev *mddev, char *page)
5118{
5119 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5120}
5121
5122static ssize_t
5123suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5124{
5125 unsigned long long new;
5126 int err;
5127
5128 err = kstrtoull(buf, 10, &new);
5129 if (err < 0)
5130 return err;
5131 if (new != (sector_t)new)
5132 return -EINVAL;
5133
5134 err = mddev_lock(mddev);
5135 if (err)
5136 return err;
5137 err = -EINVAL;
5138 if (mddev->pers == NULL)
5139 goto unlock;
5140
5141 mddev_suspend(mddev);
5142 mddev->suspend_hi = new;
5143 mddev_resume(mddev);
5144
5145 err = 0;
5146unlock:
5147 mddev_unlock(mddev);
5148 return err ?: len;
5149}
5150static struct md_sysfs_entry md_suspend_hi =
5151__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5152
5153static ssize_t
5154reshape_position_show(struct mddev *mddev, char *page)
5155{
5156 if (mddev->reshape_position != MaxSector)
5157 return sprintf(page, "%llu\n",
5158 (unsigned long long)mddev->reshape_position);
5159 strcpy(page, "none\n");
5160 return 5;
5161}
5162
5163static ssize_t
5164reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5165{
5166 struct md_rdev *rdev;
5167 unsigned long long new;
5168 int err;
5169
5170 err = kstrtoull(buf, 10, &new);
5171 if (err < 0)
5172 return err;
5173 if (new != (sector_t)new)
5174 return -EINVAL;
5175 err = mddev_lock(mddev);
5176 if (err)
5177 return err;
5178 err = -EBUSY;
5179 if (mddev->pers)
5180 goto unlock;
5181 mddev->reshape_position = new;
5182 mddev->delta_disks = 0;
5183 mddev->reshape_backwards = 0;
5184 mddev->new_level = mddev->level;
5185 mddev->new_layout = mddev->layout;
5186 mddev->new_chunk_sectors = mddev->chunk_sectors;
5187 rdev_for_each(rdev, mddev)
5188 rdev->new_data_offset = rdev->data_offset;
5189 err = 0;
5190unlock:
5191 mddev_unlock(mddev);
5192 return err ?: len;
5193}
5194
5195static struct md_sysfs_entry md_reshape_position =
5196__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5197 reshape_position_store);
5198
5199static ssize_t
5200reshape_direction_show(struct mddev *mddev, char *page)
5201{
5202 return sprintf(page, "%s\n",
5203 mddev->reshape_backwards ? "backwards" : "forwards");
5204}
5205
5206static ssize_t
5207reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5208{
5209 int backwards = 0;
5210 int err;
5211
5212 if (cmd_match(buf, "forwards"))
5213 backwards = 0;
5214 else if (cmd_match(buf, "backwards"))
5215 backwards = 1;
5216 else
5217 return -EINVAL;
5218 if (mddev->reshape_backwards == backwards)
5219 return len;
5220
5221 err = mddev_lock(mddev);
5222 if (err)
5223 return err;
5224
5225 if (mddev->delta_disks)
5226 err = -EBUSY;
5227 else if (mddev->persistent &&
5228 mddev->major_version == 0)
5229 err = -EINVAL;
5230 else
5231 mddev->reshape_backwards = backwards;
5232 mddev_unlock(mddev);
5233 return err ?: len;
5234}
5235
5236static struct md_sysfs_entry md_reshape_direction =
5237__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5238 reshape_direction_store);
5239
5240static ssize_t
5241array_size_show(struct mddev *mddev, char *page)
5242{
5243 if (mddev->external_size)
5244 return sprintf(page, "%llu\n",
5245 (unsigned long long)mddev->array_sectors/2);
5246 else
5247 return sprintf(page, "default\n");
5248}
5249
5250static ssize_t
5251array_size_store(struct mddev *mddev, const char *buf, size_t len)
5252{
5253 sector_t sectors;
5254 int err;
5255
5256 err = mddev_lock(mddev);
5257 if (err)
5258 return err;
5259
5260
5261 if (mddev_is_clustered(mddev)) {
5262 mddev_unlock(mddev);
5263 return -EINVAL;
5264 }
5265
5266 if (strncmp(buf, "default", 7) == 0) {
5267 if (mddev->pers)
5268 sectors = mddev->pers->size(mddev, 0, 0);
5269 else
5270 sectors = mddev->array_sectors;
5271
5272 mddev->external_size = 0;
5273 } else {
5274 if (strict_blocks_to_sectors(buf, §ors) < 0)
5275 err = -EINVAL;
5276 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5277 err = -E2BIG;
5278 else
5279 mddev->external_size = 1;
5280 }
5281
5282 if (!err) {
5283 mddev->array_sectors = sectors;
5284 if (mddev->pers) {
5285 set_capacity(mddev->gendisk, mddev->array_sectors);
5286 revalidate_disk(mddev->gendisk);
5287 }
5288 }
5289 mddev_unlock(mddev);
5290 return err ?: len;
5291}
5292
5293static struct md_sysfs_entry md_array_size =
5294__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5295 array_size_store);
5296
5297static ssize_t
5298consistency_policy_show(struct mddev *mddev, char *page)
5299{
5300 int ret;
5301
5302 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5303 ret = sprintf(page, "journal\n");
5304 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5305 ret = sprintf(page, "ppl\n");
5306 } else if (mddev->bitmap) {
5307 ret = sprintf(page, "bitmap\n");
5308 } else if (mddev->pers) {
5309 if (mddev->pers->sync_request)
5310 ret = sprintf(page, "resync\n");
5311 else
5312 ret = sprintf(page, "none\n");
5313 } else {
5314 ret = sprintf(page, "unknown\n");
5315 }
5316
5317 return ret;
5318}
5319
5320static ssize_t
5321consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5322{
5323 int err = 0;
5324
5325 if (mddev->pers) {
5326 if (mddev->pers->change_consistency_policy)
5327 err = mddev->pers->change_consistency_policy(mddev, buf);
5328 else
5329 err = -EBUSY;
5330 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5331 set_bit(MD_HAS_PPL, &mddev->flags);
5332 } else {
5333 err = -EINVAL;
5334 }
5335
5336 return err ? err : len;
5337}
5338
5339static struct md_sysfs_entry md_consistency_policy =
5340__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5341 consistency_policy_store);
5342
5343static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5344{
5345 return sprintf(page, "%d\n", mddev->fail_last_dev);
5346}
5347
5348
5349
5350
5351
5352static ssize_t
5353fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5354{
5355 int ret;
5356 bool value;
5357
5358 ret = kstrtobool(buf, &value);
5359 if (ret)
5360 return ret;
5361
5362 if (value != mddev->fail_last_dev)
5363 mddev->fail_last_dev = value;
5364
5365 return len;
5366}
5367static struct md_sysfs_entry md_fail_last_dev =
5368__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5369 fail_last_dev_store);
5370
5371static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5372{
5373 if (mddev->pers == NULL || (mddev->pers->level != 1))
5374 return sprintf(page, "n/a\n");
5375 else
5376 return sprintf(page, "%d\n", mddev->serialize_policy);
5377}
5378
5379
5380
5381
5382
5383static ssize_t
5384serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5385{
5386 int err;
5387 bool value;
5388
5389 err = kstrtobool(buf, &value);
5390 if (err)
5391 return err;
5392
5393 if (value == mddev->serialize_policy)
5394 return len;
5395
5396 err = mddev_lock(mddev);
5397 if (err)
5398 return err;
5399 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5400 pr_err("md: serialize_policy is only effective for raid1\n");
5401 err = -EINVAL;
5402 goto unlock;
5403 }
5404
5405 mddev_suspend(mddev);
5406 if (value)
5407 mddev_create_serial_pool(mddev, NULL, true);
5408 else
5409 mddev_destroy_serial_pool(mddev, NULL, true);
5410 mddev->serialize_policy = value;
5411 mddev_resume(mddev);
5412unlock:
5413 mddev_unlock(mddev);
5414 return err ?: len;
5415}
5416
5417static struct md_sysfs_entry md_serialize_policy =
5418__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5419 serialize_policy_store);
5420
5421
5422static struct attribute *md_default_attrs[] = {
5423 &md_level.attr,
5424 &md_layout.attr,
5425 &md_raid_disks.attr,
5426 &md_chunk_size.attr,
5427 &md_size.attr,
5428 &md_resync_start.attr,
5429 &md_metadata.attr,
5430 &md_new_device.attr,
5431 &md_safe_delay.attr,
5432 &md_array_state.attr,
5433 &md_reshape_position.attr,
5434 &md_reshape_direction.attr,
5435 &md_array_size.attr,
5436 &max_corr_read_errors.attr,
5437 &md_consistency_policy.attr,
5438 &md_fail_last_dev.attr,
5439 &md_serialize_policy.attr,
5440 NULL,
5441};
5442
5443static struct attribute *md_redundancy_attrs[] = {
5444 &md_scan_mode.attr,
5445 &md_last_scan_mode.attr,
5446 &md_mismatches.attr,
5447 &md_sync_min.attr,
5448 &md_sync_max.attr,
5449 &md_sync_speed.attr,
5450 &md_sync_force_parallel.attr,
5451 &md_sync_completed.attr,
5452 &md_min_sync.attr,
5453 &md_max_sync.attr,
5454 &md_suspend_lo.attr,
5455 &md_suspend_hi.attr,
5456 &md_bitmap.attr,
5457 &md_degraded.attr,
5458 NULL,
5459};
5460static struct attribute_group md_redundancy_group = {
5461 .name = NULL,
5462 .attrs = md_redundancy_attrs,
5463};
5464
5465static ssize_t
5466md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5467{
5468 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5469 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5470 ssize_t rv;
5471
5472 if (!entry->show)
5473 return -EIO;
5474 spin_lock(&all_mddevs_lock);
5475 if (list_empty(&mddev->all_mddevs)) {
5476 spin_unlock(&all_mddevs_lock);
5477 return -EBUSY;
5478 }
5479 mddev_get(mddev);
5480 spin_unlock(&all_mddevs_lock);
5481
5482 rv = entry->show(mddev, page);
5483 mddev_put(mddev);
5484 return rv;
5485}
5486
5487static ssize_t
5488md_attr_store(struct kobject *kobj, struct attribute *attr,
5489 const char *page, size_t length)
5490{
5491 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5492 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5493 ssize_t rv;
5494
5495 if (!entry->store)
5496 return -EIO;
5497 if (!capable(CAP_SYS_ADMIN))
5498 return -EACCES;
5499 spin_lock(&all_mddevs_lock);
5500 if (list_empty(&mddev->all_mddevs)) {
5501 spin_unlock(&all_mddevs_lock);
5502 return -EBUSY;
5503 }
5504 mddev_get(mddev);
5505 spin_unlock(&all_mddevs_lock);
5506 rv = entry->store(mddev, page, length);
5507 mddev_put(mddev);
5508 return rv;
5509}
5510
5511static void md_free(struct kobject *ko)
5512{
5513 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5514
5515 if (mddev->sysfs_state)
5516 sysfs_put(mddev->sysfs_state);
5517
5518 if (mddev->gendisk)
5519 del_gendisk(mddev->gendisk);
5520 if (mddev->queue)
5521 blk_cleanup_queue(mddev->queue);
5522 if (mddev->gendisk)
5523 put_disk(mddev->gendisk);
5524 percpu_ref_exit(&mddev->writes_pending);
5525
5526 bioset_exit(&mddev->bio_set);
5527 bioset_exit(&mddev->sync_set);
5528 kfree(mddev);
5529}
5530
5531static const struct sysfs_ops md_sysfs_ops = {
5532 .show = md_attr_show,
5533 .store = md_attr_store,
5534};
5535static struct kobj_type md_ktype = {
5536 .release = md_free,
5537 .sysfs_ops = &md_sysfs_ops,
5538 .default_attrs = md_default_attrs,
5539};
5540
5541int mdp_major = 0;
5542
5543static void mddev_delayed_delete(struct work_struct *ws)
5544{
5545 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5546
5547 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5548 kobject_del(&mddev->kobj);
5549 kobject_put(&mddev->kobj);
5550}
5551
5552static void no_op(struct percpu_ref *r) {}
5553
5554int mddev_init_writes_pending(struct mddev *mddev)
5555{
5556 if (mddev->writes_pending.percpu_count_ptr)
5557 return 0;
5558 if (percpu_ref_init(&mddev->writes_pending, no_op,
5559 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5560 return -ENOMEM;
5561
5562 percpu_ref_put(&mddev->writes_pending);
5563 return 0;
5564}
5565EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5566
5567static int md_alloc(dev_t dev, char *name)
5568{
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578 static DEFINE_MUTEX(disks_mutex);
5579 struct mddev *mddev = mddev_find(dev);
5580 struct gendisk *disk;
5581 int partitioned;
5582 int shift;
5583 int unit;
5584 int error;
5585
5586 if (!mddev)
5587 return -ENODEV;
5588
5589 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5590 shift = partitioned ? MdpMinorShift : 0;
5591 unit = MINOR(mddev->unit) >> shift;
5592
5593
5594
5595
5596 flush_workqueue(md_misc_wq);
5597
5598 mutex_lock(&disks_mutex);
5599 error = -EEXIST;
5600 if (mddev->gendisk)
5601 goto abort;
5602
5603 if (name && !dev) {
5604
5605
5606 struct mddev *mddev2;
5607 spin_lock(&all_mddevs_lock);
5608
5609 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5610 if (mddev2->gendisk &&
5611 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5612 spin_unlock(&all_mddevs_lock);
5613 goto abort;
5614 }
5615 spin_unlock(&all_mddevs_lock);
5616 }
5617 if (name && dev)
5618
5619
5620
5621 mddev->hold_active = UNTIL_STOP;
5622
5623 error = -ENOMEM;
5624 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5625 if (!mddev->queue)
5626 goto abort;
5627 mddev->queue->queuedata = mddev;
5628
5629 blk_queue_make_request(mddev->queue, md_make_request);
5630 blk_set_stacking_limits(&mddev->queue->limits);
5631
5632 disk = alloc_disk(1 << shift);
5633 if (!disk) {
5634 blk_cleanup_queue(mddev->queue);
5635 mddev->queue = NULL;
5636 goto abort;
5637 }
5638 disk->major = MAJOR(mddev->unit);
5639 disk->first_minor = unit << shift;
5640 if (name)
5641 strcpy(disk->disk_name, name);
5642 else if (partitioned)
5643 sprintf(disk->disk_name, "md_d%d", unit);
5644 else
5645 sprintf(disk->disk_name, "md%d", unit);
5646 disk->fops = &md_fops;
5647 disk->private_data = mddev;
5648 disk->queue = mddev->queue;
5649 blk_queue_write_cache(mddev->queue, true, true);
5650
5651
5652
5653
5654 disk->flags |= GENHD_FL_EXT_DEVT;
5655 mddev->gendisk = disk;
5656
5657
5658
5659 mutex_lock(&mddev->open_mutex);
5660 add_disk(disk);
5661
5662 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5663 if (error) {
5664
5665
5666
5667 pr_debug("md: cannot register %s/md - name in use\n",
5668 disk->disk_name);
5669 error = 0;
5670 }
5671 if (mddev->kobj.sd &&
5672 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5673 pr_debug("pointless warning\n");
5674 mutex_unlock(&mddev->open_mutex);
5675 abort:
5676 mutex_unlock(&disks_mutex);
5677 if (!error && mddev->kobj.sd) {
5678 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5679 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5680 }
5681 mddev_put(mddev);
5682 return error;
5683}
5684
5685static struct kobject *md_probe(dev_t dev, int *part, void *data)
5686{
5687 if (create_on_open)
5688 md_alloc(dev, NULL);
5689 return NULL;
5690}
5691
5692static int add_named_array(const char *val, const struct kernel_param *kp)
5693{
5694
5695
5696
5697
5698
5699
5700
5701 int len = strlen(val);
5702 char buf[DISK_NAME_LEN];
5703 unsigned long devnum;
5704
5705 while (len && val[len-1] == '\n')
5706 len--;
5707 if (len >= DISK_NAME_LEN)
5708 return -E2BIG;
5709 strlcpy(buf, val, len+1);
5710 if (strncmp(buf, "md_", 3) == 0)
5711 return md_alloc(0, buf);
5712 if (strncmp(buf, "md", 2) == 0 &&
5713 isdigit(buf[2]) &&
5714 kstrtoul(buf+2, 10, &devnum) == 0 &&
5715 devnum <= MINORMASK)
5716 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5717
5718 return -EINVAL;
5719}
5720
5721static void md_safemode_timeout(struct timer_list *t)
5722{
5723 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5724
5725 mddev->safemode = 1;
5726 if (mddev->external)
5727 sysfs_notify_dirent_safe(mddev->sysfs_state);
5728
5729 md_wakeup_thread(mddev->thread);
5730}
5731
5732static int start_dirty_degraded;
5733
5734int md_run(struct mddev *mddev)
5735{
5736 int err;
5737 struct md_rdev *rdev;
5738 struct md_personality *pers;
5739
5740 if (list_empty(&mddev->disks))
5741
5742 return -EINVAL;
5743
5744 if (mddev->pers)
5745 return -EBUSY;
5746
5747 if (mddev->sysfs_active)
5748 return -EBUSY;
5749
5750
5751
5752
5753 if (!mddev->raid_disks) {
5754 if (!mddev->persistent)
5755 return -EINVAL;
5756 err = analyze_sbs(mddev);
5757 if (err)
5758 return -EINVAL;
5759 }
5760
5761 if (mddev->level != LEVEL_NONE)
5762 request_module("md-level-%d", mddev->level);
5763 else if (mddev->clevel[0])
5764 request_module("md-%s", mddev->clevel);
5765
5766
5767
5768
5769
5770
5771 mddev->has_superblocks = false;
5772 rdev_for_each(rdev, mddev) {
5773 if (test_bit(Faulty, &rdev->flags))
5774 continue;
5775 sync_blockdev(rdev->bdev);
5776 invalidate_bdev(rdev->bdev);
5777 if (mddev->ro != 1 &&
5778 (bdev_read_only(rdev->bdev) ||
5779 bdev_read_only(rdev->meta_bdev))) {
5780 mddev->ro = 1;
5781 if (mddev->gendisk)
5782 set_disk_ro(mddev->gendisk, 1);
5783 }
5784
5785 if (rdev->sb_page)
5786 mddev->has_superblocks = true;
5787
5788
5789
5790
5791
5792 if (rdev->meta_bdev) {
5793 ;
5794 } else if (rdev->data_offset < rdev->sb_start) {
5795 if (mddev->dev_sectors &&
5796 rdev->data_offset + mddev->dev_sectors
5797 > rdev->sb_start) {
5798 pr_warn("md: %s: data overlaps metadata\n",
5799 mdname(mddev));
5800 return -EINVAL;
5801 }
5802 } else {
5803 if (rdev->sb_start + rdev->sb_size/512
5804 > rdev->data_offset) {
5805 pr_warn("md: %s: metadata overlaps data\n",
5806 mdname(mddev));
5807 return -EINVAL;
5808 }
5809 }
5810 sysfs_notify_dirent_safe(rdev->sysfs_state);
5811 }
5812
5813 if (!bioset_initialized(&mddev->bio_set)) {
5814 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5815 if (err)
5816 return err;
5817 }
5818 if (!bioset_initialized(&mddev->sync_set)) {
5819 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5820 if (err)
5821 return err;
5822 }
5823
5824 spin_lock(&pers_lock);
5825 pers = find_pers(mddev->level, mddev->clevel);
5826 if (!pers || !try_module_get(pers->owner)) {
5827 spin_unlock(&pers_lock);
5828 if (mddev->level != LEVEL_NONE)
5829 pr_warn("md: personality for level %d is not loaded!\n",
5830 mddev->level);
5831 else
5832 pr_warn("md: personality for level %s is not loaded!\n",
5833 mddev->clevel);
5834 err = -EINVAL;
5835 goto abort;
5836 }
5837 spin_unlock(&pers_lock);
5838 if (mddev->level != pers->level) {
5839 mddev->level = pers->level;
5840 mddev->new_level = pers->level;
5841 }
5842 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5843
5844 if (mddev->reshape_position != MaxSector &&
5845 pers->start_reshape == NULL) {
5846
5847 module_put(pers->owner);
5848 err = -EINVAL;
5849 goto abort;
5850 }
5851
5852 if (pers->sync_request) {
5853
5854
5855
5856 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5857 struct md_rdev *rdev2;
5858 int warned = 0;
5859
5860 rdev_for_each(rdev, mddev)
5861 rdev_for_each(rdev2, mddev) {
5862 if (rdev < rdev2 &&
5863 rdev->bdev->bd_contains ==
5864 rdev2->bdev->bd_contains) {
5865 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5866 mdname(mddev),
5867 bdevname(rdev->bdev,b),
5868 bdevname(rdev2->bdev,b2));
5869 warned = 1;
5870 }
5871 }
5872
5873 if (warned)
5874 pr_warn("True protection against single-disk failure might be compromised.\n");
5875 }
5876
5877 mddev->recovery = 0;
5878
5879 mddev->resync_max_sectors = mddev->dev_sectors;
5880
5881 mddev->ok_start_degraded = start_dirty_degraded;
5882
5883 if (start_readonly && mddev->ro == 0)
5884 mddev->ro = 2;
5885
5886 err = pers->run(mddev);
5887 if (err)
5888 pr_warn("md: pers->run() failed ...\n");
5889 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5890 WARN_ONCE(!mddev->external_size,
5891 "%s: default size too small, but 'external_size' not in effect?\n",
5892 __func__);
5893 pr_warn("md: invalid array_size %llu > default size %llu\n",
5894 (unsigned long long)mddev->array_sectors / 2,
5895 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5896 err = -EINVAL;
5897 }
5898 if (err == 0 && pers->sync_request &&
5899 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5900 struct bitmap *bitmap;
5901
5902 bitmap = md_bitmap_create(mddev, -1);
5903 if (IS_ERR(bitmap)) {
5904 err = PTR_ERR(bitmap);
5905 pr_warn("%s: failed to create bitmap (%d)\n",
5906 mdname(mddev), err);
5907 } else
5908 mddev->bitmap = bitmap;
5909
5910 }
5911 if (err)
5912 goto bitmap_abort;
5913
5914 if (mddev->bitmap_info.max_write_behind > 0) {
5915 bool create_pool = false;
5916
5917 rdev_for_each(rdev, mddev) {
5918 if (test_bit(WriteMostly, &rdev->flags) &&
5919 rdev_init_serial(rdev))
5920 create_pool = true;
5921 }
5922 if (create_pool && mddev->serial_info_pool == NULL) {
5923 mddev->serial_info_pool =
5924 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
5925 sizeof(struct serial_info));
5926 if (!mddev->serial_info_pool) {
5927 err = -ENOMEM;
5928 goto bitmap_abort;
5929 }
5930 }
5931 }
5932
5933 if (mddev->queue) {
5934 bool nonrot = true;
5935
5936 rdev_for_each(rdev, mddev) {
5937 if (rdev->raid_disk >= 0 &&
5938 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5939 nonrot = false;
5940 break;
5941 }
5942 }
5943 if (mddev->degraded)
5944 nonrot = false;
5945 if (nonrot)
5946 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5947 else
5948 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5949 mddev->queue->backing_dev_info->congested_data = mddev;
5950 mddev->queue->backing_dev_info->congested_fn = md_congested;
5951 }
5952 if (pers->sync_request) {
5953 if (mddev->kobj.sd &&
5954 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5955 pr_warn("md: cannot register extra attributes for %s\n",
5956 mdname(mddev));
5957 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5958 } else if (mddev->ro == 2)
5959 mddev->ro = 0;
5960
5961 atomic_set(&mddev->max_corr_read_errors,
5962 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5963 mddev->safemode = 0;
5964 if (mddev_is_clustered(mddev))
5965 mddev->safemode_delay = 0;
5966 else
5967 mddev->safemode_delay = (200 * HZ)/1000 +1;
5968 mddev->in_sync = 1;
5969 smp_wmb();
5970 spin_lock(&mddev->lock);
5971 mddev->pers = pers;
5972 spin_unlock(&mddev->lock);
5973 rdev_for_each(rdev, mddev)
5974 if (rdev->raid_disk >= 0)
5975 sysfs_link_rdev(mddev, rdev);
5976
5977 if (mddev->degraded && !mddev->ro)
5978
5979
5980
5981 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5982 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5983
5984 if (mddev->sb_flags)
5985 md_update_sb(mddev, 0);
5986
5987 md_new_event(mddev);
5988 return 0;
5989
5990bitmap_abort:
5991 mddev_detach(mddev);
5992 if (mddev->private)
5993 pers->free(mddev, mddev->private);
5994 mddev->private = NULL;
5995 module_put(pers->owner);
5996 md_bitmap_destroy(mddev);
5997abort:
5998 bioset_exit(&mddev->bio_set);
5999 bioset_exit(&mddev->sync_set);
6000 return err;
6001}
6002EXPORT_SYMBOL_GPL(md_run);
6003
6004static int do_md_run(struct mddev *mddev)
6005{
6006 int err;
6007
6008 set_bit(MD_NOT_READY, &mddev->flags);
6009 err = md_run(mddev);
6010 if (err)
6011 goto out;
6012 err = md_bitmap_load(mddev);
6013 if (err) {
6014 md_bitmap_destroy(mddev);
6015 goto out;
6016 }
6017
6018 if (mddev_is_clustered(mddev))
6019 md_allow_write(mddev);
6020
6021
6022 md_start(mddev);
6023
6024 md_wakeup_thread(mddev->thread);
6025 md_wakeup_thread(mddev->sync_thread);
6026
6027 set_capacity(mddev->gendisk, mddev->array_sectors);
6028 revalidate_disk(mddev->gendisk);
6029 clear_bit(MD_NOT_READY, &mddev->flags);
6030 mddev->changed = 1;
6031 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6032 sysfs_notify_dirent_safe(mddev->sysfs_state);
6033 sysfs_notify_dirent_safe(mddev->sysfs_action);
6034 sysfs_notify(&mddev->kobj, NULL, "degraded");
6035out:
6036 clear_bit(MD_NOT_READY, &mddev->flags);
6037 return err;
6038}
6039
6040int md_start(struct mddev *mddev)
6041{
6042 int ret = 0;
6043
6044 if (mddev->pers->start) {
6045 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6046 md_wakeup_thread(mddev->thread);
6047 ret = mddev->pers->start(mddev);
6048 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6049 md_wakeup_thread(mddev->sync_thread);
6050 }
6051 return ret;
6052}
6053EXPORT_SYMBOL_GPL(md_start);
6054
6055static int restart_array(struct mddev *mddev)
6056{
6057 struct gendisk *disk = mddev->gendisk;
6058 struct md_rdev *rdev;
6059 bool has_journal = false;
6060 bool has_readonly = false;
6061
6062
6063 if (list_empty(&mddev->disks))
6064 return -ENXIO;
6065 if (!mddev->pers)
6066 return -EINVAL;
6067 if (!mddev->ro)
6068 return -EBUSY;
6069
6070 rcu_read_lock();
6071 rdev_for_each_rcu(rdev, mddev) {
6072 if (test_bit(Journal, &rdev->flags) &&
6073 !test_bit(Faulty, &rdev->flags))
6074 has_journal = true;
6075 if (bdev_read_only(rdev->bdev))
6076 has_readonly = true;
6077 }
6078 rcu_read_unlock();
6079 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6080
6081 return -EINVAL;
6082 if (has_readonly)
6083 return -EROFS;
6084
6085 mddev->safemode = 0;
6086 mddev->ro = 0;
6087 set_disk_ro(disk, 0);
6088 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6089
6090 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6091 md_wakeup_thread(mddev->thread);
6092 md_wakeup_thread(mddev->sync_thread);
6093 sysfs_notify_dirent_safe(mddev->sysfs_state);
6094 return 0;
6095}
6096
6097static void md_clean(struct mddev *mddev)
6098{
6099 mddev->array_sectors = 0;
6100 mddev->external_size = 0;
6101 mddev->dev_sectors = 0;
6102 mddev->raid_disks = 0;
6103 mddev->recovery_cp = 0;
6104 mddev->resync_min = 0;
6105 mddev->resync_max = MaxSector;
6106 mddev->reshape_position = MaxSector;
6107 mddev->external = 0;
6108 mddev->persistent = 0;
6109 mddev->level = LEVEL_NONE;
6110 mddev->clevel[0] = 0;
6111 mddev->flags = 0;
6112 mddev->sb_flags = 0;
6113 mddev->ro = 0;
6114 mddev->metadata_type[0] = 0;
6115 mddev->chunk_sectors = 0;
6116 mddev->ctime = mddev->utime = 0;
6117 mddev->layout = 0;
6118 mddev->max_disks = 0;
6119 mddev->events = 0;
6120 mddev->can_decrease_events = 0;
6121 mddev->delta_disks = 0;
6122 mddev->reshape_backwards = 0;
6123 mddev->new_level = LEVEL_NONE;
6124 mddev->new_layout = 0;
6125 mddev->new_chunk_sectors = 0;
6126 mddev->curr_resync = 0;
6127 atomic64_set(&mddev->resync_mismatches, 0);
6128 mddev->suspend_lo = mddev->suspend_hi = 0;
6129 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6130 mddev->recovery = 0;
6131 mddev->in_sync = 0;
6132 mddev->changed = 0;
6133 mddev->degraded = 0;
6134 mddev->safemode = 0;
6135 mddev->private = NULL;
6136 mddev->cluster_info = NULL;
6137 mddev->bitmap_info.offset = 0;
6138 mddev->bitmap_info.default_offset = 0;
6139 mddev->bitmap_info.default_space = 0;
6140 mddev->bitmap_info.chunksize = 0;
6141 mddev->bitmap_info.daemon_sleep = 0;
6142 mddev->bitmap_info.max_write_behind = 0;
6143 mddev->bitmap_info.nodes = 0;
6144}
6145
6146static void __md_stop_writes(struct mddev *mddev)
6147{
6148 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6149 flush_workqueue(md_misc_wq);
6150 if (mddev->sync_thread) {
6151 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6152 md_reap_sync_thread(mddev);
6153 }
6154
6155 del_timer_sync(&mddev->safemode_timer);
6156
6157 if (mddev->pers && mddev->pers->quiesce) {
6158 mddev->pers->quiesce(mddev, 1);
6159 mddev->pers->quiesce(mddev, 0);
6160 }
6161 md_bitmap_flush(mddev);
6162
6163 if (mddev->ro == 0 &&
6164 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6165 mddev->sb_flags)) {
6166
6167 if (!mddev_is_clustered(mddev))
6168 mddev->in_sync = 1;
6169 md_update_sb(mddev, 1);
6170 }
6171
6172 mddev->serialize_policy = 0;
6173 mddev_destroy_serial_pool(mddev, NULL, true);
6174}
6175
6176void md_stop_writes(struct mddev *mddev)
6177{
6178 mddev_lock_nointr(mddev);
6179 __md_stop_writes(mddev);
6180 mddev_unlock(mddev);
6181}
6182EXPORT_SYMBOL_GPL(md_stop_writes);
6183
6184static void mddev_detach(struct mddev *mddev)
6185{
6186 md_bitmap_wait_behind_writes(mddev);
6187 if (mddev->pers && mddev->pers->quiesce) {
6188 mddev->pers->quiesce(mddev, 1);
6189 mddev->pers->quiesce(mddev, 0);
6190 }
6191 md_unregister_thread(&mddev->thread);
6192 if (mddev->queue)
6193 blk_sync_queue(mddev->queue);
6194}
6195
6196static void __md_stop(struct mddev *mddev)
6197{
6198 struct md_personality *pers = mddev->pers;
6199 md_bitmap_destroy(mddev);
6200 mddev_detach(mddev);
6201
6202 flush_workqueue(md_misc_wq);
6203 spin_lock(&mddev->lock);
6204 mddev->pers = NULL;
6205 spin_unlock(&mddev->lock);
6206 pers->free(mddev, mddev->private);
6207 mddev->private = NULL;
6208 if (pers->sync_request && mddev->to_remove == NULL)
6209 mddev->to_remove = &md_redundancy_group;
6210 module_put(pers->owner);
6211 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6212}
6213
6214void md_stop(struct mddev *mddev)
6215{
6216
6217
6218
6219 __md_stop(mddev);
6220 bioset_exit(&mddev->bio_set);
6221 bioset_exit(&mddev->sync_set);
6222}
6223
6224EXPORT_SYMBOL_GPL(md_stop);
6225
6226static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6227{
6228 int err = 0;
6229 int did_freeze = 0;
6230
6231 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6232 did_freeze = 1;
6233 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6234 md_wakeup_thread(mddev->thread);
6235 }
6236 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6237 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6238 if (mddev->sync_thread)
6239
6240
6241 wake_up_process(mddev->sync_thread->tsk);
6242
6243 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6244 return -EBUSY;
6245 mddev_unlock(mddev);
6246 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6247 &mddev->recovery));
6248 wait_event(mddev->sb_wait,
6249 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6250 mddev_lock_nointr(mddev);
6251
6252 mutex_lock(&mddev->open_mutex);
6253 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6254 mddev->sync_thread ||
6255 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6256 pr_warn("md: %s still in use.\n",mdname(mddev));
6257 if (did_freeze) {
6258 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6259 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6260 md_wakeup_thread(mddev->thread);
6261 }
6262 err = -EBUSY;
6263 goto out;
6264 }
6265 if (mddev->pers) {
6266 __md_stop_writes(mddev);
6267
6268 err = -ENXIO;
6269 if (mddev->ro==1)
6270 goto out;
6271 mddev->ro = 1;
6272 set_disk_ro(mddev->gendisk, 1);
6273 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6274 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6275 md_wakeup_thread(mddev->thread);
6276 sysfs_notify_dirent_safe(mddev->sysfs_state);
6277 err = 0;
6278 }
6279out:
6280 mutex_unlock(&mddev->open_mutex);
6281 return err;
6282}
6283
6284
6285
6286
6287
6288static int do_md_stop(struct mddev *mddev, int mode,
6289 struct block_device *bdev)
6290{
6291 struct gendisk *disk = mddev->gendisk;
6292 struct md_rdev *rdev;
6293 int did_freeze = 0;
6294
6295 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6296 did_freeze = 1;
6297 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6298 md_wakeup_thread(mddev->thread);
6299 }
6300 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6301 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6302 if (mddev->sync_thread)
6303
6304
6305 wake_up_process(mddev->sync_thread->tsk);
6306
6307 mddev_unlock(mddev);
6308 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6309 !test_bit(MD_RECOVERY_RUNNING,
6310 &mddev->recovery)));
6311 mddev_lock_nointr(mddev);
6312
6313 mutex_lock(&mddev->open_mutex);
6314 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6315 mddev->sysfs_active ||
6316 mddev->sync_thread ||
6317 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6318 pr_warn("md: %s still in use.\n",mdname(mddev));
6319 mutex_unlock(&mddev->open_mutex);
6320 if (did_freeze) {
6321 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6322 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6323 md_wakeup_thread(mddev->thread);
6324 }
6325 return -EBUSY;
6326 }
6327 if (mddev->pers) {
6328 if (mddev->ro)
6329 set_disk_ro(disk, 0);
6330
6331 __md_stop_writes(mddev);
6332 __md_stop(mddev);
6333 mddev->queue->backing_dev_info->congested_fn = NULL;
6334
6335
6336 sysfs_notify_dirent_safe(mddev->sysfs_state);
6337
6338 rdev_for_each(rdev, mddev)
6339 if (rdev->raid_disk >= 0)
6340 sysfs_unlink_rdev(mddev, rdev);
6341
6342 set_capacity(disk, 0);
6343 mutex_unlock(&mddev->open_mutex);
6344 mddev->changed = 1;
6345 revalidate_disk(disk);
6346
6347 if (mddev->ro)
6348 mddev->ro = 0;
6349 } else
6350 mutex_unlock(&mddev->open_mutex);
6351
6352
6353
6354 if (mode == 0) {
6355 pr_info("md: %s stopped.\n", mdname(mddev));
6356
6357 if (mddev->bitmap_info.file) {
6358 struct file *f = mddev->bitmap_info.file;
6359 spin_lock(&mddev->lock);
6360 mddev->bitmap_info.file = NULL;
6361 spin_unlock(&mddev->lock);
6362 fput(f);
6363 }
6364 mddev->bitmap_info.offset = 0;
6365
6366 export_array(mddev);
6367
6368 md_clean(mddev);
6369 if (mddev->hold_active == UNTIL_STOP)
6370 mddev->hold_active = 0;
6371 }
6372 md_new_event(mddev);
6373 sysfs_notify_dirent_safe(mddev->sysfs_state);
6374 return 0;
6375}
6376
6377#ifndef MODULE
6378static void autorun_array(struct mddev *mddev)
6379{
6380 struct md_rdev *rdev;
6381 int err;
6382
6383 if (list_empty(&mddev->disks))
6384 return;
6385
6386 pr_info("md: running: ");
6387
6388 rdev_for_each(rdev, mddev) {
6389 char b[BDEVNAME_SIZE];
6390 pr_cont("<%s>", bdevname(rdev->bdev,b));
6391 }
6392 pr_cont("\n");
6393
6394 err = do_md_run(mddev);
6395 if (err) {
6396 pr_warn("md: do_md_run() returned %d\n", err);
6397 do_md_stop(mddev, 0, NULL);
6398 }
6399}
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413static void autorun_devices(int part)
6414{
6415 struct md_rdev *rdev0, *rdev, *tmp;
6416 struct mddev *mddev;
6417 char b[BDEVNAME_SIZE];
6418
6419 pr_info("md: autorun ...\n");
6420 while (!list_empty(&pending_raid_disks)) {
6421 int unit;
6422 dev_t dev;
6423 LIST_HEAD(candidates);
6424 rdev0 = list_entry(pending_raid_disks.next,
6425 struct md_rdev, same_set);
6426
6427 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6428 INIT_LIST_HEAD(&candidates);
6429 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6430 if (super_90_load(rdev, rdev0, 0) >= 0) {
6431 pr_debug("md: adding %s ...\n",
6432 bdevname(rdev->bdev,b));
6433 list_move(&rdev->same_set, &candidates);
6434 }
6435
6436
6437
6438
6439
6440 if (part) {
6441 dev = MKDEV(mdp_major,
6442 rdev0->preferred_minor << MdpMinorShift);
6443 unit = MINOR(dev) >> MdpMinorShift;
6444 } else {
6445 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6446 unit = MINOR(dev);
6447 }
6448 if (rdev0->preferred_minor != unit) {
6449 pr_warn("md: unit number in %s is bad: %d\n",
6450 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6451 break;
6452 }
6453
6454 md_probe(dev, NULL, NULL);
6455 mddev = mddev_find(dev);
6456 if (!mddev || !mddev->gendisk) {
6457 if (mddev)
6458 mddev_put(mddev);
6459 break;
6460 }
6461 if (mddev_lock(mddev))
6462 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6463 else if (mddev->raid_disks || mddev->major_version
6464 || !list_empty(&mddev->disks)) {
6465 pr_warn("md: %s already running, cannot run %s\n",
6466 mdname(mddev), bdevname(rdev0->bdev,b));
6467 mddev_unlock(mddev);
6468 } else {
6469 pr_debug("md: created %s\n", mdname(mddev));
6470 mddev->persistent = 1;
6471 rdev_for_each_list(rdev, tmp, &candidates) {
6472 list_del_init(&rdev->same_set);
6473 if (bind_rdev_to_array(rdev, mddev))
6474 export_rdev(rdev);
6475 }
6476 autorun_array(mddev);
6477 mddev_unlock(mddev);
6478 }
6479
6480
6481
6482 rdev_for_each_list(rdev, tmp, &candidates) {
6483 list_del_init(&rdev->same_set);
6484 export_rdev(rdev);
6485 }
6486 mddev_put(mddev);
6487 }
6488 pr_info("md: ... autorun DONE.\n");
6489}
6490#endif
6491
6492static int get_version(void __user *arg)
6493{
6494 mdu_version_t ver;
6495
6496 ver.major = MD_MAJOR_VERSION;
6497 ver.minor = MD_MINOR_VERSION;
6498 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6499
6500 if (copy_to_user(arg, &ver, sizeof(ver)))
6501 return -EFAULT;
6502
6503 return 0;
6504}
6505
6506static int get_array_info(struct mddev *mddev, void __user *arg)
6507{
6508 mdu_array_info_t info;
6509 int nr,working,insync,failed,spare;
6510 struct md_rdev *rdev;
6511
6512 nr = working = insync = failed = spare = 0;
6513 rcu_read_lock();
6514 rdev_for_each_rcu(rdev, mddev) {
6515 nr++;
6516 if (test_bit(Faulty, &rdev->flags))
6517 failed++;
6518 else {
6519 working++;
6520 if (test_bit(In_sync, &rdev->flags))
6521 insync++;
6522 else if (test_bit(Journal, &rdev->flags))
6523
6524 ;
6525 else
6526 spare++;
6527 }
6528 }
6529 rcu_read_unlock();
6530
6531 info.major_version = mddev->major_version;
6532 info.minor_version = mddev->minor_version;
6533 info.patch_version = MD_PATCHLEVEL_VERSION;
6534 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6535 info.level = mddev->level;
6536 info.size = mddev->dev_sectors / 2;
6537 if (info.size != mddev->dev_sectors / 2)
6538 info.size = -1;
6539 info.nr_disks = nr;
6540 info.raid_disks = mddev->raid_disks;
6541 info.md_minor = mddev->md_minor;
6542 info.not_persistent= !mddev->persistent;
6543
6544 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6545 info.state = 0;
6546 if (mddev->in_sync)
6547 info.state = (1<<MD_SB_CLEAN);
6548 if (mddev->bitmap && mddev->bitmap_info.offset)
6549 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6550 if (mddev_is_clustered(mddev))
6551 info.state |= (1<<MD_SB_CLUSTERED);
6552 info.active_disks = insync;
6553 info.working_disks = working;
6554 info.failed_disks = failed;
6555 info.spare_disks = spare;
6556
6557 info.layout = mddev->layout;
6558 info.chunk_size = mddev->chunk_sectors << 9;
6559
6560 if (copy_to_user(arg, &info, sizeof(info)))
6561 return -EFAULT;
6562
6563 return 0;
6564}
6565
6566static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6567{
6568 mdu_bitmap_file_t *file = NULL;
6569 char *ptr;
6570 int err;
6571
6572 file = kzalloc(sizeof(*file), GFP_NOIO);
6573 if (!file)
6574 return -ENOMEM;
6575
6576 err = 0;
6577 spin_lock(&mddev->lock);
6578
6579 if (mddev->bitmap_info.file) {
6580 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6581 sizeof(file->pathname));
6582 if (IS_ERR(ptr))
6583 err = PTR_ERR(ptr);
6584 else
6585 memmove(file->pathname, ptr,
6586 sizeof(file->pathname)-(ptr-file->pathname));
6587 }
6588 spin_unlock(&mddev->lock);
6589
6590 if (err == 0 &&
6591 copy_to_user(arg, file, sizeof(*file)))
6592 err = -EFAULT;
6593
6594 kfree(file);
6595 return err;
6596}
6597
6598static int get_disk_info(struct mddev *mddev, void __user * arg)
6599{
6600 mdu_disk_info_t info;
6601 struct md_rdev *rdev;
6602
6603 if (copy_from_user(&info, arg, sizeof(info)))
6604 return -EFAULT;
6605
6606 rcu_read_lock();
6607 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6608 if (rdev) {
6609 info.major = MAJOR(rdev->bdev->bd_dev);
6610 info.minor = MINOR(rdev->bdev->bd_dev);
6611 info.raid_disk = rdev->raid_disk;
6612 info.state = 0;
6613 if (test_bit(Faulty, &rdev->flags))
6614 info.state |= (1<<MD_DISK_FAULTY);
6615 else if (test_bit(In_sync, &rdev->flags)) {
6616 info.state |= (1<<MD_DISK_ACTIVE);
6617 info.state |= (1<<MD_DISK_SYNC);
6618 }
6619 if (test_bit(Journal, &rdev->flags))
6620 info.state |= (1<<MD_DISK_JOURNAL);
6621 if (test_bit(WriteMostly, &rdev->flags))
6622 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6623 if (test_bit(FailFast, &rdev->flags))
6624 info.state |= (1<<MD_DISK_FAILFAST);
6625 } else {
6626 info.major = info.minor = 0;
6627 info.raid_disk = -1;
6628 info.state = (1<<MD_DISK_REMOVED);
6629 }
6630 rcu_read_unlock();
6631
6632 if (copy_to_user(arg, &info, sizeof(info)))
6633 return -EFAULT;
6634
6635 return 0;
6636}
6637
6638static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6639{
6640 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6641 struct md_rdev *rdev;
6642 dev_t dev = MKDEV(info->major,info->minor);
6643
6644 if (mddev_is_clustered(mddev) &&
6645 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6646 pr_warn("%s: Cannot add to clustered mddev.\n",
6647 mdname(mddev));
6648 return -EINVAL;
6649 }
6650
6651 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6652 return -EOVERFLOW;
6653
6654 if (!mddev->raid_disks) {
6655 int err;
6656
6657 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6658 if (IS_ERR(rdev)) {
6659 pr_warn("md: md_import_device returned %ld\n",
6660 PTR_ERR(rdev));
6661 return PTR_ERR(rdev);
6662 }
6663 if (!list_empty(&mddev->disks)) {
6664 struct md_rdev *rdev0
6665 = list_entry(mddev->disks.next,
6666 struct md_rdev, same_set);
6667 err = super_types[mddev->major_version]
6668 .load_super(rdev, rdev0, mddev->minor_version);
6669 if (err < 0) {
6670 pr_warn("md: %s has different UUID to %s\n",
6671 bdevname(rdev->bdev,b),
6672 bdevname(rdev0->bdev,b2));
6673 export_rdev(rdev);
6674 return -EINVAL;
6675 }
6676 }
6677 err = bind_rdev_to_array(rdev, mddev);
6678 if (err)
6679 export_rdev(rdev);
6680 return err;
6681 }
6682
6683
6684
6685
6686
6687
6688 if (mddev->pers) {
6689 int err;
6690 if (!mddev->pers->hot_add_disk) {
6691 pr_warn("%s: personality does not support diskops!\n",
6692 mdname(mddev));
6693 return -EINVAL;
6694 }
6695 if (mddev->persistent)
6696 rdev = md_import_device(dev, mddev->major_version,
6697 mddev->minor_version);
6698 else
6699 rdev = md_import_device(dev, -1, -1);
6700 if (IS_ERR(rdev)) {
6701 pr_warn("md: md_import_device returned %ld\n",
6702 PTR_ERR(rdev));
6703 return PTR_ERR(rdev);
6704 }
6705
6706 if (!mddev->persistent) {
6707 if (info->state & (1<<MD_DISK_SYNC) &&
6708 info->raid_disk < mddev->raid_disks) {
6709 rdev->raid_disk = info->raid_disk;
6710 set_bit(In_sync, &rdev->flags);
6711 clear_bit(Bitmap_sync, &rdev->flags);
6712 } else
6713 rdev->raid_disk = -1;
6714 rdev->saved_raid_disk = rdev->raid_disk;
6715 } else
6716 super_types[mddev->major_version].
6717 validate_super(mddev, rdev);
6718 if ((info->state & (1<<MD_DISK_SYNC)) &&
6719 rdev->raid_disk != info->raid_disk) {
6720
6721
6722
6723 export_rdev(rdev);
6724 return -EINVAL;
6725 }
6726
6727 clear_bit(In_sync, &rdev->flags);
6728 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6729 set_bit(WriteMostly, &rdev->flags);
6730 else
6731 clear_bit(WriteMostly, &rdev->flags);
6732 if (info->state & (1<<MD_DISK_FAILFAST))
6733 set_bit(FailFast, &rdev->flags);
6734 else
6735 clear_bit(FailFast, &rdev->flags);
6736
6737 if (info->state & (1<<MD_DISK_JOURNAL)) {
6738 struct md_rdev *rdev2;
6739 bool has_journal = false;
6740
6741
6742 rdev_for_each(rdev2, mddev) {
6743 if (test_bit(Journal, &rdev2->flags)) {
6744 has_journal = true;
6745 break;
6746 }
6747 }
6748 if (has_journal || mddev->bitmap) {
6749 export_rdev(rdev);
6750 return -EBUSY;
6751 }
6752 set_bit(Journal, &rdev->flags);
6753 }
6754
6755
6756
6757 if (mddev_is_clustered(mddev)) {
6758 if (info->state & (1 << MD_DISK_CANDIDATE))
6759 set_bit(Candidate, &rdev->flags);
6760 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6761
6762 err = md_cluster_ops->add_new_disk(mddev, rdev);
6763 if (err) {
6764 export_rdev(rdev);
6765 return err;
6766 }
6767 }
6768 }
6769
6770 rdev->raid_disk = -1;
6771 err = bind_rdev_to_array(rdev, mddev);
6772
6773 if (err)
6774 export_rdev(rdev);
6775
6776 if (mddev_is_clustered(mddev)) {
6777 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6778 if (!err) {
6779 err = md_cluster_ops->new_disk_ack(mddev,
6780 err == 0);
6781 if (err)
6782 md_kick_rdev_from_array(rdev);
6783 }
6784 } else {
6785 if (err)
6786 md_cluster_ops->add_new_disk_cancel(mddev);
6787 else
6788 err = add_bound_rdev(rdev);
6789 }
6790
6791 } else if (!err)
6792 err = add_bound_rdev(rdev);
6793
6794 return err;
6795 }
6796
6797
6798
6799
6800 if (mddev->major_version != 0) {
6801 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6802 return -EINVAL;
6803 }
6804
6805 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6806 int err;
6807 rdev = md_import_device(dev, -1, 0);
6808 if (IS_ERR(rdev)) {
6809 pr_warn("md: error, md_import_device() returned %ld\n",
6810 PTR_ERR(rdev));
6811 return PTR_ERR(rdev);
6812 }
6813 rdev->desc_nr = info->number;
6814 if (info->raid_disk < mddev->raid_disks)
6815 rdev->raid_disk = info->raid_disk;
6816 else
6817 rdev->raid_disk = -1;
6818
6819 if (rdev->raid_disk < mddev->raid_disks)
6820 if (info->state & (1<<MD_DISK_SYNC))
6821 set_bit(In_sync, &rdev->flags);
6822
6823 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6824 set_bit(WriteMostly, &rdev->flags);
6825 if (info->state & (1<<MD_DISK_FAILFAST))
6826 set_bit(FailFast, &rdev->flags);
6827
6828 if (!mddev->persistent) {
6829 pr_debug("md: nonpersistent superblock ...\n");
6830 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6831 } else
6832 rdev->sb_start = calc_dev_sboffset(rdev);
6833 rdev->sectors = rdev->sb_start;
6834
6835 err = bind_rdev_to_array(rdev, mddev);
6836 if (err) {
6837 export_rdev(rdev);
6838 return err;
6839 }
6840 }
6841
6842 return 0;
6843}
6844
6845static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6846{
6847 char b[BDEVNAME_SIZE];
6848 struct md_rdev *rdev;
6849
6850 if (!mddev->pers)
6851 return -ENODEV;
6852
6853 rdev = find_rdev(mddev, dev);
6854 if (!rdev)
6855 return -ENXIO;
6856
6857 if (rdev->raid_disk < 0)
6858 goto kick_rdev;
6859
6860 clear_bit(Blocked, &rdev->flags);
6861 remove_and_add_spares(mddev, rdev);
6862
6863 if (rdev->raid_disk >= 0)
6864 goto busy;
6865
6866kick_rdev:
6867 if (mddev_is_clustered(mddev))
6868 md_cluster_ops->remove_disk(mddev, rdev);
6869
6870 md_kick_rdev_from_array(rdev);
6871 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6872 if (mddev->thread)
6873 md_wakeup_thread(mddev->thread);
6874 else
6875 md_update_sb(mddev, 1);
6876 md_new_event(mddev);
6877
6878 return 0;
6879busy:
6880 pr_debug("md: cannot remove active disk %s from %s ...\n",
6881 bdevname(rdev->bdev,b), mdname(mddev));
6882 return -EBUSY;
6883}
6884
6885static int hot_add_disk(struct mddev *mddev, dev_t dev)
6886{
6887 char b[BDEVNAME_SIZE];
6888 int err;
6889 struct md_rdev *rdev;
6890
6891 if (!mddev->pers)
6892 return -ENODEV;
6893
6894 if (mddev->major_version != 0) {
6895 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6896 mdname(mddev));
6897 return -EINVAL;
6898 }
6899 if (!mddev->pers->hot_add_disk) {
6900 pr_warn("%s: personality does not support diskops!\n",
6901 mdname(mddev));
6902 return -EINVAL;
6903 }
6904
6905 rdev = md_import_device(dev, -1, 0);
6906 if (IS_ERR(rdev)) {
6907 pr_warn("md: error, md_import_device() returned %ld\n",
6908 PTR_ERR(rdev));
6909 return -EINVAL;
6910 }
6911
6912 if (mddev->persistent)
6913 rdev->sb_start = calc_dev_sboffset(rdev);
6914 else
6915 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6916
6917 rdev->sectors = rdev->sb_start;
6918
6919 if (test_bit(Faulty, &rdev->flags)) {
6920 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6921 bdevname(rdev->bdev,b), mdname(mddev));
6922 err = -EINVAL;
6923 goto abort_export;
6924 }
6925
6926 clear_bit(In_sync, &rdev->flags);
6927 rdev->desc_nr = -1;
6928 rdev->saved_raid_disk = -1;
6929 err = bind_rdev_to_array(rdev, mddev);
6930 if (err)
6931 goto abort_export;
6932
6933
6934
6935
6936
6937
6938 rdev->raid_disk = -1;
6939
6940 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6941 if (!mddev->thread)
6942 md_update_sb(mddev, 1);
6943
6944
6945
6946
6947 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6948 md_wakeup_thread(mddev->thread);
6949 md_new_event(mddev);
6950 return 0;
6951
6952abort_export:
6953 export_rdev(rdev);
6954 return err;
6955}
6956
6957static int set_bitmap_file(struct mddev *mddev, int fd)
6958{
6959 int err = 0;
6960
6961 if (mddev->pers) {
6962 if (!mddev->pers->quiesce || !mddev->thread)
6963 return -EBUSY;
6964 if (mddev->recovery || mddev->sync_thread)
6965 return -EBUSY;
6966
6967 }
6968
6969 if (fd >= 0) {
6970 struct inode *inode;
6971 struct file *f;
6972
6973 if (mddev->bitmap || mddev->bitmap_info.file)
6974 return -EEXIST;
6975 f = fget(fd);
6976
6977 if (f == NULL) {
6978 pr_warn("%s: error: failed to get bitmap file\n",
6979 mdname(mddev));
6980 return -EBADF;
6981 }
6982
6983 inode = f->f_mapping->host;
6984 if (!S_ISREG(inode->i_mode)) {
6985 pr_warn("%s: error: bitmap file must be a regular file\n",
6986 mdname(mddev));
6987 err = -EBADF;
6988 } else if (!(f->f_mode & FMODE_WRITE)) {
6989 pr_warn("%s: error: bitmap file must open for write\n",
6990 mdname(mddev));
6991 err = -EBADF;
6992 } else if (atomic_read(&inode->i_writecount) != 1) {
6993 pr_warn("%s: error: bitmap file is already in use\n",
6994 mdname(mddev));
6995 err = -EBUSY;
6996 }
6997 if (err) {
6998 fput(f);
6999 return err;
7000 }
7001 mddev->bitmap_info.file = f;
7002 mddev->bitmap_info.offset = 0;
7003 } else if (mddev->bitmap == NULL)
7004 return -ENOENT;
7005 err = 0;
7006 if (mddev->pers) {
7007 if (fd >= 0) {
7008 struct bitmap *bitmap;
7009
7010 bitmap = md_bitmap_create(mddev, -1);
7011 mddev_suspend(mddev);
7012 if (!IS_ERR(bitmap)) {
7013 mddev->bitmap = bitmap;
7014 err = md_bitmap_load(mddev);
7015 } else
7016 err = PTR_ERR(bitmap);
7017 if (err) {
7018 md_bitmap_destroy(mddev);
7019 fd = -1;
7020 }
7021 mddev_resume(mddev);
7022 } else if (fd < 0) {
7023 mddev_suspend(mddev);
7024 md_bitmap_destroy(mddev);
7025 mddev_resume(mddev);
7026 }
7027 }
7028 if (fd < 0) {
7029 struct file *f = mddev->bitmap_info.file;
7030 if (f) {
7031 spin_lock(&mddev->lock);
7032 mddev->bitmap_info.file = NULL;
7033 spin_unlock(&mddev->lock);
7034 fput(f);
7035 }
7036 }
7037
7038 return err;
7039}
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
7055{
7056
7057 if (info->raid_disks == 0) {
7058
7059 if (info->major_version < 0 ||
7060 info->major_version >= ARRAY_SIZE(super_types) ||
7061 super_types[info->major_version].name == NULL) {
7062
7063 pr_warn("md: superblock version %d not known\n",
7064 info->major_version);
7065 return -EINVAL;
7066 }
7067 mddev->major_version = info->major_version;
7068 mddev->minor_version = info->minor_version;
7069 mddev->patch_version = info->patch_version;
7070 mddev->persistent = !info->not_persistent;
7071
7072
7073
7074 mddev->ctime = ktime_get_real_seconds();
7075 return 0;
7076 }
7077 mddev->major_version = MD_MAJOR_VERSION;
7078 mddev->minor_version = MD_MINOR_VERSION;
7079 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7080 mddev->ctime = ktime_get_real_seconds();
7081
7082 mddev->level = info->level;
7083 mddev->clevel[0] = 0;
7084 mddev->dev_sectors = 2 * (sector_t)info->size;
7085 mddev->raid_disks = info->raid_disks;
7086
7087
7088
7089 if (info->state & (1<<MD_SB_CLEAN))
7090 mddev->recovery_cp = MaxSector;
7091 else
7092 mddev->recovery_cp = 0;
7093 mddev->persistent = ! info->not_persistent;
7094 mddev->external = 0;
7095
7096 mddev->layout = info->layout;
7097 if (mddev->level == 0)
7098
7099 mddev->layout = -1;
7100 mddev->chunk_sectors = info->chunk_size >> 9;
7101
7102 if (mddev->persistent) {
7103 mddev->max_disks = MD_SB_DISKS;
7104 mddev->flags = 0;
7105 mddev->sb_flags = 0;
7106 }
7107 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7108
7109 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7110 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7111 mddev->bitmap_info.offset = 0;
7112
7113 mddev->reshape_position = MaxSector;
7114
7115
7116
7117
7118 get_random_bytes(mddev->uuid, 16);
7119
7120 mddev->new_level = mddev->level;
7121 mddev->new_chunk_sectors = mddev->chunk_sectors;
7122 mddev->new_layout = mddev->layout;
7123 mddev->delta_disks = 0;
7124 mddev->reshape_backwards = 0;
7125
7126 return 0;
7127}
7128
7129void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7130{
7131 lockdep_assert_held(&mddev->reconfig_mutex);
7132
7133 if (mddev->external_size)
7134 return;
7135
7136 mddev->array_sectors = array_sectors;
7137}
7138EXPORT_SYMBOL(md_set_array_sectors);
7139
7140static int update_size(struct mddev *mddev, sector_t num_sectors)
7141{
7142 struct md_rdev *rdev;
7143 int rv;
7144 int fit = (num_sectors == 0);
7145 sector_t old_dev_sectors = mddev->dev_sectors;
7146
7147 if (mddev->pers->resize == NULL)
7148 return -EINVAL;
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7159 mddev->sync_thread)
7160 return -EBUSY;
7161 if (mddev->ro)
7162 return -EROFS;
7163
7164 rdev_for_each(rdev, mddev) {
7165 sector_t avail = rdev->sectors;
7166
7167 if (fit && (num_sectors == 0 || num_sectors > avail))
7168 num_sectors = avail;
7169 if (avail < num_sectors)
7170 return -ENOSPC;
7171 }
7172 rv = mddev->pers->resize(mddev, num_sectors);
7173 if (!rv) {
7174 if (mddev_is_clustered(mddev))
7175 md_cluster_ops->update_size(mddev, old_dev_sectors);
7176 else if (mddev->queue) {
7177 set_capacity(mddev->gendisk, mddev->array_sectors);
7178 revalidate_disk(mddev->gendisk);
7179 }
7180 }
7181 return rv;
7182}
7183
7184static int update_raid_disks(struct mddev *mddev, int raid_disks)
7185{
7186 int rv;
7187 struct md_rdev *rdev;
7188
7189 if (mddev->pers->check_reshape == NULL)
7190 return -EINVAL;
7191 if (mddev->ro)
7192 return -EROFS;
7193 if (raid_disks <= 0 ||
7194 (mddev->max_disks && raid_disks >= mddev->max_disks))
7195 return -EINVAL;
7196 if (mddev->sync_thread ||
7197 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7198 mddev->reshape_position != MaxSector)
7199 return -EBUSY;
7200
7201 rdev_for_each(rdev, mddev) {
7202 if (mddev->raid_disks < raid_disks &&
7203 rdev->data_offset < rdev->new_data_offset)
7204 return -EINVAL;
7205 if (mddev->raid_disks > raid_disks &&
7206 rdev->data_offset > rdev->new_data_offset)
7207 return -EINVAL;
7208 }
7209
7210 mddev->delta_disks = raid_disks - mddev->raid_disks;
7211 if (mddev->delta_disks < 0)
7212 mddev->reshape_backwards = 1;
7213 else if (mddev->delta_disks > 0)
7214 mddev->reshape_backwards = 0;
7215
7216 rv = mddev->pers->check_reshape(mddev);
7217 if (rv < 0) {
7218 mddev->delta_disks = 0;
7219 mddev->reshape_backwards = 0;
7220 }
7221 return rv;
7222}
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7233{
7234 int rv = 0;
7235 int cnt = 0;
7236 int state = 0;
7237
7238
7239 if (mddev->bitmap && mddev->bitmap_info.offset)
7240 state |= (1 << MD_SB_BITMAP_PRESENT);
7241
7242 if (mddev->major_version != info->major_version ||
7243 mddev->minor_version != info->minor_version ||
7244
7245 mddev->ctime != info->ctime ||
7246 mddev->level != info->level ||
7247
7248 mddev->persistent != !info->not_persistent ||
7249 mddev->chunk_sectors != info->chunk_size >> 9 ||
7250
7251 ((state^info->state) & 0xfffffe00)
7252 )
7253 return -EINVAL;
7254
7255 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7256 cnt++;
7257 if (mddev->raid_disks != info->raid_disks)
7258 cnt++;
7259 if (mddev->layout != info->layout)
7260 cnt++;
7261 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7262 cnt++;
7263 if (cnt == 0)
7264 return 0;
7265 if (cnt > 1)
7266 return -EINVAL;
7267
7268 if (mddev->layout != info->layout) {
7269
7270
7271
7272
7273 if (mddev->pers->check_reshape == NULL)
7274 return -EINVAL;
7275 else {
7276 mddev->new_layout = info->layout;
7277 rv = mddev->pers->check_reshape(mddev);
7278 if (rv)
7279 mddev->new_layout = mddev->layout;
7280 return rv;
7281 }
7282 }
7283 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7284 rv = update_size(mddev, (sector_t)info->size * 2);
7285
7286 if (mddev->raid_disks != info->raid_disks)
7287 rv = update_raid_disks(mddev, info->raid_disks);
7288
7289 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7290 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7291 rv = -EINVAL;
7292 goto err;
7293 }
7294 if (mddev->recovery || mddev->sync_thread) {
7295 rv = -EBUSY;
7296 goto err;
7297 }
7298 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7299 struct bitmap *bitmap;
7300
7301 if (mddev->bitmap) {
7302 rv = -EEXIST;
7303 goto err;
7304 }
7305 if (mddev->bitmap_info.default_offset == 0) {
7306 rv = -EINVAL;
7307 goto err;
7308 }
7309 mddev->bitmap_info.offset =
7310 mddev->bitmap_info.default_offset;
7311 mddev->bitmap_info.space =
7312 mddev->bitmap_info.default_space;
7313 bitmap = md_bitmap_create(mddev, -1);
7314 mddev_suspend(mddev);
7315 if (!IS_ERR(bitmap)) {
7316 mddev->bitmap = bitmap;
7317 rv = md_bitmap_load(mddev);
7318 } else
7319 rv = PTR_ERR(bitmap);
7320 if (rv)
7321 md_bitmap_destroy(mddev);
7322 mddev_resume(mddev);
7323 } else {
7324
7325 if (!mddev->bitmap) {
7326 rv = -ENOENT;
7327 goto err;
7328 }
7329 if (mddev->bitmap->storage.file) {
7330 rv = -EINVAL;
7331 goto err;
7332 }
7333 if (mddev->bitmap_info.nodes) {
7334
7335 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7336 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7337 rv = -EPERM;
7338 md_cluster_ops->unlock_all_bitmaps(mddev);
7339 goto err;
7340 }
7341
7342 mddev->bitmap_info.nodes = 0;
7343 md_cluster_ops->leave(mddev);
7344 }
7345 mddev_suspend(mddev);
7346 md_bitmap_destroy(mddev);
7347 mddev_resume(mddev);
7348 mddev->bitmap_info.offset = 0;
7349 }
7350 }
7351 md_update_sb(mddev, 1);
7352 return rv;
7353err:
7354 return rv;
7355}
7356
7357static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7358{
7359 struct md_rdev *rdev;
7360 int err = 0;
7361
7362 if (mddev->pers == NULL)
7363 return -ENODEV;
7364
7365 rcu_read_lock();
7366 rdev = md_find_rdev_rcu(mddev, dev);
7367 if (!rdev)
7368 err = -ENODEV;
7369 else {
7370 md_error(mddev, rdev);
7371 if (!test_bit(Faulty, &rdev->flags))
7372 err = -EBUSY;
7373 }
7374 rcu_read_unlock();
7375 return err;
7376}
7377
7378
7379
7380
7381
7382
7383
7384static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7385{
7386 struct mddev *mddev = bdev->bd_disk->private_data;
7387
7388 geo->heads = 2;
7389 geo->sectors = 4;
7390 geo->cylinders = mddev->array_sectors / 8;
7391 return 0;
7392}
7393
7394static inline bool md_ioctl_valid(unsigned int cmd)
7395{
7396 switch (cmd) {
7397 case ADD_NEW_DISK:
7398 case BLKROSET:
7399 case GET_ARRAY_INFO:
7400 case GET_BITMAP_FILE:
7401 case GET_DISK_INFO:
7402 case HOT_ADD_DISK:
7403 case HOT_REMOVE_DISK:
7404 case RAID_AUTORUN:
7405 case RAID_VERSION:
7406 case RESTART_ARRAY_RW:
7407 case RUN_ARRAY:
7408 case SET_ARRAY_INFO:
7409 case SET_BITMAP_FILE:
7410 case SET_DISK_FAULTY:
7411 case STOP_ARRAY:
7412 case STOP_ARRAY_RO:
7413 case CLUSTERED_DISK_NACK:
7414 return true;
7415 default:
7416 return false;
7417 }
7418}
7419
7420static int md_ioctl(struct block_device *bdev, fmode_t mode,
7421 unsigned int cmd, unsigned long arg)
7422{
7423 int err = 0;
7424 void __user *argp = (void __user *)arg;
7425 struct mddev *mddev = NULL;
7426 int ro;
7427 bool did_set_md_closing = false;
7428
7429 if (!md_ioctl_valid(cmd))
7430 return -ENOTTY;
7431
7432 switch (cmd) {
7433 case RAID_VERSION:
7434 case GET_ARRAY_INFO:
7435 case GET_DISK_INFO:
7436 break;
7437 default:
7438 if (!capable(CAP_SYS_ADMIN))
7439 return -EACCES;
7440 }
7441
7442
7443
7444
7445
7446 switch (cmd) {
7447 case RAID_VERSION:
7448 err = get_version(argp);
7449 goto out;
7450
7451#ifndef MODULE
7452 case RAID_AUTORUN:
7453 err = 0;
7454 autostart_arrays(arg);
7455 goto out;
7456#endif
7457 default:;
7458 }
7459
7460
7461
7462
7463
7464 mddev = bdev->bd_disk->private_data;
7465
7466 if (!mddev) {
7467 BUG();
7468 goto out;
7469 }
7470
7471
7472 switch (cmd) {
7473 case GET_ARRAY_INFO:
7474 if (!mddev->raid_disks && !mddev->external)
7475 err = -ENODEV;
7476 else
7477 err = get_array_info(mddev, argp);
7478 goto out;
7479
7480 case GET_DISK_INFO:
7481 if (!mddev->raid_disks && !mddev->external)
7482 err = -ENODEV;
7483 else
7484 err = get_disk_info(mddev, argp);
7485 goto out;
7486
7487 case SET_DISK_FAULTY:
7488 err = set_disk_faulty(mddev, new_decode_dev(arg));
7489 goto out;
7490
7491 case GET_BITMAP_FILE:
7492 err = get_bitmap_file(mddev, argp);
7493 goto out;
7494
7495 }
7496
7497 if (cmd == ADD_NEW_DISK)
7498
7499 flush_workqueue(md_misc_wq);
7500
7501 if (cmd == HOT_REMOVE_DISK)
7502
7503 wait_event_interruptible_timeout(mddev->sb_wait,
7504 !test_bit(MD_RECOVERY_NEEDED,
7505 &mddev->recovery),
7506 msecs_to_jiffies(5000));
7507 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7508
7509
7510
7511 mutex_lock(&mddev->open_mutex);
7512 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7513 mutex_unlock(&mddev->open_mutex);
7514 err = -EBUSY;
7515 goto out;
7516 }
7517 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7518 set_bit(MD_CLOSING, &mddev->flags);
7519 did_set_md_closing = true;
7520 mutex_unlock(&mddev->open_mutex);
7521 sync_blockdev(bdev);
7522 }
7523 err = mddev_lock(mddev);
7524 if (err) {
7525 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7526 err, cmd);
7527 goto out;
7528 }
7529
7530 if (cmd == SET_ARRAY_INFO) {
7531 mdu_array_info_t info;
7532 if (!arg)
7533 memset(&info, 0, sizeof(info));
7534 else if (copy_from_user(&info, argp, sizeof(info))) {
7535 err = -EFAULT;
7536 goto unlock;
7537 }
7538 if (mddev->pers) {
7539 err = update_array_info(mddev, &info);
7540 if (err) {
7541 pr_warn("md: couldn't update array info. %d\n", err);
7542 goto unlock;
7543 }
7544 goto unlock;
7545 }
7546 if (!list_empty(&mddev->disks)) {
7547 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7548 err = -EBUSY;
7549 goto unlock;
7550 }
7551 if (mddev->raid_disks) {
7552 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7553 err = -EBUSY;
7554 goto unlock;
7555 }
7556 err = set_array_info(mddev, &info);
7557 if (err) {
7558 pr_warn("md: couldn't set array info. %d\n", err);
7559 goto unlock;
7560 }
7561 goto unlock;
7562 }
7563
7564
7565
7566
7567
7568
7569 if ((!mddev->raid_disks && !mddev->external)
7570 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7571 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7572 && cmd != GET_BITMAP_FILE) {
7573 err = -ENODEV;
7574 goto unlock;
7575 }
7576
7577
7578
7579
7580 switch (cmd) {
7581 case RESTART_ARRAY_RW:
7582 err = restart_array(mddev);
7583 goto unlock;
7584
7585 case STOP_ARRAY:
7586 err = do_md_stop(mddev, 0, bdev);
7587 goto unlock;
7588
7589 case STOP_ARRAY_RO:
7590 err = md_set_readonly(mddev, bdev);
7591 goto unlock;
7592
7593 case HOT_REMOVE_DISK:
7594 err = hot_remove_disk(mddev, new_decode_dev(arg));
7595 goto unlock;
7596
7597 case ADD_NEW_DISK:
7598
7599
7600
7601
7602 if (mddev->pers) {
7603 mdu_disk_info_t info;
7604 if (copy_from_user(&info, argp, sizeof(info)))
7605 err = -EFAULT;
7606 else if (!(info.state & (1<<MD_DISK_SYNC)))
7607
7608 break;
7609 else
7610 err = add_new_disk(mddev, &info);
7611 goto unlock;
7612 }
7613 break;
7614
7615 case BLKROSET:
7616 if (get_user(ro, (int __user *)(arg))) {
7617 err = -EFAULT;
7618 goto unlock;
7619 }
7620 err = -EINVAL;
7621
7622
7623
7624
7625 if (ro)
7626 goto unlock;
7627
7628
7629 if (mddev->ro != 1)
7630 goto unlock;
7631
7632
7633
7634
7635 if (mddev->pers) {
7636 err = restart_array(mddev);
7637 if (err == 0) {
7638 mddev->ro = 2;
7639 set_disk_ro(mddev->gendisk, 0);
7640 }
7641 }
7642 goto unlock;
7643 }
7644
7645
7646
7647
7648
7649 if (mddev->ro && mddev->pers) {
7650 if (mddev->ro == 2) {
7651 mddev->ro = 0;
7652 sysfs_notify_dirent_safe(mddev->sysfs_state);
7653 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7654
7655
7656
7657
7658 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7659 mddev_unlock(mddev);
7660 wait_event(mddev->sb_wait,
7661 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7662 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7663 mddev_lock_nointr(mddev);
7664 }
7665 } else {
7666 err = -EROFS;
7667 goto unlock;
7668 }
7669 }
7670
7671 switch (cmd) {
7672 case ADD_NEW_DISK:
7673 {
7674 mdu_disk_info_t info;
7675 if (copy_from_user(&info, argp, sizeof(info)))
7676 err = -EFAULT;
7677 else
7678 err = add_new_disk(mddev, &info);
7679 goto unlock;
7680 }
7681
7682 case CLUSTERED_DISK_NACK:
7683 if (mddev_is_clustered(mddev))
7684 md_cluster_ops->new_disk_ack(mddev, false);
7685 else
7686 err = -EINVAL;
7687 goto unlock;
7688
7689 case HOT_ADD_DISK:
7690 err = hot_add_disk(mddev, new_decode_dev(arg));
7691 goto unlock;
7692
7693 case RUN_ARRAY:
7694 err = do_md_run(mddev);
7695 goto unlock;
7696
7697 case SET_BITMAP_FILE:
7698 err = set_bitmap_file(mddev, (int)arg);
7699 goto unlock;
7700
7701 default:
7702 err = -EINVAL;
7703 goto unlock;
7704 }
7705
7706unlock:
7707 if (mddev->hold_active == UNTIL_IOCTL &&
7708 err != -EINVAL)
7709 mddev->hold_active = 0;
7710 mddev_unlock(mddev);
7711out:
7712 if(did_set_md_closing)
7713 clear_bit(MD_CLOSING, &mddev->flags);
7714 return err;
7715}
7716#ifdef CONFIG_COMPAT
7717static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7718 unsigned int cmd, unsigned long arg)
7719{
7720 switch (cmd) {
7721 case HOT_REMOVE_DISK:
7722 case HOT_ADD_DISK:
7723 case SET_DISK_FAULTY:
7724 case SET_BITMAP_FILE:
7725
7726 break;
7727 default:
7728 arg = (unsigned long)compat_ptr(arg);
7729 break;
7730 }
7731
7732 return md_ioctl(bdev, mode, cmd, arg);
7733}
7734#endif
7735
7736static int md_open(struct block_device *bdev, fmode_t mode)
7737{
7738
7739
7740
7741
7742 struct mddev *mddev = mddev_find(bdev->bd_dev);
7743 int err;
7744
7745 if (!mddev)
7746 return -ENODEV;
7747
7748 if (mddev->gendisk != bdev->bd_disk) {
7749
7750
7751
7752 mddev_put(mddev);
7753
7754 flush_workqueue(md_misc_wq);
7755
7756 return -ERESTARTSYS;
7757 }
7758 BUG_ON(mddev != bdev->bd_disk->private_data);
7759
7760 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7761 goto out;
7762
7763 if (test_bit(MD_CLOSING, &mddev->flags)) {
7764 mutex_unlock(&mddev->open_mutex);
7765 err = -ENODEV;
7766 goto out;
7767 }
7768
7769 err = 0;
7770 atomic_inc(&mddev->openers);
7771 mutex_unlock(&mddev->open_mutex);
7772
7773 check_disk_change(bdev);
7774 out:
7775 if (err)
7776 mddev_put(mddev);
7777 return err;
7778}
7779
7780static void md_release(struct gendisk *disk, fmode_t mode)
7781{
7782 struct mddev *mddev = disk->private_data;
7783
7784 BUG_ON(!mddev);
7785 atomic_dec(&mddev->openers);
7786 mddev_put(mddev);
7787}
7788
7789static int md_media_changed(struct gendisk *disk)
7790{
7791 struct mddev *mddev = disk->private_data;
7792
7793 return mddev->changed;
7794}
7795
7796static int md_revalidate(struct gendisk *disk)
7797{
7798 struct mddev *mddev = disk->private_data;
7799
7800 mddev->changed = 0;
7801 return 0;
7802}
7803static const struct block_device_operations md_fops =
7804{
7805 .owner = THIS_MODULE,
7806 .open = md_open,
7807 .release = md_release,
7808 .ioctl = md_ioctl,
7809#ifdef CONFIG_COMPAT
7810 .compat_ioctl = md_compat_ioctl,
7811#endif
7812 .getgeo = md_getgeo,
7813 .media_changed = md_media_changed,
7814 .revalidate_disk= md_revalidate,
7815};
7816
7817static int md_thread(void *arg)
7818{
7819 struct md_thread *thread = arg;
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833 allow_signal(SIGKILL);
7834 while (!kthread_should_stop()) {
7835
7836
7837
7838
7839
7840
7841 if (signal_pending(current))
7842 flush_signals(current);
7843
7844 wait_event_interruptible_timeout
7845 (thread->wqueue,
7846 test_bit(THREAD_WAKEUP, &thread->flags)
7847 || kthread_should_stop() || kthread_should_park(),
7848 thread->timeout);
7849
7850 clear_bit(THREAD_WAKEUP, &thread->flags);
7851 if (kthread_should_park())
7852 kthread_parkme();
7853 if (!kthread_should_stop())
7854 thread->run(thread);
7855 }
7856
7857 return 0;
7858}
7859
7860void md_wakeup_thread(struct md_thread *thread)
7861{
7862 if (thread) {
7863 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7864 set_bit(THREAD_WAKEUP, &thread->flags);
7865 wake_up(&thread->wqueue);
7866 }
7867}
7868EXPORT_SYMBOL(md_wakeup_thread);
7869
7870struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7871 struct mddev *mddev, const char *name)
7872{
7873 struct md_thread *thread;
7874
7875 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7876 if (!thread)
7877 return NULL;
7878
7879 init_waitqueue_head(&thread->wqueue);
7880
7881 thread->run = run;
7882 thread->mddev = mddev;
7883 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7884 thread->tsk = kthread_run(md_thread, thread,
7885 "%s_%s",
7886 mdname(thread->mddev),
7887 name);
7888 if (IS_ERR(thread->tsk)) {
7889 kfree(thread);
7890 return NULL;
7891 }
7892 return thread;
7893}
7894EXPORT_SYMBOL(md_register_thread);
7895
7896void md_unregister_thread(struct md_thread **threadp)
7897{
7898 struct md_thread *thread = *threadp;
7899 if (!thread)
7900 return;
7901 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7902
7903
7904
7905 spin_lock(&pers_lock);
7906 *threadp = NULL;
7907 spin_unlock(&pers_lock);
7908
7909 kthread_stop(thread->tsk);
7910 kfree(thread);
7911}
7912EXPORT_SYMBOL(md_unregister_thread);
7913
7914void md_error(struct mddev *mddev, struct md_rdev *rdev)
7915{
7916 if (!rdev || test_bit(Faulty, &rdev->flags))
7917 return;
7918
7919 if (!mddev->pers || !mddev->pers->error_handler)
7920 return;
7921 mddev->pers->error_handler(mddev,rdev);
7922 if (mddev->degraded)
7923 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7924 sysfs_notify_dirent_safe(rdev->sysfs_state);
7925 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7926 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7927 md_wakeup_thread(mddev->thread);
7928 if (mddev->event_work.func)
7929 queue_work(md_misc_wq, &mddev->event_work);
7930 md_new_event(mddev);
7931}
7932EXPORT_SYMBOL(md_error);
7933
7934
7935
7936static void status_unused(struct seq_file *seq)
7937{
7938 int i = 0;
7939 struct md_rdev *rdev;
7940
7941 seq_printf(seq, "unused devices: ");
7942
7943 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7944 char b[BDEVNAME_SIZE];
7945 i++;
7946 seq_printf(seq, "%s ",
7947 bdevname(rdev->bdev,b));
7948 }
7949 if (!i)
7950 seq_printf(seq, "<none>");
7951
7952 seq_printf(seq, "\n");
7953}
7954
7955static int status_resync(struct seq_file *seq, struct mddev *mddev)
7956{
7957 sector_t max_sectors, resync, res;
7958 unsigned long dt, db = 0;
7959 sector_t rt, curr_mark_cnt, resync_mark_cnt;
7960 int scale, recovery_active;
7961 unsigned int per_milli;
7962
7963 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7964 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7965 max_sectors = mddev->resync_max_sectors;
7966 else
7967 max_sectors = mddev->dev_sectors;
7968
7969 resync = mddev->curr_resync;
7970 if (resync <= 3) {
7971 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7972
7973 resync = max_sectors;
7974 } else if (resync > max_sectors)
7975 resync = max_sectors;
7976 else
7977 resync -= atomic_read(&mddev->recovery_active);
7978
7979 if (resync == 0) {
7980 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
7981 struct md_rdev *rdev;
7982
7983 rdev_for_each(rdev, mddev)
7984 if (rdev->raid_disk >= 0 &&
7985 !test_bit(Faulty, &rdev->flags) &&
7986 rdev->recovery_offset != MaxSector &&
7987 rdev->recovery_offset) {
7988 seq_printf(seq, "\trecover=REMOTE");
7989 return 1;
7990 }
7991 if (mddev->reshape_position != MaxSector)
7992 seq_printf(seq, "\treshape=REMOTE");
7993 else
7994 seq_printf(seq, "\tresync=REMOTE");
7995 return 1;
7996 }
7997 if (mddev->recovery_cp < MaxSector) {
7998 seq_printf(seq, "\tresync=PENDING");
7999 return 1;
8000 }
8001 return 0;
8002 }
8003 if (resync < 3) {
8004 seq_printf(seq, "\tresync=DELAYED");
8005 return 1;
8006 }
8007
8008 WARN_ON(max_sectors == 0);
8009
8010
8011
8012
8013
8014 scale = 10;
8015 if (sizeof(sector_t) > sizeof(unsigned long)) {
8016 while ( max_sectors/2 > (1ULL<<(scale+32)))
8017 scale++;
8018 }
8019 res = (resync>>scale)*1000;
8020 sector_div(res, (u32)((max_sectors>>scale)+1));
8021
8022 per_milli = res;
8023 {
8024 int i, x = per_milli/50, y = 20-x;
8025 seq_printf(seq, "[");
8026 for (i = 0; i < x; i++)
8027 seq_printf(seq, "=");
8028 seq_printf(seq, ">");
8029 for (i = 0; i < y; i++)
8030 seq_printf(seq, ".");
8031 seq_printf(seq, "] ");
8032 }
8033 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8034 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8035 "reshape" :
8036 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8037 "check" :
8038 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8039 "resync" : "recovery"))),
8040 per_milli/10, per_milli % 10,
8041 (unsigned long long) resync/2,
8042 (unsigned long long) max_sectors/2);
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061 dt = ((jiffies - mddev->resync_mark) / HZ);
8062 if (!dt) dt++;
8063
8064 curr_mark_cnt = mddev->curr_mark_cnt;
8065 recovery_active = atomic_read(&mddev->recovery_active);
8066 resync_mark_cnt = mddev->resync_mark_cnt;
8067
8068 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8069 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8070
8071 rt = max_sectors - resync;
8072 rt = div64_u64(rt, db/32+1);
8073 rt *= dt;
8074 rt >>= 5;
8075
8076 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8077 ((unsigned long)rt % 60)/6);
8078
8079 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8080 return 1;
8081}
8082
8083static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8084{
8085 struct list_head *tmp;
8086 loff_t l = *pos;
8087 struct mddev *mddev;
8088
8089 if (l >= 0x10000)
8090 return NULL;
8091 if (!l--)
8092
8093 return (void*)1;
8094
8095 spin_lock(&all_mddevs_lock);
8096 list_for_each(tmp,&all_mddevs)
8097 if (!l--) {
8098 mddev = list_entry(tmp, struct mddev, all_mddevs);
8099 mddev_get(mddev);
8100 spin_unlock(&all_mddevs_lock);
8101 return mddev;
8102 }
8103 spin_unlock(&all_mddevs_lock);
8104 if (!l--)
8105 return (void*)2;
8106 return NULL;
8107}
8108
8109static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8110{
8111 struct list_head *tmp;
8112 struct mddev *next_mddev, *mddev = v;
8113
8114 ++*pos;
8115 if (v == (void*)2)
8116 return NULL;
8117
8118 spin_lock(&all_mddevs_lock);
8119 if (v == (void*)1)
8120 tmp = all_mddevs.next;
8121 else
8122 tmp = mddev->all_mddevs.next;
8123 if (tmp != &all_mddevs)
8124 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8125 else {
8126 next_mddev = (void*)2;
8127 *pos = 0x10000;
8128 }
8129 spin_unlock(&all_mddevs_lock);
8130
8131 if (v != (void*)1)
8132 mddev_put(mddev);
8133 return next_mddev;
8134
8135}
8136
8137static void md_seq_stop(struct seq_file *seq, void *v)
8138{
8139 struct mddev *mddev = v;
8140
8141 if (mddev && v != (void*)1 && v != (void*)2)
8142 mddev_put(mddev);
8143}
8144
8145static int md_seq_show(struct seq_file *seq, void *v)
8146{
8147 struct mddev *mddev = v;
8148 sector_t sectors;
8149 struct md_rdev *rdev;
8150
8151 if (v == (void*)1) {
8152 struct md_personality *pers;
8153 seq_printf(seq, "Personalities : ");
8154 spin_lock(&pers_lock);
8155 list_for_each_entry(pers, &pers_list, list)
8156 seq_printf(seq, "[%s] ", pers->name);
8157
8158 spin_unlock(&pers_lock);
8159 seq_printf(seq, "\n");
8160 seq->poll_event = atomic_read(&md_event_count);
8161 return 0;
8162 }
8163 if (v == (void*)2) {
8164 status_unused(seq);
8165 return 0;
8166 }
8167
8168 spin_lock(&mddev->lock);
8169 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8170 seq_printf(seq, "%s : %sactive", mdname(mddev),
8171 mddev->pers ? "" : "in");
8172 if (mddev->pers) {
8173 if (mddev->ro==1)
8174 seq_printf(seq, " (read-only)");
8175 if (mddev->ro==2)
8176 seq_printf(seq, " (auto-read-only)");
8177 seq_printf(seq, " %s", mddev->pers->name);
8178 }
8179
8180 sectors = 0;
8181 rcu_read_lock();
8182 rdev_for_each_rcu(rdev, mddev) {
8183 char b[BDEVNAME_SIZE];
8184 seq_printf(seq, " %s[%d]",
8185 bdevname(rdev->bdev,b), rdev->desc_nr);
8186 if (test_bit(WriteMostly, &rdev->flags))
8187 seq_printf(seq, "(W)");
8188 if (test_bit(Journal, &rdev->flags))
8189 seq_printf(seq, "(J)");
8190 if (test_bit(Faulty, &rdev->flags)) {
8191 seq_printf(seq, "(F)");
8192 continue;
8193 }
8194 if (rdev->raid_disk < 0)
8195 seq_printf(seq, "(S)");
8196 if (test_bit(Replacement, &rdev->flags))
8197 seq_printf(seq, "(R)");
8198 sectors += rdev->sectors;
8199 }
8200 rcu_read_unlock();
8201
8202 if (!list_empty(&mddev->disks)) {
8203 if (mddev->pers)
8204 seq_printf(seq, "\n %llu blocks",
8205 (unsigned long long)
8206 mddev->array_sectors / 2);
8207 else
8208 seq_printf(seq, "\n %llu blocks",
8209 (unsigned long long)sectors / 2);
8210 }
8211 if (mddev->persistent) {
8212 if (mddev->major_version != 0 ||
8213 mddev->minor_version != 90) {
8214 seq_printf(seq," super %d.%d",
8215 mddev->major_version,
8216 mddev->minor_version);
8217 }
8218 } else if (mddev->external)
8219 seq_printf(seq, " super external:%s",
8220 mddev->metadata_type);
8221 else
8222 seq_printf(seq, " super non-persistent");
8223
8224 if (mddev->pers) {
8225 mddev->pers->status(seq, mddev);
8226 seq_printf(seq, "\n ");
8227 if (mddev->pers->sync_request) {
8228 if (status_resync(seq, mddev))
8229 seq_printf(seq, "\n ");
8230 }
8231 } else
8232 seq_printf(seq, "\n ");
8233
8234 md_bitmap_status(seq, mddev->bitmap);
8235
8236 seq_printf(seq, "\n");
8237 }
8238 spin_unlock(&mddev->lock);
8239
8240 return 0;
8241}
8242
8243static const struct seq_operations md_seq_ops = {
8244 .start = md_seq_start,
8245 .next = md_seq_next,
8246 .stop = md_seq_stop,
8247 .show = md_seq_show,
8248};
8249
8250static int md_seq_open(struct inode *inode, struct file *file)
8251{
8252 struct seq_file *seq;
8253 int error;
8254
8255 error = seq_open(file, &md_seq_ops);
8256 if (error)
8257 return error;
8258
8259 seq = file->private_data;
8260 seq->poll_event = atomic_read(&md_event_count);
8261 return error;
8262}
8263
8264static int md_unloading;
8265static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8266{
8267 struct seq_file *seq = filp->private_data;
8268 __poll_t mask;
8269
8270 if (md_unloading)
8271 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8272 poll_wait(filp, &md_event_waiters, wait);
8273
8274
8275 mask = EPOLLIN | EPOLLRDNORM;
8276
8277 if (seq->poll_event != atomic_read(&md_event_count))
8278 mask |= EPOLLERR | EPOLLPRI;
8279 return mask;
8280}
8281
8282static const struct proc_ops mdstat_proc_ops = {
8283 .proc_open = md_seq_open,
8284 .proc_read = seq_read,
8285 .proc_lseek = seq_lseek,
8286 .proc_release = seq_release,
8287 .proc_poll = mdstat_poll,
8288};
8289
8290int register_md_personality(struct md_personality *p)
8291{
8292 pr_debug("md: %s personality registered for level %d\n",
8293 p->name, p->level);
8294 spin_lock(&pers_lock);
8295 list_add_tail(&p->list, &pers_list);
8296 spin_unlock(&pers_lock);
8297 return 0;
8298}
8299EXPORT_SYMBOL(register_md_personality);
8300
8301int unregister_md_personality(struct md_personality *p)
8302{
8303 pr_debug("md: %s personality unregistered\n", p->name);
8304 spin_lock(&pers_lock);
8305 list_del_init(&p->list);
8306 spin_unlock(&pers_lock);
8307 return 0;
8308}
8309EXPORT_SYMBOL(unregister_md_personality);
8310
8311int register_md_cluster_operations(struct md_cluster_operations *ops,
8312 struct module *module)
8313{
8314 int ret = 0;
8315 spin_lock(&pers_lock);
8316 if (md_cluster_ops != NULL)
8317 ret = -EALREADY;
8318 else {
8319 md_cluster_ops = ops;
8320 md_cluster_mod = module;
8321 }
8322 spin_unlock(&pers_lock);
8323 return ret;
8324}
8325EXPORT_SYMBOL(register_md_cluster_operations);
8326
8327int unregister_md_cluster_operations(void)
8328{
8329 spin_lock(&pers_lock);
8330 md_cluster_ops = NULL;
8331 spin_unlock(&pers_lock);
8332 return 0;
8333}
8334EXPORT_SYMBOL(unregister_md_cluster_operations);
8335
8336int md_setup_cluster(struct mddev *mddev, int nodes)
8337{
8338 if (!md_cluster_ops)
8339 request_module("md-cluster");
8340 spin_lock(&pers_lock);
8341
8342 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8343 pr_warn("can't find md-cluster module or get it's reference.\n");
8344 spin_unlock(&pers_lock);
8345 return -ENOENT;
8346 }
8347 spin_unlock(&pers_lock);
8348
8349 return md_cluster_ops->join(mddev, nodes);
8350}
8351
8352void md_cluster_stop(struct mddev *mddev)
8353{
8354 if (!md_cluster_ops)
8355 return;
8356 md_cluster_ops->leave(mddev);
8357 module_put(md_cluster_mod);
8358}
8359
8360static int is_mddev_idle(struct mddev *mddev, int init)
8361{
8362 struct md_rdev *rdev;
8363 int idle;
8364 int curr_events;
8365
8366 idle = 1;
8367 rcu_read_lock();
8368 rdev_for_each_rcu(rdev, mddev) {
8369 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8370 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8371 atomic_read(&disk->sync_io);
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394 if (init || curr_events - rdev->last_events > 64) {
8395 rdev->last_events = curr_events;
8396 idle = 0;
8397 }
8398 }
8399 rcu_read_unlock();
8400 return idle;
8401}
8402
8403void md_done_sync(struct mddev *mddev, int blocks, int ok)
8404{
8405
8406 atomic_sub(blocks, &mddev->recovery_active);
8407 wake_up(&mddev->recovery_wait);
8408 if (!ok) {
8409 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8410 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8411 md_wakeup_thread(mddev->thread);
8412
8413 }
8414}
8415EXPORT_SYMBOL(md_done_sync);
8416
8417
8418
8419
8420
8421
8422
8423
8424bool md_write_start(struct mddev *mddev, struct bio *bi)
8425{
8426 int did_change = 0;
8427
8428 if (bio_data_dir(bi) != WRITE)
8429 return true;
8430
8431 BUG_ON(mddev->ro == 1);
8432 if (mddev->ro == 2) {
8433
8434 mddev->ro = 0;
8435 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8436 md_wakeup_thread(mddev->thread);
8437 md_wakeup_thread(mddev->sync_thread);
8438 did_change = 1;
8439 }
8440 rcu_read_lock();
8441 percpu_ref_get(&mddev->writes_pending);
8442 smp_mb();
8443 if (mddev->safemode == 1)
8444 mddev->safemode = 0;
8445
8446 if (mddev->in_sync || mddev->sync_checkers) {
8447 spin_lock(&mddev->lock);
8448 if (mddev->in_sync) {
8449 mddev->in_sync = 0;
8450 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8451 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8452 md_wakeup_thread(mddev->thread);
8453 did_change = 1;
8454 }
8455 spin_unlock(&mddev->lock);
8456 }
8457 rcu_read_unlock();
8458 if (did_change)
8459 sysfs_notify_dirent_safe(mddev->sysfs_state);
8460 if (!mddev->has_superblocks)
8461 return true;
8462 wait_event(mddev->sb_wait,
8463 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8464 mddev->suspended);
8465 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8466 percpu_ref_put(&mddev->writes_pending);
8467 return false;
8468 }
8469 return true;
8470}
8471EXPORT_SYMBOL(md_write_start);
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481void md_write_inc(struct mddev *mddev, struct bio *bi)
8482{
8483 if (bio_data_dir(bi) != WRITE)
8484 return;
8485 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8486 percpu_ref_get(&mddev->writes_pending);
8487}
8488EXPORT_SYMBOL(md_write_inc);
8489
8490void md_write_end(struct mddev *mddev)
8491{
8492 percpu_ref_put(&mddev->writes_pending);
8493
8494 if (mddev->safemode == 2)
8495 md_wakeup_thread(mddev->thread);
8496 else if (mddev->safemode_delay)
8497
8498
8499
8500 mod_timer(&mddev->safemode_timer,
8501 roundup(jiffies, mddev->safemode_delay) +
8502 mddev->safemode_delay);
8503}
8504
8505EXPORT_SYMBOL(md_write_end);
8506
8507
8508
8509
8510
8511
8512
8513void md_allow_write(struct mddev *mddev)
8514{
8515 if (!mddev->pers)
8516 return;
8517 if (mddev->ro)
8518 return;
8519 if (!mddev->pers->sync_request)
8520 return;
8521
8522 spin_lock(&mddev->lock);
8523 if (mddev->in_sync) {
8524 mddev->in_sync = 0;
8525 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8526 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8527 if (mddev->safemode_delay &&
8528 mddev->safemode == 0)
8529 mddev->safemode = 1;
8530 spin_unlock(&mddev->lock);
8531 md_update_sb(mddev, 0);
8532 sysfs_notify_dirent_safe(mddev->sysfs_state);
8533
8534 wait_event(mddev->sb_wait,
8535 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8536 } else
8537 spin_unlock(&mddev->lock);
8538}
8539EXPORT_SYMBOL_GPL(md_allow_write);
8540
8541#define SYNC_MARKS 10
8542#define SYNC_MARK_STEP (3*HZ)
8543#define UPDATE_FREQUENCY (5*60*HZ)
8544void md_do_sync(struct md_thread *thread)
8545{
8546 struct mddev *mddev = thread->mddev;
8547 struct mddev *mddev2;
8548 unsigned int currspeed = 0, window;
8549 sector_t max_sectors,j, io_sectors, recovery_done;
8550 unsigned long mark[SYNC_MARKS];
8551 unsigned long update_time;
8552 sector_t mark_cnt[SYNC_MARKS];
8553 int last_mark,m;
8554 struct list_head *tmp;
8555 sector_t last_check;
8556 int skipped = 0;
8557 struct md_rdev *rdev;
8558 char *desc, *action = NULL;
8559 struct blk_plug plug;
8560 int ret;
8561
8562
8563 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8564 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8565 return;
8566 if (mddev->ro) {
8567 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8568 return;
8569 }
8570
8571 if (mddev_is_clustered(mddev)) {
8572 ret = md_cluster_ops->resync_start(mddev);
8573 if (ret)
8574 goto skip;
8575
8576 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8577 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8578 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8579 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8580 && ((unsigned long long)mddev->curr_resync_completed
8581 < (unsigned long long)mddev->resync_max_sectors))
8582 goto skip;
8583 }
8584
8585 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8586 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8587 desc = "data-check";
8588 action = "check";
8589 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8590 desc = "requested-resync";
8591 action = "repair";
8592 } else
8593 desc = "resync";
8594 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8595 desc = "reshape";
8596 else
8597 desc = "recovery";
8598
8599 mddev->last_sync_action = action ?: desc;
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617 do {
8618 int mddev2_minor = -1;
8619 mddev->curr_resync = 2;
8620
8621 try_again:
8622 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8623 goto skip;
8624 for_each_mddev(mddev2, tmp) {
8625 if (mddev2 == mddev)
8626 continue;
8627 if (!mddev->parallel_resync
8628 && mddev2->curr_resync
8629 && match_mddev_units(mddev, mddev2)) {
8630 DEFINE_WAIT(wq);
8631 if (mddev < mddev2 && mddev->curr_resync == 2) {
8632
8633 mddev->curr_resync = 1;
8634 wake_up(&resync_wait);
8635 }
8636 if (mddev > mddev2 && mddev->curr_resync == 1)
8637
8638
8639
8640 continue;
8641
8642
8643
8644
8645 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8646 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8647 mddev2->curr_resync >= mddev->curr_resync) {
8648 if (mddev2_minor != mddev2->md_minor) {
8649 mddev2_minor = mddev2->md_minor;
8650 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8651 desc, mdname(mddev),
8652 mdname(mddev2));
8653 }
8654 mddev_put(mddev2);
8655 if (signal_pending(current))
8656 flush_signals(current);
8657 schedule();
8658 finish_wait(&resync_wait, &wq);
8659 goto try_again;
8660 }
8661 finish_wait(&resync_wait, &wq);
8662 }
8663 }
8664 } while (mddev->curr_resync < 2);
8665
8666 j = 0;
8667 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8668
8669
8670
8671 max_sectors = mddev->resync_max_sectors;
8672 atomic64_set(&mddev->resync_mismatches, 0);
8673
8674 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8675 j = mddev->resync_min;
8676 else if (!mddev->bitmap)
8677 j = mddev->recovery_cp;
8678
8679 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8680 max_sectors = mddev->resync_max_sectors;
8681
8682
8683
8684
8685
8686 if (mddev_is_clustered(mddev) &&
8687 mddev->reshape_position != MaxSector)
8688 j = mddev->reshape_position;
8689 } else {
8690
8691 max_sectors = mddev->dev_sectors;
8692 j = MaxSector;
8693 rcu_read_lock();
8694 rdev_for_each_rcu(rdev, mddev)
8695 if (rdev->raid_disk >= 0 &&
8696 !test_bit(Journal, &rdev->flags) &&
8697 !test_bit(Faulty, &rdev->flags) &&
8698 !test_bit(In_sync, &rdev->flags) &&
8699 rdev->recovery_offset < j)
8700 j = rdev->recovery_offset;
8701 rcu_read_unlock();
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711 if (mddev->bitmap) {
8712 mddev->pers->quiesce(mddev, 1);
8713 mddev->pers->quiesce(mddev, 0);
8714 }
8715 }
8716
8717 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8718 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8719 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8720 speed_max(mddev), desc);
8721
8722 is_mddev_idle(mddev, 1);
8723
8724 io_sectors = 0;
8725 for (m = 0; m < SYNC_MARKS; m++) {
8726 mark[m] = jiffies;
8727 mark_cnt[m] = io_sectors;
8728 }
8729 last_mark = 0;
8730 mddev->resync_mark = mark[last_mark];
8731 mddev->resync_mark_cnt = mark_cnt[last_mark];
8732
8733
8734
8735
8736 window = 32 * (PAGE_SIZE / 512);
8737 pr_debug("md: using %dk window, over a total of %lluk.\n",
8738 window/2, (unsigned long long)max_sectors/2);
8739
8740 atomic_set(&mddev->recovery_active, 0);
8741 last_check = 0;
8742
8743 if (j>2) {
8744 pr_debug("md: resuming %s of %s from checkpoint.\n",
8745 desc, mdname(mddev));
8746 mddev->curr_resync = j;
8747 } else
8748 mddev->curr_resync = 3;
8749 mddev->curr_resync_completed = j;
8750 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8751 md_new_event(mddev);
8752 update_time = jiffies;
8753
8754 blk_start_plug(&plug);
8755 while (j < max_sectors) {
8756 sector_t sectors;
8757
8758 skipped = 0;
8759
8760 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8761 ((mddev->curr_resync > mddev->curr_resync_completed &&
8762 (mddev->curr_resync - mddev->curr_resync_completed)
8763 > (max_sectors >> 4)) ||
8764 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8765 (j - mddev->curr_resync_completed)*2
8766 >= mddev->resync_max - mddev->curr_resync_completed ||
8767 mddev->curr_resync_completed > mddev->resync_max
8768 )) {
8769
8770 wait_event(mddev->recovery_wait,
8771 atomic_read(&mddev->recovery_active) == 0);
8772 mddev->curr_resync_completed = j;
8773 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8774 j > mddev->recovery_cp)
8775 mddev->recovery_cp = j;
8776 update_time = jiffies;
8777 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8778 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8779 }
8780
8781 while (j >= mddev->resync_max &&
8782 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8783
8784
8785
8786
8787 flush_signals(current);
8788 wait_event_interruptible(mddev->recovery_wait,
8789 mddev->resync_max > j
8790 || test_bit(MD_RECOVERY_INTR,
8791 &mddev->recovery));
8792 }
8793
8794 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8795 break;
8796
8797 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8798 if (sectors == 0) {
8799 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8800 break;
8801 }
8802
8803 if (!skipped) {
8804 io_sectors += sectors;
8805 atomic_add(sectors, &mddev->recovery_active);
8806 }
8807
8808 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8809 break;
8810
8811 j += sectors;
8812 if (j > max_sectors)
8813
8814 j = max_sectors;
8815 if (j > 2)
8816 mddev->curr_resync = j;
8817 mddev->curr_mark_cnt = io_sectors;
8818 if (last_check == 0)
8819
8820
8821
8822 md_new_event(mddev);
8823
8824 if (last_check + window > io_sectors || j == max_sectors)
8825 continue;
8826
8827 last_check = io_sectors;
8828 repeat:
8829 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8830
8831 int next = (last_mark+1) % SYNC_MARKS;
8832
8833 mddev->resync_mark = mark[next];
8834 mddev->resync_mark_cnt = mark_cnt[next];
8835 mark[next] = jiffies;
8836 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8837 last_mark = next;
8838 }
8839
8840 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8841 break;
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851 cond_resched();
8852
8853 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8854 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8855 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8856
8857 if (currspeed > speed_min(mddev)) {
8858 if (currspeed > speed_max(mddev)) {
8859 msleep(500);
8860 goto repeat;
8861 }
8862 if (!is_mddev_idle(mddev, 0)) {
8863
8864
8865
8866
8867 wait_event(mddev->recovery_wait,
8868 !atomic_read(&mddev->recovery_active));
8869 }
8870 }
8871 }
8872 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8873 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8874 ? "interrupted" : "done");
8875
8876
8877
8878 blk_finish_plug(&plug);
8879 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8880
8881 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8882 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8883 mddev->curr_resync > 3) {
8884 mddev->curr_resync_completed = mddev->curr_resync;
8885 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8886 }
8887 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8888
8889 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8890 mddev->curr_resync > 3) {
8891 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8892 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8893 if (mddev->curr_resync >= mddev->recovery_cp) {
8894 pr_debug("md: checkpointing %s of %s.\n",
8895 desc, mdname(mddev));
8896 if (test_bit(MD_RECOVERY_ERROR,
8897 &mddev->recovery))
8898 mddev->recovery_cp =
8899 mddev->curr_resync_completed;
8900 else
8901 mddev->recovery_cp =
8902 mddev->curr_resync;
8903 }
8904 } else
8905 mddev->recovery_cp = MaxSector;
8906 } else {
8907 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8908 mddev->curr_resync = MaxSector;
8909 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8910 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8911 rcu_read_lock();
8912 rdev_for_each_rcu(rdev, mddev)
8913 if (rdev->raid_disk >= 0 &&
8914 mddev->delta_disks >= 0 &&
8915 !test_bit(Journal, &rdev->flags) &&
8916 !test_bit(Faulty, &rdev->flags) &&
8917 !test_bit(In_sync, &rdev->flags) &&
8918 rdev->recovery_offset < mddev->curr_resync)
8919 rdev->recovery_offset = mddev->curr_resync;
8920 rcu_read_unlock();
8921 }
8922 }
8923 }
8924 skip:
8925
8926
8927
8928 set_mask_bits(&mddev->sb_flags, 0,
8929 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8930
8931 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8932 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8933 mddev->delta_disks > 0 &&
8934 mddev->pers->finish_reshape &&
8935 mddev->pers->size &&
8936 mddev->queue) {
8937 mddev_lock_nointr(mddev);
8938 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8939 mddev_unlock(mddev);
8940 if (!mddev_is_clustered(mddev)) {
8941 set_capacity(mddev->gendisk, mddev->array_sectors);
8942 revalidate_disk(mddev->gendisk);
8943 }
8944 }
8945
8946 spin_lock(&mddev->lock);
8947 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8948
8949 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8950 mddev->resync_min = 0;
8951 mddev->resync_max = MaxSector;
8952 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8953 mddev->resync_min = mddev->curr_resync_completed;
8954 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8955 mddev->curr_resync = 0;
8956 spin_unlock(&mddev->lock);
8957
8958 wake_up(&resync_wait);
8959 md_wakeup_thread(mddev->thread);
8960 return;
8961}
8962EXPORT_SYMBOL_GPL(md_do_sync);
8963
8964static int remove_and_add_spares(struct mddev *mddev,
8965 struct md_rdev *this)
8966{
8967 struct md_rdev *rdev;
8968 int spares = 0;
8969 int removed = 0;
8970 bool remove_some = false;
8971
8972 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8973
8974 return 0;
8975
8976 rdev_for_each(rdev, mddev) {
8977 if ((this == NULL || rdev == this) &&
8978 rdev->raid_disk >= 0 &&
8979 !test_bit(Blocked, &rdev->flags) &&
8980 test_bit(Faulty, &rdev->flags) &&
8981 atomic_read(&rdev->nr_pending)==0) {
8982
8983
8984
8985
8986
8987 remove_some = true;
8988 set_bit(RemoveSynchronized, &rdev->flags);
8989 }
8990 }
8991
8992 if (remove_some)
8993 synchronize_rcu();
8994 rdev_for_each(rdev, mddev) {
8995 if ((this == NULL || rdev == this) &&
8996 rdev->raid_disk >= 0 &&
8997 !test_bit(Blocked, &rdev->flags) &&
8998 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8999 (!test_bit(In_sync, &rdev->flags) &&
9000 !test_bit(Journal, &rdev->flags))) &&
9001 atomic_read(&rdev->nr_pending)==0)) {
9002 if (mddev->pers->hot_remove_disk(
9003 mddev, rdev) == 0) {
9004 sysfs_unlink_rdev(mddev, rdev);
9005 rdev->saved_raid_disk = rdev->raid_disk;
9006 rdev->raid_disk = -1;
9007 removed++;
9008 }
9009 }
9010 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9011 clear_bit(RemoveSynchronized, &rdev->flags);
9012 }
9013
9014 if (removed && mddev->kobj.sd)
9015 sysfs_notify(&mddev->kobj, NULL, "degraded");
9016
9017 if (this && removed)
9018 goto no_add;
9019
9020 rdev_for_each(rdev, mddev) {
9021 if (this && this != rdev)
9022 continue;
9023 if (test_bit(Candidate, &rdev->flags))
9024 continue;
9025 if (rdev->raid_disk >= 0 &&
9026 !test_bit(In_sync, &rdev->flags) &&
9027 !test_bit(Journal, &rdev->flags) &&
9028 !test_bit(Faulty, &rdev->flags))
9029 spares++;
9030 if (rdev->raid_disk >= 0)
9031 continue;
9032 if (test_bit(Faulty, &rdev->flags))
9033 continue;
9034 if (!test_bit(Journal, &rdev->flags)) {
9035 if (mddev->ro &&
9036 ! (rdev->saved_raid_disk >= 0 &&
9037 !test_bit(Bitmap_sync, &rdev->flags)))
9038 continue;
9039
9040 rdev->recovery_offset = 0;
9041 }
9042 if (mddev->pers->
9043 hot_add_disk(mddev, rdev) == 0) {
9044 if (sysfs_link_rdev(mddev, rdev))
9045 ;
9046 if (!test_bit(Journal, &rdev->flags))
9047 spares++;
9048 md_new_event(mddev);
9049 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9050 }
9051 }
9052no_add:
9053 if (removed)
9054 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9055 return spares;
9056}
9057
9058static void md_start_sync(struct work_struct *ws)
9059{
9060 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9061
9062 mddev->sync_thread = md_register_thread(md_do_sync,
9063 mddev,
9064 "resync");
9065 if (!mddev->sync_thread) {
9066 pr_warn("%s: could not start resync thread...\n",
9067 mdname(mddev));
9068
9069 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9070 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9071 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9072 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9073 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9074 wake_up(&resync_wait);
9075 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9076 &mddev->recovery))
9077 if (mddev->sysfs_action)
9078 sysfs_notify_dirent_safe(mddev->sysfs_action);
9079 } else
9080 md_wakeup_thread(mddev->sync_thread);
9081 sysfs_notify_dirent_safe(mddev->sysfs_action);
9082 md_new_event(mddev);
9083}
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107void md_check_recovery(struct mddev *mddev)
9108{
9109 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9110
9111
9112
9113 set_bit(MD_UPDATING_SB, &mddev->flags);
9114 smp_mb__after_atomic();
9115 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9116 md_update_sb(mddev, 0);
9117 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9118 wake_up(&mddev->sb_wait);
9119 }
9120
9121 if (mddev->suspended)
9122 return;
9123
9124 if (mddev->bitmap)
9125 md_bitmap_daemon_work(mddev);
9126
9127 if (signal_pending(current)) {
9128 if (mddev->pers->sync_request && !mddev->external) {
9129 pr_debug("md: %s in immediate safe mode\n",
9130 mdname(mddev));
9131 mddev->safemode = 2;
9132 }
9133 flush_signals(current);
9134 }
9135
9136 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9137 return;
9138 if ( ! (
9139 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9140 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9141 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9142 (mddev->external == 0 && mddev->safemode == 1) ||
9143 (mddev->safemode == 2
9144 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9145 ))
9146 return;
9147
9148 if (mddev_trylock(mddev)) {
9149 int spares = 0;
9150 bool try_set_sync = mddev->safemode != 0;
9151
9152 if (!mddev->external && mddev->safemode == 1)
9153 mddev->safemode = 0;
9154
9155 if (mddev->ro) {
9156 struct md_rdev *rdev;
9157 if (!mddev->external && mddev->in_sync)
9158
9159
9160
9161
9162
9163 rdev_for_each(rdev, mddev)
9164 clear_bit(Blocked, &rdev->flags);
9165
9166
9167
9168
9169
9170
9171
9172 remove_and_add_spares(mddev, NULL);
9173
9174
9175
9176 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9177 md_reap_sync_thread(mddev);
9178 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9179 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9180 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9181 goto unlock;
9182 }
9183
9184 if (mddev_is_clustered(mddev)) {
9185 struct md_rdev *rdev;
9186
9187
9188
9189 rdev_for_each(rdev, mddev) {
9190 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9191 rdev->raid_disk < 0)
9192 md_kick_rdev_from_array(rdev);
9193 }
9194 }
9195
9196 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9197 spin_lock(&mddev->lock);
9198 set_in_sync(mddev);
9199 spin_unlock(&mddev->lock);
9200 }
9201
9202 if (mddev->sb_flags)
9203 md_update_sb(mddev, 0);
9204
9205 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9206 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9207
9208 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9209 goto unlock;
9210 }
9211 if (mddev->sync_thread) {
9212 md_reap_sync_thread(mddev);
9213 goto unlock;
9214 }
9215
9216
9217
9218 mddev->curr_resync_completed = 0;
9219 spin_lock(&mddev->lock);
9220 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9221 spin_unlock(&mddev->lock);
9222
9223
9224
9225 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9226 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9227
9228 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9229 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9230 goto not_running;
9231
9232
9233
9234
9235
9236
9237
9238 if (mddev->reshape_position != MaxSector) {
9239 if (mddev->pers->check_reshape == NULL ||
9240 mddev->pers->check_reshape(mddev) != 0)
9241
9242 goto not_running;
9243 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9244 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9245 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9246 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9247 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9248 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9249 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9250 } else if (mddev->recovery_cp < MaxSector) {
9251 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9252 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9253 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9254
9255 goto not_running;
9256
9257 if (mddev->pers->sync_request) {
9258 if (spares) {
9259
9260
9261
9262
9263 md_bitmap_write_all(mddev->bitmap);
9264 }
9265 INIT_WORK(&mddev->del_work, md_start_sync);
9266 queue_work(md_misc_wq, &mddev->del_work);
9267 goto unlock;
9268 }
9269 not_running:
9270 if (!mddev->sync_thread) {
9271 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9272 wake_up(&resync_wait);
9273 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9274 &mddev->recovery))
9275 if (mddev->sysfs_action)
9276 sysfs_notify_dirent_safe(mddev->sysfs_action);
9277 }
9278 unlock:
9279 wake_up(&mddev->sb_wait);
9280 mddev_unlock(mddev);
9281 }
9282}
9283EXPORT_SYMBOL(md_check_recovery);
9284
9285void md_reap_sync_thread(struct mddev *mddev)
9286{
9287 struct md_rdev *rdev;
9288 sector_t old_dev_sectors = mddev->dev_sectors;
9289 bool is_reshaped = false;
9290
9291
9292 md_unregister_thread(&mddev->sync_thread);
9293 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9294 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9295 mddev->degraded != mddev->raid_disks) {
9296
9297
9298 if (mddev->pers->spare_active(mddev)) {
9299 sysfs_notify(&mddev->kobj, NULL,
9300 "degraded");
9301 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9302 }
9303 }
9304 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9305 mddev->pers->finish_reshape) {
9306 mddev->pers->finish_reshape(mddev);
9307 if (mddev_is_clustered(mddev))
9308 is_reshaped = true;
9309 }
9310
9311
9312
9313
9314 if (!mddev->degraded)
9315 rdev_for_each(rdev, mddev)
9316 rdev->saved_raid_disk = -1;
9317
9318 md_update_sb(mddev, 1);
9319
9320
9321
9322 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9323 md_cluster_ops->resync_finish(mddev);
9324 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9325 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9326 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9327 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9328 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9329 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9330
9331
9332
9333
9334
9335 if (mddev_is_clustered(mddev) && is_reshaped
9336 && !test_bit(MD_CLOSING, &mddev->flags))
9337 md_cluster_ops->update_size(mddev, old_dev_sectors);
9338 wake_up(&resync_wait);
9339
9340 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9341 sysfs_notify_dirent_safe(mddev->sysfs_action);
9342 md_new_event(mddev);
9343 if (mddev->event_work.func)
9344 queue_work(md_misc_wq, &mddev->event_work);
9345}
9346EXPORT_SYMBOL(md_reap_sync_thread);
9347
9348void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9349{
9350 sysfs_notify_dirent_safe(rdev->sysfs_state);
9351 wait_event_timeout(rdev->blocked_wait,
9352 !test_bit(Blocked, &rdev->flags) &&
9353 !test_bit(BlockedBadBlocks, &rdev->flags),
9354 msecs_to_jiffies(5000));
9355 rdev_dec_pending(rdev, mddev);
9356}
9357EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9358
9359void md_finish_reshape(struct mddev *mddev)
9360{
9361
9362 struct md_rdev *rdev;
9363
9364 rdev_for_each(rdev, mddev) {
9365 if (rdev->data_offset > rdev->new_data_offset)
9366 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9367 else
9368 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9369 rdev->data_offset = rdev->new_data_offset;
9370 }
9371}
9372EXPORT_SYMBOL(md_finish_reshape);
9373
9374
9375
9376
9377int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9378 int is_new)
9379{
9380 struct mddev *mddev = rdev->mddev;
9381 int rv;
9382 if (is_new)
9383 s += rdev->new_data_offset;
9384 else
9385 s += rdev->data_offset;
9386 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9387 if (rv == 0) {
9388
9389 if (test_bit(ExternalBbl, &rdev->flags))
9390 sysfs_notify(&rdev->kobj, NULL,
9391 "unacknowledged_bad_blocks");
9392 sysfs_notify_dirent_safe(rdev->sysfs_state);
9393 set_mask_bits(&mddev->sb_flags, 0,
9394 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9395 md_wakeup_thread(rdev->mddev->thread);
9396 return 1;
9397 } else
9398 return 0;
9399}
9400EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9401
9402int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9403 int is_new)
9404{
9405 int rv;
9406 if (is_new)
9407 s += rdev->new_data_offset;
9408 else
9409 s += rdev->data_offset;
9410 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9411 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9412 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9413 return rv;
9414}
9415EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9416
9417static int md_notify_reboot(struct notifier_block *this,
9418 unsigned long code, void *x)
9419{
9420 struct list_head *tmp;
9421 struct mddev *mddev;
9422 int need_delay = 0;
9423
9424 for_each_mddev(mddev, tmp) {
9425 if (mddev_trylock(mddev)) {
9426 if (mddev->pers)
9427 __md_stop_writes(mddev);
9428 if (mddev->persistent)
9429 mddev->safemode = 2;
9430 mddev_unlock(mddev);
9431 }
9432 need_delay = 1;
9433 }
9434
9435
9436
9437
9438
9439
9440 if (need_delay)
9441 mdelay(1000*1);
9442
9443 return NOTIFY_DONE;
9444}
9445
9446static struct notifier_block md_notifier = {
9447 .notifier_call = md_notify_reboot,
9448 .next = NULL,
9449 .priority = INT_MAX,
9450};
9451
9452static void md_geninit(void)
9453{
9454 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9455
9456 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9457}
9458
9459static int __init md_init(void)
9460{
9461 int ret = -ENOMEM;
9462
9463 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9464 if (!md_wq)
9465 goto err_wq;
9466
9467 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9468 if (!md_misc_wq)
9469 goto err_misc_wq;
9470
9471 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9472 goto err_md;
9473
9474 if ((ret = register_blkdev(0, "mdp")) < 0)
9475 goto err_mdp;
9476 mdp_major = ret;
9477
9478 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9479 md_probe, NULL, NULL);
9480 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9481 md_probe, NULL, NULL);
9482
9483 register_reboot_notifier(&md_notifier);
9484 raid_table_header = register_sysctl_table(raid_root_table);
9485
9486 md_geninit();
9487 return 0;
9488
9489err_mdp:
9490 unregister_blkdev(MD_MAJOR, "md");
9491err_md:
9492 destroy_workqueue(md_misc_wq);
9493err_misc_wq:
9494 destroy_workqueue(md_wq);
9495err_wq:
9496 return ret;
9497}
9498
9499static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9500{
9501 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9502 struct md_rdev *rdev2;
9503 int role, ret;
9504 char b[BDEVNAME_SIZE];
9505
9506
9507
9508
9509
9510 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9511 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9512 if (ret)
9513 pr_info("md-cluster: resize failed\n");
9514 else
9515 md_bitmap_update_sb(mddev->bitmap);
9516 }
9517
9518
9519 rdev_for_each(rdev2, mddev) {
9520 if (test_bit(Faulty, &rdev2->flags))
9521 continue;
9522
9523
9524 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9525
9526 if (test_bit(Candidate, &rdev2->flags)) {
9527 if (role == 0xfffe) {
9528 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9529 md_kick_rdev_from_array(rdev2);
9530 continue;
9531 }
9532 else
9533 clear_bit(Candidate, &rdev2->flags);
9534 }
9535
9536 if (role != rdev2->raid_disk) {
9537
9538
9539
9540 if (rdev2->raid_disk == -1 && role != 0xffff &&
9541 !(le32_to_cpu(sb->feature_map) &
9542 MD_FEATURE_RESHAPE_ACTIVE)) {
9543 rdev2->saved_raid_disk = role;
9544 ret = remove_and_add_spares(mddev, rdev2);
9545 pr_info("Activated spare: %s\n",
9546 bdevname(rdev2->bdev,b));
9547
9548
9549 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9550 md_wakeup_thread(mddev->thread);
9551 }
9552
9553
9554
9555
9556
9557 if ((role == 0xfffe) || (role == 0xfffd)) {
9558 md_error(mddev, rdev2);
9559 clear_bit(Blocked, &rdev2->flags);
9560 }
9561 }
9562 }
9563
9564 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9565 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9566
9567
9568
9569
9570
9571 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9572 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9573
9574
9575
9576
9577 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9578 if (mddev->pers->update_reshape_pos)
9579 mddev->pers->update_reshape_pos(mddev);
9580 if (mddev->pers->start_reshape)
9581 mddev->pers->start_reshape(mddev);
9582 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9583 mddev->reshape_position != MaxSector &&
9584 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9585
9586 mddev->reshape_position = MaxSector;
9587 if (mddev->pers->update_reshape_pos)
9588 mddev->pers->update_reshape_pos(mddev);
9589 }
9590
9591
9592 mddev->events = le64_to_cpu(sb->events);
9593}
9594
9595static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9596{
9597 int err;
9598 struct page *swapout = rdev->sb_page;
9599 struct mdp_superblock_1 *sb;
9600
9601
9602
9603
9604 rdev->sb_page = NULL;
9605 err = alloc_disk_sb(rdev);
9606 if (err == 0) {
9607 ClearPageUptodate(rdev->sb_page);
9608 rdev->sb_loaded = 0;
9609 err = super_types[mddev->major_version].
9610 load_super(rdev, NULL, mddev->minor_version);
9611 }
9612 if (err < 0) {
9613 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9614 __func__, __LINE__, rdev->desc_nr, err);
9615 if (rdev->sb_page)
9616 put_page(rdev->sb_page);
9617 rdev->sb_page = swapout;
9618 rdev->sb_loaded = 1;
9619 return err;
9620 }
9621
9622 sb = page_address(rdev->sb_page);
9623
9624
9625
9626
9627 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9628 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9629
9630
9631
9632
9633 if (rdev->recovery_offset == MaxSector &&
9634 !test_bit(In_sync, &rdev->flags) &&
9635 mddev->pers->spare_active(mddev))
9636 sysfs_notify(&mddev->kobj, NULL, "degraded");
9637
9638 put_page(swapout);
9639 return 0;
9640}
9641
9642void md_reload_sb(struct mddev *mddev, int nr)
9643{
9644 struct md_rdev *rdev;
9645 int err;
9646
9647
9648 rdev_for_each_rcu(rdev, mddev) {
9649 if (rdev->desc_nr == nr)
9650 break;
9651 }
9652
9653 if (!rdev || rdev->desc_nr != nr) {
9654 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9655 return;
9656 }
9657
9658 err = read_rdev(mddev, rdev);
9659 if (err < 0)
9660 return;
9661
9662 check_sb_changes(mddev, rdev);
9663
9664
9665 rdev_for_each_rcu(rdev, mddev) {
9666 if (!test_bit(Faulty, &rdev->flags))
9667 read_rdev(mddev, rdev);
9668 }
9669}
9670EXPORT_SYMBOL(md_reload_sb);
9671
9672#ifndef MODULE
9673
9674
9675
9676
9677
9678
9679static DEFINE_MUTEX(detected_devices_mutex);
9680static LIST_HEAD(all_detected_devices);
9681struct detected_devices_node {
9682 struct list_head list;
9683 dev_t dev;
9684};
9685
9686void md_autodetect_dev(dev_t dev)
9687{
9688 struct detected_devices_node *node_detected_dev;
9689
9690 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9691 if (node_detected_dev) {
9692 node_detected_dev->dev = dev;
9693 mutex_lock(&detected_devices_mutex);
9694 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9695 mutex_unlock(&detected_devices_mutex);
9696 }
9697}
9698
9699static void autostart_arrays(int part)
9700{
9701 struct md_rdev *rdev;
9702 struct detected_devices_node *node_detected_dev;
9703 dev_t dev;
9704 int i_scanned, i_passed;
9705
9706 i_scanned = 0;
9707 i_passed = 0;
9708
9709 pr_info("md: Autodetecting RAID arrays.\n");
9710
9711 mutex_lock(&detected_devices_mutex);
9712 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9713 i_scanned++;
9714 node_detected_dev = list_entry(all_detected_devices.next,
9715 struct detected_devices_node, list);
9716 list_del(&node_detected_dev->list);
9717 dev = node_detected_dev->dev;
9718 kfree(node_detected_dev);
9719 mutex_unlock(&detected_devices_mutex);
9720 rdev = md_import_device(dev,0, 90);
9721 mutex_lock(&detected_devices_mutex);
9722 if (IS_ERR(rdev))
9723 continue;
9724
9725 if (test_bit(Faulty, &rdev->flags))
9726 continue;
9727
9728 set_bit(AutoDetected, &rdev->flags);
9729 list_add(&rdev->same_set, &pending_raid_disks);
9730 i_passed++;
9731 }
9732 mutex_unlock(&detected_devices_mutex);
9733
9734 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9735
9736 autorun_devices(part);
9737}
9738
9739#endif
9740
9741static __exit void md_exit(void)
9742{
9743 struct mddev *mddev;
9744 struct list_head *tmp;
9745 int delay = 1;
9746
9747 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9748 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9749
9750 unregister_blkdev(MD_MAJOR,"md");
9751 unregister_blkdev(mdp_major, "mdp");
9752 unregister_reboot_notifier(&md_notifier);
9753 unregister_sysctl_table(raid_table_header);
9754
9755
9756
9757
9758 md_unloading = 1;
9759 while (waitqueue_active(&md_event_waiters)) {
9760
9761 wake_up(&md_event_waiters);
9762 msleep(delay);
9763 delay += delay;
9764 }
9765 remove_proc_entry("mdstat", NULL);
9766
9767 for_each_mddev(mddev, tmp) {
9768 export_array(mddev);
9769 mddev->ctime = 0;
9770 mddev->hold_active = 0;
9771
9772
9773
9774
9775
9776
9777 }
9778 destroy_workqueue(md_misc_wq);
9779 destroy_workqueue(md_wq);
9780}
9781
9782subsys_initcall(md_init);
9783module_exit(md_exit)
9784
9785static int get_ro(char *buffer, const struct kernel_param *kp)
9786{
9787 return sprintf(buffer, "%d", start_readonly);
9788}
9789static int set_ro(const char *val, const struct kernel_param *kp)
9790{
9791 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9792}
9793
9794module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9795module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9796module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9797module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9798
9799MODULE_LICENSE("GPL");
9800MODULE_DESCRIPTION("MD RAID framework");
9801MODULE_ALIAS("md");
9802MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9803