1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/slab.h>
62#include <linux/percpu-refcount.h>
63
64#include <trace/events/block.h>
65#include "md.h"
66#include "md-bitmap.h"
67#include "md-cluster.h"
68
69#ifndef MODULE
70static void autostart_arrays(int part);
71#endif
72
73
74
75
76
77
78static LIST_HEAD(pers_list);
79static DEFINE_SPINLOCK(pers_lock);
80
81static struct kobj_type md_ktype;
82
83struct md_cluster_operations *md_cluster_ops;
84EXPORT_SYMBOL(md_cluster_ops);
85static struct module *md_cluster_mod;
86
87static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
88static struct workqueue_struct *md_wq;
89static struct workqueue_struct *md_misc_wq;
90
91static int remove_and_add_spares(struct mddev *mddev,
92 struct md_rdev *this);
93static void mddev_detach(struct mddev *mddev);
94
95
96
97
98
99
100#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
101
102
103
104
105
106
107
108
109
110
111
112
113
114static int sysctl_speed_limit_min = 1000;
115static int sysctl_speed_limit_max = 200000;
116static inline int speed_min(struct mddev *mddev)
117{
118 return mddev->sync_speed_min ?
119 mddev->sync_speed_min : sysctl_speed_limit_min;
120}
121
122static inline int speed_max(struct mddev *mddev)
123{
124 return mddev->sync_speed_max ?
125 mddev->sync_speed_max : sysctl_speed_limit_max;
126}
127
128static int rdev_init_wb(struct md_rdev *rdev)
129{
130 if (rdev->bdev->bd_queue->nr_hw_queues == 1)
131 return 0;
132
133 spin_lock_init(&rdev->wb_list_lock);
134 INIT_LIST_HEAD(&rdev->wb_list);
135 init_waitqueue_head(&rdev->wb_io_wait);
136 set_bit(WBCollisionCheck, &rdev->flags);
137
138 return 1;
139}
140
141
142
143
144
145void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
146 bool is_suspend)
147{
148 if (mddev->bitmap_info.max_write_behind == 0)
149 return;
150
151 if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev))
152 return;
153
154 if (mddev->wb_info_pool == NULL) {
155 unsigned int noio_flag;
156
157 if (!is_suspend)
158 mddev_suspend(mddev);
159 noio_flag = memalloc_noio_save();
160 mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS,
161 sizeof(struct wb_info));
162 memalloc_noio_restore(noio_flag);
163 if (!mddev->wb_info_pool)
164 pr_err("can't alloc memory pool for writemostly\n");
165 if (!is_suspend)
166 mddev_resume(mddev);
167 }
168}
169EXPORT_SYMBOL_GPL(mddev_create_wb_pool);
170
171
172
173
174static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev)
175{
176 if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags))
177 return;
178
179 if (mddev->wb_info_pool) {
180 struct md_rdev *temp;
181 int num = 0;
182
183
184
185
186 rdev_for_each(temp, mddev)
187 if (temp != rdev &&
188 test_bit(WBCollisionCheck, &temp->flags))
189 num++;
190 if (!num) {
191 mddev_suspend(rdev->mddev);
192 mempool_destroy(mddev->wb_info_pool);
193 mddev->wb_info_pool = NULL;
194 mddev_resume(rdev->mddev);
195 }
196 }
197}
198
199static struct ctl_table_header *raid_table_header;
200
201static struct ctl_table raid_table[] = {
202 {
203 .procname = "speed_limit_min",
204 .data = &sysctl_speed_limit_min,
205 .maxlen = sizeof(int),
206 .mode = S_IRUGO|S_IWUSR,
207 .proc_handler = proc_dointvec,
208 },
209 {
210 .procname = "speed_limit_max",
211 .data = &sysctl_speed_limit_max,
212 .maxlen = sizeof(int),
213 .mode = S_IRUGO|S_IWUSR,
214 .proc_handler = proc_dointvec,
215 },
216 { }
217};
218
219static struct ctl_table raid_dir_table[] = {
220 {
221 .procname = "raid",
222 .maxlen = 0,
223 .mode = S_IRUGO|S_IXUGO,
224 .child = raid_table,
225 },
226 { }
227};
228
229static struct ctl_table raid_root_table[] = {
230 {
231 .procname = "dev",
232 .maxlen = 0,
233 .mode = 0555,
234 .child = raid_dir_table,
235 },
236 { }
237};
238
239static const struct block_device_operations md_fops;
240
241static int start_readonly;
242
243
244
245
246
247
248
249
250
251static bool create_on_open = true;
252
253struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
254 struct mddev *mddev)
255{
256 if (!mddev || !bioset_initialized(&mddev->bio_set))
257 return bio_alloc(gfp_mask, nr_iovecs);
258
259 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
260}
261EXPORT_SYMBOL_GPL(bio_alloc_mddev);
262
263static struct bio *md_bio_alloc_sync(struct mddev *mddev)
264{
265 if (!mddev || !bioset_initialized(&mddev->sync_set))
266 return bio_alloc(GFP_NOIO, 1);
267
268 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
269}
270
271
272
273
274
275
276
277
278
279
280
281static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
282static atomic_t md_event_count;
283void md_new_event(struct mddev *mddev)
284{
285 atomic_inc(&md_event_count);
286 wake_up(&md_event_waiters);
287}
288EXPORT_SYMBOL_GPL(md_new_event);
289
290
291
292
293
294static LIST_HEAD(all_mddevs);
295static DEFINE_SPINLOCK(all_mddevs_lock);
296
297
298
299
300
301
302
303
304#define for_each_mddev(_mddev,_tmp) \
305 \
306 for (({ spin_lock(&all_mddevs_lock); \
307 _tmp = all_mddevs.next; \
308 _mddev = NULL;}); \
309 ({ if (_tmp != &all_mddevs) \
310 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
311 spin_unlock(&all_mddevs_lock); \
312 if (_mddev) mddev_put(_mddev); \
313 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
314 _tmp != &all_mddevs;}); \
315 ({ spin_lock(&all_mddevs_lock); \
316 _tmp = _tmp->next;}) \
317 )
318
319
320
321
322
323
324
325
326static bool is_suspended(struct mddev *mddev, struct bio *bio)
327{
328 if (mddev->suspended)
329 return true;
330 if (bio_data_dir(bio) != WRITE)
331 return false;
332 if (mddev->suspend_lo >= mddev->suspend_hi)
333 return false;
334 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
335 return false;
336 if (bio_end_sector(bio) < mddev->suspend_lo)
337 return false;
338 return true;
339}
340
341void md_handle_request(struct mddev *mddev, struct bio *bio)
342{
343check_suspended:
344 rcu_read_lock();
345 if (is_suspended(mddev, bio)) {
346 DEFINE_WAIT(__wait);
347 for (;;) {
348 prepare_to_wait(&mddev->sb_wait, &__wait,
349 TASK_UNINTERRUPTIBLE);
350 if (!is_suspended(mddev, bio))
351 break;
352 rcu_read_unlock();
353 schedule();
354 rcu_read_lock();
355 }
356 finish_wait(&mddev->sb_wait, &__wait);
357 }
358 atomic_inc(&mddev->active_io);
359 rcu_read_unlock();
360
361 if (!mddev->pers->make_request(mddev, bio)) {
362 atomic_dec(&mddev->active_io);
363 wake_up(&mddev->sb_wait);
364 goto check_suspended;
365 }
366
367 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
368 wake_up(&mddev->sb_wait);
369}
370EXPORT_SYMBOL(md_handle_request);
371
372static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
373{
374 const int rw = bio_data_dir(bio);
375 const int sgrp = op_stat_group(bio_op(bio));
376 struct mddev *mddev = q->queuedata;
377 unsigned int sectors;
378
379 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
380 bio_io_error(bio);
381 return BLK_QC_T_NONE;
382 }
383
384 blk_queue_split(q, &bio);
385
386 if (mddev == NULL || mddev->pers == NULL) {
387 bio_io_error(bio);
388 return BLK_QC_T_NONE;
389 }
390 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
391 if (bio_sectors(bio) != 0)
392 bio->bi_status = BLK_STS_IOERR;
393 bio_endio(bio);
394 return BLK_QC_T_NONE;
395 }
396
397
398
399
400
401 sectors = bio_sectors(bio);
402
403 bio->bi_opf &= ~REQ_NOMERGE;
404
405 md_handle_request(mddev, bio);
406
407 part_stat_lock();
408 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
409 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
410 part_stat_unlock();
411
412 return BLK_QC_T_NONE;
413}
414
415
416
417
418
419
420
421void mddev_suspend(struct mddev *mddev)
422{
423 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
424 lockdep_assert_held(&mddev->reconfig_mutex);
425 if (mddev->suspended++)
426 return;
427 synchronize_rcu();
428 wake_up(&mddev->sb_wait);
429 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
430 smp_mb__after_atomic();
431 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
432 mddev->pers->quiesce(mddev, 1);
433 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
434 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
435
436 del_timer_sync(&mddev->safemode_timer);
437}
438EXPORT_SYMBOL_GPL(mddev_suspend);
439
440void mddev_resume(struct mddev *mddev)
441{
442 lockdep_assert_held(&mddev->reconfig_mutex);
443 if (--mddev->suspended)
444 return;
445 wake_up(&mddev->sb_wait);
446 mddev->pers->quiesce(mddev, 0);
447
448 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
449 md_wakeup_thread(mddev->thread);
450 md_wakeup_thread(mddev->sync_thread);
451}
452EXPORT_SYMBOL_GPL(mddev_resume);
453
454int mddev_congested(struct mddev *mddev, int bits)
455{
456 struct md_personality *pers = mddev->pers;
457 int ret = 0;
458
459 rcu_read_lock();
460 if (mddev->suspended)
461 ret = 1;
462 else if (pers && pers->congested)
463 ret = pers->congested(mddev, bits);
464 rcu_read_unlock();
465 return ret;
466}
467EXPORT_SYMBOL_GPL(mddev_congested);
468static int md_congested(void *data, int bits)
469{
470 struct mddev *mddev = data;
471 return mddev_congested(mddev, bits);
472}
473
474
475
476
477
478static void md_end_flush(struct bio *bio)
479{
480 struct md_rdev *rdev = bio->bi_private;
481 struct mddev *mddev = rdev->mddev;
482
483 rdev_dec_pending(rdev, mddev);
484
485 if (atomic_dec_and_test(&mddev->flush_pending)) {
486
487 queue_work(md_wq, &mddev->flush_work);
488 }
489 bio_put(bio);
490}
491
492static void md_submit_flush_data(struct work_struct *ws);
493
494static void submit_flushes(struct work_struct *ws)
495{
496 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
497 struct md_rdev *rdev;
498
499 mddev->start_flush = ktime_get_boottime();
500 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
501 atomic_set(&mddev->flush_pending, 1);
502 rcu_read_lock();
503 rdev_for_each_rcu(rdev, mddev)
504 if (rdev->raid_disk >= 0 &&
505 !test_bit(Faulty, &rdev->flags)) {
506
507
508
509
510 struct bio *bi;
511 atomic_inc(&rdev->nr_pending);
512 atomic_inc(&rdev->nr_pending);
513 rcu_read_unlock();
514 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
515 bi->bi_end_io = md_end_flush;
516 bi->bi_private = rdev;
517 bio_set_dev(bi, rdev->bdev);
518 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
519 atomic_inc(&mddev->flush_pending);
520 submit_bio(bi);
521 rcu_read_lock();
522 rdev_dec_pending(rdev, mddev);
523 }
524 rcu_read_unlock();
525 if (atomic_dec_and_test(&mddev->flush_pending))
526 queue_work(md_wq, &mddev->flush_work);
527}
528
529static void md_submit_flush_data(struct work_struct *ws)
530{
531 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
532 struct bio *bio = mddev->flush_bio;
533
534
535
536
537
538
539
540 mddev->last_flush = mddev->start_flush;
541 mddev->flush_bio = NULL;
542 wake_up(&mddev->sb_wait);
543
544 if (bio->bi_iter.bi_size == 0) {
545
546 bio_endio(bio);
547 } else {
548 bio->bi_opf &= ~REQ_PREFLUSH;
549 md_handle_request(mddev, bio);
550 }
551}
552
553void md_flush_request(struct mddev *mddev, struct bio *bio)
554{
555 ktime_t start = ktime_get_boottime();
556 spin_lock_irq(&mddev->lock);
557 wait_event_lock_irq(mddev->sb_wait,
558 !mddev->flush_bio ||
559 ktime_after(mddev->last_flush, start),
560 mddev->lock);
561 if (!ktime_after(mddev->last_flush, start)) {
562 WARN_ON(mddev->flush_bio);
563 mddev->flush_bio = bio;
564 bio = NULL;
565 }
566 spin_unlock_irq(&mddev->lock);
567
568 if (!bio) {
569 INIT_WORK(&mddev->flush_work, submit_flushes);
570 queue_work(md_wq, &mddev->flush_work);
571 } else {
572
573 if (bio->bi_iter.bi_size == 0)
574
575 bio_endio(bio);
576 else {
577 bio->bi_opf &= ~REQ_PREFLUSH;
578 mddev->pers->make_request(mddev, bio);
579 }
580 }
581}
582EXPORT_SYMBOL(md_flush_request);
583
584static inline struct mddev *mddev_get(struct mddev *mddev)
585{
586 atomic_inc(&mddev->active);
587 return mddev;
588}
589
590static void mddev_delayed_delete(struct work_struct *ws);
591
592static void mddev_put(struct mddev *mddev)
593{
594 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
595 return;
596 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
597 mddev->ctime == 0 && !mddev->hold_active) {
598
599
600 list_del_init(&mddev->all_mddevs);
601
602
603
604
605
606
607 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
608 queue_work(md_misc_wq, &mddev->del_work);
609 }
610 spin_unlock(&all_mddevs_lock);
611}
612
613static void md_safemode_timeout(struct timer_list *t);
614
615void mddev_init(struct mddev *mddev)
616{
617 kobject_init(&mddev->kobj, &md_ktype);
618 mutex_init(&mddev->open_mutex);
619 mutex_init(&mddev->reconfig_mutex);
620 mutex_init(&mddev->bitmap_info.mutex);
621 INIT_LIST_HEAD(&mddev->disks);
622 INIT_LIST_HEAD(&mddev->all_mddevs);
623 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
624 atomic_set(&mddev->active, 1);
625 atomic_set(&mddev->openers, 0);
626 atomic_set(&mddev->active_io, 0);
627 spin_lock_init(&mddev->lock);
628 atomic_set(&mddev->flush_pending, 0);
629 init_waitqueue_head(&mddev->sb_wait);
630 init_waitqueue_head(&mddev->recovery_wait);
631 mddev->reshape_position = MaxSector;
632 mddev->reshape_backwards = 0;
633 mddev->last_sync_action = "none";
634 mddev->resync_min = 0;
635 mddev->resync_max = MaxSector;
636 mddev->level = LEVEL_NONE;
637}
638EXPORT_SYMBOL_GPL(mddev_init);
639
640static struct mddev *mddev_find(dev_t unit)
641{
642 struct mddev *mddev, *new = NULL;
643
644 if (unit && MAJOR(unit) != MD_MAJOR)
645 unit &= ~((1<<MdpMinorShift)-1);
646
647 retry:
648 spin_lock(&all_mddevs_lock);
649
650 if (unit) {
651 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
652 if (mddev->unit == unit) {
653 mddev_get(mddev);
654 spin_unlock(&all_mddevs_lock);
655 kfree(new);
656 return mddev;
657 }
658
659 if (new) {
660 list_add(&new->all_mddevs, &all_mddevs);
661 spin_unlock(&all_mddevs_lock);
662 new->hold_active = UNTIL_IOCTL;
663 return new;
664 }
665 } else if (new) {
666
667 static int next_minor = 512;
668 int start = next_minor;
669 int is_free = 0;
670 int dev = 0;
671 while (!is_free) {
672 dev = MKDEV(MD_MAJOR, next_minor);
673 next_minor++;
674 if (next_minor > MINORMASK)
675 next_minor = 0;
676 if (next_minor == start) {
677
678 spin_unlock(&all_mddevs_lock);
679 kfree(new);
680 return NULL;
681 }
682
683 is_free = 1;
684 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
685 if (mddev->unit == dev) {
686 is_free = 0;
687 break;
688 }
689 }
690 new->unit = dev;
691 new->md_minor = MINOR(dev);
692 new->hold_active = UNTIL_STOP;
693 list_add(&new->all_mddevs, &all_mddevs);
694 spin_unlock(&all_mddevs_lock);
695 return new;
696 }
697 spin_unlock(&all_mddevs_lock);
698
699 new = kzalloc(sizeof(*new), GFP_KERNEL);
700 if (!new)
701 return NULL;
702
703 new->unit = unit;
704 if (MAJOR(unit) == MD_MAJOR)
705 new->md_minor = MINOR(unit);
706 else
707 new->md_minor = MINOR(unit) >> MdpMinorShift;
708
709 mddev_init(new);
710
711 goto retry;
712}
713
714static struct attribute_group md_redundancy_group;
715
716void mddev_unlock(struct mddev *mddev)
717{
718 if (mddev->to_remove) {
719
720
721
722
723
724
725
726
727
728
729
730
731 struct attribute_group *to_remove = mddev->to_remove;
732 mddev->to_remove = NULL;
733 mddev->sysfs_active = 1;
734 mutex_unlock(&mddev->reconfig_mutex);
735
736 if (mddev->kobj.sd) {
737 if (to_remove != &md_redundancy_group)
738 sysfs_remove_group(&mddev->kobj, to_remove);
739 if (mddev->pers == NULL ||
740 mddev->pers->sync_request == NULL) {
741 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
742 if (mddev->sysfs_action)
743 sysfs_put(mddev->sysfs_action);
744 mddev->sysfs_action = NULL;
745 }
746 }
747 mddev->sysfs_active = 0;
748 } else
749 mutex_unlock(&mddev->reconfig_mutex);
750
751
752
753
754 spin_lock(&pers_lock);
755 md_wakeup_thread(mddev->thread);
756 wake_up(&mddev->sb_wait);
757 spin_unlock(&pers_lock);
758}
759EXPORT_SYMBOL_GPL(mddev_unlock);
760
761struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
762{
763 struct md_rdev *rdev;
764
765 rdev_for_each_rcu(rdev, mddev)
766 if (rdev->desc_nr == nr)
767 return rdev;
768
769 return NULL;
770}
771EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
772
773static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
774{
775 struct md_rdev *rdev;
776
777 rdev_for_each(rdev, mddev)
778 if (rdev->bdev->bd_dev == dev)
779 return rdev;
780
781 return NULL;
782}
783
784struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
785{
786 struct md_rdev *rdev;
787
788 rdev_for_each_rcu(rdev, mddev)
789 if (rdev->bdev->bd_dev == dev)
790 return rdev;
791
792 return NULL;
793}
794EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
795
796static struct md_personality *find_pers(int level, char *clevel)
797{
798 struct md_personality *pers;
799 list_for_each_entry(pers, &pers_list, list) {
800 if (level != LEVEL_NONE && pers->level == level)
801 return pers;
802 if (strcmp(pers->name, clevel)==0)
803 return pers;
804 }
805 return NULL;
806}
807
808
809static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
810{
811 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
812 return MD_NEW_SIZE_SECTORS(num_sectors);
813}
814
815static int alloc_disk_sb(struct md_rdev *rdev)
816{
817 rdev->sb_page = alloc_page(GFP_KERNEL);
818 if (!rdev->sb_page)
819 return -ENOMEM;
820 return 0;
821}
822
823void md_rdev_clear(struct md_rdev *rdev)
824{
825 if (rdev->sb_page) {
826 put_page(rdev->sb_page);
827 rdev->sb_loaded = 0;
828 rdev->sb_page = NULL;
829 rdev->sb_start = 0;
830 rdev->sectors = 0;
831 }
832 if (rdev->bb_page) {
833 put_page(rdev->bb_page);
834 rdev->bb_page = NULL;
835 }
836 badblocks_exit(&rdev->badblocks);
837}
838EXPORT_SYMBOL_GPL(md_rdev_clear);
839
840static void super_written(struct bio *bio)
841{
842 struct md_rdev *rdev = bio->bi_private;
843 struct mddev *mddev = rdev->mddev;
844
845 if (bio->bi_status) {
846 pr_err("md: super_written gets error=%d\n", bio->bi_status);
847 md_error(mddev, rdev);
848 if (!test_bit(Faulty, &rdev->flags)
849 && (bio->bi_opf & MD_FAILFAST)) {
850 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
851 set_bit(LastDev, &rdev->flags);
852 }
853 } else
854 clear_bit(LastDev, &rdev->flags);
855
856 if (atomic_dec_and_test(&mddev->pending_writes))
857 wake_up(&mddev->sb_wait);
858 rdev_dec_pending(rdev, mddev);
859 bio_put(bio);
860}
861
862void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
863 sector_t sector, int size, struct page *page)
864{
865
866
867
868
869
870
871 struct bio *bio;
872 int ff = 0;
873
874 if (!page)
875 return;
876
877 if (test_bit(Faulty, &rdev->flags))
878 return;
879
880 bio = md_bio_alloc_sync(mddev);
881
882 atomic_inc(&rdev->nr_pending);
883
884 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
885 bio->bi_iter.bi_sector = sector;
886 bio_add_page(bio, page, size, 0);
887 bio->bi_private = rdev;
888 bio->bi_end_io = super_written;
889
890 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
891 test_bit(FailFast, &rdev->flags) &&
892 !test_bit(LastDev, &rdev->flags))
893 ff = MD_FAILFAST;
894 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
895
896 atomic_inc(&mddev->pending_writes);
897 submit_bio(bio);
898}
899
900int md_super_wait(struct mddev *mddev)
901{
902
903 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
904 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
905 return -EAGAIN;
906 return 0;
907}
908
909int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
910 struct page *page, int op, int op_flags, bool metadata_op)
911{
912 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
913 int ret;
914
915 if (metadata_op && rdev->meta_bdev)
916 bio_set_dev(bio, rdev->meta_bdev);
917 else
918 bio_set_dev(bio, rdev->bdev);
919 bio_set_op_attrs(bio, op, op_flags);
920 if (metadata_op)
921 bio->bi_iter.bi_sector = sector + rdev->sb_start;
922 else if (rdev->mddev->reshape_position != MaxSector &&
923 (rdev->mddev->reshape_backwards ==
924 (sector >= rdev->mddev->reshape_position)))
925 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
926 else
927 bio->bi_iter.bi_sector = sector + rdev->data_offset;
928 bio_add_page(bio, page, size, 0);
929
930 submit_bio_wait(bio);
931
932 ret = !bio->bi_status;
933 bio_put(bio);
934 return ret;
935}
936EXPORT_SYMBOL_GPL(sync_page_io);
937
938static int read_disk_sb(struct md_rdev *rdev, int size)
939{
940 char b[BDEVNAME_SIZE];
941
942 if (rdev->sb_loaded)
943 return 0;
944
945 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
946 goto fail;
947 rdev->sb_loaded = 1;
948 return 0;
949
950fail:
951 pr_err("md: disabled device %s, could not read superblock.\n",
952 bdevname(rdev->bdev,b));
953 return -EINVAL;
954}
955
956static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
957{
958 return sb1->set_uuid0 == sb2->set_uuid0 &&
959 sb1->set_uuid1 == sb2->set_uuid1 &&
960 sb1->set_uuid2 == sb2->set_uuid2 &&
961 sb1->set_uuid3 == sb2->set_uuid3;
962}
963
964static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
965{
966 int ret;
967 mdp_super_t *tmp1, *tmp2;
968
969 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
970 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
971
972 if (!tmp1 || !tmp2) {
973 ret = 0;
974 goto abort;
975 }
976
977 *tmp1 = *sb1;
978 *tmp2 = *sb2;
979
980
981
982
983 tmp1->nr_disks = 0;
984 tmp2->nr_disks = 0;
985
986 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
987abort:
988 kfree(tmp1);
989 kfree(tmp2);
990 return ret;
991}
992
993static u32 md_csum_fold(u32 csum)
994{
995 csum = (csum & 0xffff) + (csum >> 16);
996 return (csum & 0xffff) + (csum >> 16);
997}
998
999static unsigned int calc_sb_csum(mdp_super_t *sb)
1000{
1001 u64 newcsum = 0;
1002 u32 *sb32 = (u32*)sb;
1003 int i;
1004 unsigned int disk_csum, csum;
1005
1006 disk_csum = sb->sb_csum;
1007 sb->sb_csum = 0;
1008
1009 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1010 newcsum += sb32[i];
1011 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1012
1013#ifdef CONFIG_ALPHA
1014
1015
1016
1017
1018
1019
1020
1021
1022 sb->sb_csum = md_csum_fold(disk_csum);
1023#else
1024 sb->sb_csum = disk_csum;
1025#endif
1026 return csum;
1027}
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059struct super_type {
1060 char *name;
1061 struct module *owner;
1062 int (*load_super)(struct md_rdev *rdev,
1063 struct md_rdev *refdev,
1064 int minor_version);
1065 int (*validate_super)(struct mddev *mddev,
1066 struct md_rdev *rdev);
1067 void (*sync_super)(struct mddev *mddev,
1068 struct md_rdev *rdev);
1069 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1070 sector_t num_sectors);
1071 int (*allow_new_offset)(struct md_rdev *rdev,
1072 unsigned long long new_offset);
1073};
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083int md_check_no_bitmap(struct mddev *mddev)
1084{
1085 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1086 return 0;
1087 pr_warn("%s: bitmaps are not supported for %s\n",
1088 mdname(mddev), mddev->pers->name);
1089 return 1;
1090}
1091EXPORT_SYMBOL(md_check_no_bitmap);
1092
1093
1094
1095
1096static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1097{
1098 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1099 mdp_super_t *sb;
1100 int ret;
1101
1102
1103
1104
1105
1106
1107
1108 rdev->sb_start = calc_dev_sboffset(rdev);
1109
1110 ret = read_disk_sb(rdev, MD_SB_BYTES);
1111 if (ret)
1112 return ret;
1113
1114 ret = -EINVAL;
1115
1116 bdevname(rdev->bdev, b);
1117 sb = page_address(rdev->sb_page);
1118
1119 if (sb->md_magic != MD_SB_MAGIC) {
1120 pr_warn("md: invalid raid superblock magic on %s\n", b);
1121 goto abort;
1122 }
1123
1124 if (sb->major_version != 0 ||
1125 sb->minor_version < 90 ||
1126 sb->minor_version > 91) {
1127 pr_warn("Bad version number %d.%d on %s\n",
1128 sb->major_version, sb->minor_version, b);
1129 goto abort;
1130 }
1131
1132 if (sb->raid_disks <= 0)
1133 goto abort;
1134
1135 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1136 pr_warn("md: invalid superblock checksum on %s\n", b);
1137 goto abort;
1138 }
1139
1140 rdev->preferred_minor = sb->md_minor;
1141 rdev->data_offset = 0;
1142 rdev->new_data_offset = 0;
1143 rdev->sb_size = MD_SB_BYTES;
1144 rdev->badblocks.shift = -1;
1145
1146 if (sb->level == LEVEL_MULTIPATH)
1147 rdev->desc_nr = -1;
1148 else
1149 rdev->desc_nr = sb->this_disk.number;
1150
1151 if (!refdev) {
1152 ret = 1;
1153 } else {
1154 __u64 ev1, ev2;
1155 mdp_super_t *refsb = page_address(refdev->sb_page);
1156 if (!md_uuid_equal(refsb, sb)) {
1157 pr_warn("md: %s has different UUID to %s\n",
1158 b, bdevname(refdev->bdev,b2));
1159 goto abort;
1160 }
1161 if (!md_sb_equal(refsb, sb)) {
1162 pr_warn("md: %s has same UUID but different superblock to %s\n",
1163 b, bdevname(refdev->bdev, b2));
1164 goto abort;
1165 }
1166 ev1 = md_event(sb);
1167 ev2 = md_event(refsb);
1168 if (ev1 > ev2)
1169 ret = 1;
1170 else
1171 ret = 0;
1172 }
1173 rdev->sectors = rdev->sb_start;
1174
1175
1176
1177
1178 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1179 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1180
1181 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1182
1183 ret = -EINVAL;
1184
1185 abort:
1186 return ret;
1187}
1188
1189
1190
1191
1192static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1193{
1194 mdp_disk_t *desc;
1195 mdp_super_t *sb = page_address(rdev->sb_page);
1196 __u64 ev1 = md_event(sb);
1197
1198 rdev->raid_disk = -1;
1199 clear_bit(Faulty, &rdev->flags);
1200 clear_bit(In_sync, &rdev->flags);
1201 clear_bit(Bitmap_sync, &rdev->flags);
1202 clear_bit(WriteMostly, &rdev->flags);
1203
1204 if (mddev->raid_disks == 0) {
1205 mddev->major_version = 0;
1206 mddev->minor_version = sb->minor_version;
1207 mddev->patch_version = sb->patch_version;
1208 mddev->external = 0;
1209 mddev->chunk_sectors = sb->chunk_size >> 9;
1210 mddev->ctime = sb->ctime;
1211 mddev->utime = sb->utime;
1212 mddev->level = sb->level;
1213 mddev->clevel[0] = 0;
1214 mddev->layout = sb->layout;
1215 mddev->raid_disks = sb->raid_disks;
1216 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1217 mddev->events = ev1;
1218 mddev->bitmap_info.offset = 0;
1219 mddev->bitmap_info.space = 0;
1220
1221 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1222 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1223 mddev->reshape_backwards = 0;
1224
1225 if (mddev->minor_version >= 91) {
1226 mddev->reshape_position = sb->reshape_position;
1227 mddev->delta_disks = sb->delta_disks;
1228 mddev->new_level = sb->new_level;
1229 mddev->new_layout = sb->new_layout;
1230 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1231 if (mddev->delta_disks < 0)
1232 mddev->reshape_backwards = 1;
1233 } else {
1234 mddev->reshape_position = MaxSector;
1235 mddev->delta_disks = 0;
1236 mddev->new_level = mddev->level;
1237 mddev->new_layout = mddev->layout;
1238 mddev->new_chunk_sectors = mddev->chunk_sectors;
1239 }
1240 if (mddev->level == 0)
1241 mddev->layout = -1;
1242
1243 if (sb->state & (1<<MD_SB_CLEAN))
1244 mddev->recovery_cp = MaxSector;
1245 else {
1246 if (sb->events_hi == sb->cp_events_hi &&
1247 sb->events_lo == sb->cp_events_lo) {
1248 mddev->recovery_cp = sb->recovery_cp;
1249 } else
1250 mddev->recovery_cp = 0;
1251 }
1252
1253 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1254 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1255 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1256 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1257
1258 mddev->max_disks = MD_SB_DISKS;
1259
1260 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1261 mddev->bitmap_info.file == NULL) {
1262 mddev->bitmap_info.offset =
1263 mddev->bitmap_info.default_offset;
1264 mddev->bitmap_info.space =
1265 mddev->bitmap_info.default_space;
1266 }
1267
1268 } else if (mddev->pers == NULL) {
1269
1270
1271 ++ev1;
1272 if (sb->disks[rdev->desc_nr].state & (
1273 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1274 if (ev1 < mddev->events)
1275 return -EINVAL;
1276 } else if (mddev->bitmap) {
1277
1278
1279
1280 if (ev1 < mddev->bitmap->events_cleared)
1281 return 0;
1282 if (ev1 < mddev->events)
1283 set_bit(Bitmap_sync, &rdev->flags);
1284 } else {
1285 if (ev1 < mddev->events)
1286
1287 return 0;
1288 }
1289
1290 if (mddev->level != LEVEL_MULTIPATH) {
1291 desc = sb->disks + rdev->desc_nr;
1292
1293 if (desc->state & (1<<MD_DISK_FAULTY))
1294 set_bit(Faulty, &rdev->flags);
1295 else if (desc->state & (1<<MD_DISK_SYNC)
1296) {
1297 set_bit(In_sync, &rdev->flags);
1298 rdev->raid_disk = desc->raid_disk;
1299 rdev->saved_raid_disk = desc->raid_disk;
1300 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1301
1302
1303
1304 if (mddev->minor_version >= 91) {
1305 rdev->recovery_offset = 0;
1306 rdev->raid_disk = desc->raid_disk;
1307 }
1308 }
1309 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1310 set_bit(WriteMostly, &rdev->flags);
1311 if (desc->state & (1<<MD_DISK_FAILFAST))
1312 set_bit(FailFast, &rdev->flags);
1313 } else
1314 set_bit(In_sync, &rdev->flags);
1315 return 0;
1316}
1317
1318
1319
1320
1321static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1322{
1323 mdp_super_t *sb;
1324 struct md_rdev *rdev2;
1325 int next_spare = mddev->raid_disks;
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337 int i;
1338 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1339
1340 rdev->sb_size = MD_SB_BYTES;
1341
1342 sb = page_address(rdev->sb_page);
1343
1344 memset(sb, 0, sizeof(*sb));
1345
1346 sb->md_magic = MD_SB_MAGIC;
1347 sb->major_version = mddev->major_version;
1348 sb->patch_version = mddev->patch_version;
1349 sb->gvalid_words = 0;
1350 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1351 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1352 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1353 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1354
1355 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1356 sb->level = mddev->level;
1357 sb->size = mddev->dev_sectors / 2;
1358 sb->raid_disks = mddev->raid_disks;
1359 sb->md_minor = mddev->md_minor;
1360 sb->not_persistent = 0;
1361 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1362 sb->state = 0;
1363 sb->events_hi = (mddev->events>>32);
1364 sb->events_lo = (u32)mddev->events;
1365
1366 if (mddev->reshape_position == MaxSector)
1367 sb->minor_version = 90;
1368 else {
1369 sb->minor_version = 91;
1370 sb->reshape_position = mddev->reshape_position;
1371 sb->new_level = mddev->new_level;
1372 sb->delta_disks = mddev->delta_disks;
1373 sb->new_layout = mddev->new_layout;
1374 sb->new_chunk = mddev->new_chunk_sectors << 9;
1375 }
1376 mddev->minor_version = sb->minor_version;
1377 if (mddev->in_sync)
1378 {
1379 sb->recovery_cp = mddev->recovery_cp;
1380 sb->cp_events_hi = (mddev->events>>32);
1381 sb->cp_events_lo = (u32)mddev->events;
1382 if (mddev->recovery_cp == MaxSector)
1383 sb->state = (1<< MD_SB_CLEAN);
1384 } else
1385 sb->recovery_cp = 0;
1386
1387 sb->layout = mddev->layout;
1388 sb->chunk_size = mddev->chunk_sectors << 9;
1389
1390 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1391 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1392
1393 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1394 rdev_for_each(rdev2, mddev) {
1395 mdp_disk_t *d;
1396 int desc_nr;
1397 int is_active = test_bit(In_sync, &rdev2->flags);
1398
1399 if (rdev2->raid_disk >= 0 &&
1400 sb->minor_version >= 91)
1401
1402
1403
1404
1405 is_active = 1;
1406 if (rdev2->raid_disk < 0 ||
1407 test_bit(Faulty, &rdev2->flags))
1408 is_active = 0;
1409 if (is_active)
1410 desc_nr = rdev2->raid_disk;
1411 else
1412 desc_nr = next_spare++;
1413 rdev2->desc_nr = desc_nr;
1414 d = &sb->disks[rdev2->desc_nr];
1415 nr_disks++;
1416 d->number = rdev2->desc_nr;
1417 d->major = MAJOR(rdev2->bdev->bd_dev);
1418 d->minor = MINOR(rdev2->bdev->bd_dev);
1419 if (is_active)
1420 d->raid_disk = rdev2->raid_disk;
1421 else
1422 d->raid_disk = rdev2->desc_nr;
1423 if (test_bit(Faulty, &rdev2->flags))
1424 d->state = (1<<MD_DISK_FAULTY);
1425 else if (is_active) {
1426 d->state = (1<<MD_DISK_ACTIVE);
1427 if (test_bit(In_sync, &rdev2->flags))
1428 d->state |= (1<<MD_DISK_SYNC);
1429 active++;
1430 working++;
1431 } else {
1432 d->state = 0;
1433 spare++;
1434 working++;
1435 }
1436 if (test_bit(WriteMostly, &rdev2->flags))
1437 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1438 if (test_bit(FailFast, &rdev2->flags))
1439 d->state |= (1<<MD_DISK_FAILFAST);
1440 }
1441
1442 for (i=0 ; i < mddev->raid_disks ; i++) {
1443 mdp_disk_t *d = &sb->disks[i];
1444 if (d->state == 0 && d->number == 0) {
1445 d->number = i;
1446 d->raid_disk = i;
1447 d->state = (1<<MD_DISK_REMOVED);
1448 d->state |= (1<<MD_DISK_FAULTY);
1449 failed++;
1450 }
1451 }
1452 sb->nr_disks = nr_disks;
1453 sb->active_disks = active;
1454 sb->working_disks = working;
1455 sb->failed_disks = failed;
1456 sb->spare_disks = spare;
1457
1458 sb->this_disk = sb->disks[rdev->desc_nr];
1459 sb->sb_csum = calc_sb_csum(sb);
1460}
1461
1462
1463
1464
1465static unsigned long long
1466super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1467{
1468 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1469 return 0;
1470 if (rdev->mddev->bitmap_info.offset)
1471 return 0;
1472 rdev->sb_start = calc_dev_sboffset(rdev);
1473 if (!num_sectors || num_sectors > rdev->sb_start)
1474 num_sectors = rdev->sb_start;
1475
1476
1477
1478 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1479 num_sectors = (sector_t)(2ULL << 32) - 2;
1480 do {
1481 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1482 rdev->sb_page);
1483 } while (md_super_wait(rdev->mddev) < 0);
1484 return num_sectors;
1485}
1486
1487static int
1488super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1489{
1490
1491 return new_offset == 0;
1492}
1493
1494
1495
1496
1497
1498static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1499{
1500 __le32 disk_csum;
1501 u32 csum;
1502 unsigned long long newcsum;
1503 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1504 __le32 *isuper = (__le32*)sb;
1505
1506 disk_csum = sb->sb_csum;
1507 sb->sb_csum = 0;
1508 newcsum = 0;
1509 for (; size >= 4; size -= 4)
1510 newcsum += le32_to_cpu(*isuper++);
1511
1512 if (size == 2)
1513 newcsum += le16_to_cpu(*(__le16*) isuper);
1514
1515 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1516 sb->sb_csum = disk_csum;
1517 return cpu_to_le32(csum);
1518}
1519
1520static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1521{
1522 struct mdp_superblock_1 *sb;
1523 int ret;
1524 sector_t sb_start;
1525 sector_t sectors;
1526 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1527 int bmask;
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537 switch(minor_version) {
1538 case 0:
1539 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1540 sb_start -= 8*2;
1541 sb_start &= ~(sector_t)(4*2-1);
1542 break;
1543 case 1:
1544 sb_start = 0;
1545 break;
1546 case 2:
1547 sb_start = 8;
1548 break;
1549 default:
1550 return -EINVAL;
1551 }
1552 rdev->sb_start = sb_start;
1553
1554
1555
1556
1557 ret = read_disk_sb(rdev, 4096);
1558 if (ret) return ret;
1559
1560 sb = page_address(rdev->sb_page);
1561
1562 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1563 sb->major_version != cpu_to_le32(1) ||
1564 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1565 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1566 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1567 return -EINVAL;
1568
1569 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1570 pr_warn("md: invalid superblock checksum on %s\n",
1571 bdevname(rdev->bdev,b));
1572 return -EINVAL;
1573 }
1574 if (le64_to_cpu(sb->data_size) < 10) {
1575 pr_warn("md: data_size too small on %s\n",
1576 bdevname(rdev->bdev,b));
1577 return -EINVAL;
1578 }
1579 if (sb->pad0 ||
1580 sb->pad3[0] ||
1581 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1582
1583 return -EINVAL;
1584
1585 rdev->preferred_minor = 0xffff;
1586 rdev->data_offset = le64_to_cpu(sb->data_offset);
1587 rdev->new_data_offset = rdev->data_offset;
1588 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1589 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1590 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1591 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1592
1593 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1594 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1595 if (rdev->sb_size & bmask)
1596 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1597
1598 if (minor_version
1599 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1600 return -EINVAL;
1601 if (minor_version
1602 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1603 return -EINVAL;
1604
1605 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1606 rdev->desc_nr = -1;
1607 else
1608 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1609
1610 if (!rdev->bb_page) {
1611 rdev->bb_page = alloc_page(GFP_KERNEL);
1612 if (!rdev->bb_page)
1613 return -ENOMEM;
1614 }
1615 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1616 rdev->badblocks.count == 0) {
1617
1618
1619
1620 s32 offset;
1621 sector_t bb_sector;
1622 __le64 *bbp;
1623 int i;
1624 int sectors = le16_to_cpu(sb->bblog_size);
1625 if (sectors > (PAGE_SIZE / 512))
1626 return -EINVAL;
1627 offset = le32_to_cpu(sb->bblog_offset);
1628 if (offset == 0)
1629 return -EINVAL;
1630 bb_sector = (long long)offset;
1631 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1632 rdev->bb_page, REQ_OP_READ, 0, true))
1633 return -EIO;
1634 bbp = (__le64 *)page_address(rdev->bb_page);
1635 rdev->badblocks.shift = sb->bblog_shift;
1636 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1637 u64 bb = le64_to_cpu(*bbp);
1638 int count = bb & (0x3ff);
1639 u64 sector = bb >> 10;
1640 sector <<= sb->bblog_shift;
1641 count <<= sb->bblog_shift;
1642 if (bb + 1 == 0)
1643 break;
1644 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1645 return -EINVAL;
1646 }
1647 } else if (sb->bblog_offset != 0)
1648 rdev->badblocks.shift = 0;
1649
1650 if ((le32_to_cpu(sb->feature_map) &
1651 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1652 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1653 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1654 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1655 }
1656
1657 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1658 sb->level != 0)
1659 return -EINVAL;
1660
1661 if (!refdev) {
1662 ret = 1;
1663 } else {
1664 __u64 ev1, ev2;
1665 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1666
1667 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1668 sb->level != refsb->level ||
1669 sb->layout != refsb->layout ||
1670 sb->chunksize != refsb->chunksize) {
1671 pr_warn("md: %s has strangely different superblock to %s\n",
1672 bdevname(rdev->bdev,b),
1673 bdevname(refdev->bdev,b2));
1674 return -EINVAL;
1675 }
1676 ev1 = le64_to_cpu(sb->events);
1677 ev2 = le64_to_cpu(refsb->events);
1678
1679 if (ev1 > ev2)
1680 ret = 1;
1681 else
1682 ret = 0;
1683 }
1684 if (minor_version) {
1685 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1686 sectors -= rdev->data_offset;
1687 } else
1688 sectors = rdev->sb_start;
1689 if (sectors < le64_to_cpu(sb->data_size))
1690 return -EINVAL;
1691 rdev->sectors = le64_to_cpu(sb->data_size);
1692 return ret;
1693}
1694
1695static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1696{
1697 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1698 __u64 ev1 = le64_to_cpu(sb->events);
1699
1700 rdev->raid_disk = -1;
1701 clear_bit(Faulty, &rdev->flags);
1702 clear_bit(In_sync, &rdev->flags);
1703 clear_bit(Bitmap_sync, &rdev->flags);
1704 clear_bit(WriteMostly, &rdev->flags);
1705
1706 if (mddev->raid_disks == 0) {
1707 mddev->major_version = 1;
1708 mddev->patch_version = 0;
1709 mddev->external = 0;
1710 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1711 mddev->ctime = le64_to_cpu(sb->ctime);
1712 mddev->utime = le64_to_cpu(sb->utime);
1713 mddev->level = le32_to_cpu(sb->level);
1714 mddev->clevel[0] = 0;
1715 mddev->layout = le32_to_cpu(sb->layout);
1716 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1717 mddev->dev_sectors = le64_to_cpu(sb->size);
1718 mddev->events = ev1;
1719 mddev->bitmap_info.offset = 0;
1720 mddev->bitmap_info.space = 0;
1721
1722
1723
1724 mddev->bitmap_info.default_offset = 1024 >> 9;
1725 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1726 mddev->reshape_backwards = 0;
1727
1728 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1729 memcpy(mddev->uuid, sb->set_uuid, 16);
1730
1731 mddev->max_disks = (4096-256)/2;
1732
1733 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1734 mddev->bitmap_info.file == NULL) {
1735 mddev->bitmap_info.offset =
1736 (__s32)le32_to_cpu(sb->bitmap_offset);
1737
1738
1739
1740
1741
1742 if (mddev->minor_version > 0)
1743 mddev->bitmap_info.space = 0;
1744 else if (mddev->bitmap_info.offset > 0)
1745 mddev->bitmap_info.space =
1746 8 - mddev->bitmap_info.offset;
1747 else
1748 mddev->bitmap_info.space =
1749 -mddev->bitmap_info.offset;
1750 }
1751
1752 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1753 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1754 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1755 mddev->new_level = le32_to_cpu(sb->new_level);
1756 mddev->new_layout = le32_to_cpu(sb->new_layout);
1757 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1758 if (mddev->delta_disks < 0 ||
1759 (mddev->delta_disks == 0 &&
1760 (le32_to_cpu(sb->feature_map)
1761 & MD_FEATURE_RESHAPE_BACKWARDS)))
1762 mddev->reshape_backwards = 1;
1763 } else {
1764 mddev->reshape_position = MaxSector;
1765 mddev->delta_disks = 0;
1766 mddev->new_level = mddev->level;
1767 mddev->new_layout = mddev->layout;
1768 mddev->new_chunk_sectors = mddev->chunk_sectors;
1769 }
1770
1771 if (mddev->level == 0 &&
1772 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1773 mddev->layout = -1;
1774
1775 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1776 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1777
1778 if (le32_to_cpu(sb->feature_map) &
1779 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1780 if (le32_to_cpu(sb->feature_map) &
1781 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1782 return -EINVAL;
1783 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1784 (le32_to_cpu(sb->feature_map) &
1785 MD_FEATURE_MULTIPLE_PPLS))
1786 return -EINVAL;
1787 set_bit(MD_HAS_PPL, &mddev->flags);
1788 }
1789 } else if (mddev->pers == NULL) {
1790
1791
1792 ++ev1;
1793 if (rdev->desc_nr >= 0 &&
1794 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1795 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1796 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1797 if (ev1 < mddev->events)
1798 return -EINVAL;
1799 } else if (mddev->bitmap) {
1800
1801
1802
1803 if (ev1 < mddev->bitmap->events_cleared)
1804 return 0;
1805 if (ev1 < mddev->events)
1806 set_bit(Bitmap_sync, &rdev->flags);
1807 } else {
1808 if (ev1 < mddev->events)
1809
1810 return 0;
1811 }
1812 if (mddev->level != LEVEL_MULTIPATH) {
1813 int role;
1814 if (rdev->desc_nr < 0 ||
1815 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1816 role = MD_DISK_ROLE_SPARE;
1817 rdev->desc_nr = -1;
1818 } else
1819 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1820 switch(role) {
1821 case MD_DISK_ROLE_SPARE:
1822 break;
1823 case MD_DISK_ROLE_FAULTY:
1824 set_bit(Faulty, &rdev->flags);
1825 break;
1826 case MD_DISK_ROLE_JOURNAL:
1827 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1828
1829 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1830 return -EINVAL;
1831 }
1832 set_bit(Journal, &rdev->flags);
1833 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1834 rdev->raid_disk = 0;
1835 break;
1836 default:
1837 rdev->saved_raid_disk = role;
1838 if ((le32_to_cpu(sb->feature_map) &
1839 MD_FEATURE_RECOVERY_OFFSET)) {
1840 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1841 if (!(le32_to_cpu(sb->feature_map) &
1842 MD_FEATURE_RECOVERY_BITMAP))
1843 rdev->saved_raid_disk = -1;
1844 } else {
1845
1846
1847
1848
1849 if (!test_bit(MD_RECOVERY_FROZEN,
1850 &mddev->recovery))
1851 set_bit(In_sync, &rdev->flags);
1852 }
1853 rdev->raid_disk = role;
1854 break;
1855 }
1856 if (sb->devflags & WriteMostly1)
1857 set_bit(WriteMostly, &rdev->flags);
1858 if (sb->devflags & FailFast1)
1859 set_bit(FailFast, &rdev->flags);
1860 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1861 set_bit(Replacement, &rdev->flags);
1862 } else
1863 set_bit(In_sync, &rdev->flags);
1864
1865 return 0;
1866}
1867
1868static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1869{
1870 struct mdp_superblock_1 *sb;
1871 struct md_rdev *rdev2;
1872 int max_dev, i;
1873
1874
1875 sb = page_address(rdev->sb_page);
1876
1877 sb->feature_map = 0;
1878 sb->pad0 = 0;
1879 sb->recovery_offset = cpu_to_le64(0);
1880 memset(sb->pad3, 0, sizeof(sb->pad3));
1881
1882 sb->utime = cpu_to_le64((__u64)mddev->utime);
1883 sb->events = cpu_to_le64(mddev->events);
1884 if (mddev->in_sync)
1885 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1886 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1887 sb->resync_offset = cpu_to_le64(MaxSector);
1888 else
1889 sb->resync_offset = cpu_to_le64(0);
1890
1891 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1892
1893 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1894 sb->size = cpu_to_le64(mddev->dev_sectors);
1895 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1896 sb->level = cpu_to_le32(mddev->level);
1897 sb->layout = cpu_to_le32(mddev->layout);
1898 if (test_bit(FailFast, &rdev->flags))
1899 sb->devflags |= FailFast1;
1900 else
1901 sb->devflags &= ~FailFast1;
1902
1903 if (test_bit(WriteMostly, &rdev->flags))
1904 sb->devflags |= WriteMostly1;
1905 else
1906 sb->devflags &= ~WriteMostly1;
1907 sb->data_offset = cpu_to_le64(rdev->data_offset);
1908 sb->data_size = cpu_to_le64(rdev->sectors);
1909
1910 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1911 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1912 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1913 }
1914
1915 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1916 !test_bit(In_sync, &rdev->flags)) {
1917 sb->feature_map |=
1918 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1919 sb->recovery_offset =
1920 cpu_to_le64(rdev->recovery_offset);
1921 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1922 sb->feature_map |=
1923 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1924 }
1925
1926 if (test_bit(Journal, &rdev->flags))
1927 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1928 if (test_bit(Replacement, &rdev->flags))
1929 sb->feature_map |=
1930 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1931
1932 if (mddev->reshape_position != MaxSector) {
1933 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1934 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1935 sb->new_layout = cpu_to_le32(mddev->new_layout);
1936 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1937 sb->new_level = cpu_to_le32(mddev->new_level);
1938 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1939 if (mddev->delta_disks == 0 &&
1940 mddev->reshape_backwards)
1941 sb->feature_map
1942 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1943 if (rdev->new_data_offset != rdev->data_offset) {
1944 sb->feature_map
1945 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1946 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1947 - rdev->data_offset));
1948 }
1949 }
1950
1951 if (mddev_is_clustered(mddev))
1952 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1953
1954 if (rdev->badblocks.count == 0)
1955 ;
1956 else if (sb->bblog_offset == 0)
1957
1958 md_error(mddev, rdev);
1959 else {
1960 struct badblocks *bb = &rdev->badblocks;
1961 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
1962 u64 *p = bb->page;
1963 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1964 if (bb->changed) {
1965 unsigned seq;
1966
1967retry:
1968 seq = read_seqbegin(&bb->lock);
1969
1970 memset(bbp, 0xff, PAGE_SIZE);
1971
1972 for (i = 0 ; i < bb->count ; i++) {
1973 u64 internal_bb = p[i];
1974 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1975 | BB_LEN(internal_bb));
1976 bbp[i] = cpu_to_le64(store_bb);
1977 }
1978 bb->changed = 0;
1979 if (read_seqretry(&bb->lock, seq))
1980 goto retry;
1981
1982 bb->sector = (rdev->sb_start +
1983 (int)le32_to_cpu(sb->bblog_offset));
1984 bb->size = le16_to_cpu(sb->bblog_size);
1985 }
1986 }
1987
1988 max_dev = 0;
1989 rdev_for_each(rdev2, mddev)
1990 if (rdev2->desc_nr+1 > max_dev)
1991 max_dev = rdev2->desc_nr+1;
1992
1993 if (max_dev > le32_to_cpu(sb->max_dev)) {
1994 int bmask;
1995 sb->max_dev = cpu_to_le32(max_dev);
1996 rdev->sb_size = max_dev * 2 + 256;
1997 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1998 if (rdev->sb_size & bmask)
1999 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2000 } else
2001 max_dev = le32_to_cpu(sb->max_dev);
2002
2003 for (i=0; i<max_dev;i++)
2004 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2005
2006 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2007 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2008
2009 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2010 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2011 sb->feature_map |=
2012 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2013 else
2014 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2015 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2016 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2017 }
2018
2019 rdev_for_each(rdev2, mddev) {
2020 i = rdev2->desc_nr;
2021 if (test_bit(Faulty, &rdev2->flags))
2022 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2023 else if (test_bit(In_sync, &rdev2->flags))
2024 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2025 else if (test_bit(Journal, &rdev2->flags))
2026 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2027 else if (rdev2->raid_disk >= 0)
2028 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2029 else
2030 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2031 }
2032
2033 sb->sb_csum = calc_sb_1_csum(sb);
2034}
2035
2036static unsigned long long
2037super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2038{
2039 struct mdp_superblock_1 *sb;
2040 sector_t max_sectors;
2041 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2042 return 0;
2043 if (rdev->data_offset != rdev->new_data_offset)
2044 return 0;
2045 if (rdev->sb_start < rdev->data_offset) {
2046
2047 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2048 max_sectors -= rdev->data_offset;
2049 if (!num_sectors || num_sectors > max_sectors)
2050 num_sectors = max_sectors;
2051 } else if (rdev->mddev->bitmap_info.offset) {
2052
2053 return 0;
2054 } else {
2055
2056 sector_t sb_start;
2057 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
2058 sb_start &= ~(sector_t)(4*2 - 1);
2059 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
2060 if (!num_sectors || num_sectors > max_sectors)
2061 num_sectors = max_sectors;
2062 rdev->sb_start = sb_start;
2063 }
2064 sb = page_address(rdev->sb_page);
2065 sb->data_size = cpu_to_le64(num_sectors);
2066 sb->super_offset = cpu_to_le64(rdev->sb_start);
2067 sb->sb_csum = calc_sb_1_csum(sb);
2068 do {
2069 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2070 rdev->sb_page);
2071 } while (md_super_wait(rdev->mddev) < 0);
2072 return num_sectors;
2073
2074}
2075
2076static int
2077super_1_allow_new_offset(struct md_rdev *rdev,
2078 unsigned long long new_offset)
2079{
2080
2081 struct bitmap *bitmap;
2082 if (new_offset >= rdev->data_offset)
2083 return 1;
2084
2085
2086
2087 if (rdev->mddev->minor_version == 0)
2088 return 1;
2089
2090
2091
2092
2093
2094
2095
2096 if (rdev->sb_start + (32+4)*2 > new_offset)
2097 return 0;
2098 bitmap = rdev->mddev->bitmap;
2099 if (bitmap && !rdev->mddev->bitmap_info.file &&
2100 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2101 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2102 return 0;
2103 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2104 return 0;
2105
2106 return 1;
2107}
2108
2109static struct super_type super_types[] = {
2110 [0] = {
2111 .name = "0.90.0",
2112 .owner = THIS_MODULE,
2113 .load_super = super_90_load,
2114 .validate_super = super_90_validate,
2115 .sync_super = super_90_sync,
2116 .rdev_size_change = super_90_rdev_size_change,
2117 .allow_new_offset = super_90_allow_new_offset,
2118 },
2119 [1] = {
2120 .name = "md-1",
2121 .owner = THIS_MODULE,
2122 .load_super = super_1_load,
2123 .validate_super = super_1_validate,
2124 .sync_super = super_1_sync,
2125 .rdev_size_change = super_1_rdev_size_change,
2126 .allow_new_offset = super_1_allow_new_offset,
2127 },
2128};
2129
2130static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2131{
2132 if (mddev->sync_super) {
2133 mddev->sync_super(mddev, rdev);
2134 return;
2135 }
2136
2137 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2138
2139 super_types[mddev->major_version].sync_super(mddev, rdev);
2140}
2141
2142static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2143{
2144 struct md_rdev *rdev, *rdev2;
2145
2146 rcu_read_lock();
2147 rdev_for_each_rcu(rdev, mddev1) {
2148 if (test_bit(Faulty, &rdev->flags) ||
2149 test_bit(Journal, &rdev->flags) ||
2150 rdev->raid_disk == -1)
2151 continue;
2152 rdev_for_each_rcu(rdev2, mddev2) {
2153 if (test_bit(Faulty, &rdev2->flags) ||
2154 test_bit(Journal, &rdev2->flags) ||
2155 rdev2->raid_disk == -1)
2156 continue;
2157 if (rdev->bdev->bd_contains ==
2158 rdev2->bdev->bd_contains) {
2159 rcu_read_unlock();
2160 return 1;
2161 }
2162 }
2163 }
2164 rcu_read_unlock();
2165 return 0;
2166}
2167
2168static LIST_HEAD(pending_raid_disks);
2169
2170
2171
2172
2173
2174
2175
2176
2177int md_integrity_register(struct mddev *mddev)
2178{
2179 struct md_rdev *rdev, *reference = NULL;
2180
2181 if (list_empty(&mddev->disks))
2182 return 0;
2183 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2184 return 0;
2185 rdev_for_each(rdev, mddev) {
2186
2187 if (test_bit(Faulty, &rdev->flags))
2188 continue;
2189 if (rdev->raid_disk < 0)
2190 continue;
2191 if (!reference) {
2192
2193 reference = rdev;
2194 continue;
2195 }
2196
2197 if (blk_integrity_compare(reference->bdev->bd_disk,
2198 rdev->bdev->bd_disk) < 0)
2199 return -EINVAL;
2200 }
2201 if (!reference || !bdev_get_integrity(reference->bdev))
2202 return 0;
2203
2204
2205
2206
2207 blk_integrity_register(mddev->gendisk,
2208 bdev_get_integrity(reference->bdev));
2209
2210 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2211 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2212 pr_err("md: failed to create integrity pool for %s\n",
2213 mdname(mddev));
2214 return -EINVAL;
2215 }
2216 return 0;
2217}
2218EXPORT_SYMBOL(md_integrity_register);
2219
2220
2221
2222
2223
2224int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2225{
2226 struct blk_integrity *bi_mddev;
2227 char name[BDEVNAME_SIZE];
2228
2229 if (!mddev->gendisk)
2230 return 0;
2231
2232 bi_mddev = blk_get_integrity(mddev->gendisk);
2233
2234 if (!bi_mddev)
2235 return 0;
2236
2237 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2238 pr_err("%s: incompatible integrity profile for %s\n",
2239 mdname(mddev), bdevname(rdev->bdev, name));
2240 return -ENXIO;
2241 }
2242
2243 return 0;
2244}
2245EXPORT_SYMBOL(md_integrity_add_rdev);
2246
2247static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2248{
2249 char b[BDEVNAME_SIZE];
2250 struct kobject *ko;
2251 int err;
2252
2253
2254 if (find_rdev(mddev, rdev->bdev->bd_dev))
2255 return -EEXIST;
2256
2257 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2258 mddev->pers)
2259 return -EROFS;
2260
2261
2262 if (!test_bit(Journal, &rdev->flags) &&
2263 rdev->sectors &&
2264 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2265 if (mddev->pers) {
2266
2267
2268
2269
2270 if (mddev->level > 0)
2271 return -ENOSPC;
2272 } else
2273 mddev->dev_sectors = rdev->sectors;
2274 }
2275
2276
2277
2278
2279
2280 rcu_read_lock();
2281 if (rdev->desc_nr < 0) {
2282 int choice = 0;
2283 if (mddev->pers)
2284 choice = mddev->raid_disks;
2285 while (md_find_rdev_nr_rcu(mddev, choice))
2286 choice++;
2287 rdev->desc_nr = choice;
2288 } else {
2289 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2290 rcu_read_unlock();
2291 return -EBUSY;
2292 }
2293 }
2294 rcu_read_unlock();
2295 if (!test_bit(Journal, &rdev->flags) &&
2296 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2297 pr_warn("md: %s: array is limited to %d devices\n",
2298 mdname(mddev), mddev->max_disks);
2299 return -EBUSY;
2300 }
2301 bdevname(rdev->bdev,b);
2302 strreplace(b, '/', '!');
2303
2304 rdev->mddev = mddev;
2305 pr_debug("md: bind<%s>\n", b);
2306
2307 if (mddev->raid_disks)
2308 mddev_create_wb_pool(mddev, rdev, false);
2309
2310 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2311 goto fail;
2312
2313 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2314 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2315 ;
2316 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2317
2318 list_add_rcu(&rdev->same_set, &mddev->disks);
2319 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2320
2321
2322 mddev->recovery_disabled++;
2323
2324 return 0;
2325
2326 fail:
2327 pr_warn("md: failed to register dev-%s for %s\n",
2328 b, mdname(mddev));
2329 return err;
2330}
2331
2332static void md_delayed_delete(struct work_struct *ws)
2333{
2334 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2335 kobject_del(&rdev->kobj);
2336 kobject_put(&rdev->kobj);
2337}
2338
2339static void unbind_rdev_from_array(struct md_rdev *rdev)
2340{
2341 char b[BDEVNAME_SIZE];
2342
2343 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2344 list_del_rcu(&rdev->same_set);
2345 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2346 mddev_destroy_wb_pool(rdev->mddev, rdev);
2347 rdev->mddev = NULL;
2348 sysfs_remove_link(&rdev->kobj, "block");
2349 sysfs_put(rdev->sysfs_state);
2350 rdev->sysfs_state = NULL;
2351 rdev->badblocks.count = 0;
2352
2353
2354
2355
2356 synchronize_rcu();
2357 INIT_WORK(&rdev->del_work, md_delayed_delete);
2358 kobject_get(&rdev->kobj);
2359 queue_work(md_misc_wq, &rdev->del_work);
2360}
2361
2362
2363
2364
2365
2366
2367static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2368{
2369 int err = 0;
2370 struct block_device *bdev;
2371 char b[BDEVNAME_SIZE];
2372
2373 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2374 shared ? (struct md_rdev *)lock_rdev : rdev);
2375 if (IS_ERR(bdev)) {
2376 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2377 return PTR_ERR(bdev);
2378 }
2379 rdev->bdev = bdev;
2380 return err;
2381}
2382
2383static void unlock_rdev(struct md_rdev *rdev)
2384{
2385 struct block_device *bdev = rdev->bdev;
2386 rdev->bdev = NULL;
2387 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2388}
2389
2390void md_autodetect_dev(dev_t dev);
2391
2392static void export_rdev(struct md_rdev *rdev)
2393{
2394 char b[BDEVNAME_SIZE];
2395
2396 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2397 md_rdev_clear(rdev);
2398#ifndef MODULE
2399 if (test_bit(AutoDetected, &rdev->flags))
2400 md_autodetect_dev(rdev->bdev->bd_dev);
2401#endif
2402 unlock_rdev(rdev);
2403 kobject_put(&rdev->kobj);
2404}
2405
2406void md_kick_rdev_from_array(struct md_rdev *rdev)
2407{
2408 unbind_rdev_from_array(rdev);
2409 export_rdev(rdev);
2410}
2411EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2412
2413static void export_array(struct mddev *mddev)
2414{
2415 struct md_rdev *rdev;
2416
2417 while (!list_empty(&mddev->disks)) {
2418 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2419 same_set);
2420 md_kick_rdev_from_array(rdev);
2421 }
2422 mddev->raid_disks = 0;
2423 mddev->major_version = 0;
2424}
2425
2426static bool set_in_sync(struct mddev *mddev)
2427{
2428 lockdep_assert_held(&mddev->lock);
2429 if (!mddev->in_sync) {
2430 mddev->sync_checkers++;
2431 spin_unlock(&mddev->lock);
2432 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2433 spin_lock(&mddev->lock);
2434 if (!mddev->in_sync &&
2435 percpu_ref_is_zero(&mddev->writes_pending)) {
2436 mddev->in_sync = 1;
2437
2438
2439
2440
2441 smp_mb();
2442 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2443 sysfs_notify_dirent_safe(mddev->sysfs_state);
2444 }
2445 if (--mddev->sync_checkers == 0)
2446 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2447 }
2448 if (mddev->safemode == 1)
2449 mddev->safemode = 0;
2450 return mddev->in_sync;
2451}
2452
2453static void sync_sbs(struct mddev *mddev, int nospares)
2454{
2455
2456
2457
2458
2459
2460
2461 struct md_rdev *rdev;
2462 rdev_for_each(rdev, mddev) {
2463 if (rdev->sb_events == mddev->events ||
2464 (nospares &&
2465 rdev->raid_disk < 0 &&
2466 rdev->sb_events+1 == mddev->events)) {
2467
2468 rdev->sb_loaded = 2;
2469 } else {
2470 sync_super(mddev, rdev);
2471 rdev->sb_loaded = 1;
2472 }
2473 }
2474}
2475
2476static bool does_sb_need_changing(struct mddev *mddev)
2477{
2478 struct md_rdev *rdev;
2479 struct mdp_superblock_1 *sb;
2480 int role;
2481
2482
2483 rdev_for_each(rdev, mddev)
2484 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2485 break;
2486
2487
2488 if (!rdev)
2489 return false;
2490
2491 sb = page_address(rdev->sb_page);
2492
2493 rdev_for_each(rdev, mddev) {
2494 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2495
2496 if (role == 0xffff && rdev->raid_disk >=0 &&
2497 !test_bit(Faulty, &rdev->flags))
2498 return true;
2499
2500 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2501 return true;
2502 }
2503
2504
2505 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2506 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2507 (mddev->layout != le32_to_cpu(sb->layout)) ||
2508 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2509 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2510 return true;
2511
2512 return false;
2513}
2514
2515void md_update_sb(struct mddev *mddev, int force_change)
2516{
2517 struct md_rdev *rdev;
2518 int sync_req;
2519 int nospares = 0;
2520 int any_badblocks_changed = 0;
2521 int ret = -1;
2522
2523 if (mddev->ro) {
2524 if (force_change)
2525 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2526 return;
2527 }
2528
2529repeat:
2530 if (mddev_is_clustered(mddev)) {
2531 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2532 force_change = 1;
2533 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2534 nospares = 1;
2535 ret = md_cluster_ops->metadata_update_start(mddev);
2536
2537 if (!does_sb_need_changing(mddev)) {
2538 if (ret == 0)
2539 md_cluster_ops->metadata_update_cancel(mddev);
2540 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2541 BIT(MD_SB_CHANGE_DEVS) |
2542 BIT(MD_SB_CHANGE_CLEAN));
2543 return;
2544 }
2545 }
2546
2547
2548
2549
2550
2551
2552
2553 rdev_for_each(rdev, mddev) {
2554 if (rdev->raid_disk >= 0 &&
2555 mddev->delta_disks >= 0 &&
2556 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2557 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2558 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2559 !test_bit(Journal, &rdev->flags) &&
2560 !test_bit(In_sync, &rdev->flags) &&
2561 mddev->curr_resync_completed > rdev->recovery_offset)
2562 rdev->recovery_offset = mddev->curr_resync_completed;
2563
2564 }
2565 if (!mddev->persistent) {
2566 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2567 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2568 if (!mddev->external) {
2569 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2570 rdev_for_each(rdev, mddev) {
2571 if (rdev->badblocks.changed) {
2572 rdev->badblocks.changed = 0;
2573 ack_all_badblocks(&rdev->badblocks);
2574 md_error(mddev, rdev);
2575 }
2576 clear_bit(Blocked, &rdev->flags);
2577 clear_bit(BlockedBadBlocks, &rdev->flags);
2578 wake_up(&rdev->blocked_wait);
2579 }
2580 }
2581 wake_up(&mddev->sb_wait);
2582 return;
2583 }
2584
2585 spin_lock(&mddev->lock);
2586
2587 mddev->utime = ktime_get_real_seconds();
2588
2589 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2590 force_change = 1;
2591 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2592
2593
2594
2595
2596 nospares = 1;
2597 if (force_change)
2598 nospares = 0;
2599 if (mddev->degraded)
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609 nospares = 0;
2610
2611 sync_req = mddev->in_sync;
2612
2613
2614
2615 if (nospares
2616 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2617 && mddev->can_decrease_events
2618 && mddev->events != 1) {
2619 mddev->events--;
2620 mddev->can_decrease_events = 0;
2621 } else {
2622
2623 mddev->events ++;
2624 mddev->can_decrease_events = nospares;
2625 }
2626
2627
2628
2629
2630
2631
2632 WARN_ON(mddev->events == 0);
2633
2634 rdev_for_each(rdev, mddev) {
2635 if (rdev->badblocks.changed)
2636 any_badblocks_changed++;
2637 if (test_bit(Faulty, &rdev->flags))
2638 set_bit(FaultRecorded, &rdev->flags);
2639 }
2640
2641 sync_sbs(mddev, nospares);
2642 spin_unlock(&mddev->lock);
2643
2644 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2645 mdname(mddev), mddev->in_sync);
2646
2647 if (mddev->queue)
2648 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2649rewrite:
2650 md_bitmap_update_sb(mddev->bitmap);
2651 rdev_for_each(rdev, mddev) {
2652 char b[BDEVNAME_SIZE];
2653
2654 if (rdev->sb_loaded != 1)
2655 continue;
2656
2657 if (!test_bit(Faulty, &rdev->flags)) {
2658 md_super_write(mddev,rdev,
2659 rdev->sb_start, rdev->sb_size,
2660 rdev->sb_page);
2661 pr_debug("md: (write) %s's sb offset: %llu\n",
2662 bdevname(rdev->bdev, b),
2663 (unsigned long long)rdev->sb_start);
2664 rdev->sb_events = mddev->events;
2665 if (rdev->badblocks.size) {
2666 md_super_write(mddev, rdev,
2667 rdev->badblocks.sector,
2668 rdev->badblocks.size << 9,
2669 rdev->bb_page);
2670 rdev->badblocks.size = 0;
2671 }
2672
2673 } else
2674 pr_debug("md: %s (skipping faulty)\n",
2675 bdevname(rdev->bdev, b));
2676
2677 if (mddev->level == LEVEL_MULTIPATH)
2678
2679 break;
2680 }
2681 if (md_super_wait(mddev) < 0)
2682 goto rewrite;
2683
2684
2685 if (mddev_is_clustered(mddev) && ret == 0)
2686 md_cluster_ops->metadata_update_finish(mddev);
2687
2688 if (mddev->in_sync != sync_req ||
2689 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2690 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2691
2692 goto repeat;
2693 wake_up(&mddev->sb_wait);
2694 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2695 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2696
2697 rdev_for_each(rdev, mddev) {
2698 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2699 clear_bit(Blocked, &rdev->flags);
2700
2701 if (any_badblocks_changed)
2702 ack_all_badblocks(&rdev->badblocks);
2703 clear_bit(BlockedBadBlocks, &rdev->flags);
2704 wake_up(&rdev->blocked_wait);
2705 }
2706}
2707EXPORT_SYMBOL(md_update_sb);
2708
2709static int add_bound_rdev(struct md_rdev *rdev)
2710{
2711 struct mddev *mddev = rdev->mddev;
2712 int err = 0;
2713 bool add_journal = test_bit(Journal, &rdev->flags);
2714
2715 if (!mddev->pers->hot_remove_disk || add_journal) {
2716
2717
2718
2719
2720 super_types[mddev->major_version].
2721 validate_super(mddev, rdev);
2722 if (add_journal)
2723 mddev_suspend(mddev);
2724 err = mddev->pers->hot_add_disk(mddev, rdev);
2725 if (add_journal)
2726 mddev_resume(mddev);
2727 if (err) {
2728 md_kick_rdev_from_array(rdev);
2729 return err;
2730 }
2731 }
2732 sysfs_notify_dirent_safe(rdev->sysfs_state);
2733
2734 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2735 if (mddev->degraded)
2736 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2737 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2738 md_new_event(mddev);
2739 md_wakeup_thread(mddev->thread);
2740 return 0;
2741}
2742
2743
2744
2745
2746static int cmd_match(const char *cmd, const char *str)
2747{
2748
2749
2750
2751
2752 while (*cmd && *str && *cmd == *str) {
2753 cmd++;
2754 str++;
2755 }
2756 if (*cmd == '\n')
2757 cmd++;
2758 if (*str || *cmd)
2759 return 0;
2760 return 1;
2761}
2762
2763struct rdev_sysfs_entry {
2764 struct attribute attr;
2765 ssize_t (*show)(struct md_rdev *, char *);
2766 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2767};
2768
2769static ssize_t
2770state_show(struct md_rdev *rdev, char *page)
2771{
2772 char *sep = ",";
2773 size_t len = 0;
2774 unsigned long flags = READ_ONCE(rdev->flags);
2775
2776 if (test_bit(Faulty, &flags) ||
2777 (!test_bit(ExternalBbl, &flags) &&
2778 rdev->badblocks.unacked_exist))
2779 len += sprintf(page+len, "faulty%s", sep);
2780 if (test_bit(In_sync, &flags))
2781 len += sprintf(page+len, "in_sync%s", sep);
2782 if (test_bit(Journal, &flags))
2783 len += sprintf(page+len, "journal%s", sep);
2784 if (test_bit(WriteMostly, &flags))
2785 len += sprintf(page+len, "write_mostly%s", sep);
2786 if (test_bit(Blocked, &flags) ||
2787 (rdev->badblocks.unacked_exist
2788 && !test_bit(Faulty, &flags)))
2789 len += sprintf(page+len, "blocked%s", sep);
2790 if (!test_bit(Faulty, &flags) &&
2791 !test_bit(Journal, &flags) &&
2792 !test_bit(In_sync, &flags))
2793 len += sprintf(page+len, "spare%s", sep);
2794 if (test_bit(WriteErrorSeen, &flags))
2795 len += sprintf(page+len, "write_error%s", sep);
2796 if (test_bit(WantReplacement, &flags))
2797 len += sprintf(page+len, "want_replacement%s", sep);
2798 if (test_bit(Replacement, &flags))
2799 len += sprintf(page+len, "replacement%s", sep);
2800 if (test_bit(ExternalBbl, &flags))
2801 len += sprintf(page+len, "external_bbl%s", sep);
2802 if (test_bit(FailFast, &flags))
2803 len += sprintf(page+len, "failfast%s", sep);
2804
2805 if (len)
2806 len -= strlen(sep);
2807
2808 return len+sprintf(page+len, "\n");
2809}
2810
2811static ssize_t
2812state_store(struct md_rdev *rdev, const char *buf, size_t len)
2813{
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828 int err = -EINVAL;
2829 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2830 md_error(rdev->mddev, rdev);
2831 if (test_bit(Faulty, &rdev->flags))
2832 err = 0;
2833 else
2834 err = -EBUSY;
2835 } else if (cmd_match(buf, "remove")) {
2836 if (rdev->mddev->pers) {
2837 clear_bit(Blocked, &rdev->flags);
2838 remove_and_add_spares(rdev->mddev, rdev);
2839 }
2840 if (rdev->raid_disk >= 0)
2841 err = -EBUSY;
2842 else {
2843 struct mddev *mddev = rdev->mddev;
2844 err = 0;
2845 if (mddev_is_clustered(mddev))
2846 err = md_cluster_ops->remove_disk(mddev, rdev);
2847
2848 if (err == 0) {
2849 md_kick_rdev_from_array(rdev);
2850 if (mddev->pers) {
2851 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2852 md_wakeup_thread(mddev->thread);
2853 }
2854 md_new_event(mddev);
2855 }
2856 }
2857 } else if (cmd_match(buf, "writemostly")) {
2858 set_bit(WriteMostly, &rdev->flags);
2859 mddev_create_wb_pool(rdev->mddev, rdev, false);
2860 err = 0;
2861 } else if (cmd_match(buf, "-writemostly")) {
2862 mddev_destroy_wb_pool(rdev->mddev, rdev);
2863 clear_bit(WriteMostly, &rdev->flags);
2864 err = 0;
2865 } else if (cmd_match(buf, "blocked")) {
2866 set_bit(Blocked, &rdev->flags);
2867 err = 0;
2868 } else if (cmd_match(buf, "-blocked")) {
2869 if (!test_bit(Faulty, &rdev->flags) &&
2870 !test_bit(ExternalBbl, &rdev->flags) &&
2871 rdev->badblocks.unacked_exist) {
2872
2873
2874
2875 md_error(rdev->mddev, rdev);
2876 }
2877 clear_bit(Blocked, &rdev->flags);
2878 clear_bit(BlockedBadBlocks, &rdev->flags);
2879 wake_up(&rdev->blocked_wait);
2880 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2881 md_wakeup_thread(rdev->mddev->thread);
2882
2883 err = 0;
2884 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2885 set_bit(In_sync, &rdev->flags);
2886 err = 0;
2887 } else if (cmd_match(buf, "failfast")) {
2888 set_bit(FailFast, &rdev->flags);
2889 err = 0;
2890 } else if (cmd_match(buf, "-failfast")) {
2891 clear_bit(FailFast, &rdev->flags);
2892 err = 0;
2893 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2894 !test_bit(Journal, &rdev->flags)) {
2895 if (rdev->mddev->pers == NULL) {
2896 clear_bit(In_sync, &rdev->flags);
2897 rdev->saved_raid_disk = rdev->raid_disk;
2898 rdev->raid_disk = -1;
2899 err = 0;
2900 }
2901 } else if (cmd_match(buf, "write_error")) {
2902 set_bit(WriteErrorSeen, &rdev->flags);
2903 err = 0;
2904 } else if (cmd_match(buf, "-write_error")) {
2905 clear_bit(WriteErrorSeen, &rdev->flags);
2906 err = 0;
2907 } else if (cmd_match(buf, "want_replacement")) {
2908
2909
2910
2911
2912 if (rdev->raid_disk >= 0 &&
2913 !test_bit(Journal, &rdev->flags) &&
2914 !test_bit(Replacement, &rdev->flags))
2915 set_bit(WantReplacement, &rdev->flags);
2916 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2917 md_wakeup_thread(rdev->mddev->thread);
2918 err = 0;
2919 } else if (cmd_match(buf, "-want_replacement")) {
2920
2921
2922
2923 err = 0;
2924 clear_bit(WantReplacement, &rdev->flags);
2925 } else if (cmd_match(buf, "replacement")) {
2926
2927
2928
2929
2930 if (rdev->mddev->pers)
2931 err = -EBUSY;
2932 else {
2933 set_bit(Replacement, &rdev->flags);
2934 err = 0;
2935 }
2936 } else if (cmd_match(buf, "-replacement")) {
2937
2938 if (rdev->mddev->pers)
2939 err = -EBUSY;
2940 else {
2941 clear_bit(Replacement, &rdev->flags);
2942 err = 0;
2943 }
2944 } else if (cmd_match(buf, "re-add")) {
2945 if (!rdev->mddev->pers)
2946 err = -EINVAL;
2947 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
2948 rdev->saved_raid_disk >= 0) {
2949
2950
2951
2952
2953
2954
2955 if (!mddev_is_clustered(rdev->mddev) ||
2956 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2957 clear_bit(Faulty, &rdev->flags);
2958 err = add_bound_rdev(rdev);
2959 }
2960 } else
2961 err = -EBUSY;
2962 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2963 set_bit(ExternalBbl, &rdev->flags);
2964 rdev->badblocks.shift = 0;
2965 err = 0;
2966 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2967 clear_bit(ExternalBbl, &rdev->flags);
2968 err = 0;
2969 }
2970 if (!err)
2971 sysfs_notify_dirent_safe(rdev->sysfs_state);
2972 return err ? err : len;
2973}
2974static struct rdev_sysfs_entry rdev_state =
2975__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2976
2977static ssize_t
2978errors_show(struct md_rdev *rdev, char *page)
2979{
2980 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2981}
2982
2983static ssize_t
2984errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2985{
2986 unsigned int n;
2987 int rv;
2988
2989 rv = kstrtouint(buf, 10, &n);
2990 if (rv < 0)
2991 return rv;
2992 atomic_set(&rdev->corrected_errors, n);
2993 return len;
2994}
2995static struct rdev_sysfs_entry rdev_errors =
2996__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2997
2998static ssize_t
2999slot_show(struct md_rdev *rdev, char *page)
3000{
3001 if (test_bit(Journal, &rdev->flags))
3002 return sprintf(page, "journal\n");
3003 else if (rdev->raid_disk < 0)
3004 return sprintf(page, "none\n");
3005 else
3006 return sprintf(page, "%d\n", rdev->raid_disk);
3007}
3008
3009static ssize_t
3010slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3011{
3012 int slot;
3013 int err;
3014
3015 if (test_bit(Journal, &rdev->flags))
3016 return -EBUSY;
3017 if (strncmp(buf, "none", 4)==0)
3018 slot = -1;
3019 else {
3020 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3021 if (err < 0)
3022 return err;
3023 }
3024 if (rdev->mddev->pers && slot == -1) {
3025
3026
3027
3028
3029
3030
3031
3032 if (rdev->raid_disk == -1)
3033 return -EEXIST;
3034
3035 if (rdev->mddev->pers->hot_remove_disk == NULL)
3036 return -EINVAL;
3037 clear_bit(Blocked, &rdev->flags);
3038 remove_and_add_spares(rdev->mddev, rdev);
3039 if (rdev->raid_disk >= 0)
3040 return -EBUSY;
3041 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3042 md_wakeup_thread(rdev->mddev->thread);
3043 } else if (rdev->mddev->pers) {
3044
3045
3046
3047 int err;
3048
3049 if (rdev->raid_disk != -1)
3050 return -EBUSY;
3051
3052 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3053 return -EBUSY;
3054
3055 if (rdev->mddev->pers->hot_add_disk == NULL)
3056 return -EINVAL;
3057
3058 if (slot >= rdev->mddev->raid_disks &&
3059 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3060 return -ENOSPC;
3061
3062 rdev->raid_disk = slot;
3063 if (test_bit(In_sync, &rdev->flags))
3064 rdev->saved_raid_disk = slot;
3065 else
3066 rdev->saved_raid_disk = -1;
3067 clear_bit(In_sync, &rdev->flags);
3068 clear_bit(Bitmap_sync, &rdev->flags);
3069 err = rdev->mddev->pers->
3070 hot_add_disk(rdev->mddev, rdev);
3071 if (err) {
3072 rdev->raid_disk = -1;
3073 return err;
3074 } else
3075 sysfs_notify_dirent_safe(rdev->sysfs_state);
3076 if (sysfs_link_rdev(rdev->mddev, rdev))
3077 ;
3078
3079 } else {
3080 if (slot >= rdev->mddev->raid_disks &&
3081 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3082 return -ENOSPC;
3083 rdev->raid_disk = slot;
3084
3085 clear_bit(Faulty, &rdev->flags);
3086 clear_bit(WriteMostly, &rdev->flags);
3087 set_bit(In_sync, &rdev->flags);
3088 sysfs_notify_dirent_safe(rdev->sysfs_state);
3089 }
3090 return len;
3091}
3092
3093static struct rdev_sysfs_entry rdev_slot =
3094__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3095
3096static ssize_t
3097offset_show(struct md_rdev *rdev, char *page)
3098{
3099 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3100}
3101
3102static ssize_t
3103offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3104{
3105 unsigned long long offset;
3106 if (kstrtoull(buf, 10, &offset) < 0)
3107 return -EINVAL;
3108 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3109 return -EBUSY;
3110 if (rdev->sectors && rdev->mddev->external)
3111
3112
3113 return -EBUSY;
3114 rdev->data_offset = offset;
3115 rdev->new_data_offset = offset;
3116 return len;
3117}
3118
3119static struct rdev_sysfs_entry rdev_offset =
3120__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3121
3122static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3123{
3124 return sprintf(page, "%llu\n",
3125 (unsigned long long)rdev->new_data_offset);
3126}
3127
3128static ssize_t new_offset_store(struct md_rdev *rdev,
3129 const char *buf, size_t len)
3130{
3131 unsigned long long new_offset;
3132 struct mddev *mddev = rdev->mddev;
3133
3134 if (kstrtoull(buf, 10, &new_offset) < 0)
3135 return -EINVAL;
3136
3137 if (mddev->sync_thread ||
3138 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3139 return -EBUSY;
3140 if (new_offset == rdev->data_offset)
3141
3142 ;
3143 else if (new_offset > rdev->data_offset) {
3144
3145 if (new_offset - rdev->data_offset
3146 + mddev->dev_sectors > rdev->sectors)
3147 return -E2BIG;
3148 }
3149
3150
3151
3152
3153
3154 if (new_offset < rdev->data_offset &&
3155 mddev->reshape_backwards)
3156 return -EINVAL;
3157
3158
3159
3160
3161 if (new_offset > rdev->data_offset &&
3162 !mddev->reshape_backwards)
3163 return -EINVAL;
3164
3165 if (mddev->pers && mddev->persistent &&
3166 !super_types[mddev->major_version]
3167 .allow_new_offset(rdev, new_offset))
3168 return -E2BIG;
3169 rdev->new_data_offset = new_offset;
3170 if (new_offset > rdev->data_offset)
3171 mddev->reshape_backwards = 1;
3172 else if (new_offset < rdev->data_offset)
3173 mddev->reshape_backwards = 0;
3174
3175 return len;
3176}
3177static struct rdev_sysfs_entry rdev_new_offset =
3178__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3179
3180static ssize_t
3181rdev_size_show(struct md_rdev *rdev, char *page)
3182{
3183 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3184}
3185
3186static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3187{
3188
3189 if (s1+l1 <= s2)
3190 return 0;
3191 if (s2+l2 <= s1)
3192 return 0;
3193 return 1;
3194}
3195
3196static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3197{
3198 unsigned long long blocks;
3199 sector_t new;
3200
3201 if (kstrtoull(buf, 10, &blocks) < 0)
3202 return -EINVAL;
3203
3204 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3205 return -EINVAL;
3206
3207 new = blocks * 2;
3208 if (new != blocks * 2)
3209 return -EINVAL;
3210
3211 *sectors = new;
3212 return 0;
3213}
3214
3215static ssize_t
3216rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3217{
3218 struct mddev *my_mddev = rdev->mddev;
3219 sector_t oldsectors = rdev->sectors;
3220 sector_t sectors;
3221
3222 if (test_bit(Journal, &rdev->flags))
3223 return -EBUSY;
3224 if (strict_blocks_to_sectors(buf, §ors) < 0)
3225 return -EINVAL;
3226 if (rdev->data_offset != rdev->new_data_offset)
3227 return -EINVAL;
3228 if (my_mddev->pers && rdev->raid_disk >= 0) {
3229 if (my_mddev->persistent) {
3230 sectors = super_types[my_mddev->major_version].
3231 rdev_size_change(rdev, sectors);
3232 if (!sectors)
3233 return -EBUSY;
3234 } else if (!sectors)
3235 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3236 rdev->data_offset;
3237 if (!my_mddev->pers->resize)
3238
3239 return -EINVAL;
3240 }
3241 if (sectors < my_mddev->dev_sectors)
3242 return -EINVAL;
3243
3244 rdev->sectors = sectors;
3245 if (sectors > oldsectors && my_mddev->external) {
3246
3247
3248
3249
3250
3251
3252 struct mddev *mddev;
3253 int overlap = 0;
3254 struct list_head *tmp;
3255
3256 rcu_read_lock();
3257 for_each_mddev(mddev, tmp) {
3258 struct md_rdev *rdev2;
3259
3260 rdev_for_each(rdev2, mddev)
3261 if (rdev->bdev == rdev2->bdev &&
3262 rdev != rdev2 &&
3263 overlaps(rdev->data_offset, rdev->sectors,
3264 rdev2->data_offset,
3265 rdev2->sectors)) {
3266 overlap = 1;
3267 break;
3268 }
3269 if (overlap) {
3270 mddev_put(mddev);
3271 break;
3272 }
3273 }
3274 rcu_read_unlock();
3275 if (overlap) {
3276
3277
3278
3279
3280
3281
3282 rdev->sectors = oldsectors;
3283 return -EBUSY;
3284 }
3285 }
3286 return len;
3287}
3288
3289static struct rdev_sysfs_entry rdev_size =
3290__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3291
3292static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3293{
3294 unsigned long long recovery_start = rdev->recovery_offset;
3295
3296 if (test_bit(In_sync, &rdev->flags) ||
3297 recovery_start == MaxSector)
3298 return sprintf(page, "none\n");
3299
3300 return sprintf(page, "%llu\n", recovery_start);
3301}
3302
3303static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3304{
3305 unsigned long long recovery_start;
3306
3307 if (cmd_match(buf, "none"))
3308 recovery_start = MaxSector;
3309 else if (kstrtoull(buf, 10, &recovery_start))
3310 return -EINVAL;
3311
3312 if (rdev->mddev->pers &&
3313 rdev->raid_disk >= 0)
3314 return -EBUSY;
3315
3316 rdev->recovery_offset = recovery_start;
3317 if (recovery_start == MaxSector)
3318 set_bit(In_sync, &rdev->flags);
3319 else
3320 clear_bit(In_sync, &rdev->flags);
3321 return len;
3322}
3323
3324static struct rdev_sysfs_entry rdev_recovery_start =
3325__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338static ssize_t bb_show(struct md_rdev *rdev, char *page)
3339{
3340 return badblocks_show(&rdev->badblocks, page, 0);
3341}
3342static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3343{
3344 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3345
3346 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3347 wake_up(&rdev->blocked_wait);
3348 return rv;
3349}
3350static struct rdev_sysfs_entry rdev_bad_blocks =
3351__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3352
3353static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3354{
3355 return badblocks_show(&rdev->badblocks, page, 1);
3356}
3357static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3358{
3359 return badblocks_store(&rdev->badblocks, page, len, 1);
3360}
3361static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3362__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3363
3364static ssize_t
3365ppl_sector_show(struct md_rdev *rdev, char *page)
3366{
3367 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3368}
3369
3370static ssize_t
3371ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3372{
3373 unsigned long long sector;
3374
3375 if (kstrtoull(buf, 10, §or) < 0)
3376 return -EINVAL;
3377 if (sector != (sector_t)sector)
3378 return -EINVAL;
3379
3380 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3381 rdev->raid_disk >= 0)
3382 return -EBUSY;
3383
3384 if (rdev->mddev->persistent) {
3385 if (rdev->mddev->major_version == 0)
3386 return -EINVAL;
3387 if ((sector > rdev->sb_start &&
3388 sector - rdev->sb_start > S16_MAX) ||
3389 (sector < rdev->sb_start &&
3390 rdev->sb_start - sector > -S16_MIN))
3391 return -EINVAL;
3392 rdev->ppl.offset = sector - rdev->sb_start;
3393 } else if (!rdev->mddev->external) {
3394 return -EBUSY;
3395 }
3396 rdev->ppl.sector = sector;
3397 return len;
3398}
3399
3400static struct rdev_sysfs_entry rdev_ppl_sector =
3401__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3402
3403static ssize_t
3404ppl_size_show(struct md_rdev *rdev, char *page)
3405{
3406 return sprintf(page, "%u\n", rdev->ppl.size);
3407}
3408
3409static ssize_t
3410ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3411{
3412 unsigned int size;
3413
3414 if (kstrtouint(buf, 10, &size) < 0)
3415 return -EINVAL;
3416
3417 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3418 rdev->raid_disk >= 0)
3419 return -EBUSY;
3420
3421 if (rdev->mddev->persistent) {
3422 if (rdev->mddev->major_version == 0)
3423 return -EINVAL;
3424 if (size > U16_MAX)
3425 return -EINVAL;
3426 } else if (!rdev->mddev->external) {
3427 return -EBUSY;
3428 }
3429 rdev->ppl.size = size;
3430 return len;
3431}
3432
3433static struct rdev_sysfs_entry rdev_ppl_size =
3434__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3435
3436static struct attribute *rdev_default_attrs[] = {
3437 &rdev_state.attr,
3438 &rdev_errors.attr,
3439 &rdev_slot.attr,
3440 &rdev_offset.attr,
3441 &rdev_new_offset.attr,
3442 &rdev_size.attr,
3443 &rdev_recovery_start.attr,
3444 &rdev_bad_blocks.attr,
3445 &rdev_unack_bad_blocks.attr,
3446 &rdev_ppl_sector.attr,
3447 &rdev_ppl_size.attr,
3448 NULL,
3449};
3450static ssize_t
3451rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3452{
3453 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3454 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3455
3456 if (!entry->show)
3457 return -EIO;
3458 if (!rdev->mddev)
3459 return -ENODEV;
3460 return entry->show(rdev, page);
3461}
3462
3463static ssize_t
3464rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3465 const char *page, size_t length)
3466{
3467 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3468 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3469 ssize_t rv;
3470 struct mddev *mddev = rdev->mddev;
3471
3472 if (!entry->store)
3473 return -EIO;
3474 if (!capable(CAP_SYS_ADMIN))
3475 return -EACCES;
3476 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3477 if (!rv) {
3478 if (rdev->mddev == NULL)
3479 rv = -ENODEV;
3480 else
3481 rv = entry->store(rdev, page, length);
3482 mddev_unlock(mddev);
3483 }
3484 return rv;
3485}
3486
3487static void rdev_free(struct kobject *ko)
3488{
3489 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3490 kfree(rdev);
3491}
3492static const struct sysfs_ops rdev_sysfs_ops = {
3493 .show = rdev_attr_show,
3494 .store = rdev_attr_store,
3495};
3496static struct kobj_type rdev_ktype = {
3497 .release = rdev_free,
3498 .sysfs_ops = &rdev_sysfs_ops,
3499 .default_attrs = rdev_default_attrs,
3500};
3501
3502int md_rdev_init(struct md_rdev *rdev)
3503{
3504 rdev->desc_nr = -1;
3505 rdev->saved_raid_disk = -1;
3506 rdev->raid_disk = -1;
3507 rdev->flags = 0;
3508 rdev->data_offset = 0;
3509 rdev->new_data_offset = 0;
3510 rdev->sb_events = 0;
3511 rdev->last_read_error = 0;
3512 rdev->sb_loaded = 0;
3513 rdev->bb_page = NULL;
3514 atomic_set(&rdev->nr_pending, 0);
3515 atomic_set(&rdev->read_errors, 0);
3516 atomic_set(&rdev->corrected_errors, 0);
3517
3518 INIT_LIST_HEAD(&rdev->same_set);
3519 init_waitqueue_head(&rdev->blocked_wait);
3520
3521
3522
3523
3524
3525 return badblocks_init(&rdev->badblocks, 0);
3526}
3527EXPORT_SYMBOL_GPL(md_rdev_init);
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3539{
3540 char b[BDEVNAME_SIZE];
3541 int err;
3542 struct md_rdev *rdev;
3543 sector_t size;
3544
3545 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3546 if (!rdev)
3547 return ERR_PTR(-ENOMEM);
3548
3549 err = md_rdev_init(rdev);
3550 if (err)
3551 goto abort_free;
3552 err = alloc_disk_sb(rdev);
3553 if (err)
3554 goto abort_free;
3555
3556 err = lock_rdev(rdev, newdev, super_format == -2);
3557 if (err)
3558 goto abort_free;
3559
3560 kobject_init(&rdev->kobj, &rdev_ktype);
3561
3562 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3563 if (!size) {
3564 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3565 bdevname(rdev->bdev,b));
3566 err = -EINVAL;
3567 goto abort_free;
3568 }
3569
3570 if (super_format >= 0) {
3571 err = super_types[super_format].
3572 load_super(rdev, NULL, super_minor);
3573 if (err == -EINVAL) {
3574 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3575 bdevname(rdev->bdev,b),
3576 super_format, super_minor);
3577 goto abort_free;
3578 }
3579 if (err < 0) {
3580 pr_warn("md: could not read %s's sb, not importing!\n",
3581 bdevname(rdev->bdev,b));
3582 goto abort_free;
3583 }
3584 }
3585
3586 return rdev;
3587
3588abort_free:
3589 if (rdev->bdev)
3590 unlock_rdev(rdev);
3591 md_rdev_clear(rdev);
3592 kfree(rdev);
3593 return ERR_PTR(err);
3594}
3595
3596
3597
3598
3599
3600static void analyze_sbs(struct mddev *mddev)
3601{
3602 int i;
3603 struct md_rdev *rdev, *freshest, *tmp;
3604 char b[BDEVNAME_SIZE];
3605
3606 freshest = NULL;
3607 rdev_for_each_safe(rdev, tmp, mddev)
3608 switch (super_types[mddev->major_version].
3609 load_super(rdev, freshest, mddev->minor_version)) {
3610 case 1:
3611 freshest = rdev;
3612 break;
3613 case 0:
3614 break;
3615 default:
3616 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3617 bdevname(rdev->bdev,b));
3618 md_kick_rdev_from_array(rdev);
3619 }
3620
3621 super_types[mddev->major_version].
3622 validate_super(mddev, freshest);
3623
3624 i = 0;
3625 rdev_for_each_safe(rdev, tmp, mddev) {
3626 if (mddev->max_disks &&
3627 (rdev->desc_nr >= mddev->max_disks ||
3628 i > mddev->max_disks)) {
3629 pr_warn("md: %s: %s: only %d devices permitted\n",
3630 mdname(mddev), bdevname(rdev->bdev, b),
3631 mddev->max_disks);
3632 md_kick_rdev_from_array(rdev);
3633 continue;
3634 }
3635 if (rdev != freshest) {
3636 if (super_types[mddev->major_version].
3637 validate_super(mddev, rdev)) {
3638 pr_warn("md: kicking non-fresh %s from array!\n",
3639 bdevname(rdev->bdev,b));
3640 md_kick_rdev_from_array(rdev);
3641 continue;
3642 }
3643 }
3644 if (mddev->level == LEVEL_MULTIPATH) {
3645 rdev->desc_nr = i++;
3646 rdev->raid_disk = rdev->desc_nr;
3647 set_bit(In_sync, &rdev->flags);
3648 } else if (rdev->raid_disk >=
3649 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3650 !test_bit(Journal, &rdev->flags)) {
3651 rdev->raid_disk = -1;
3652 clear_bit(In_sync, &rdev->flags);
3653 }
3654 }
3655}
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3668{
3669 unsigned long result = 0;
3670 long decimals = -1;
3671 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3672 if (*cp == '.')
3673 decimals = 0;
3674 else if (decimals < scale) {
3675 unsigned int value;
3676 value = *cp - '0';
3677 result = result * 10 + value;
3678 if (decimals >= 0)
3679 decimals++;
3680 }
3681 cp++;
3682 }
3683 if (*cp == '\n')
3684 cp++;
3685 if (*cp)
3686 return -EINVAL;
3687 if (decimals < 0)
3688 decimals = 0;
3689 *res = result * int_pow(10, scale - decimals);
3690 return 0;
3691}
3692
3693static ssize_t
3694safe_delay_show(struct mddev *mddev, char *page)
3695{
3696 int msec = (mddev->safemode_delay*1000)/HZ;
3697 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3698}
3699static ssize_t
3700safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3701{
3702 unsigned long msec;
3703
3704 if (mddev_is_clustered(mddev)) {
3705 pr_warn("md: Safemode is disabled for clustered mode\n");
3706 return -EINVAL;
3707 }
3708
3709 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3710 return -EINVAL;
3711 if (msec == 0)
3712 mddev->safemode_delay = 0;
3713 else {
3714 unsigned long old_delay = mddev->safemode_delay;
3715 unsigned long new_delay = (msec*HZ)/1000;
3716
3717 if (new_delay == 0)
3718 new_delay = 1;
3719 mddev->safemode_delay = new_delay;
3720 if (new_delay < old_delay || old_delay == 0)
3721 mod_timer(&mddev->safemode_timer, jiffies+1);
3722 }
3723 return len;
3724}
3725static struct md_sysfs_entry md_safe_delay =
3726__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3727
3728static ssize_t
3729level_show(struct mddev *mddev, char *page)
3730{
3731 struct md_personality *p;
3732 int ret;
3733 spin_lock(&mddev->lock);
3734 p = mddev->pers;
3735 if (p)
3736 ret = sprintf(page, "%s\n", p->name);
3737 else if (mddev->clevel[0])
3738 ret = sprintf(page, "%s\n", mddev->clevel);
3739 else if (mddev->level != LEVEL_NONE)
3740 ret = sprintf(page, "%d\n", mddev->level);
3741 else
3742 ret = 0;
3743 spin_unlock(&mddev->lock);
3744 return ret;
3745}
3746
3747static ssize_t
3748level_store(struct mddev *mddev, const char *buf, size_t len)
3749{
3750 char clevel[16];
3751 ssize_t rv;
3752 size_t slen = len;
3753 struct md_personality *pers, *oldpers;
3754 long level;
3755 void *priv, *oldpriv;
3756 struct md_rdev *rdev;
3757
3758 if (slen == 0 || slen >= sizeof(clevel))
3759 return -EINVAL;
3760
3761 rv = mddev_lock(mddev);
3762 if (rv)
3763 return rv;
3764
3765 if (mddev->pers == NULL) {
3766 strncpy(mddev->clevel, buf, slen);
3767 if (mddev->clevel[slen-1] == '\n')
3768 slen--;
3769 mddev->clevel[slen] = 0;
3770 mddev->level = LEVEL_NONE;
3771 rv = len;
3772 goto out_unlock;
3773 }
3774 rv = -EROFS;
3775 if (mddev->ro)
3776 goto out_unlock;
3777
3778
3779
3780
3781
3782
3783
3784 rv = -EBUSY;
3785 if (mddev->sync_thread ||
3786 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3787 mddev->reshape_position != MaxSector ||
3788 mddev->sysfs_active)
3789 goto out_unlock;
3790
3791 rv = -EINVAL;
3792 if (!mddev->pers->quiesce) {
3793 pr_warn("md: %s: %s does not support online personality change\n",
3794 mdname(mddev), mddev->pers->name);
3795 goto out_unlock;
3796 }
3797
3798
3799 strncpy(clevel, buf, slen);
3800 if (clevel[slen-1] == '\n')
3801 slen--;
3802 clevel[slen] = 0;
3803 if (kstrtol(clevel, 10, &level))
3804 level = LEVEL_NONE;
3805
3806 if (request_module("md-%s", clevel) != 0)
3807 request_module("md-level-%s", clevel);
3808 spin_lock(&pers_lock);
3809 pers = find_pers(level, clevel);
3810 if (!pers || !try_module_get(pers->owner)) {
3811 spin_unlock(&pers_lock);
3812 pr_warn("md: personality %s not loaded\n", clevel);
3813 rv = -EINVAL;
3814 goto out_unlock;
3815 }
3816 spin_unlock(&pers_lock);
3817
3818 if (pers == mddev->pers) {
3819
3820 module_put(pers->owner);
3821 rv = len;
3822 goto out_unlock;
3823 }
3824 if (!pers->takeover) {
3825 module_put(pers->owner);
3826 pr_warn("md: %s: %s does not support personality takeover\n",
3827 mdname(mddev), clevel);
3828 rv = -EINVAL;
3829 goto out_unlock;
3830 }
3831
3832 rdev_for_each(rdev, mddev)
3833 rdev->new_raid_disk = rdev->raid_disk;
3834
3835
3836
3837
3838 priv = pers->takeover(mddev);
3839 if (IS_ERR(priv)) {
3840 mddev->new_level = mddev->level;
3841 mddev->new_layout = mddev->layout;
3842 mddev->new_chunk_sectors = mddev->chunk_sectors;
3843 mddev->raid_disks -= mddev->delta_disks;
3844 mddev->delta_disks = 0;
3845 mddev->reshape_backwards = 0;
3846 module_put(pers->owner);
3847 pr_warn("md: %s: %s would not accept array\n",
3848 mdname(mddev), clevel);
3849 rv = PTR_ERR(priv);
3850 goto out_unlock;
3851 }
3852
3853
3854 mddev_suspend(mddev);
3855 mddev_detach(mddev);
3856
3857 spin_lock(&mddev->lock);
3858 oldpers = mddev->pers;
3859 oldpriv = mddev->private;
3860 mddev->pers = pers;
3861 mddev->private = priv;
3862 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3863 mddev->level = mddev->new_level;
3864 mddev->layout = mddev->new_layout;
3865 mddev->chunk_sectors = mddev->new_chunk_sectors;
3866 mddev->delta_disks = 0;
3867 mddev->reshape_backwards = 0;
3868 mddev->degraded = 0;
3869 spin_unlock(&mddev->lock);
3870
3871 if (oldpers->sync_request == NULL &&
3872 mddev->external) {
3873
3874
3875
3876
3877
3878
3879
3880 mddev->in_sync = 0;
3881 mddev->safemode_delay = 0;
3882 mddev->safemode = 0;
3883 }
3884
3885 oldpers->free(mddev, oldpriv);
3886
3887 if (oldpers->sync_request == NULL &&
3888 pers->sync_request != NULL) {
3889
3890 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3891 pr_warn("md: cannot register extra attributes for %s\n",
3892 mdname(mddev));
3893 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3894 }
3895 if (oldpers->sync_request != NULL &&
3896 pers->sync_request == NULL) {
3897
3898 if (mddev->to_remove == NULL)
3899 mddev->to_remove = &md_redundancy_group;
3900 }
3901
3902 module_put(oldpers->owner);
3903
3904 rdev_for_each(rdev, mddev) {
3905 if (rdev->raid_disk < 0)
3906 continue;
3907 if (rdev->new_raid_disk >= mddev->raid_disks)
3908 rdev->new_raid_disk = -1;
3909 if (rdev->new_raid_disk == rdev->raid_disk)
3910 continue;
3911 sysfs_unlink_rdev(mddev, rdev);
3912 }
3913 rdev_for_each(rdev, mddev) {
3914 if (rdev->raid_disk < 0)
3915 continue;
3916 if (rdev->new_raid_disk == rdev->raid_disk)
3917 continue;
3918 rdev->raid_disk = rdev->new_raid_disk;
3919 if (rdev->raid_disk < 0)
3920 clear_bit(In_sync, &rdev->flags);
3921 else {
3922 if (sysfs_link_rdev(mddev, rdev))
3923 pr_warn("md: cannot register rd%d for %s after level change\n",
3924 rdev->raid_disk, mdname(mddev));
3925 }
3926 }
3927
3928 if (pers->sync_request == NULL) {
3929
3930
3931
3932 mddev->in_sync = 1;
3933 del_timer_sync(&mddev->safemode_timer);
3934 }
3935 blk_set_stacking_limits(&mddev->queue->limits);
3936 pers->run(mddev);
3937 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3938 mddev_resume(mddev);
3939 if (!mddev->thread)
3940 md_update_sb(mddev, 1);
3941 sysfs_notify(&mddev->kobj, NULL, "level");
3942 md_new_event(mddev);
3943 rv = len;
3944out_unlock:
3945 mddev_unlock(mddev);
3946 return rv;
3947}
3948
3949static struct md_sysfs_entry md_level =
3950__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3951
3952static ssize_t
3953layout_show(struct mddev *mddev, char *page)
3954{
3955
3956 if (mddev->reshape_position != MaxSector &&
3957 mddev->layout != mddev->new_layout)
3958 return sprintf(page, "%d (%d)\n",
3959 mddev->new_layout, mddev->layout);
3960 return sprintf(page, "%d\n", mddev->layout);
3961}
3962
3963static ssize_t
3964layout_store(struct mddev *mddev, const char *buf, size_t len)
3965{
3966 unsigned int n;
3967 int err;
3968
3969 err = kstrtouint(buf, 10, &n);
3970 if (err < 0)
3971 return err;
3972 err = mddev_lock(mddev);
3973 if (err)
3974 return err;
3975
3976 if (mddev->pers) {
3977 if (mddev->pers->check_reshape == NULL)
3978 err = -EBUSY;
3979 else if (mddev->ro)
3980 err = -EROFS;
3981 else {
3982 mddev->new_layout = n;
3983 err = mddev->pers->check_reshape(mddev);
3984 if (err)
3985 mddev->new_layout = mddev->layout;
3986 }
3987 } else {
3988 mddev->new_layout = n;
3989 if (mddev->reshape_position == MaxSector)
3990 mddev->layout = n;
3991 }
3992 mddev_unlock(mddev);
3993 return err ?: len;
3994}
3995static struct md_sysfs_entry md_layout =
3996__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3997
3998static ssize_t
3999raid_disks_show(struct mddev *mddev, char *page)
4000{
4001 if (mddev->raid_disks == 0)
4002 return 0;
4003 if (mddev->reshape_position != MaxSector &&
4004 mddev->delta_disks != 0)
4005 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4006 mddev->raid_disks - mddev->delta_disks);
4007 return sprintf(page, "%d\n", mddev->raid_disks);
4008}
4009
4010static int update_raid_disks(struct mddev *mddev, int raid_disks);
4011
4012static ssize_t
4013raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4014{
4015 unsigned int n;
4016 int err;
4017
4018 err = kstrtouint(buf, 10, &n);
4019 if (err < 0)
4020 return err;
4021
4022 err = mddev_lock(mddev);
4023 if (err)
4024 return err;
4025 if (mddev->pers)
4026 err = update_raid_disks(mddev, n);
4027 else if (mddev->reshape_position != MaxSector) {
4028 struct md_rdev *rdev;
4029 int olddisks = mddev->raid_disks - mddev->delta_disks;
4030
4031 err = -EINVAL;
4032 rdev_for_each(rdev, mddev) {
4033 if (olddisks < n &&
4034 rdev->data_offset < rdev->new_data_offset)
4035 goto out_unlock;
4036 if (olddisks > n &&
4037 rdev->data_offset > rdev->new_data_offset)
4038 goto out_unlock;
4039 }
4040 err = 0;
4041 mddev->delta_disks = n - olddisks;
4042 mddev->raid_disks = n;
4043 mddev->reshape_backwards = (mddev->delta_disks < 0);
4044 } else
4045 mddev->raid_disks = n;
4046out_unlock:
4047 mddev_unlock(mddev);
4048 return err ? err : len;
4049}
4050static struct md_sysfs_entry md_raid_disks =
4051__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4052
4053static ssize_t
4054chunk_size_show(struct mddev *mddev, char *page)
4055{
4056 if (mddev->reshape_position != MaxSector &&
4057 mddev->chunk_sectors != mddev->new_chunk_sectors)
4058 return sprintf(page, "%d (%d)\n",
4059 mddev->new_chunk_sectors << 9,
4060 mddev->chunk_sectors << 9);
4061 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4062}
4063
4064static ssize_t
4065chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4066{
4067 unsigned long n;
4068 int err;
4069
4070 err = kstrtoul(buf, 10, &n);
4071 if (err < 0)
4072 return err;
4073
4074 err = mddev_lock(mddev);
4075 if (err)
4076 return err;
4077 if (mddev->pers) {
4078 if (mddev->pers->check_reshape == NULL)
4079 err = -EBUSY;
4080 else if (mddev->ro)
4081 err = -EROFS;
4082 else {
4083 mddev->new_chunk_sectors = n >> 9;
4084 err = mddev->pers->check_reshape(mddev);
4085 if (err)
4086 mddev->new_chunk_sectors = mddev->chunk_sectors;
4087 }
4088 } else {
4089 mddev->new_chunk_sectors = n >> 9;
4090 if (mddev->reshape_position == MaxSector)
4091 mddev->chunk_sectors = n >> 9;
4092 }
4093 mddev_unlock(mddev);
4094 return err ?: len;
4095}
4096static struct md_sysfs_entry md_chunk_size =
4097__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4098
4099static ssize_t
4100resync_start_show(struct mddev *mddev, char *page)
4101{
4102 if (mddev->recovery_cp == MaxSector)
4103 return sprintf(page, "none\n");
4104 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4105}
4106
4107static ssize_t
4108resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4109{
4110 unsigned long long n;
4111 int err;
4112
4113 if (cmd_match(buf, "none"))
4114 n = MaxSector;
4115 else {
4116 err = kstrtoull(buf, 10, &n);
4117 if (err < 0)
4118 return err;
4119 if (n != (sector_t)n)
4120 return -EINVAL;
4121 }
4122
4123 err = mddev_lock(mddev);
4124 if (err)
4125 return err;
4126 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4127 err = -EBUSY;
4128
4129 if (!err) {
4130 mddev->recovery_cp = n;
4131 if (mddev->pers)
4132 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4133 }
4134 mddev_unlock(mddev);
4135 return err ?: len;
4136}
4137static struct md_sysfs_entry md_resync_start =
4138__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4139 resync_start_show, resync_start_store);
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4183 write_pending, active_idle, broken, bad_word};
4184static char *array_states[] = {
4185 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4186 "write-pending", "active-idle", "broken", NULL };
4187
4188static int match_word(const char *word, char **list)
4189{
4190 int n;
4191 for (n=0; list[n]; n++)
4192 if (cmd_match(word, list[n]))
4193 break;
4194 return n;
4195}
4196
4197static ssize_t
4198array_state_show(struct mddev *mddev, char *page)
4199{
4200 enum array_state st = inactive;
4201
4202 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4203 switch(mddev->ro) {
4204 case 1:
4205 st = readonly;
4206 break;
4207 case 2:
4208 st = read_auto;
4209 break;
4210 case 0:
4211 spin_lock(&mddev->lock);
4212 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4213 st = write_pending;
4214 else if (mddev->in_sync)
4215 st = clean;
4216 else if (mddev->safemode)
4217 st = active_idle;
4218 else
4219 st = active;
4220 spin_unlock(&mddev->lock);
4221 }
4222
4223 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4224 st = broken;
4225 } else {
4226 if (list_empty(&mddev->disks) &&
4227 mddev->raid_disks == 0 &&
4228 mddev->dev_sectors == 0)
4229 st = clear;
4230 else
4231 st = inactive;
4232 }
4233 return sprintf(page, "%s\n", array_states[st]);
4234}
4235
4236static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4237static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4238static int do_md_run(struct mddev *mddev);
4239static int restart_array(struct mddev *mddev);
4240
4241static ssize_t
4242array_state_store(struct mddev *mddev, const char *buf, size_t len)
4243{
4244 int err = 0;
4245 enum array_state st = match_word(buf, array_states);
4246
4247 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4248
4249
4250
4251 spin_lock(&mddev->lock);
4252 if (st == active) {
4253 restart_array(mddev);
4254 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4255 md_wakeup_thread(mddev->thread);
4256 wake_up(&mddev->sb_wait);
4257 } else {
4258 restart_array(mddev);
4259 if (!set_in_sync(mddev))
4260 err = -EBUSY;
4261 }
4262 if (!err)
4263 sysfs_notify_dirent_safe(mddev->sysfs_state);
4264 spin_unlock(&mddev->lock);
4265 return err ?: len;
4266 }
4267 err = mddev_lock(mddev);
4268 if (err)
4269 return err;
4270 err = -EINVAL;
4271 switch(st) {
4272 case bad_word:
4273 break;
4274 case clear:
4275
4276 err = do_md_stop(mddev, 0, NULL);
4277 break;
4278 case inactive:
4279
4280 if (mddev->pers)
4281 err = do_md_stop(mddev, 2, NULL);
4282 else
4283 err = 0;
4284 break;
4285 case suspended:
4286 break;
4287 case readonly:
4288 if (mddev->pers)
4289 err = md_set_readonly(mddev, NULL);
4290 else {
4291 mddev->ro = 1;
4292 set_disk_ro(mddev->gendisk, 1);
4293 err = do_md_run(mddev);
4294 }
4295 break;
4296 case read_auto:
4297 if (mddev->pers) {
4298 if (mddev->ro == 0)
4299 err = md_set_readonly(mddev, NULL);
4300 else if (mddev->ro == 1)
4301 err = restart_array(mddev);
4302 if (err == 0) {
4303 mddev->ro = 2;
4304 set_disk_ro(mddev->gendisk, 0);
4305 }
4306 } else {
4307 mddev->ro = 2;
4308 err = do_md_run(mddev);
4309 }
4310 break;
4311 case clean:
4312 if (mddev->pers) {
4313 err = restart_array(mddev);
4314 if (err)
4315 break;
4316 spin_lock(&mddev->lock);
4317 if (!set_in_sync(mddev))
4318 err = -EBUSY;
4319 spin_unlock(&mddev->lock);
4320 } else
4321 err = -EINVAL;
4322 break;
4323 case active:
4324 if (mddev->pers) {
4325 err = restart_array(mddev);
4326 if (err)
4327 break;
4328 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4329 wake_up(&mddev->sb_wait);
4330 err = 0;
4331 } else {
4332 mddev->ro = 0;
4333 set_disk_ro(mddev->gendisk, 0);
4334 err = do_md_run(mddev);
4335 }
4336 break;
4337 case write_pending:
4338 case active_idle:
4339 case broken:
4340
4341 break;
4342 }
4343
4344 if (!err) {
4345 if (mddev->hold_active == UNTIL_IOCTL)
4346 mddev->hold_active = 0;
4347 sysfs_notify_dirent_safe(mddev->sysfs_state);
4348 }
4349 mddev_unlock(mddev);
4350 return err ?: len;
4351}
4352static struct md_sysfs_entry md_array_state =
4353__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4354
4355static ssize_t
4356max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4357 return sprintf(page, "%d\n",
4358 atomic_read(&mddev->max_corr_read_errors));
4359}
4360
4361static ssize_t
4362max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4363{
4364 unsigned int n;
4365 int rv;
4366
4367 rv = kstrtouint(buf, 10, &n);
4368 if (rv < 0)
4369 return rv;
4370 atomic_set(&mddev->max_corr_read_errors, n);
4371 return len;
4372}
4373
4374static struct md_sysfs_entry max_corr_read_errors =
4375__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4376 max_corrected_read_errors_store);
4377
4378static ssize_t
4379null_show(struct mddev *mddev, char *page)
4380{
4381 return -EINVAL;
4382}
4383
4384static ssize_t
4385new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4386{
4387
4388
4389
4390
4391
4392
4393
4394 char *e;
4395 int major = simple_strtoul(buf, &e, 10);
4396 int minor;
4397 dev_t dev;
4398 struct md_rdev *rdev;
4399 int err;
4400
4401 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4402 return -EINVAL;
4403 minor = simple_strtoul(e+1, &e, 10);
4404 if (*e && *e != '\n')
4405 return -EINVAL;
4406 dev = MKDEV(major, minor);
4407 if (major != MAJOR(dev) ||
4408 minor != MINOR(dev))
4409 return -EOVERFLOW;
4410
4411 flush_workqueue(md_misc_wq);
4412
4413 err = mddev_lock(mddev);
4414 if (err)
4415 return err;
4416 if (mddev->persistent) {
4417 rdev = md_import_device(dev, mddev->major_version,
4418 mddev->minor_version);
4419 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4420 struct md_rdev *rdev0
4421 = list_entry(mddev->disks.next,
4422 struct md_rdev, same_set);
4423 err = super_types[mddev->major_version]
4424 .load_super(rdev, rdev0, mddev->minor_version);
4425 if (err < 0)
4426 goto out;
4427 }
4428 } else if (mddev->external)
4429 rdev = md_import_device(dev, -2, -1);
4430 else
4431 rdev = md_import_device(dev, -1, -1);
4432
4433 if (IS_ERR(rdev)) {
4434 mddev_unlock(mddev);
4435 return PTR_ERR(rdev);
4436 }
4437 err = bind_rdev_to_array(rdev, mddev);
4438 out:
4439 if (err)
4440 export_rdev(rdev);
4441 mddev_unlock(mddev);
4442 if (!err)
4443 md_new_event(mddev);
4444 return err ? err : len;
4445}
4446
4447static struct md_sysfs_entry md_new_device =
4448__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4449
4450static ssize_t
4451bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4452{
4453 char *end;
4454 unsigned long chunk, end_chunk;
4455 int err;
4456
4457 err = mddev_lock(mddev);
4458 if (err)
4459 return err;
4460 if (!mddev->bitmap)
4461 goto out;
4462
4463 while (*buf) {
4464 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4465 if (buf == end) break;
4466 if (*end == '-') {
4467 buf = end + 1;
4468 end_chunk = simple_strtoul(buf, &end, 0);
4469 if (buf == end) break;
4470 }
4471 if (*end && !isspace(*end)) break;
4472 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4473 buf = skip_spaces(end);
4474 }
4475 md_bitmap_unplug(mddev->bitmap);
4476out:
4477 mddev_unlock(mddev);
4478 return len;
4479}
4480
4481static struct md_sysfs_entry md_bitmap =
4482__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4483
4484static ssize_t
4485size_show(struct mddev *mddev, char *page)
4486{
4487 return sprintf(page, "%llu\n",
4488 (unsigned long long)mddev->dev_sectors / 2);
4489}
4490
4491static int update_size(struct mddev *mddev, sector_t num_sectors);
4492
4493static ssize_t
4494size_store(struct mddev *mddev, const char *buf, size_t len)
4495{
4496
4497
4498
4499
4500 sector_t sectors;
4501 int err = strict_blocks_to_sectors(buf, §ors);
4502
4503 if (err < 0)
4504 return err;
4505 err = mddev_lock(mddev);
4506 if (err)
4507 return err;
4508 if (mddev->pers) {
4509 err = update_size(mddev, sectors);
4510 if (err == 0)
4511 md_update_sb(mddev, 1);
4512 } else {
4513 if (mddev->dev_sectors == 0 ||
4514 mddev->dev_sectors > sectors)
4515 mddev->dev_sectors = sectors;
4516 else
4517 err = -ENOSPC;
4518 }
4519 mddev_unlock(mddev);
4520 return err ? err : len;
4521}
4522
4523static struct md_sysfs_entry md_size =
4524__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4525
4526
4527
4528
4529
4530
4531
4532static ssize_t
4533metadata_show(struct mddev *mddev, char *page)
4534{
4535 if (mddev->persistent)
4536 return sprintf(page, "%d.%d\n",
4537 mddev->major_version, mddev->minor_version);
4538 else if (mddev->external)
4539 return sprintf(page, "external:%s\n", mddev->metadata_type);
4540 else
4541 return sprintf(page, "none\n");
4542}
4543
4544static ssize_t
4545metadata_store(struct mddev *mddev, const char *buf, size_t len)
4546{
4547 int major, minor;
4548 char *e;
4549 int err;
4550
4551
4552
4553
4554
4555 err = mddev_lock(mddev);
4556 if (err)
4557 return err;
4558 err = -EBUSY;
4559 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4560 ;
4561 else if (!list_empty(&mddev->disks))
4562 goto out_unlock;
4563
4564 err = 0;
4565 if (cmd_match(buf, "none")) {
4566 mddev->persistent = 0;
4567 mddev->external = 0;
4568 mddev->major_version = 0;
4569 mddev->minor_version = 90;
4570 goto out_unlock;
4571 }
4572 if (strncmp(buf, "external:", 9) == 0) {
4573 size_t namelen = len-9;
4574 if (namelen >= sizeof(mddev->metadata_type))
4575 namelen = sizeof(mddev->metadata_type)-1;
4576 strncpy(mddev->metadata_type, buf+9, namelen);
4577 mddev->metadata_type[namelen] = 0;
4578 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4579 mddev->metadata_type[--namelen] = 0;
4580 mddev->persistent = 0;
4581 mddev->external = 1;
4582 mddev->major_version = 0;
4583 mddev->minor_version = 90;
4584 goto out_unlock;
4585 }
4586 major = simple_strtoul(buf, &e, 10);
4587 err = -EINVAL;
4588 if (e==buf || *e != '.')
4589 goto out_unlock;
4590 buf = e+1;
4591 minor = simple_strtoul(buf, &e, 10);
4592 if (e==buf || (*e && *e != '\n') )
4593 goto out_unlock;
4594 err = -ENOENT;
4595 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4596 goto out_unlock;
4597 mddev->major_version = major;
4598 mddev->minor_version = minor;
4599 mddev->persistent = 1;
4600 mddev->external = 0;
4601 err = 0;
4602out_unlock:
4603 mddev_unlock(mddev);
4604 return err ?: len;
4605}
4606
4607static struct md_sysfs_entry md_metadata =
4608__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4609
4610static ssize_t
4611action_show(struct mddev *mddev, char *page)
4612{
4613 char *type = "idle";
4614 unsigned long recovery = mddev->recovery;
4615 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4616 type = "frozen";
4617 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4618 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4619 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4620 type = "reshape";
4621 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4622 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4623 type = "resync";
4624 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4625 type = "check";
4626 else
4627 type = "repair";
4628 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4629 type = "recover";
4630 else if (mddev->reshape_position != MaxSector)
4631 type = "reshape";
4632 }
4633 return sprintf(page, "%s\n", type);
4634}
4635
4636static ssize_t
4637action_store(struct mddev *mddev, const char *page, size_t len)
4638{
4639 if (!mddev->pers || !mddev->pers->sync_request)
4640 return -EINVAL;
4641
4642
4643 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4644 if (cmd_match(page, "frozen"))
4645 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4646 else
4647 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4648 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4649 mddev_lock(mddev) == 0) {
4650 flush_workqueue(md_misc_wq);
4651 if (mddev->sync_thread) {
4652 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4653 md_reap_sync_thread(mddev);
4654 }
4655 mddev_unlock(mddev);
4656 }
4657 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4658 return -EBUSY;
4659 else if (cmd_match(page, "resync"))
4660 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4661 else if (cmd_match(page, "recover")) {
4662 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4663 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4664 } else if (cmd_match(page, "reshape")) {
4665 int err;
4666 if (mddev->pers->start_reshape == NULL)
4667 return -EINVAL;
4668 err = mddev_lock(mddev);
4669 if (!err) {
4670 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4671 err = -EBUSY;
4672 else {
4673 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4674 err = mddev->pers->start_reshape(mddev);
4675 }
4676 mddev_unlock(mddev);
4677 }
4678 if (err)
4679 return err;
4680 sysfs_notify(&mddev->kobj, NULL, "degraded");
4681 } else {
4682 if (cmd_match(page, "check"))
4683 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4684 else if (!cmd_match(page, "repair"))
4685 return -EINVAL;
4686 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4687 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4688 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4689 }
4690 if (mddev->ro == 2) {
4691
4692
4693
4694 mddev->ro = 0;
4695 md_wakeup_thread(mddev->sync_thread);
4696 }
4697 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4698 md_wakeup_thread(mddev->thread);
4699 sysfs_notify_dirent_safe(mddev->sysfs_action);
4700 return len;
4701}
4702
4703static struct md_sysfs_entry md_scan_mode =
4704__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4705
4706static ssize_t
4707last_sync_action_show(struct mddev *mddev, char *page)
4708{
4709 return sprintf(page, "%s\n", mddev->last_sync_action);
4710}
4711
4712static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4713
4714static ssize_t
4715mismatch_cnt_show(struct mddev *mddev, char *page)
4716{
4717 return sprintf(page, "%llu\n",
4718 (unsigned long long)
4719 atomic64_read(&mddev->resync_mismatches));
4720}
4721
4722static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4723
4724static ssize_t
4725sync_min_show(struct mddev *mddev, char *page)
4726{
4727 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4728 mddev->sync_speed_min ? "local": "system");
4729}
4730
4731static ssize_t
4732sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4733{
4734 unsigned int min;
4735 int rv;
4736
4737 if (strncmp(buf, "system", 6)==0) {
4738 min = 0;
4739 } else {
4740 rv = kstrtouint(buf, 10, &min);
4741 if (rv < 0)
4742 return rv;
4743 if (min == 0)
4744 return -EINVAL;
4745 }
4746 mddev->sync_speed_min = min;
4747 return len;
4748}
4749
4750static struct md_sysfs_entry md_sync_min =
4751__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4752
4753static ssize_t
4754sync_max_show(struct mddev *mddev, char *page)
4755{
4756 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4757 mddev->sync_speed_max ? "local": "system");
4758}
4759
4760static ssize_t
4761sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4762{
4763 unsigned int max;
4764 int rv;
4765
4766 if (strncmp(buf, "system", 6)==0) {
4767 max = 0;
4768 } else {
4769 rv = kstrtouint(buf, 10, &max);
4770 if (rv < 0)
4771 return rv;
4772 if (max == 0)
4773 return -EINVAL;
4774 }
4775 mddev->sync_speed_max = max;
4776 return len;
4777}
4778
4779static struct md_sysfs_entry md_sync_max =
4780__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4781
4782static ssize_t
4783degraded_show(struct mddev *mddev, char *page)
4784{
4785 return sprintf(page, "%d\n", mddev->degraded);
4786}
4787static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4788
4789static ssize_t
4790sync_force_parallel_show(struct mddev *mddev, char *page)
4791{
4792 return sprintf(page, "%d\n", mddev->parallel_resync);
4793}
4794
4795static ssize_t
4796sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4797{
4798 long n;
4799
4800 if (kstrtol(buf, 10, &n))
4801 return -EINVAL;
4802
4803 if (n != 0 && n != 1)
4804 return -EINVAL;
4805
4806 mddev->parallel_resync = n;
4807
4808 if (mddev->sync_thread)
4809 wake_up(&resync_wait);
4810
4811 return len;
4812}
4813
4814
4815static struct md_sysfs_entry md_sync_force_parallel =
4816__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4817 sync_force_parallel_show, sync_force_parallel_store);
4818
4819static ssize_t
4820sync_speed_show(struct mddev *mddev, char *page)
4821{
4822 unsigned long resync, dt, db;
4823 if (mddev->curr_resync == 0)
4824 return sprintf(page, "none\n");
4825 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4826 dt = (jiffies - mddev->resync_mark) / HZ;
4827 if (!dt) dt++;
4828 db = resync - mddev->resync_mark_cnt;
4829 return sprintf(page, "%lu\n", db/dt/2);
4830}
4831
4832static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4833
4834static ssize_t
4835sync_completed_show(struct mddev *mddev, char *page)
4836{
4837 unsigned long long max_sectors, resync;
4838
4839 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4840 return sprintf(page, "none\n");
4841
4842 if (mddev->curr_resync == 1 ||
4843 mddev->curr_resync == 2)
4844 return sprintf(page, "delayed\n");
4845
4846 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4847 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4848 max_sectors = mddev->resync_max_sectors;
4849 else
4850 max_sectors = mddev->dev_sectors;
4851
4852 resync = mddev->curr_resync_completed;
4853 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4854}
4855
4856static struct md_sysfs_entry md_sync_completed =
4857 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4858
4859static ssize_t
4860min_sync_show(struct mddev *mddev, char *page)
4861{
4862 return sprintf(page, "%llu\n",
4863 (unsigned long long)mddev->resync_min);
4864}
4865static ssize_t
4866min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4867{
4868 unsigned long long min;
4869 int err;
4870
4871 if (kstrtoull(buf, 10, &min))
4872 return -EINVAL;
4873
4874 spin_lock(&mddev->lock);
4875 err = -EINVAL;
4876 if (min > mddev->resync_max)
4877 goto out_unlock;
4878
4879 err = -EBUSY;
4880 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4881 goto out_unlock;
4882
4883
4884 mddev->resync_min = round_down(min, 8);
4885 err = 0;
4886
4887out_unlock:
4888 spin_unlock(&mddev->lock);
4889 return err ?: len;
4890}
4891
4892static struct md_sysfs_entry md_min_sync =
4893__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4894
4895static ssize_t
4896max_sync_show(struct mddev *mddev, char *page)
4897{
4898 if (mddev->resync_max == MaxSector)
4899 return sprintf(page, "max\n");
4900 else
4901 return sprintf(page, "%llu\n",
4902 (unsigned long long)mddev->resync_max);
4903}
4904static ssize_t
4905max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4906{
4907 int err;
4908 spin_lock(&mddev->lock);
4909 if (strncmp(buf, "max", 3) == 0)
4910 mddev->resync_max = MaxSector;
4911 else {
4912 unsigned long long max;
4913 int chunk;
4914
4915 err = -EINVAL;
4916 if (kstrtoull(buf, 10, &max))
4917 goto out_unlock;
4918 if (max < mddev->resync_min)
4919 goto out_unlock;
4920
4921 err = -EBUSY;
4922 if (max < mddev->resync_max &&
4923 mddev->ro == 0 &&
4924 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4925 goto out_unlock;
4926
4927
4928 chunk = mddev->chunk_sectors;
4929 if (chunk) {
4930 sector_t temp = max;
4931
4932 err = -EINVAL;
4933 if (sector_div(temp, chunk))
4934 goto out_unlock;
4935 }
4936 mddev->resync_max = max;
4937 }
4938 wake_up(&mddev->recovery_wait);
4939 err = 0;
4940out_unlock:
4941 spin_unlock(&mddev->lock);
4942 return err ?: len;
4943}
4944
4945static struct md_sysfs_entry md_max_sync =
4946__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4947
4948static ssize_t
4949suspend_lo_show(struct mddev *mddev, char *page)
4950{
4951 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4952}
4953
4954static ssize_t
4955suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4956{
4957 unsigned long long new;
4958 int err;
4959
4960 err = kstrtoull(buf, 10, &new);
4961 if (err < 0)
4962 return err;
4963 if (new != (sector_t)new)
4964 return -EINVAL;
4965
4966 err = mddev_lock(mddev);
4967 if (err)
4968 return err;
4969 err = -EINVAL;
4970 if (mddev->pers == NULL ||
4971 mddev->pers->quiesce == NULL)
4972 goto unlock;
4973 mddev_suspend(mddev);
4974 mddev->suspend_lo = new;
4975 mddev_resume(mddev);
4976
4977 err = 0;
4978unlock:
4979 mddev_unlock(mddev);
4980 return err ?: len;
4981}
4982static struct md_sysfs_entry md_suspend_lo =
4983__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4984
4985static ssize_t
4986suspend_hi_show(struct mddev *mddev, char *page)
4987{
4988 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4989}
4990
4991static ssize_t
4992suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4993{
4994 unsigned long long new;
4995 int err;
4996
4997 err = kstrtoull(buf, 10, &new);
4998 if (err < 0)
4999 return err;
5000 if (new != (sector_t)new)
5001 return -EINVAL;
5002
5003 err = mddev_lock(mddev);
5004 if (err)
5005 return err;
5006 err = -EINVAL;
5007 if (mddev->pers == NULL)
5008 goto unlock;
5009
5010 mddev_suspend(mddev);
5011 mddev->suspend_hi = new;
5012 mddev_resume(mddev);
5013
5014 err = 0;
5015unlock:
5016 mddev_unlock(mddev);
5017 return err ?: len;
5018}
5019static struct md_sysfs_entry md_suspend_hi =
5020__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5021
5022static ssize_t
5023reshape_position_show(struct mddev *mddev, char *page)
5024{
5025 if (mddev->reshape_position != MaxSector)
5026 return sprintf(page, "%llu\n",
5027 (unsigned long long)mddev->reshape_position);
5028 strcpy(page, "none\n");
5029 return 5;
5030}
5031
5032static ssize_t
5033reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5034{
5035 struct md_rdev *rdev;
5036 unsigned long long new;
5037 int err;
5038
5039 err = kstrtoull(buf, 10, &new);
5040 if (err < 0)
5041 return err;
5042 if (new != (sector_t)new)
5043 return -EINVAL;
5044 err = mddev_lock(mddev);
5045 if (err)
5046 return err;
5047 err = -EBUSY;
5048 if (mddev->pers)
5049 goto unlock;
5050 mddev->reshape_position = new;
5051 mddev->delta_disks = 0;
5052 mddev->reshape_backwards = 0;
5053 mddev->new_level = mddev->level;
5054 mddev->new_layout = mddev->layout;
5055 mddev->new_chunk_sectors = mddev->chunk_sectors;
5056 rdev_for_each(rdev, mddev)
5057 rdev->new_data_offset = rdev->data_offset;
5058 err = 0;
5059unlock:
5060 mddev_unlock(mddev);
5061 return err ?: len;
5062}
5063
5064static struct md_sysfs_entry md_reshape_position =
5065__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5066 reshape_position_store);
5067
5068static ssize_t
5069reshape_direction_show(struct mddev *mddev, char *page)
5070{
5071 return sprintf(page, "%s\n",
5072 mddev->reshape_backwards ? "backwards" : "forwards");
5073}
5074
5075static ssize_t
5076reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5077{
5078 int backwards = 0;
5079 int err;
5080
5081 if (cmd_match(buf, "forwards"))
5082 backwards = 0;
5083 else if (cmd_match(buf, "backwards"))
5084 backwards = 1;
5085 else
5086 return -EINVAL;
5087 if (mddev->reshape_backwards == backwards)
5088 return len;
5089
5090 err = mddev_lock(mddev);
5091 if (err)
5092 return err;
5093
5094 if (mddev->delta_disks)
5095 err = -EBUSY;
5096 else if (mddev->persistent &&
5097 mddev->major_version == 0)
5098 err = -EINVAL;
5099 else
5100 mddev->reshape_backwards = backwards;
5101 mddev_unlock(mddev);
5102 return err ?: len;
5103}
5104
5105static struct md_sysfs_entry md_reshape_direction =
5106__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5107 reshape_direction_store);
5108
5109static ssize_t
5110array_size_show(struct mddev *mddev, char *page)
5111{
5112 if (mddev->external_size)
5113 return sprintf(page, "%llu\n",
5114 (unsigned long long)mddev->array_sectors/2);
5115 else
5116 return sprintf(page, "default\n");
5117}
5118
5119static ssize_t
5120array_size_store(struct mddev *mddev, const char *buf, size_t len)
5121{
5122 sector_t sectors;
5123 int err;
5124
5125 err = mddev_lock(mddev);
5126 if (err)
5127 return err;
5128
5129
5130 if (mddev_is_clustered(mddev)) {
5131 mddev_unlock(mddev);
5132 return -EINVAL;
5133 }
5134
5135 if (strncmp(buf, "default", 7) == 0) {
5136 if (mddev->pers)
5137 sectors = mddev->pers->size(mddev, 0, 0);
5138 else
5139 sectors = mddev->array_sectors;
5140
5141 mddev->external_size = 0;
5142 } else {
5143 if (strict_blocks_to_sectors(buf, §ors) < 0)
5144 err = -EINVAL;
5145 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5146 err = -E2BIG;
5147 else
5148 mddev->external_size = 1;
5149 }
5150
5151 if (!err) {
5152 mddev->array_sectors = sectors;
5153 if (mddev->pers) {
5154 set_capacity(mddev->gendisk, mddev->array_sectors);
5155 revalidate_disk(mddev->gendisk);
5156 }
5157 }
5158 mddev_unlock(mddev);
5159 return err ?: len;
5160}
5161
5162static struct md_sysfs_entry md_array_size =
5163__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5164 array_size_store);
5165
5166static ssize_t
5167consistency_policy_show(struct mddev *mddev, char *page)
5168{
5169 int ret;
5170
5171 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5172 ret = sprintf(page, "journal\n");
5173 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5174 ret = sprintf(page, "ppl\n");
5175 } else if (mddev->bitmap) {
5176 ret = sprintf(page, "bitmap\n");
5177 } else if (mddev->pers) {
5178 if (mddev->pers->sync_request)
5179 ret = sprintf(page, "resync\n");
5180 else
5181 ret = sprintf(page, "none\n");
5182 } else {
5183 ret = sprintf(page, "unknown\n");
5184 }
5185
5186 return ret;
5187}
5188
5189static ssize_t
5190consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5191{
5192 int err = 0;
5193
5194 if (mddev->pers) {
5195 if (mddev->pers->change_consistency_policy)
5196 err = mddev->pers->change_consistency_policy(mddev, buf);
5197 else
5198 err = -EBUSY;
5199 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5200 set_bit(MD_HAS_PPL, &mddev->flags);
5201 } else {
5202 err = -EINVAL;
5203 }
5204
5205 return err ? err : len;
5206}
5207
5208static struct md_sysfs_entry md_consistency_policy =
5209__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5210 consistency_policy_store);
5211
5212static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5213{
5214 return sprintf(page, "%d\n", mddev->fail_last_dev);
5215}
5216
5217
5218
5219
5220
5221static ssize_t
5222fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5223{
5224 int ret;
5225 bool value;
5226
5227 ret = kstrtobool(buf, &value);
5228 if (ret)
5229 return ret;
5230
5231 if (value != mddev->fail_last_dev)
5232 mddev->fail_last_dev = value;
5233
5234 return len;
5235}
5236static struct md_sysfs_entry md_fail_last_dev =
5237__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5238 fail_last_dev_store);
5239
5240static struct attribute *md_default_attrs[] = {
5241 &md_level.attr,
5242 &md_layout.attr,
5243 &md_raid_disks.attr,
5244 &md_chunk_size.attr,
5245 &md_size.attr,
5246 &md_resync_start.attr,
5247 &md_metadata.attr,
5248 &md_new_device.attr,
5249 &md_safe_delay.attr,
5250 &md_array_state.attr,
5251 &md_reshape_position.attr,
5252 &md_reshape_direction.attr,
5253 &md_array_size.attr,
5254 &max_corr_read_errors.attr,
5255 &md_consistency_policy.attr,
5256 &md_fail_last_dev.attr,
5257 NULL,
5258};
5259
5260static struct attribute *md_redundancy_attrs[] = {
5261 &md_scan_mode.attr,
5262 &md_last_scan_mode.attr,
5263 &md_mismatches.attr,
5264 &md_sync_min.attr,
5265 &md_sync_max.attr,
5266 &md_sync_speed.attr,
5267 &md_sync_force_parallel.attr,
5268 &md_sync_completed.attr,
5269 &md_min_sync.attr,
5270 &md_max_sync.attr,
5271 &md_suspend_lo.attr,
5272 &md_suspend_hi.attr,
5273 &md_bitmap.attr,
5274 &md_degraded.attr,
5275 NULL,
5276};
5277static struct attribute_group md_redundancy_group = {
5278 .name = NULL,
5279 .attrs = md_redundancy_attrs,
5280};
5281
5282static ssize_t
5283md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5284{
5285 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5286 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5287 ssize_t rv;
5288
5289 if (!entry->show)
5290 return -EIO;
5291 spin_lock(&all_mddevs_lock);
5292 if (list_empty(&mddev->all_mddevs)) {
5293 spin_unlock(&all_mddevs_lock);
5294 return -EBUSY;
5295 }
5296 mddev_get(mddev);
5297 spin_unlock(&all_mddevs_lock);
5298
5299 rv = entry->show(mddev, page);
5300 mddev_put(mddev);
5301 return rv;
5302}
5303
5304static ssize_t
5305md_attr_store(struct kobject *kobj, struct attribute *attr,
5306 const char *page, size_t length)
5307{
5308 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5309 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5310 ssize_t rv;
5311
5312 if (!entry->store)
5313 return -EIO;
5314 if (!capable(CAP_SYS_ADMIN))
5315 return -EACCES;
5316 spin_lock(&all_mddevs_lock);
5317 if (list_empty(&mddev->all_mddevs)) {
5318 spin_unlock(&all_mddevs_lock);
5319 return -EBUSY;
5320 }
5321 mddev_get(mddev);
5322 spin_unlock(&all_mddevs_lock);
5323 rv = entry->store(mddev, page, length);
5324 mddev_put(mddev);
5325 return rv;
5326}
5327
5328static void md_free(struct kobject *ko)
5329{
5330 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5331
5332 if (mddev->sysfs_state)
5333 sysfs_put(mddev->sysfs_state);
5334
5335 if (mddev->gendisk)
5336 del_gendisk(mddev->gendisk);
5337 if (mddev->queue)
5338 blk_cleanup_queue(mddev->queue);
5339 if (mddev->gendisk)
5340 put_disk(mddev->gendisk);
5341 percpu_ref_exit(&mddev->writes_pending);
5342
5343 bioset_exit(&mddev->bio_set);
5344 bioset_exit(&mddev->sync_set);
5345 kfree(mddev);
5346}
5347
5348static const struct sysfs_ops md_sysfs_ops = {
5349 .show = md_attr_show,
5350 .store = md_attr_store,
5351};
5352static struct kobj_type md_ktype = {
5353 .release = md_free,
5354 .sysfs_ops = &md_sysfs_ops,
5355 .default_attrs = md_default_attrs,
5356};
5357
5358int mdp_major = 0;
5359
5360static void mddev_delayed_delete(struct work_struct *ws)
5361{
5362 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5363
5364 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5365 kobject_del(&mddev->kobj);
5366 kobject_put(&mddev->kobj);
5367}
5368
5369static void no_op(struct percpu_ref *r) {}
5370
5371int mddev_init_writes_pending(struct mddev *mddev)
5372{
5373 if (mddev->writes_pending.percpu_count_ptr)
5374 return 0;
5375 if (percpu_ref_init(&mddev->writes_pending, no_op,
5376 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5377 return -ENOMEM;
5378
5379 percpu_ref_put(&mddev->writes_pending);
5380 return 0;
5381}
5382EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5383
5384static int md_alloc(dev_t dev, char *name)
5385{
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395 static DEFINE_MUTEX(disks_mutex);
5396 struct mddev *mddev = mddev_find(dev);
5397 struct gendisk *disk;
5398 int partitioned;
5399 int shift;
5400 int unit;
5401 int error;
5402
5403 if (!mddev)
5404 return -ENODEV;
5405
5406 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5407 shift = partitioned ? MdpMinorShift : 0;
5408 unit = MINOR(mddev->unit) >> shift;
5409
5410
5411
5412
5413 flush_workqueue(md_misc_wq);
5414
5415 mutex_lock(&disks_mutex);
5416 error = -EEXIST;
5417 if (mddev->gendisk)
5418 goto abort;
5419
5420 if (name && !dev) {
5421
5422
5423 struct mddev *mddev2;
5424 spin_lock(&all_mddevs_lock);
5425
5426 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5427 if (mddev2->gendisk &&
5428 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5429 spin_unlock(&all_mddevs_lock);
5430 goto abort;
5431 }
5432 spin_unlock(&all_mddevs_lock);
5433 }
5434 if (name && dev)
5435
5436
5437
5438 mddev->hold_active = UNTIL_STOP;
5439
5440 error = -ENOMEM;
5441 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5442 if (!mddev->queue)
5443 goto abort;
5444 mddev->queue->queuedata = mddev;
5445
5446 blk_queue_make_request(mddev->queue, md_make_request);
5447 blk_set_stacking_limits(&mddev->queue->limits);
5448
5449 disk = alloc_disk(1 << shift);
5450 if (!disk) {
5451 blk_cleanup_queue(mddev->queue);
5452 mddev->queue = NULL;
5453 goto abort;
5454 }
5455 disk->major = MAJOR(mddev->unit);
5456 disk->first_minor = unit << shift;
5457 if (name)
5458 strcpy(disk->disk_name, name);
5459 else if (partitioned)
5460 sprintf(disk->disk_name, "md_d%d", unit);
5461 else
5462 sprintf(disk->disk_name, "md%d", unit);
5463 disk->fops = &md_fops;
5464 disk->private_data = mddev;
5465 disk->queue = mddev->queue;
5466 blk_queue_write_cache(mddev->queue, true, true);
5467
5468
5469
5470
5471 disk->flags |= GENHD_FL_EXT_DEVT;
5472 mddev->gendisk = disk;
5473
5474
5475
5476 mutex_lock(&mddev->open_mutex);
5477 add_disk(disk);
5478
5479 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5480 if (error) {
5481
5482
5483
5484 pr_debug("md: cannot register %s/md - name in use\n",
5485 disk->disk_name);
5486 error = 0;
5487 }
5488 if (mddev->kobj.sd &&
5489 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5490 pr_debug("pointless warning\n");
5491 mutex_unlock(&mddev->open_mutex);
5492 abort:
5493 mutex_unlock(&disks_mutex);
5494 if (!error && mddev->kobj.sd) {
5495 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5496 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5497 }
5498 mddev_put(mddev);
5499 return error;
5500}
5501
5502static struct kobject *md_probe(dev_t dev, int *part, void *data)
5503{
5504 if (create_on_open)
5505 md_alloc(dev, NULL);
5506 return NULL;
5507}
5508
5509static int add_named_array(const char *val, const struct kernel_param *kp)
5510{
5511
5512
5513
5514
5515
5516
5517
5518 int len = strlen(val);
5519 char buf[DISK_NAME_LEN];
5520 unsigned long devnum;
5521
5522 while (len && val[len-1] == '\n')
5523 len--;
5524 if (len >= DISK_NAME_LEN)
5525 return -E2BIG;
5526 strlcpy(buf, val, len+1);
5527 if (strncmp(buf, "md_", 3) == 0)
5528 return md_alloc(0, buf);
5529 if (strncmp(buf, "md", 2) == 0 &&
5530 isdigit(buf[2]) &&
5531 kstrtoul(buf+2, 10, &devnum) == 0 &&
5532 devnum <= MINORMASK)
5533 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5534
5535 return -EINVAL;
5536}
5537
5538static void md_safemode_timeout(struct timer_list *t)
5539{
5540 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5541
5542 mddev->safemode = 1;
5543 if (mddev->external)
5544 sysfs_notify_dirent_safe(mddev->sysfs_state);
5545
5546 md_wakeup_thread(mddev->thread);
5547}
5548
5549static int start_dirty_degraded;
5550
5551int md_run(struct mddev *mddev)
5552{
5553 int err;
5554 struct md_rdev *rdev;
5555 struct md_personality *pers;
5556
5557 if (list_empty(&mddev->disks))
5558
5559 return -EINVAL;
5560
5561 if (mddev->pers)
5562 return -EBUSY;
5563
5564 if (mddev->sysfs_active)
5565 return -EBUSY;
5566
5567
5568
5569
5570 if (!mddev->raid_disks) {
5571 if (!mddev->persistent)
5572 return -EINVAL;
5573 analyze_sbs(mddev);
5574 }
5575
5576 if (mddev->level != LEVEL_NONE)
5577 request_module("md-level-%d", mddev->level);
5578 else if (mddev->clevel[0])
5579 request_module("md-%s", mddev->clevel);
5580
5581
5582
5583
5584
5585
5586 mddev->has_superblocks = false;
5587 rdev_for_each(rdev, mddev) {
5588 if (test_bit(Faulty, &rdev->flags))
5589 continue;
5590 sync_blockdev(rdev->bdev);
5591 invalidate_bdev(rdev->bdev);
5592 if (mddev->ro != 1 &&
5593 (bdev_read_only(rdev->bdev) ||
5594 bdev_read_only(rdev->meta_bdev))) {
5595 mddev->ro = 1;
5596 if (mddev->gendisk)
5597 set_disk_ro(mddev->gendisk, 1);
5598 }
5599
5600 if (rdev->sb_page)
5601 mddev->has_superblocks = true;
5602
5603
5604
5605
5606
5607 if (rdev->meta_bdev) {
5608 ;
5609 } else if (rdev->data_offset < rdev->sb_start) {
5610 if (mddev->dev_sectors &&
5611 rdev->data_offset + mddev->dev_sectors
5612 > rdev->sb_start) {
5613 pr_warn("md: %s: data overlaps metadata\n",
5614 mdname(mddev));
5615 return -EINVAL;
5616 }
5617 } else {
5618 if (rdev->sb_start + rdev->sb_size/512
5619 > rdev->data_offset) {
5620 pr_warn("md: %s: metadata overlaps data\n",
5621 mdname(mddev));
5622 return -EINVAL;
5623 }
5624 }
5625 sysfs_notify_dirent_safe(rdev->sysfs_state);
5626 }
5627
5628 if (!bioset_initialized(&mddev->bio_set)) {
5629 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5630 if (err)
5631 return err;
5632 }
5633 if (!bioset_initialized(&mddev->sync_set)) {
5634 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5635 if (err)
5636 return err;
5637 }
5638
5639 spin_lock(&pers_lock);
5640 pers = find_pers(mddev->level, mddev->clevel);
5641 if (!pers || !try_module_get(pers->owner)) {
5642 spin_unlock(&pers_lock);
5643 if (mddev->level != LEVEL_NONE)
5644 pr_warn("md: personality for level %d is not loaded!\n",
5645 mddev->level);
5646 else
5647 pr_warn("md: personality for level %s is not loaded!\n",
5648 mddev->clevel);
5649 err = -EINVAL;
5650 goto abort;
5651 }
5652 spin_unlock(&pers_lock);
5653 if (mddev->level != pers->level) {
5654 mddev->level = pers->level;
5655 mddev->new_level = pers->level;
5656 }
5657 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5658
5659 if (mddev->reshape_position != MaxSector &&
5660 pers->start_reshape == NULL) {
5661
5662 module_put(pers->owner);
5663 err = -EINVAL;
5664 goto abort;
5665 }
5666
5667 if (pers->sync_request) {
5668
5669
5670
5671 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5672 struct md_rdev *rdev2;
5673 int warned = 0;
5674
5675 rdev_for_each(rdev, mddev)
5676 rdev_for_each(rdev2, mddev) {
5677 if (rdev < rdev2 &&
5678 rdev->bdev->bd_contains ==
5679 rdev2->bdev->bd_contains) {
5680 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5681 mdname(mddev),
5682 bdevname(rdev->bdev,b),
5683 bdevname(rdev2->bdev,b2));
5684 warned = 1;
5685 }
5686 }
5687
5688 if (warned)
5689 pr_warn("True protection against single-disk failure might be compromised.\n");
5690 }
5691
5692 mddev->recovery = 0;
5693
5694 mddev->resync_max_sectors = mddev->dev_sectors;
5695
5696 mddev->ok_start_degraded = start_dirty_degraded;
5697
5698 if (start_readonly && mddev->ro == 0)
5699 mddev->ro = 2;
5700
5701 err = pers->run(mddev);
5702 if (err)
5703 pr_warn("md: pers->run() failed ...\n");
5704 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5705 WARN_ONCE(!mddev->external_size,
5706 "%s: default size too small, but 'external_size' not in effect?\n",
5707 __func__);
5708 pr_warn("md: invalid array_size %llu > default size %llu\n",
5709 (unsigned long long)mddev->array_sectors / 2,
5710 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5711 err = -EINVAL;
5712 }
5713 if (err == 0 && pers->sync_request &&
5714 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5715 struct bitmap *bitmap;
5716
5717 bitmap = md_bitmap_create(mddev, -1);
5718 if (IS_ERR(bitmap)) {
5719 err = PTR_ERR(bitmap);
5720 pr_warn("%s: failed to create bitmap (%d)\n",
5721 mdname(mddev), err);
5722 } else
5723 mddev->bitmap = bitmap;
5724
5725 }
5726 if (err)
5727 goto bitmap_abort;
5728
5729 if (mddev->bitmap_info.max_write_behind > 0) {
5730 bool creat_pool = false;
5731
5732 rdev_for_each(rdev, mddev) {
5733 if (test_bit(WriteMostly, &rdev->flags) &&
5734 rdev_init_wb(rdev))
5735 creat_pool = true;
5736 }
5737 if (creat_pool && mddev->wb_info_pool == NULL) {
5738 mddev->wb_info_pool =
5739 mempool_create_kmalloc_pool(NR_WB_INFOS,
5740 sizeof(struct wb_info));
5741 if (!mddev->wb_info_pool) {
5742 err = -ENOMEM;
5743 goto bitmap_abort;
5744 }
5745 }
5746 }
5747
5748 if (mddev->queue) {
5749 bool nonrot = true;
5750
5751 rdev_for_each(rdev, mddev) {
5752 if (rdev->raid_disk >= 0 &&
5753 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5754 nonrot = false;
5755 break;
5756 }
5757 }
5758 if (mddev->degraded)
5759 nonrot = false;
5760 if (nonrot)
5761 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5762 else
5763 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5764 mddev->queue->backing_dev_info->congested_data = mddev;
5765 mddev->queue->backing_dev_info->congested_fn = md_congested;
5766 }
5767 if (pers->sync_request) {
5768 if (mddev->kobj.sd &&
5769 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5770 pr_warn("md: cannot register extra attributes for %s\n",
5771 mdname(mddev));
5772 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5773 } else if (mddev->ro == 2)
5774 mddev->ro = 0;
5775
5776 atomic_set(&mddev->max_corr_read_errors,
5777 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5778 mddev->safemode = 0;
5779 if (mddev_is_clustered(mddev))
5780 mddev->safemode_delay = 0;
5781 else
5782 mddev->safemode_delay = (200 * HZ)/1000 +1;
5783 mddev->in_sync = 1;
5784 smp_wmb();
5785 spin_lock(&mddev->lock);
5786 mddev->pers = pers;
5787 spin_unlock(&mddev->lock);
5788 rdev_for_each(rdev, mddev)
5789 if (rdev->raid_disk >= 0)
5790 sysfs_link_rdev(mddev, rdev);
5791
5792 if (mddev->degraded && !mddev->ro)
5793
5794
5795
5796 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5797 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5798
5799 if (mddev->sb_flags)
5800 md_update_sb(mddev, 0);
5801
5802 md_new_event(mddev);
5803 return 0;
5804
5805bitmap_abort:
5806 mddev_detach(mddev);
5807 if (mddev->private)
5808 pers->free(mddev, mddev->private);
5809 mddev->private = NULL;
5810 module_put(pers->owner);
5811 md_bitmap_destroy(mddev);
5812abort:
5813 bioset_exit(&mddev->bio_set);
5814 bioset_exit(&mddev->sync_set);
5815 return err;
5816}
5817EXPORT_SYMBOL_GPL(md_run);
5818
5819static int do_md_run(struct mddev *mddev)
5820{
5821 int err;
5822
5823 set_bit(MD_NOT_READY, &mddev->flags);
5824 err = md_run(mddev);
5825 if (err)
5826 goto out;
5827 err = md_bitmap_load(mddev);
5828 if (err) {
5829 md_bitmap_destroy(mddev);
5830 goto out;
5831 }
5832
5833 if (mddev_is_clustered(mddev))
5834 md_allow_write(mddev);
5835
5836
5837 md_start(mddev);
5838
5839 md_wakeup_thread(mddev->thread);
5840 md_wakeup_thread(mddev->sync_thread);
5841
5842 set_capacity(mddev->gendisk, mddev->array_sectors);
5843 revalidate_disk(mddev->gendisk);
5844 clear_bit(MD_NOT_READY, &mddev->flags);
5845 mddev->changed = 1;
5846 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5847 sysfs_notify_dirent_safe(mddev->sysfs_state);
5848 sysfs_notify_dirent_safe(mddev->sysfs_action);
5849 sysfs_notify(&mddev->kobj, NULL, "degraded");
5850out:
5851 clear_bit(MD_NOT_READY, &mddev->flags);
5852 return err;
5853}
5854
5855int md_start(struct mddev *mddev)
5856{
5857 int ret = 0;
5858
5859 if (mddev->pers->start) {
5860 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5861 md_wakeup_thread(mddev->thread);
5862 ret = mddev->pers->start(mddev);
5863 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5864 md_wakeup_thread(mddev->sync_thread);
5865 }
5866 return ret;
5867}
5868EXPORT_SYMBOL_GPL(md_start);
5869
5870static int restart_array(struct mddev *mddev)
5871{
5872 struct gendisk *disk = mddev->gendisk;
5873 struct md_rdev *rdev;
5874 bool has_journal = false;
5875 bool has_readonly = false;
5876
5877
5878 if (list_empty(&mddev->disks))
5879 return -ENXIO;
5880 if (!mddev->pers)
5881 return -EINVAL;
5882 if (!mddev->ro)
5883 return -EBUSY;
5884
5885 rcu_read_lock();
5886 rdev_for_each_rcu(rdev, mddev) {
5887 if (test_bit(Journal, &rdev->flags) &&
5888 !test_bit(Faulty, &rdev->flags))
5889 has_journal = true;
5890 if (bdev_read_only(rdev->bdev))
5891 has_readonly = true;
5892 }
5893 rcu_read_unlock();
5894 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
5895
5896 return -EINVAL;
5897 if (has_readonly)
5898 return -EROFS;
5899
5900 mddev->safemode = 0;
5901 mddev->ro = 0;
5902 set_disk_ro(disk, 0);
5903 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5904
5905 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5906 md_wakeup_thread(mddev->thread);
5907 md_wakeup_thread(mddev->sync_thread);
5908 sysfs_notify_dirent_safe(mddev->sysfs_state);
5909 return 0;
5910}
5911
5912static void md_clean(struct mddev *mddev)
5913{
5914 mddev->array_sectors = 0;
5915 mddev->external_size = 0;
5916 mddev->dev_sectors = 0;
5917 mddev->raid_disks = 0;
5918 mddev->recovery_cp = 0;
5919 mddev->resync_min = 0;
5920 mddev->resync_max = MaxSector;
5921 mddev->reshape_position = MaxSector;
5922 mddev->external = 0;
5923 mddev->persistent = 0;
5924 mddev->level = LEVEL_NONE;
5925 mddev->clevel[0] = 0;
5926 mddev->flags = 0;
5927 mddev->sb_flags = 0;
5928 mddev->ro = 0;
5929 mddev->metadata_type[0] = 0;
5930 mddev->chunk_sectors = 0;
5931 mddev->ctime = mddev->utime = 0;
5932 mddev->layout = 0;
5933 mddev->max_disks = 0;
5934 mddev->events = 0;
5935 mddev->can_decrease_events = 0;
5936 mddev->delta_disks = 0;
5937 mddev->reshape_backwards = 0;
5938 mddev->new_level = LEVEL_NONE;
5939 mddev->new_layout = 0;
5940 mddev->new_chunk_sectors = 0;
5941 mddev->curr_resync = 0;
5942 atomic64_set(&mddev->resync_mismatches, 0);
5943 mddev->suspend_lo = mddev->suspend_hi = 0;
5944 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5945 mddev->recovery = 0;
5946 mddev->in_sync = 0;
5947 mddev->changed = 0;
5948 mddev->degraded = 0;
5949 mddev->safemode = 0;
5950 mddev->private = NULL;
5951 mddev->cluster_info = NULL;
5952 mddev->bitmap_info.offset = 0;
5953 mddev->bitmap_info.default_offset = 0;
5954 mddev->bitmap_info.default_space = 0;
5955 mddev->bitmap_info.chunksize = 0;
5956 mddev->bitmap_info.daemon_sleep = 0;
5957 mddev->bitmap_info.max_write_behind = 0;
5958 mddev->bitmap_info.nodes = 0;
5959}
5960
5961static void __md_stop_writes(struct mddev *mddev)
5962{
5963 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5964 flush_workqueue(md_misc_wq);
5965 if (mddev->sync_thread) {
5966 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5967 md_reap_sync_thread(mddev);
5968 }
5969
5970 del_timer_sync(&mddev->safemode_timer);
5971
5972 if (mddev->pers && mddev->pers->quiesce) {
5973 mddev->pers->quiesce(mddev, 1);
5974 mddev->pers->quiesce(mddev, 0);
5975 }
5976 md_bitmap_flush(mddev);
5977
5978 if (mddev->ro == 0 &&
5979 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5980 mddev->sb_flags)) {
5981
5982 if (!mddev_is_clustered(mddev))
5983 mddev->in_sync = 1;
5984 md_update_sb(mddev, 1);
5985 }
5986 mempool_destroy(mddev->wb_info_pool);
5987 mddev->wb_info_pool = NULL;
5988}
5989
5990void md_stop_writes(struct mddev *mddev)
5991{
5992 mddev_lock_nointr(mddev);
5993 __md_stop_writes(mddev);
5994 mddev_unlock(mddev);
5995}
5996EXPORT_SYMBOL_GPL(md_stop_writes);
5997
5998static void mddev_detach(struct mddev *mddev)
5999{
6000 md_bitmap_wait_behind_writes(mddev);
6001 if (mddev->pers && mddev->pers->quiesce) {
6002 mddev->pers->quiesce(mddev, 1);
6003 mddev->pers->quiesce(mddev, 0);
6004 }
6005 md_unregister_thread(&mddev->thread);
6006 if (mddev->queue)
6007 blk_sync_queue(mddev->queue);
6008}
6009
6010static void __md_stop(struct mddev *mddev)
6011{
6012 struct md_personality *pers = mddev->pers;
6013 md_bitmap_destroy(mddev);
6014 mddev_detach(mddev);
6015
6016 flush_workqueue(md_misc_wq);
6017 spin_lock(&mddev->lock);
6018 mddev->pers = NULL;
6019 spin_unlock(&mddev->lock);
6020 pers->free(mddev, mddev->private);
6021 mddev->private = NULL;
6022 if (pers->sync_request && mddev->to_remove == NULL)
6023 mddev->to_remove = &md_redundancy_group;
6024 module_put(pers->owner);
6025 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6026}
6027
6028void md_stop(struct mddev *mddev)
6029{
6030
6031
6032
6033 __md_stop(mddev);
6034 bioset_exit(&mddev->bio_set);
6035 bioset_exit(&mddev->sync_set);
6036}
6037
6038EXPORT_SYMBOL_GPL(md_stop);
6039
6040static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6041{
6042 int err = 0;
6043 int did_freeze = 0;
6044
6045 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6046 did_freeze = 1;
6047 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6048 md_wakeup_thread(mddev->thread);
6049 }
6050 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6051 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6052 if (mddev->sync_thread)
6053
6054
6055 wake_up_process(mddev->sync_thread->tsk);
6056
6057 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6058 return -EBUSY;
6059 mddev_unlock(mddev);
6060 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6061 &mddev->recovery));
6062 wait_event(mddev->sb_wait,
6063 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6064 mddev_lock_nointr(mddev);
6065
6066 mutex_lock(&mddev->open_mutex);
6067 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6068 mddev->sync_thread ||
6069 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6070 pr_warn("md: %s still in use.\n",mdname(mddev));
6071 if (did_freeze) {
6072 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6073 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6074 md_wakeup_thread(mddev->thread);
6075 }
6076 err = -EBUSY;
6077 goto out;
6078 }
6079 if (mddev->pers) {
6080 __md_stop_writes(mddev);
6081
6082 err = -ENXIO;
6083 if (mddev->ro==1)
6084 goto out;
6085 mddev->ro = 1;
6086 set_disk_ro(mddev->gendisk, 1);
6087 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6088 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6089 md_wakeup_thread(mddev->thread);
6090 sysfs_notify_dirent_safe(mddev->sysfs_state);
6091 err = 0;
6092 }
6093out:
6094 mutex_unlock(&mddev->open_mutex);
6095 return err;
6096}
6097
6098
6099
6100
6101
6102static int do_md_stop(struct mddev *mddev, int mode,
6103 struct block_device *bdev)
6104{
6105 struct gendisk *disk = mddev->gendisk;
6106 struct md_rdev *rdev;
6107 int did_freeze = 0;
6108
6109 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6110 did_freeze = 1;
6111 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6112 md_wakeup_thread(mddev->thread);
6113 }
6114 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6115 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6116 if (mddev->sync_thread)
6117
6118
6119 wake_up_process(mddev->sync_thread->tsk);
6120
6121 mddev_unlock(mddev);
6122 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6123 !test_bit(MD_RECOVERY_RUNNING,
6124 &mddev->recovery)));
6125 mddev_lock_nointr(mddev);
6126
6127 mutex_lock(&mddev->open_mutex);
6128 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6129 mddev->sysfs_active ||
6130 mddev->sync_thread ||
6131 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6132 pr_warn("md: %s still in use.\n",mdname(mddev));
6133 mutex_unlock(&mddev->open_mutex);
6134 if (did_freeze) {
6135 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6136 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6137 md_wakeup_thread(mddev->thread);
6138 }
6139 return -EBUSY;
6140 }
6141 if (mddev->pers) {
6142 if (mddev->ro)
6143 set_disk_ro(disk, 0);
6144
6145 __md_stop_writes(mddev);
6146 __md_stop(mddev);
6147 mddev->queue->backing_dev_info->congested_fn = NULL;
6148
6149
6150 sysfs_notify_dirent_safe(mddev->sysfs_state);
6151
6152 rdev_for_each(rdev, mddev)
6153 if (rdev->raid_disk >= 0)
6154 sysfs_unlink_rdev(mddev, rdev);
6155
6156 set_capacity(disk, 0);
6157 mutex_unlock(&mddev->open_mutex);
6158 mddev->changed = 1;
6159 revalidate_disk(disk);
6160
6161 if (mddev->ro)
6162 mddev->ro = 0;
6163 } else
6164 mutex_unlock(&mddev->open_mutex);
6165
6166
6167
6168 if (mode == 0) {
6169 pr_info("md: %s stopped.\n", mdname(mddev));
6170
6171 if (mddev->bitmap_info.file) {
6172 struct file *f = mddev->bitmap_info.file;
6173 spin_lock(&mddev->lock);
6174 mddev->bitmap_info.file = NULL;
6175 spin_unlock(&mddev->lock);
6176 fput(f);
6177 }
6178 mddev->bitmap_info.offset = 0;
6179
6180 export_array(mddev);
6181
6182 md_clean(mddev);
6183 if (mddev->hold_active == UNTIL_STOP)
6184 mddev->hold_active = 0;
6185 }
6186 md_new_event(mddev);
6187 sysfs_notify_dirent_safe(mddev->sysfs_state);
6188 return 0;
6189}
6190
6191#ifndef MODULE
6192static void autorun_array(struct mddev *mddev)
6193{
6194 struct md_rdev *rdev;
6195 int err;
6196
6197 if (list_empty(&mddev->disks))
6198 return;
6199
6200 pr_info("md: running: ");
6201
6202 rdev_for_each(rdev, mddev) {
6203 char b[BDEVNAME_SIZE];
6204 pr_cont("<%s>", bdevname(rdev->bdev,b));
6205 }
6206 pr_cont("\n");
6207
6208 err = do_md_run(mddev);
6209 if (err) {
6210 pr_warn("md: do_md_run() returned %d\n", err);
6211 do_md_stop(mddev, 0, NULL);
6212 }
6213}
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227static void autorun_devices(int part)
6228{
6229 struct md_rdev *rdev0, *rdev, *tmp;
6230 struct mddev *mddev;
6231 char b[BDEVNAME_SIZE];
6232
6233 pr_info("md: autorun ...\n");
6234 while (!list_empty(&pending_raid_disks)) {
6235 int unit;
6236 dev_t dev;
6237 LIST_HEAD(candidates);
6238 rdev0 = list_entry(pending_raid_disks.next,
6239 struct md_rdev, same_set);
6240
6241 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6242 INIT_LIST_HEAD(&candidates);
6243 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6244 if (super_90_load(rdev, rdev0, 0) >= 0) {
6245 pr_debug("md: adding %s ...\n",
6246 bdevname(rdev->bdev,b));
6247 list_move(&rdev->same_set, &candidates);
6248 }
6249
6250
6251
6252
6253
6254 if (part) {
6255 dev = MKDEV(mdp_major,
6256 rdev0->preferred_minor << MdpMinorShift);
6257 unit = MINOR(dev) >> MdpMinorShift;
6258 } else {
6259 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6260 unit = MINOR(dev);
6261 }
6262 if (rdev0->preferred_minor != unit) {
6263 pr_warn("md: unit number in %s is bad: %d\n",
6264 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6265 break;
6266 }
6267
6268 md_probe(dev, NULL, NULL);
6269 mddev = mddev_find(dev);
6270 if (!mddev || !mddev->gendisk) {
6271 if (mddev)
6272 mddev_put(mddev);
6273 break;
6274 }
6275 if (mddev_lock(mddev))
6276 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6277 else if (mddev->raid_disks || mddev->major_version
6278 || !list_empty(&mddev->disks)) {
6279 pr_warn("md: %s already running, cannot run %s\n",
6280 mdname(mddev), bdevname(rdev0->bdev,b));
6281 mddev_unlock(mddev);
6282 } else {
6283 pr_debug("md: created %s\n", mdname(mddev));
6284 mddev->persistent = 1;
6285 rdev_for_each_list(rdev, tmp, &candidates) {
6286 list_del_init(&rdev->same_set);
6287 if (bind_rdev_to_array(rdev, mddev))
6288 export_rdev(rdev);
6289 }
6290 autorun_array(mddev);
6291 mddev_unlock(mddev);
6292 }
6293
6294
6295
6296 rdev_for_each_list(rdev, tmp, &candidates) {
6297 list_del_init(&rdev->same_set);
6298 export_rdev(rdev);
6299 }
6300 mddev_put(mddev);
6301 }
6302 pr_info("md: ... autorun DONE.\n");
6303}
6304#endif
6305
6306static int get_version(void __user *arg)
6307{
6308 mdu_version_t ver;
6309
6310 ver.major = MD_MAJOR_VERSION;
6311 ver.minor = MD_MINOR_VERSION;
6312 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6313
6314 if (copy_to_user(arg, &ver, sizeof(ver)))
6315 return -EFAULT;
6316
6317 return 0;
6318}
6319
6320static int get_array_info(struct mddev *mddev, void __user *arg)
6321{
6322 mdu_array_info_t info;
6323 int nr,working,insync,failed,spare;
6324 struct md_rdev *rdev;
6325
6326 nr = working = insync = failed = spare = 0;
6327 rcu_read_lock();
6328 rdev_for_each_rcu(rdev, mddev) {
6329 nr++;
6330 if (test_bit(Faulty, &rdev->flags))
6331 failed++;
6332 else {
6333 working++;
6334 if (test_bit(In_sync, &rdev->flags))
6335 insync++;
6336 else if (test_bit(Journal, &rdev->flags))
6337
6338 ;
6339 else
6340 spare++;
6341 }
6342 }
6343 rcu_read_unlock();
6344
6345 info.major_version = mddev->major_version;
6346 info.minor_version = mddev->minor_version;
6347 info.patch_version = MD_PATCHLEVEL_VERSION;
6348 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6349 info.level = mddev->level;
6350 info.size = mddev->dev_sectors / 2;
6351 if (info.size != mddev->dev_sectors / 2)
6352 info.size = -1;
6353 info.nr_disks = nr;
6354 info.raid_disks = mddev->raid_disks;
6355 info.md_minor = mddev->md_minor;
6356 info.not_persistent= !mddev->persistent;
6357
6358 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6359 info.state = 0;
6360 if (mddev->in_sync)
6361 info.state = (1<<MD_SB_CLEAN);
6362 if (mddev->bitmap && mddev->bitmap_info.offset)
6363 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6364 if (mddev_is_clustered(mddev))
6365 info.state |= (1<<MD_SB_CLUSTERED);
6366 info.active_disks = insync;
6367 info.working_disks = working;
6368 info.failed_disks = failed;
6369 info.spare_disks = spare;
6370
6371 info.layout = mddev->layout;
6372 info.chunk_size = mddev->chunk_sectors << 9;
6373
6374 if (copy_to_user(arg, &info, sizeof(info)))
6375 return -EFAULT;
6376
6377 return 0;
6378}
6379
6380static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6381{
6382 mdu_bitmap_file_t *file = NULL;
6383 char *ptr;
6384 int err;
6385
6386 file = kzalloc(sizeof(*file), GFP_NOIO);
6387 if (!file)
6388 return -ENOMEM;
6389
6390 err = 0;
6391 spin_lock(&mddev->lock);
6392
6393 if (mddev->bitmap_info.file) {
6394 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6395 sizeof(file->pathname));
6396 if (IS_ERR(ptr))
6397 err = PTR_ERR(ptr);
6398 else
6399 memmove(file->pathname, ptr,
6400 sizeof(file->pathname)-(ptr-file->pathname));
6401 }
6402 spin_unlock(&mddev->lock);
6403
6404 if (err == 0 &&
6405 copy_to_user(arg, file, sizeof(*file)))
6406 err = -EFAULT;
6407
6408 kfree(file);
6409 return err;
6410}
6411
6412static int get_disk_info(struct mddev *mddev, void __user * arg)
6413{
6414 mdu_disk_info_t info;
6415 struct md_rdev *rdev;
6416
6417 if (copy_from_user(&info, arg, sizeof(info)))
6418 return -EFAULT;
6419
6420 rcu_read_lock();
6421 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6422 if (rdev) {
6423 info.major = MAJOR(rdev->bdev->bd_dev);
6424 info.minor = MINOR(rdev->bdev->bd_dev);
6425 info.raid_disk = rdev->raid_disk;
6426 info.state = 0;
6427 if (test_bit(Faulty, &rdev->flags))
6428 info.state |= (1<<MD_DISK_FAULTY);
6429 else if (test_bit(In_sync, &rdev->flags)) {
6430 info.state |= (1<<MD_DISK_ACTIVE);
6431 info.state |= (1<<MD_DISK_SYNC);
6432 }
6433 if (test_bit(Journal, &rdev->flags))
6434 info.state |= (1<<MD_DISK_JOURNAL);
6435 if (test_bit(WriteMostly, &rdev->flags))
6436 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6437 if (test_bit(FailFast, &rdev->flags))
6438 info.state |= (1<<MD_DISK_FAILFAST);
6439 } else {
6440 info.major = info.minor = 0;
6441 info.raid_disk = -1;
6442 info.state = (1<<MD_DISK_REMOVED);
6443 }
6444 rcu_read_unlock();
6445
6446 if (copy_to_user(arg, &info, sizeof(info)))
6447 return -EFAULT;
6448
6449 return 0;
6450}
6451
6452static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6453{
6454 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6455 struct md_rdev *rdev;
6456 dev_t dev = MKDEV(info->major,info->minor);
6457
6458 if (mddev_is_clustered(mddev) &&
6459 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6460 pr_warn("%s: Cannot add to clustered mddev.\n",
6461 mdname(mddev));
6462 return -EINVAL;
6463 }
6464
6465 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6466 return -EOVERFLOW;
6467
6468 if (!mddev->raid_disks) {
6469 int err;
6470
6471 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6472 if (IS_ERR(rdev)) {
6473 pr_warn("md: md_import_device returned %ld\n",
6474 PTR_ERR(rdev));
6475 return PTR_ERR(rdev);
6476 }
6477 if (!list_empty(&mddev->disks)) {
6478 struct md_rdev *rdev0
6479 = list_entry(mddev->disks.next,
6480 struct md_rdev, same_set);
6481 err = super_types[mddev->major_version]
6482 .load_super(rdev, rdev0, mddev->minor_version);
6483 if (err < 0) {
6484 pr_warn("md: %s has different UUID to %s\n",
6485 bdevname(rdev->bdev,b),
6486 bdevname(rdev0->bdev,b2));
6487 export_rdev(rdev);
6488 return -EINVAL;
6489 }
6490 }
6491 err = bind_rdev_to_array(rdev, mddev);
6492 if (err)
6493 export_rdev(rdev);
6494 return err;
6495 }
6496
6497
6498
6499
6500
6501
6502 if (mddev->pers) {
6503 int err;
6504 if (!mddev->pers->hot_add_disk) {
6505 pr_warn("%s: personality does not support diskops!\n",
6506 mdname(mddev));
6507 return -EINVAL;
6508 }
6509 if (mddev->persistent)
6510 rdev = md_import_device(dev, mddev->major_version,
6511 mddev->minor_version);
6512 else
6513 rdev = md_import_device(dev, -1, -1);
6514 if (IS_ERR(rdev)) {
6515 pr_warn("md: md_import_device returned %ld\n",
6516 PTR_ERR(rdev));
6517 return PTR_ERR(rdev);
6518 }
6519
6520 if (!mddev->persistent) {
6521 if (info->state & (1<<MD_DISK_SYNC) &&
6522 info->raid_disk < mddev->raid_disks) {
6523 rdev->raid_disk = info->raid_disk;
6524 set_bit(In_sync, &rdev->flags);
6525 clear_bit(Bitmap_sync, &rdev->flags);
6526 } else
6527 rdev->raid_disk = -1;
6528 rdev->saved_raid_disk = rdev->raid_disk;
6529 } else
6530 super_types[mddev->major_version].
6531 validate_super(mddev, rdev);
6532 if ((info->state & (1<<MD_DISK_SYNC)) &&
6533 rdev->raid_disk != info->raid_disk) {
6534
6535
6536
6537 export_rdev(rdev);
6538 return -EINVAL;
6539 }
6540
6541 clear_bit(In_sync, &rdev->flags);
6542 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6543 set_bit(WriteMostly, &rdev->flags);
6544 else
6545 clear_bit(WriteMostly, &rdev->flags);
6546 if (info->state & (1<<MD_DISK_FAILFAST))
6547 set_bit(FailFast, &rdev->flags);
6548 else
6549 clear_bit(FailFast, &rdev->flags);
6550
6551 if (info->state & (1<<MD_DISK_JOURNAL)) {
6552 struct md_rdev *rdev2;
6553 bool has_journal = false;
6554
6555
6556 rdev_for_each(rdev2, mddev) {
6557 if (test_bit(Journal, &rdev2->flags)) {
6558 has_journal = true;
6559 break;
6560 }
6561 }
6562 if (has_journal || mddev->bitmap) {
6563 export_rdev(rdev);
6564 return -EBUSY;
6565 }
6566 set_bit(Journal, &rdev->flags);
6567 }
6568
6569
6570
6571 if (mddev_is_clustered(mddev)) {
6572 if (info->state & (1 << MD_DISK_CANDIDATE))
6573 set_bit(Candidate, &rdev->flags);
6574 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6575
6576 err = md_cluster_ops->add_new_disk(mddev, rdev);
6577 if (err) {
6578 export_rdev(rdev);
6579 return err;
6580 }
6581 }
6582 }
6583
6584 rdev->raid_disk = -1;
6585 err = bind_rdev_to_array(rdev, mddev);
6586
6587 if (err)
6588 export_rdev(rdev);
6589
6590 if (mddev_is_clustered(mddev)) {
6591 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6592 if (!err) {
6593 err = md_cluster_ops->new_disk_ack(mddev,
6594 err == 0);
6595 if (err)
6596 md_kick_rdev_from_array(rdev);
6597 }
6598 } else {
6599 if (err)
6600 md_cluster_ops->add_new_disk_cancel(mddev);
6601 else
6602 err = add_bound_rdev(rdev);
6603 }
6604
6605 } else if (!err)
6606 err = add_bound_rdev(rdev);
6607
6608 return err;
6609 }
6610
6611
6612
6613
6614 if (mddev->major_version != 0) {
6615 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6616 return -EINVAL;
6617 }
6618
6619 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6620 int err;
6621 rdev = md_import_device(dev, -1, 0);
6622 if (IS_ERR(rdev)) {
6623 pr_warn("md: error, md_import_device() returned %ld\n",
6624 PTR_ERR(rdev));
6625 return PTR_ERR(rdev);
6626 }
6627 rdev->desc_nr = info->number;
6628 if (info->raid_disk < mddev->raid_disks)
6629 rdev->raid_disk = info->raid_disk;
6630 else
6631 rdev->raid_disk = -1;
6632
6633 if (rdev->raid_disk < mddev->raid_disks)
6634 if (info->state & (1<<MD_DISK_SYNC))
6635 set_bit(In_sync, &rdev->flags);
6636
6637 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6638 set_bit(WriteMostly, &rdev->flags);
6639 if (info->state & (1<<MD_DISK_FAILFAST))
6640 set_bit(FailFast, &rdev->flags);
6641
6642 if (!mddev->persistent) {
6643 pr_debug("md: nonpersistent superblock ...\n");
6644 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6645 } else
6646 rdev->sb_start = calc_dev_sboffset(rdev);
6647 rdev->sectors = rdev->sb_start;
6648
6649 err = bind_rdev_to_array(rdev, mddev);
6650 if (err) {
6651 export_rdev(rdev);
6652 return err;
6653 }
6654 }
6655
6656 return 0;
6657}
6658
6659static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6660{
6661 char b[BDEVNAME_SIZE];
6662 struct md_rdev *rdev;
6663
6664 if (!mddev->pers)
6665 return -ENODEV;
6666
6667 rdev = find_rdev(mddev, dev);
6668 if (!rdev)
6669 return -ENXIO;
6670
6671 if (rdev->raid_disk < 0)
6672 goto kick_rdev;
6673
6674 clear_bit(Blocked, &rdev->flags);
6675 remove_and_add_spares(mddev, rdev);
6676
6677 if (rdev->raid_disk >= 0)
6678 goto busy;
6679
6680kick_rdev:
6681 if (mddev_is_clustered(mddev))
6682 md_cluster_ops->remove_disk(mddev, rdev);
6683
6684 md_kick_rdev_from_array(rdev);
6685 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6686 if (mddev->thread)
6687 md_wakeup_thread(mddev->thread);
6688 else
6689 md_update_sb(mddev, 1);
6690 md_new_event(mddev);
6691
6692 return 0;
6693busy:
6694 pr_debug("md: cannot remove active disk %s from %s ...\n",
6695 bdevname(rdev->bdev,b), mdname(mddev));
6696 return -EBUSY;
6697}
6698
6699static int hot_add_disk(struct mddev *mddev, dev_t dev)
6700{
6701 char b[BDEVNAME_SIZE];
6702 int err;
6703 struct md_rdev *rdev;
6704
6705 if (!mddev->pers)
6706 return -ENODEV;
6707
6708 if (mddev->major_version != 0) {
6709 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6710 mdname(mddev));
6711 return -EINVAL;
6712 }
6713 if (!mddev->pers->hot_add_disk) {
6714 pr_warn("%s: personality does not support diskops!\n",
6715 mdname(mddev));
6716 return -EINVAL;
6717 }
6718
6719 rdev = md_import_device(dev, -1, 0);
6720 if (IS_ERR(rdev)) {
6721 pr_warn("md: error, md_import_device() returned %ld\n",
6722 PTR_ERR(rdev));
6723 return -EINVAL;
6724 }
6725
6726 if (mddev->persistent)
6727 rdev->sb_start = calc_dev_sboffset(rdev);
6728 else
6729 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6730
6731 rdev->sectors = rdev->sb_start;
6732
6733 if (test_bit(Faulty, &rdev->flags)) {
6734 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6735 bdevname(rdev->bdev,b), mdname(mddev));
6736 err = -EINVAL;
6737 goto abort_export;
6738 }
6739
6740 clear_bit(In_sync, &rdev->flags);
6741 rdev->desc_nr = -1;
6742 rdev->saved_raid_disk = -1;
6743 err = bind_rdev_to_array(rdev, mddev);
6744 if (err)
6745 goto abort_export;
6746
6747
6748
6749
6750
6751
6752 rdev->raid_disk = -1;
6753
6754 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6755 if (!mddev->thread)
6756 md_update_sb(mddev, 1);
6757
6758
6759
6760
6761 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6762 md_wakeup_thread(mddev->thread);
6763 md_new_event(mddev);
6764 return 0;
6765
6766abort_export:
6767 export_rdev(rdev);
6768 return err;
6769}
6770
6771static int set_bitmap_file(struct mddev *mddev, int fd)
6772{
6773 int err = 0;
6774
6775 if (mddev->pers) {
6776 if (!mddev->pers->quiesce || !mddev->thread)
6777 return -EBUSY;
6778 if (mddev->recovery || mddev->sync_thread)
6779 return -EBUSY;
6780
6781 }
6782
6783 if (fd >= 0) {
6784 struct inode *inode;
6785 struct file *f;
6786
6787 if (mddev->bitmap || mddev->bitmap_info.file)
6788 return -EEXIST;
6789 f = fget(fd);
6790
6791 if (f == NULL) {
6792 pr_warn("%s: error: failed to get bitmap file\n",
6793 mdname(mddev));
6794 return -EBADF;
6795 }
6796
6797 inode = f->f_mapping->host;
6798 if (!S_ISREG(inode->i_mode)) {
6799 pr_warn("%s: error: bitmap file must be a regular file\n",
6800 mdname(mddev));
6801 err = -EBADF;
6802 } else if (!(f->f_mode & FMODE_WRITE)) {
6803 pr_warn("%s: error: bitmap file must open for write\n",
6804 mdname(mddev));
6805 err = -EBADF;
6806 } else if (atomic_read(&inode->i_writecount) != 1) {
6807 pr_warn("%s: error: bitmap file is already in use\n",
6808 mdname(mddev));
6809 err = -EBUSY;
6810 }
6811 if (err) {
6812 fput(f);
6813 return err;
6814 }
6815 mddev->bitmap_info.file = f;
6816 mddev->bitmap_info.offset = 0;
6817 } else if (mddev->bitmap == NULL)
6818 return -ENOENT;
6819 err = 0;
6820 if (mddev->pers) {
6821 if (fd >= 0) {
6822 struct bitmap *bitmap;
6823
6824 bitmap = md_bitmap_create(mddev, -1);
6825 mddev_suspend(mddev);
6826 if (!IS_ERR(bitmap)) {
6827 mddev->bitmap = bitmap;
6828 err = md_bitmap_load(mddev);
6829 } else
6830 err = PTR_ERR(bitmap);
6831 if (err) {
6832 md_bitmap_destroy(mddev);
6833 fd = -1;
6834 }
6835 mddev_resume(mddev);
6836 } else if (fd < 0) {
6837 mddev_suspend(mddev);
6838 md_bitmap_destroy(mddev);
6839 mddev_resume(mddev);
6840 }
6841 }
6842 if (fd < 0) {
6843 struct file *f = mddev->bitmap_info.file;
6844 if (f) {
6845 spin_lock(&mddev->lock);
6846 mddev->bitmap_info.file = NULL;
6847 spin_unlock(&mddev->lock);
6848 fput(f);
6849 }
6850 }
6851
6852 return err;
6853}
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6869{
6870
6871 if (info->raid_disks == 0) {
6872
6873 if (info->major_version < 0 ||
6874 info->major_version >= ARRAY_SIZE(super_types) ||
6875 super_types[info->major_version].name == NULL) {
6876
6877 pr_warn("md: superblock version %d not known\n",
6878 info->major_version);
6879 return -EINVAL;
6880 }
6881 mddev->major_version = info->major_version;
6882 mddev->minor_version = info->minor_version;
6883 mddev->patch_version = info->patch_version;
6884 mddev->persistent = !info->not_persistent;
6885
6886
6887
6888 mddev->ctime = ktime_get_real_seconds();
6889 return 0;
6890 }
6891 mddev->major_version = MD_MAJOR_VERSION;
6892 mddev->minor_version = MD_MINOR_VERSION;
6893 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6894 mddev->ctime = ktime_get_real_seconds();
6895
6896 mddev->level = info->level;
6897 mddev->clevel[0] = 0;
6898 mddev->dev_sectors = 2 * (sector_t)info->size;
6899 mddev->raid_disks = info->raid_disks;
6900
6901
6902
6903 if (info->state & (1<<MD_SB_CLEAN))
6904 mddev->recovery_cp = MaxSector;
6905 else
6906 mddev->recovery_cp = 0;
6907 mddev->persistent = ! info->not_persistent;
6908 mddev->external = 0;
6909
6910 mddev->layout = info->layout;
6911 if (mddev->level == 0)
6912
6913 mddev->layout = -1;
6914 mddev->chunk_sectors = info->chunk_size >> 9;
6915
6916 if (mddev->persistent) {
6917 mddev->max_disks = MD_SB_DISKS;
6918 mddev->flags = 0;
6919 mddev->sb_flags = 0;
6920 }
6921 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6922
6923 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6924 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6925 mddev->bitmap_info.offset = 0;
6926
6927 mddev->reshape_position = MaxSector;
6928
6929
6930
6931
6932 get_random_bytes(mddev->uuid, 16);
6933
6934 mddev->new_level = mddev->level;
6935 mddev->new_chunk_sectors = mddev->chunk_sectors;
6936 mddev->new_layout = mddev->layout;
6937 mddev->delta_disks = 0;
6938 mddev->reshape_backwards = 0;
6939
6940 return 0;
6941}
6942
6943void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6944{
6945 lockdep_assert_held(&mddev->reconfig_mutex);
6946
6947 if (mddev->external_size)
6948 return;
6949
6950 mddev->array_sectors = array_sectors;
6951}
6952EXPORT_SYMBOL(md_set_array_sectors);
6953
6954static int update_size(struct mddev *mddev, sector_t num_sectors)
6955{
6956 struct md_rdev *rdev;
6957 int rv;
6958 int fit = (num_sectors == 0);
6959 sector_t old_dev_sectors = mddev->dev_sectors;
6960
6961 if (mddev->pers->resize == NULL)
6962 return -EINVAL;
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6973 mddev->sync_thread)
6974 return -EBUSY;
6975 if (mddev->ro)
6976 return -EROFS;
6977
6978 rdev_for_each(rdev, mddev) {
6979 sector_t avail = rdev->sectors;
6980
6981 if (fit && (num_sectors == 0 || num_sectors > avail))
6982 num_sectors = avail;
6983 if (avail < num_sectors)
6984 return -ENOSPC;
6985 }
6986 rv = mddev->pers->resize(mddev, num_sectors);
6987 if (!rv) {
6988 if (mddev_is_clustered(mddev))
6989 md_cluster_ops->update_size(mddev, old_dev_sectors);
6990 else if (mddev->queue) {
6991 set_capacity(mddev->gendisk, mddev->array_sectors);
6992 revalidate_disk(mddev->gendisk);
6993 }
6994 }
6995 return rv;
6996}
6997
6998static int update_raid_disks(struct mddev *mddev, int raid_disks)
6999{
7000 int rv;
7001 struct md_rdev *rdev;
7002
7003 if (mddev->pers->check_reshape == NULL)
7004 return -EINVAL;
7005 if (mddev->ro)
7006 return -EROFS;
7007 if (raid_disks <= 0 ||
7008 (mddev->max_disks && raid_disks >= mddev->max_disks))
7009 return -EINVAL;
7010 if (mddev->sync_thread ||
7011 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7012 mddev->reshape_position != MaxSector)
7013 return -EBUSY;
7014
7015 rdev_for_each(rdev, mddev) {
7016 if (mddev->raid_disks < raid_disks &&
7017 rdev->data_offset < rdev->new_data_offset)
7018 return -EINVAL;
7019 if (mddev->raid_disks > raid_disks &&
7020 rdev->data_offset > rdev->new_data_offset)
7021 return -EINVAL;
7022 }
7023
7024 mddev->delta_disks = raid_disks - mddev->raid_disks;
7025 if (mddev->delta_disks < 0)
7026 mddev->reshape_backwards = 1;
7027 else if (mddev->delta_disks > 0)
7028 mddev->reshape_backwards = 0;
7029
7030 rv = mddev->pers->check_reshape(mddev);
7031 if (rv < 0) {
7032 mddev->delta_disks = 0;
7033 mddev->reshape_backwards = 0;
7034 }
7035 return rv;
7036}
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7047{
7048 int rv = 0;
7049 int cnt = 0;
7050 int state = 0;
7051
7052
7053 if (mddev->bitmap && mddev->bitmap_info.offset)
7054 state |= (1 << MD_SB_BITMAP_PRESENT);
7055
7056 if (mddev->major_version != info->major_version ||
7057 mddev->minor_version != info->minor_version ||
7058
7059 mddev->ctime != info->ctime ||
7060 mddev->level != info->level ||
7061
7062 mddev->persistent != !info->not_persistent ||
7063 mddev->chunk_sectors != info->chunk_size >> 9 ||
7064
7065 ((state^info->state) & 0xfffffe00)
7066 )
7067 return -EINVAL;
7068
7069 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7070 cnt++;
7071 if (mddev->raid_disks != info->raid_disks)
7072 cnt++;
7073 if (mddev->layout != info->layout)
7074 cnt++;
7075 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7076 cnt++;
7077 if (cnt == 0)
7078 return 0;
7079 if (cnt > 1)
7080 return -EINVAL;
7081
7082 if (mddev->layout != info->layout) {
7083
7084
7085
7086
7087 if (mddev->pers->check_reshape == NULL)
7088 return -EINVAL;
7089 else {
7090 mddev->new_layout = info->layout;
7091 rv = mddev->pers->check_reshape(mddev);
7092 if (rv)
7093 mddev->new_layout = mddev->layout;
7094 return rv;
7095 }
7096 }
7097 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7098 rv = update_size(mddev, (sector_t)info->size * 2);
7099
7100 if (mddev->raid_disks != info->raid_disks)
7101 rv = update_raid_disks(mddev, info->raid_disks);
7102
7103 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7104 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7105 rv = -EINVAL;
7106 goto err;
7107 }
7108 if (mddev->recovery || mddev->sync_thread) {
7109 rv = -EBUSY;
7110 goto err;
7111 }
7112 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7113 struct bitmap *bitmap;
7114
7115 if (mddev->bitmap) {
7116 rv = -EEXIST;
7117 goto err;
7118 }
7119 if (mddev->bitmap_info.default_offset == 0) {
7120 rv = -EINVAL;
7121 goto err;
7122 }
7123 mddev->bitmap_info.offset =
7124 mddev->bitmap_info.default_offset;
7125 mddev->bitmap_info.space =
7126 mddev->bitmap_info.default_space;
7127 bitmap = md_bitmap_create(mddev, -1);
7128 mddev_suspend(mddev);
7129 if (!IS_ERR(bitmap)) {
7130 mddev->bitmap = bitmap;
7131 rv = md_bitmap_load(mddev);
7132 } else
7133 rv = PTR_ERR(bitmap);
7134 if (rv)
7135 md_bitmap_destroy(mddev);
7136 mddev_resume(mddev);
7137 } else {
7138
7139 if (!mddev->bitmap) {
7140 rv = -ENOENT;
7141 goto err;
7142 }
7143 if (mddev->bitmap->storage.file) {
7144 rv = -EINVAL;
7145 goto err;
7146 }
7147 if (mddev->bitmap_info.nodes) {
7148
7149 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7150 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7151 rv = -EPERM;
7152 md_cluster_ops->unlock_all_bitmaps(mddev);
7153 goto err;
7154 }
7155
7156 mddev->bitmap_info.nodes = 0;
7157 md_cluster_ops->leave(mddev);
7158 }
7159 mddev_suspend(mddev);
7160 md_bitmap_destroy(mddev);
7161 mddev_resume(mddev);
7162 mddev->bitmap_info.offset = 0;
7163 }
7164 }
7165 md_update_sb(mddev, 1);
7166 return rv;
7167err:
7168 return rv;
7169}
7170
7171static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7172{
7173 struct md_rdev *rdev;
7174 int err = 0;
7175
7176 if (mddev->pers == NULL)
7177 return -ENODEV;
7178
7179 rcu_read_lock();
7180 rdev = md_find_rdev_rcu(mddev, dev);
7181 if (!rdev)
7182 err = -ENODEV;
7183 else {
7184 md_error(mddev, rdev);
7185 if (!test_bit(Faulty, &rdev->flags))
7186 err = -EBUSY;
7187 }
7188 rcu_read_unlock();
7189 return err;
7190}
7191
7192
7193
7194
7195
7196
7197
7198static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7199{
7200 struct mddev *mddev = bdev->bd_disk->private_data;
7201
7202 geo->heads = 2;
7203 geo->sectors = 4;
7204 geo->cylinders = mddev->array_sectors / 8;
7205 return 0;
7206}
7207
7208static inline bool md_ioctl_valid(unsigned int cmd)
7209{
7210 switch (cmd) {
7211 case ADD_NEW_DISK:
7212 case BLKROSET:
7213 case GET_ARRAY_INFO:
7214 case GET_BITMAP_FILE:
7215 case GET_DISK_INFO:
7216 case HOT_ADD_DISK:
7217 case HOT_REMOVE_DISK:
7218 case RAID_AUTORUN:
7219 case RAID_VERSION:
7220 case RESTART_ARRAY_RW:
7221 case RUN_ARRAY:
7222 case SET_ARRAY_INFO:
7223 case SET_BITMAP_FILE:
7224 case SET_DISK_FAULTY:
7225 case STOP_ARRAY:
7226 case STOP_ARRAY_RO:
7227 case CLUSTERED_DISK_NACK:
7228 return true;
7229 default:
7230 return false;
7231 }
7232}
7233
7234static int md_ioctl(struct block_device *bdev, fmode_t mode,
7235 unsigned int cmd, unsigned long arg)
7236{
7237 int err = 0;
7238 void __user *argp = (void __user *)arg;
7239 struct mddev *mddev = NULL;
7240 int ro;
7241 bool did_set_md_closing = false;
7242
7243 if (!md_ioctl_valid(cmd))
7244 return -ENOTTY;
7245
7246 switch (cmd) {
7247 case RAID_VERSION:
7248 case GET_ARRAY_INFO:
7249 case GET_DISK_INFO:
7250 break;
7251 default:
7252 if (!capable(CAP_SYS_ADMIN))
7253 return -EACCES;
7254 }
7255
7256
7257
7258
7259
7260 switch (cmd) {
7261 case RAID_VERSION:
7262 err = get_version(argp);
7263 goto out;
7264
7265#ifndef MODULE
7266 case RAID_AUTORUN:
7267 err = 0;
7268 autostart_arrays(arg);
7269 goto out;
7270#endif
7271 default:;
7272 }
7273
7274
7275
7276
7277
7278 mddev = bdev->bd_disk->private_data;
7279
7280 if (!mddev) {
7281 BUG();
7282 goto out;
7283 }
7284
7285
7286 switch (cmd) {
7287 case GET_ARRAY_INFO:
7288 if (!mddev->raid_disks && !mddev->external)
7289 err = -ENODEV;
7290 else
7291 err = get_array_info(mddev, argp);
7292 goto out;
7293
7294 case GET_DISK_INFO:
7295 if (!mddev->raid_disks && !mddev->external)
7296 err = -ENODEV;
7297 else
7298 err = get_disk_info(mddev, argp);
7299 goto out;
7300
7301 case SET_DISK_FAULTY:
7302 err = set_disk_faulty(mddev, new_decode_dev(arg));
7303 goto out;
7304
7305 case GET_BITMAP_FILE:
7306 err = get_bitmap_file(mddev, argp);
7307 goto out;
7308
7309 }
7310
7311 if (cmd == ADD_NEW_DISK)
7312
7313 flush_workqueue(md_misc_wq);
7314
7315 if (cmd == HOT_REMOVE_DISK)
7316
7317 wait_event_interruptible_timeout(mddev->sb_wait,
7318 !test_bit(MD_RECOVERY_NEEDED,
7319 &mddev->recovery),
7320 msecs_to_jiffies(5000));
7321 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7322
7323
7324
7325 mutex_lock(&mddev->open_mutex);
7326 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7327 mutex_unlock(&mddev->open_mutex);
7328 err = -EBUSY;
7329 goto out;
7330 }
7331 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7332 set_bit(MD_CLOSING, &mddev->flags);
7333 did_set_md_closing = true;
7334 mutex_unlock(&mddev->open_mutex);
7335 sync_blockdev(bdev);
7336 }
7337 err = mddev_lock(mddev);
7338 if (err) {
7339 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7340 err, cmd);
7341 goto out;
7342 }
7343
7344 if (cmd == SET_ARRAY_INFO) {
7345 mdu_array_info_t info;
7346 if (!arg)
7347 memset(&info, 0, sizeof(info));
7348 else if (copy_from_user(&info, argp, sizeof(info))) {
7349 err = -EFAULT;
7350 goto unlock;
7351 }
7352 if (mddev->pers) {
7353 err = update_array_info(mddev, &info);
7354 if (err) {
7355 pr_warn("md: couldn't update array info. %d\n", err);
7356 goto unlock;
7357 }
7358 goto unlock;
7359 }
7360 if (!list_empty(&mddev->disks)) {
7361 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7362 err = -EBUSY;
7363 goto unlock;
7364 }
7365 if (mddev->raid_disks) {
7366 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7367 err = -EBUSY;
7368 goto unlock;
7369 }
7370 err = set_array_info(mddev, &info);
7371 if (err) {
7372 pr_warn("md: couldn't set array info. %d\n", err);
7373 goto unlock;
7374 }
7375 goto unlock;
7376 }
7377
7378
7379
7380
7381
7382
7383 if ((!mddev->raid_disks && !mddev->external)
7384 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7385 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7386 && cmd != GET_BITMAP_FILE) {
7387 err = -ENODEV;
7388 goto unlock;
7389 }
7390
7391
7392
7393
7394 switch (cmd) {
7395 case RESTART_ARRAY_RW:
7396 err = restart_array(mddev);
7397 goto unlock;
7398
7399 case STOP_ARRAY:
7400 err = do_md_stop(mddev, 0, bdev);
7401 goto unlock;
7402
7403 case STOP_ARRAY_RO:
7404 err = md_set_readonly(mddev, bdev);
7405 goto unlock;
7406
7407 case HOT_REMOVE_DISK:
7408 err = hot_remove_disk(mddev, new_decode_dev(arg));
7409 goto unlock;
7410
7411 case ADD_NEW_DISK:
7412
7413
7414
7415
7416 if (mddev->pers) {
7417 mdu_disk_info_t info;
7418 if (copy_from_user(&info, argp, sizeof(info)))
7419 err = -EFAULT;
7420 else if (!(info.state & (1<<MD_DISK_SYNC)))
7421
7422 break;
7423 else
7424 err = add_new_disk(mddev, &info);
7425 goto unlock;
7426 }
7427 break;
7428
7429 case BLKROSET:
7430 if (get_user(ro, (int __user *)(arg))) {
7431 err = -EFAULT;
7432 goto unlock;
7433 }
7434 err = -EINVAL;
7435
7436
7437
7438
7439 if (ro)
7440 goto unlock;
7441
7442
7443 if (mddev->ro != 1)
7444 goto unlock;
7445
7446
7447
7448
7449 if (mddev->pers) {
7450 err = restart_array(mddev);
7451 if (err == 0) {
7452 mddev->ro = 2;
7453 set_disk_ro(mddev->gendisk, 0);
7454 }
7455 }
7456 goto unlock;
7457 }
7458
7459
7460
7461
7462
7463 if (mddev->ro && mddev->pers) {
7464 if (mddev->ro == 2) {
7465 mddev->ro = 0;
7466 sysfs_notify_dirent_safe(mddev->sysfs_state);
7467 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7468
7469
7470
7471
7472 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7473 mddev_unlock(mddev);
7474 wait_event(mddev->sb_wait,
7475 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7476 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7477 mddev_lock_nointr(mddev);
7478 }
7479 } else {
7480 err = -EROFS;
7481 goto unlock;
7482 }
7483 }
7484
7485 switch (cmd) {
7486 case ADD_NEW_DISK:
7487 {
7488 mdu_disk_info_t info;
7489 if (copy_from_user(&info, argp, sizeof(info)))
7490 err = -EFAULT;
7491 else
7492 err = add_new_disk(mddev, &info);
7493 goto unlock;
7494 }
7495
7496 case CLUSTERED_DISK_NACK:
7497 if (mddev_is_clustered(mddev))
7498 md_cluster_ops->new_disk_ack(mddev, false);
7499 else
7500 err = -EINVAL;
7501 goto unlock;
7502
7503 case HOT_ADD_DISK:
7504 err = hot_add_disk(mddev, new_decode_dev(arg));
7505 goto unlock;
7506
7507 case RUN_ARRAY:
7508 err = do_md_run(mddev);
7509 goto unlock;
7510
7511 case SET_BITMAP_FILE:
7512 err = set_bitmap_file(mddev, (int)arg);
7513 goto unlock;
7514
7515 default:
7516 err = -EINVAL;
7517 goto unlock;
7518 }
7519
7520unlock:
7521 if (mddev->hold_active == UNTIL_IOCTL &&
7522 err != -EINVAL)
7523 mddev->hold_active = 0;
7524 mddev_unlock(mddev);
7525out:
7526 if(did_set_md_closing)
7527 clear_bit(MD_CLOSING, &mddev->flags);
7528 return err;
7529}
7530#ifdef CONFIG_COMPAT
7531static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7532 unsigned int cmd, unsigned long arg)
7533{
7534 switch (cmd) {
7535 case HOT_REMOVE_DISK:
7536 case HOT_ADD_DISK:
7537 case SET_DISK_FAULTY:
7538 case SET_BITMAP_FILE:
7539
7540 break;
7541 default:
7542 arg = (unsigned long)compat_ptr(arg);
7543 break;
7544 }
7545
7546 return md_ioctl(bdev, mode, cmd, arg);
7547}
7548#endif
7549
7550static int md_open(struct block_device *bdev, fmode_t mode)
7551{
7552
7553
7554
7555
7556 struct mddev *mddev = mddev_find(bdev->bd_dev);
7557 int err;
7558
7559 if (!mddev)
7560 return -ENODEV;
7561
7562 if (mddev->gendisk != bdev->bd_disk) {
7563
7564
7565
7566 mddev_put(mddev);
7567
7568 flush_workqueue(md_misc_wq);
7569
7570 return -ERESTARTSYS;
7571 }
7572 BUG_ON(mddev != bdev->bd_disk->private_data);
7573
7574 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7575 goto out;
7576
7577 if (test_bit(MD_CLOSING, &mddev->flags)) {
7578 mutex_unlock(&mddev->open_mutex);
7579 err = -ENODEV;
7580 goto out;
7581 }
7582
7583 err = 0;
7584 atomic_inc(&mddev->openers);
7585 mutex_unlock(&mddev->open_mutex);
7586
7587 check_disk_change(bdev);
7588 out:
7589 if (err)
7590 mddev_put(mddev);
7591 return err;
7592}
7593
7594static void md_release(struct gendisk *disk, fmode_t mode)
7595{
7596 struct mddev *mddev = disk->private_data;
7597
7598 BUG_ON(!mddev);
7599 atomic_dec(&mddev->openers);
7600 mddev_put(mddev);
7601}
7602
7603static int md_media_changed(struct gendisk *disk)
7604{
7605 struct mddev *mddev = disk->private_data;
7606
7607 return mddev->changed;
7608}
7609
7610static int md_revalidate(struct gendisk *disk)
7611{
7612 struct mddev *mddev = disk->private_data;
7613
7614 mddev->changed = 0;
7615 return 0;
7616}
7617static const struct block_device_operations md_fops =
7618{
7619 .owner = THIS_MODULE,
7620 .open = md_open,
7621 .release = md_release,
7622 .ioctl = md_ioctl,
7623#ifdef CONFIG_COMPAT
7624 .compat_ioctl = md_compat_ioctl,
7625#endif
7626 .getgeo = md_getgeo,
7627 .media_changed = md_media_changed,
7628 .revalidate_disk= md_revalidate,
7629};
7630
7631static int md_thread(void *arg)
7632{
7633 struct md_thread *thread = arg;
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647 allow_signal(SIGKILL);
7648 while (!kthread_should_stop()) {
7649
7650
7651
7652
7653
7654
7655 if (signal_pending(current))
7656 flush_signals(current);
7657
7658 wait_event_interruptible_timeout
7659 (thread->wqueue,
7660 test_bit(THREAD_WAKEUP, &thread->flags)
7661 || kthread_should_stop() || kthread_should_park(),
7662 thread->timeout);
7663
7664 clear_bit(THREAD_WAKEUP, &thread->flags);
7665 if (kthread_should_park())
7666 kthread_parkme();
7667 if (!kthread_should_stop())
7668 thread->run(thread);
7669 }
7670
7671 return 0;
7672}
7673
7674void md_wakeup_thread(struct md_thread *thread)
7675{
7676 if (thread) {
7677 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7678 set_bit(THREAD_WAKEUP, &thread->flags);
7679 wake_up(&thread->wqueue);
7680 }
7681}
7682EXPORT_SYMBOL(md_wakeup_thread);
7683
7684struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7685 struct mddev *mddev, const char *name)
7686{
7687 struct md_thread *thread;
7688
7689 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7690 if (!thread)
7691 return NULL;
7692
7693 init_waitqueue_head(&thread->wqueue);
7694
7695 thread->run = run;
7696 thread->mddev = mddev;
7697 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7698 thread->tsk = kthread_run(md_thread, thread,
7699 "%s_%s",
7700 mdname(thread->mddev),
7701 name);
7702 if (IS_ERR(thread->tsk)) {
7703 kfree(thread);
7704 return NULL;
7705 }
7706 return thread;
7707}
7708EXPORT_SYMBOL(md_register_thread);
7709
7710void md_unregister_thread(struct md_thread **threadp)
7711{
7712 struct md_thread *thread = *threadp;
7713 if (!thread)
7714 return;
7715 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7716
7717
7718
7719 spin_lock(&pers_lock);
7720 *threadp = NULL;
7721 spin_unlock(&pers_lock);
7722
7723 kthread_stop(thread->tsk);
7724 kfree(thread);
7725}
7726EXPORT_SYMBOL(md_unregister_thread);
7727
7728void md_error(struct mddev *mddev, struct md_rdev *rdev)
7729{
7730 if (!rdev || test_bit(Faulty, &rdev->flags))
7731 return;
7732
7733 if (!mddev->pers || !mddev->pers->error_handler)
7734 return;
7735 mddev->pers->error_handler(mddev,rdev);
7736 if (mddev->degraded)
7737 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7738 sysfs_notify_dirent_safe(rdev->sysfs_state);
7739 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7740 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7741 md_wakeup_thread(mddev->thread);
7742 if (mddev->event_work.func)
7743 queue_work(md_misc_wq, &mddev->event_work);
7744 md_new_event(mddev);
7745}
7746EXPORT_SYMBOL(md_error);
7747
7748
7749
7750static void status_unused(struct seq_file *seq)
7751{
7752 int i = 0;
7753 struct md_rdev *rdev;
7754
7755 seq_printf(seq, "unused devices: ");
7756
7757 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7758 char b[BDEVNAME_SIZE];
7759 i++;
7760 seq_printf(seq, "%s ",
7761 bdevname(rdev->bdev,b));
7762 }
7763 if (!i)
7764 seq_printf(seq, "<none>");
7765
7766 seq_printf(seq, "\n");
7767}
7768
7769static int status_resync(struct seq_file *seq, struct mddev *mddev)
7770{
7771 sector_t max_sectors, resync, res;
7772 unsigned long dt, db = 0;
7773 sector_t rt, curr_mark_cnt, resync_mark_cnt;
7774 int scale, recovery_active;
7775 unsigned int per_milli;
7776
7777 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7778 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7779 max_sectors = mddev->resync_max_sectors;
7780 else
7781 max_sectors = mddev->dev_sectors;
7782
7783 resync = mddev->curr_resync;
7784 if (resync <= 3) {
7785 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7786
7787 resync = max_sectors;
7788 } else if (resync > max_sectors)
7789 resync = max_sectors;
7790 else
7791 resync -= atomic_read(&mddev->recovery_active);
7792
7793 if (resync == 0) {
7794 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
7795 struct md_rdev *rdev;
7796
7797 rdev_for_each(rdev, mddev)
7798 if (rdev->raid_disk >= 0 &&
7799 !test_bit(Faulty, &rdev->flags) &&
7800 rdev->recovery_offset != MaxSector &&
7801 rdev->recovery_offset) {
7802 seq_printf(seq, "\trecover=REMOTE");
7803 return 1;
7804 }
7805 if (mddev->reshape_position != MaxSector)
7806 seq_printf(seq, "\treshape=REMOTE");
7807 else
7808 seq_printf(seq, "\tresync=REMOTE");
7809 return 1;
7810 }
7811 if (mddev->recovery_cp < MaxSector) {
7812 seq_printf(seq, "\tresync=PENDING");
7813 return 1;
7814 }
7815 return 0;
7816 }
7817 if (resync < 3) {
7818 seq_printf(seq, "\tresync=DELAYED");
7819 return 1;
7820 }
7821
7822 WARN_ON(max_sectors == 0);
7823
7824
7825
7826
7827
7828 scale = 10;
7829 if (sizeof(sector_t) > sizeof(unsigned long)) {
7830 while ( max_sectors/2 > (1ULL<<(scale+32)))
7831 scale++;
7832 }
7833 res = (resync>>scale)*1000;
7834 sector_div(res, (u32)((max_sectors>>scale)+1));
7835
7836 per_milli = res;
7837 {
7838 int i, x = per_milli/50, y = 20-x;
7839 seq_printf(seq, "[");
7840 for (i = 0; i < x; i++)
7841 seq_printf(seq, "=");
7842 seq_printf(seq, ">");
7843 for (i = 0; i < y; i++)
7844 seq_printf(seq, ".");
7845 seq_printf(seq, "] ");
7846 }
7847 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7848 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7849 "reshape" :
7850 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7851 "check" :
7852 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7853 "resync" : "recovery"))),
7854 per_milli/10, per_milli % 10,
7855 (unsigned long long) resync/2,
7856 (unsigned long long) max_sectors/2);
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875 dt = ((jiffies - mddev->resync_mark) / HZ);
7876 if (!dt) dt++;
7877
7878 curr_mark_cnt = mddev->curr_mark_cnt;
7879 recovery_active = atomic_read(&mddev->recovery_active);
7880 resync_mark_cnt = mddev->resync_mark_cnt;
7881
7882 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
7883 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
7884
7885 rt = max_sectors - resync;
7886 rt = div64_u64(rt, db/32+1);
7887 rt *= dt;
7888 rt >>= 5;
7889
7890 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7891 ((unsigned long)rt % 60)/6);
7892
7893 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7894 return 1;
7895}
7896
7897static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7898{
7899 struct list_head *tmp;
7900 loff_t l = *pos;
7901 struct mddev *mddev;
7902
7903 if (l >= 0x10000)
7904 return NULL;
7905 if (!l--)
7906
7907 return (void*)1;
7908
7909 spin_lock(&all_mddevs_lock);
7910 list_for_each(tmp,&all_mddevs)
7911 if (!l--) {
7912 mddev = list_entry(tmp, struct mddev, all_mddevs);
7913 mddev_get(mddev);
7914 spin_unlock(&all_mddevs_lock);
7915 return mddev;
7916 }
7917 spin_unlock(&all_mddevs_lock);
7918 if (!l--)
7919 return (void*)2;
7920 return NULL;
7921}
7922
7923static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7924{
7925 struct list_head *tmp;
7926 struct mddev *next_mddev, *mddev = v;
7927
7928 ++*pos;
7929 if (v == (void*)2)
7930 return NULL;
7931
7932 spin_lock(&all_mddevs_lock);
7933 if (v == (void*)1)
7934 tmp = all_mddevs.next;
7935 else
7936 tmp = mddev->all_mddevs.next;
7937 if (tmp != &all_mddevs)
7938 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7939 else {
7940 next_mddev = (void*)2;
7941 *pos = 0x10000;
7942 }
7943 spin_unlock(&all_mddevs_lock);
7944
7945 if (v != (void*)1)
7946 mddev_put(mddev);
7947 return next_mddev;
7948
7949}
7950
7951static void md_seq_stop(struct seq_file *seq, void *v)
7952{
7953 struct mddev *mddev = v;
7954
7955 if (mddev && v != (void*)1 && v != (void*)2)
7956 mddev_put(mddev);
7957}
7958
7959static int md_seq_show(struct seq_file *seq, void *v)
7960{
7961 struct mddev *mddev = v;
7962 sector_t sectors;
7963 struct md_rdev *rdev;
7964
7965 if (v == (void*)1) {
7966 struct md_personality *pers;
7967 seq_printf(seq, "Personalities : ");
7968 spin_lock(&pers_lock);
7969 list_for_each_entry(pers, &pers_list, list)
7970 seq_printf(seq, "[%s] ", pers->name);
7971
7972 spin_unlock(&pers_lock);
7973 seq_printf(seq, "\n");
7974 seq->poll_event = atomic_read(&md_event_count);
7975 return 0;
7976 }
7977 if (v == (void*)2) {
7978 status_unused(seq);
7979 return 0;
7980 }
7981
7982 spin_lock(&mddev->lock);
7983 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7984 seq_printf(seq, "%s : %sactive", mdname(mddev),
7985 mddev->pers ? "" : "in");
7986 if (mddev->pers) {
7987 if (mddev->ro==1)
7988 seq_printf(seq, " (read-only)");
7989 if (mddev->ro==2)
7990 seq_printf(seq, " (auto-read-only)");
7991 seq_printf(seq, " %s", mddev->pers->name);
7992 }
7993
7994 sectors = 0;
7995 rcu_read_lock();
7996 rdev_for_each_rcu(rdev, mddev) {
7997 char b[BDEVNAME_SIZE];
7998 seq_printf(seq, " %s[%d]",
7999 bdevname(rdev->bdev,b), rdev->desc_nr);
8000 if (test_bit(WriteMostly, &rdev->flags))
8001 seq_printf(seq, "(W)");
8002 if (test_bit(Journal, &rdev->flags))
8003 seq_printf(seq, "(J)");
8004 if (test_bit(Faulty, &rdev->flags)) {
8005 seq_printf(seq, "(F)");
8006 continue;
8007 }
8008 if (rdev->raid_disk < 0)
8009 seq_printf(seq, "(S)");
8010 if (test_bit(Replacement, &rdev->flags))
8011 seq_printf(seq, "(R)");
8012 sectors += rdev->sectors;
8013 }
8014 rcu_read_unlock();
8015
8016 if (!list_empty(&mddev->disks)) {
8017 if (mddev->pers)
8018 seq_printf(seq, "\n %llu blocks",
8019 (unsigned long long)
8020 mddev->array_sectors / 2);
8021 else
8022 seq_printf(seq, "\n %llu blocks",
8023 (unsigned long long)sectors / 2);
8024 }
8025 if (mddev->persistent) {
8026 if (mddev->major_version != 0 ||
8027 mddev->minor_version != 90) {
8028 seq_printf(seq," super %d.%d",
8029 mddev->major_version,
8030 mddev->minor_version);
8031 }
8032 } else if (mddev->external)
8033 seq_printf(seq, " super external:%s",
8034 mddev->metadata_type);
8035 else
8036 seq_printf(seq, " super non-persistent");
8037
8038 if (mddev->pers) {
8039 mddev->pers->status(seq, mddev);
8040 seq_printf(seq, "\n ");
8041 if (mddev->pers->sync_request) {
8042 if (status_resync(seq, mddev))
8043 seq_printf(seq, "\n ");
8044 }
8045 } else
8046 seq_printf(seq, "\n ");
8047
8048 md_bitmap_status(seq, mddev->bitmap);
8049
8050 seq_printf(seq, "\n");
8051 }
8052 spin_unlock(&mddev->lock);
8053
8054 return 0;
8055}
8056
8057static const struct seq_operations md_seq_ops = {
8058 .start = md_seq_start,
8059 .next = md_seq_next,
8060 .stop = md_seq_stop,
8061 .show = md_seq_show,
8062};
8063
8064static int md_seq_open(struct inode *inode, struct file *file)
8065{
8066 struct seq_file *seq;
8067 int error;
8068
8069 error = seq_open(file, &md_seq_ops);
8070 if (error)
8071 return error;
8072
8073 seq = file->private_data;
8074 seq->poll_event = atomic_read(&md_event_count);
8075 return error;
8076}
8077
8078static int md_unloading;
8079static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8080{
8081 struct seq_file *seq = filp->private_data;
8082 __poll_t mask;
8083
8084 if (md_unloading)
8085 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8086 poll_wait(filp, &md_event_waiters, wait);
8087
8088
8089 mask = EPOLLIN | EPOLLRDNORM;
8090
8091 if (seq->poll_event != atomic_read(&md_event_count))
8092 mask |= EPOLLERR | EPOLLPRI;
8093 return mask;
8094}
8095
8096static const struct file_operations md_seq_fops = {
8097 .owner = THIS_MODULE,
8098 .open = md_seq_open,
8099 .read = seq_read,
8100 .llseek = seq_lseek,
8101 .release = seq_release,
8102 .poll = mdstat_poll,
8103};
8104
8105int register_md_personality(struct md_personality *p)
8106{
8107 pr_debug("md: %s personality registered for level %d\n",
8108 p->name, p->level);
8109 spin_lock(&pers_lock);
8110 list_add_tail(&p->list, &pers_list);
8111 spin_unlock(&pers_lock);
8112 return 0;
8113}
8114EXPORT_SYMBOL(register_md_personality);
8115
8116int unregister_md_personality(struct md_personality *p)
8117{
8118 pr_debug("md: %s personality unregistered\n", p->name);
8119 spin_lock(&pers_lock);
8120 list_del_init(&p->list);
8121 spin_unlock(&pers_lock);
8122 return 0;
8123}
8124EXPORT_SYMBOL(unregister_md_personality);
8125
8126int register_md_cluster_operations(struct md_cluster_operations *ops,
8127 struct module *module)
8128{
8129 int ret = 0;
8130 spin_lock(&pers_lock);
8131 if (md_cluster_ops != NULL)
8132 ret = -EALREADY;
8133 else {
8134 md_cluster_ops = ops;
8135 md_cluster_mod = module;
8136 }
8137 spin_unlock(&pers_lock);
8138 return ret;
8139}
8140EXPORT_SYMBOL(register_md_cluster_operations);
8141
8142int unregister_md_cluster_operations(void)
8143{
8144 spin_lock(&pers_lock);
8145 md_cluster_ops = NULL;
8146 spin_unlock(&pers_lock);
8147 return 0;
8148}
8149EXPORT_SYMBOL(unregister_md_cluster_operations);
8150
8151int md_setup_cluster(struct mddev *mddev, int nodes)
8152{
8153 if (!md_cluster_ops)
8154 request_module("md-cluster");
8155 spin_lock(&pers_lock);
8156
8157 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8158 pr_warn("can't find md-cluster module or get it's reference.\n");
8159 spin_unlock(&pers_lock);
8160 return -ENOENT;
8161 }
8162 spin_unlock(&pers_lock);
8163
8164 return md_cluster_ops->join(mddev, nodes);
8165}
8166
8167void md_cluster_stop(struct mddev *mddev)
8168{
8169 if (!md_cluster_ops)
8170 return;
8171 md_cluster_ops->leave(mddev);
8172 module_put(md_cluster_mod);
8173}
8174
8175static int is_mddev_idle(struct mddev *mddev, int init)
8176{
8177 struct md_rdev *rdev;
8178 int idle;
8179 int curr_events;
8180
8181 idle = 1;
8182 rcu_read_lock();
8183 rdev_for_each_rcu(rdev, mddev) {
8184 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8185 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8186 atomic_read(&disk->sync_io);
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209 if (init || curr_events - rdev->last_events > 64) {
8210 rdev->last_events = curr_events;
8211 idle = 0;
8212 }
8213 }
8214 rcu_read_unlock();
8215 return idle;
8216}
8217
8218void md_done_sync(struct mddev *mddev, int blocks, int ok)
8219{
8220
8221 atomic_sub(blocks, &mddev->recovery_active);
8222 wake_up(&mddev->recovery_wait);
8223 if (!ok) {
8224 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8225 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8226 md_wakeup_thread(mddev->thread);
8227
8228 }
8229}
8230EXPORT_SYMBOL(md_done_sync);
8231
8232
8233
8234
8235
8236
8237
8238
8239bool md_write_start(struct mddev *mddev, struct bio *bi)
8240{
8241 int did_change = 0;
8242
8243 if (bio_data_dir(bi) != WRITE)
8244 return true;
8245
8246 BUG_ON(mddev->ro == 1);
8247 if (mddev->ro == 2) {
8248
8249 mddev->ro = 0;
8250 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8251 md_wakeup_thread(mddev->thread);
8252 md_wakeup_thread(mddev->sync_thread);
8253 did_change = 1;
8254 }
8255 rcu_read_lock();
8256 percpu_ref_get(&mddev->writes_pending);
8257 smp_mb();
8258 if (mddev->safemode == 1)
8259 mddev->safemode = 0;
8260
8261 if (mddev->in_sync || mddev->sync_checkers) {
8262 spin_lock(&mddev->lock);
8263 if (mddev->in_sync) {
8264 mddev->in_sync = 0;
8265 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8266 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8267 md_wakeup_thread(mddev->thread);
8268 did_change = 1;
8269 }
8270 spin_unlock(&mddev->lock);
8271 }
8272 rcu_read_unlock();
8273 if (did_change)
8274 sysfs_notify_dirent_safe(mddev->sysfs_state);
8275 if (!mddev->has_superblocks)
8276 return true;
8277 wait_event(mddev->sb_wait,
8278 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8279 mddev->suspended);
8280 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8281 percpu_ref_put(&mddev->writes_pending);
8282 return false;
8283 }
8284 return true;
8285}
8286EXPORT_SYMBOL(md_write_start);
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296void md_write_inc(struct mddev *mddev, struct bio *bi)
8297{
8298 if (bio_data_dir(bi) != WRITE)
8299 return;
8300 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8301 percpu_ref_get(&mddev->writes_pending);
8302}
8303EXPORT_SYMBOL(md_write_inc);
8304
8305void md_write_end(struct mddev *mddev)
8306{
8307 percpu_ref_put(&mddev->writes_pending);
8308
8309 if (mddev->safemode == 2)
8310 md_wakeup_thread(mddev->thread);
8311 else if (mddev->safemode_delay)
8312
8313
8314
8315 mod_timer(&mddev->safemode_timer,
8316 roundup(jiffies, mddev->safemode_delay) +
8317 mddev->safemode_delay);
8318}
8319
8320EXPORT_SYMBOL(md_write_end);
8321
8322
8323
8324
8325
8326
8327
8328void md_allow_write(struct mddev *mddev)
8329{
8330 if (!mddev->pers)
8331 return;
8332 if (mddev->ro)
8333 return;
8334 if (!mddev->pers->sync_request)
8335 return;
8336
8337 spin_lock(&mddev->lock);
8338 if (mddev->in_sync) {
8339 mddev->in_sync = 0;
8340 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8341 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8342 if (mddev->safemode_delay &&
8343 mddev->safemode == 0)
8344 mddev->safemode = 1;
8345 spin_unlock(&mddev->lock);
8346 md_update_sb(mddev, 0);
8347 sysfs_notify_dirent_safe(mddev->sysfs_state);
8348
8349 wait_event(mddev->sb_wait,
8350 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8351 } else
8352 spin_unlock(&mddev->lock);
8353}
8354EXPORT_SYMBOL_GPL(md_allow_write);
8355
8356#define SYNC_MARKS 10
8357#define SYNC_MARK_STEP (3*HZ)
8358#define UPDATE_FREQUENCY (5*60*HZ)
8359void md_do_sync(struct md_thread *thread)
8360{
8361 struct mddev *mddev = thread->mddev;
8362 struct mddev *mddev2;
8363 unsigned int currspeed = 0, window;
8364 sector_t max_sectors,j, io_sectors, recovery_done;
8365 unsigned long mark[SYNC_MARKS];
8366 unsigned long update_time;
8367 sector_t mark_cnt[SYNC_MARKS];
8368 int last_mark,m;
8369 struct list_head *tmp;
8370 sector_t last_check;
8371 int skipped = 0;
8372 struct md_rdev *rdev;
8373 char *desc, *action = NULL;
8374 struct blk_plug plug;
8375 int ret;
8376
8377
8378 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8379 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8380 return;
8381 if (mddev->ro) {
8382 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8383 return;
8384 }
8385
8386 if (mddev_is_clustered(mddev)) {
8387 ret = md_cluster_ops->resync_start(mddev);
8388 if (ret)
8389 goto skip;
8390
8391 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8392 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8393 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8394 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8395 && ((unsigned long long)mddev->curr_resync_completed
8396 < (unsigned long long)mddev->resync_max_sectors))
8397 goto skip;
8398 }
8399
8400 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8401 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8402 desc = "data-check";
8403 action = "check";
8404 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8405 desc = "requested-resync";
8406 action = "repair";
8407 } else
8408 desc = "resync";
8409 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8410 desc = "reshape";
8411 else
8412 desc = "recovery";
8413
8414 mddev->last_sync_action = action ?: desc;
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432 do {
8433 int mddev2_minor = -1;
8434 mddev->curr_resync = 2;
8435
8436 try_again:
8437 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8438 goto skip;
8439 for_each_mddev(mddev2, tmp) {
8440 if (mddev2 == mddev)
8441 continue;
8442 if (!mddev->parallel_resync
8443 && mddev2->curr_resync
8444 && match_mddev_units(mddev, mddev2)) {
8445 DEFINE_WAIT(wq);
8446 if (mddev < mddev2 && mddev->curr_resync == 2) {
8447
8448 mddev->curr_resync = 1;
8449 wake_up(&resync_wait);
8450 }
8451 if (mddev > mddev2 && mddev->curr_resync == 1)
8452
8453
8454
8455 continue;
8456
8457
8458
8459
8460 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8461 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8462 mddev2->curr_resync >= mddev->curr_resync) {
8463 if (mddev2_minor != mddev2->md_minor) {
8464 mddev2_minor = mddev2->md_minor;
8465 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8466 desc, mdname(mddev),
8467 mdname(mddev2));
8468 }
8469 mddev_put(mddev2);
8470 if (signal_pending(current))
8471 flush_signals(current);
8472 schedule();
8473 finish_wait(&resync_wait, &wq);
8474 goto try_again;
8475 }
8476 finish_wait(&resync_wait, &wq);
8477 }
8478 }
8479 } while (mddev->curr_resync < 2);
8480
8481 j = 0;
8482 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8483
8484
8485
8486 max_sectors = mddev->resync_max_sectors;
8487 atomic64_set(&mddev->resync_mismatches, 0);
8488
8489 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8490 j = mddev->resync_min;
8491 else if (!mddev->bitmap)
8492 j = mddev->recovery_cp;
8493
8494 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8495 max_sectors = mddev->resync_max_sectors;
8496
8497
8498
8499
8500
8501 if (mddev_is_clustered(mddev) &&
8502 mddev->reshape_position != MaxSector)
8503 j = mddev->reshape_position;
8504 } else {
8505
8506 max_sectors = mddev->dev_sectors;
8507 j = MaxSector;
8508 rcu_read_lock();
8509 rdev_for_each_rcu(rdev, mddev)
8510 if (rdev->raid_disk >= 0 &&
8511 !test_bit(Journal, &rdev->flags) &&
8512 !test_bit(Faulty, &rdev->flags) &&
8513 !test_bit(In_sync, &rdev->flags) &&
8514 rdev->recovery_offset < j)
8515 j = rdev->recovery_offset;
8516 rcu_read_unlock();
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526 if (mddev->bitmap) {
8527 mddev->pers->quiesce(mddev, 1);
8528 mddev->pers->quiesce(mddev, 0);
8529 }
8530 }
8531
8532 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8533 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8534 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8535 speed_max(mddev), desc);
8536
8537 is_mddev_idle(mddev, 1);
8538
8539 io_sectors = 0;
8540 for (m = 0; m < SYNC_MARKS; m++) {
8541 mark[m] = jiffies;
8542 mark_cnt[m] = io_sectors;
8543 }
8544 last_mark = 0;
8545 mddev->resync_mark = mark[last_mark];
8546 mddev->resync_mark_cnt = mark_cnt[last_mark];
8547
8548
8549
8550
8551 window = 32 * (PAGE_SIZE / 512);
8552 pr_debug("md: using %dk window, over a total of %lluk.\n",
8553 window/2, (unsigned long long)max_sectors/2);
8554
8555 atomic_set(&mddev->recovery_active, 0);
8556 last_check = 0;
8557
8558 if (j>2) {
8559 pr_debug("md: resuming %s of %s from checkpoint.\n",
8560 desc, mdname(mddev));
8561 mddev->curr_resync = j;
8562 } else
8563 mddev->curr_resync = 3;
8564 mddev->curr_resync_completed = j;
8565 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8566 md_new_event(mddev);
8567 update_time = jiffies;
8568
8569 blk_start_plug(&plug);
8570 while (j < max_sectors) {
8571 sector_t sectors;
8572
8573 skipped = 0;
8574
8575 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8576 ((mddev->curr_resync > mddev->curr_resync_completed &&
8577 (mddev->curr_resync - mddev->curr_resync_completed)
8578 > (max_sectors >> 4)) ||
8579 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8580 (j - mddev->curr_resync_completed)*2
8581 >= mddev->resync_max - mddev->curr_resync_completed ||
8582 mddev->curr_resync_completed > mddev->resync_max
8583 )) {
8584
8585 wait_event(mddev->recovery_wait,
8586 atomic_read(&mddev->recovery_active) == 0);
8587 mddev->curr_resync_completed = j;
8588 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8589 j > mddev->recovery_cp)
8590 mddev->recovery_cp = j;
8591 update_time = jiffies;
8592 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8593 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8594 }
8595
8596 while (j >= mddev->resync_max &&
8597 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8598
8599
8600
8601
8602 flush_signals(current);
8603 wait_event_interruptible(mddev->recovery_wait,
8604 mddev->resync_max > j
8605 || test_bit(MD_RECOVERY_INTR,
8606 &mddev->recovery));
8607 }
8608
8609 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8610 break;
8611
8612 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8613 if (sectors == 0) {
8614 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8615 break;
8616 }
8617
8618 if (!skipped) {
8619 io_sectors += sectors;
8620 atomic_add(sectors, &mddev->recovery_active);
8621 }
8622
8623 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8624 break;
8625
8626 j += sectors;
8627 if (j > max_sectors)
8628
8629 j = max_sectors;
8630 if (j > 2)
8631 mddev->curr_resync = j;
8632 mddev->curr_mark_cnt = io_sectors;
8633 if (last_check == 0)
8634
8635
8636
8637 md_new_event(mddev);
8638
8639 if (last_check + window > io_sectors || j == max_sectors)
8640 continue;
8641
8642 last_check = io_sectors;
8643 repeat:
8644 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8645
8646 int next = (last_mark+1) % SYNC_MARKS;
8647
8648 mddev->resync_mark = mark[next];
8649 mddev->resync_mark_cnt = mark_cnt[next];
8650 mark[next] = jiffies;
8651 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8652 last_mark = next;
8653 }
8654
8655 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8656 break;
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666 cond_resched();
8667
8668 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8669 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8670 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8671
8672 if (currspeed > speed_min(mddev)) {
8673 if (currspeed > speed_max(mddev)) {
8674 msleep(500);
8675 goto repeat;
8676 }
8677 if (!is_mddev_idle(mddev, 0)) {
8678
8679
8680
8681
8682 wait_event(mddev->recovery_wait,
8683 !atomic_read(&mddev->recovery_active));
8684 }
8685 }
8686 }
8687 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8688 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8689 ? "interrupted" : "done");
8690
8691
8692
8693 blk_finish_plug(&plug);
8694 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8695
8696 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8697 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8698 mddev->curr_resync > 3) {
8699 mddev->curr_resync_completed = mddev->curr_resync;
8700 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8701 }
8702 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8703
8704 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8705 mddev->curr_resync > 3) {
8706 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8707 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8708 if (mddev->curr_resync >= mddev->recovery_cp) {
8709 pr_debug("md: checkpointing %s of %s.\n",
8710 desc, mdname(mddev));
8711 if (test_bit(MD_RECOVERY_ERROR,
8712 &mddev->recovery))
8713 mddev->recovery_cp =
8714 mddev->curr_resync_completed;
8715 else
8716 mddev->recovery_cp =
8717 mddev->curr_resync;
8718 }
8719 } else
8720 mddev->recovery_cp = MaxSector;
8721 } else {
8722 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8723 mddev->curr_resync = MaxSector;
8724 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8725 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8726 rcu_read_lock();
8727 rdev_for_each_rcu(rdev, mddev)
8728 if (rdev->raid_disk >= 0 &&
8729 mddev->delta_disks >= 0 &&
8730 !test_bit(Journal, &rdev->flags) &&
8731 !test_bit(Faulty, &rdev->flags) &&
8732 !test_bit(In_sync, &rdev->flags) &&
8733 rdev->recovery_offset < mddev->curr_resync)
8734 rdev->recovery_offset = mddev->curr_resync;
8735 rcu_read_unlock();
8736 }
8737 }
8738 }
8739 skip:
8740
8741
8742
8743 set_mask_bits(&mddev->sb_flags, 0,
8744 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8745
8746 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8747 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8748 mddev->delta_disks > 0 &&
8749 mddev->pers->finish_reshape &&
8750 mddev->pers->size &&
8751 mddev->queue) {
8752 mddev_lock_nointr(mddev);
8753 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8754 mddev_unlock(mddev);
8755 if (!mddev_is_clustered(mddev)) {
8756 set_capacity(mddev->gendisk, mddev->array_sectors);
8757 revalidate_disk(mddev->gendisk);
8758 }
8759 }
8760
8761 spin_lock(&mddev->lock);
8762 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8763
8764 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8765 mddev->resync_min = 0;
8766 mddev->resync_max = MaxSector;
8767 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8768 mddev->resync_min = mddev->curr_resync_completed;
8769 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8770 mddev->curr_resync = 0;
8771 spin_unlock(&mddev->lock);
8772
8773 wake_up(&resync_wait);
8774 md_wakeup_thread(mddev->thread);
8775 return;
8776}
8777EXPORT_SYMBOL_GPL(md_do_sync);
8778
8779static int remove_and_add_spares(struct mddev *mddev,
8780 struct md_rdev *this)
8781{
8782 struct md_rdev *rdev;
8783 int spares = 0;
8784 int removed = 0;
8785 bool remove_some = false;
8786
8787 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8788
8789 return 0;
8790
8791 rdev_for_each(rdev, mddev) {
8792 if ((this == NULL || rdev == this) &&
8793 rdev->raid_disk >= 0 &&
8794 !test_bit(Blocked, &rdev->flags) &&
8795 test_bit(Faulty, &rdev->flags) &&
8796 atomic_read(&rdev->nr_pending)==0) {
8797
8798
8799
8800
8801
8802 remove_some = true;
8803 set_bit(RemoveSynchronized, &rdev->flags);
8804 }
8805 }
8806
8807 if (remove_some)
8808 synchronize_rcu();
8809 rdev_for_each(rdev, mddev) {
8810 if ((this == NULL || rdev == this) &&
8811 rdev->raid_disk >= 0 &&
8812 !test_bit(Blocked, &rdev->flags) &&
8813 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8814 (!test_bit(In_sync, &rdev->flags) &&
8815 !test_bit(Journal, &rdev->flags))) &&
8816 atomic_read(&rdev->nr_pending)==0)) {
8817 if (mddev->pers->hot_remove_disk(
8818 mddev, rdev) == 0) {
8819 sysfs_unlink_rdev(mddev, rdev);
8820 rdev->saved_raid_disk = rdev->raid_disk;
8821 rdev->raid_disk = -1;
8822 removed++;
8823 }
8824 }
8825 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8826 clear_bit(RemoveSynchronized, &rdev->flags);
8827 }
8828
8829 if (removed && mddev->kobj.sd)
8830 sysfs_notify(&mddev->kobj, NULL, "degraded");
8831
8832 if (this && removed)
8833 goto no_add;
8834
8835 rdev_for_each(rdev, mddev) {
8836 if (this && this != rdev)
8837 continue;
8838 if (test_bit(Candidate, &rdev->flags))
8839 continue;
8840 if (rdev->raid_disk >= 0 &&
8841 !test_bit(In_sync, &rdev->flags) &&
8842 !test_bit(Journal, &rdev->flags) &&
8843 !test_bit(Faulty, &rdev->flags))
8844 spares++;
8845 if (rdev->raid_disk >= 0)
8846 continue;
8847 if (test_bit(Faulty, &rdev->flags))
8848 continue;
8849 if (!test_bit(Journal, &rdev->flags)) {
8850 if (mddev->ro &&
8851 ! (rdev->saved_raid_disk >= 0 &&
8852 !test_bit(Bitmap_sync, &rdev->flags)))
8853 continue;
8854
8855 rdev->recovery_offset = 0;
8856 }
8857 if (mddev->pers->
8858 hot_add_disk(mddev, rdev) == 0) {
8859 if (sysfs_link_rdev(mddev, rdev))
8860 ;
8861 if (!test_bit(Journal, &rdev->flags))
8862 spares++;
8863 md_new_event(mddev);
8864 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8865 }
8866 }
8867no_add:
8868 if (removed)
8869 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8870 return spares;
8871}
8872
8873static void md_start_sync(struct work_struct *ws)
8874{
8875 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8876
8877 mddev->sync_thread = md_register_thread(md_do_sync,
8878 mddev,
8879 "resync");
8880 if (!mddev->sync_thread) {
8881 pr_warn("%s: could not start resync thread...\n",
8882 mdname(mddev));
8883
8884 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8885 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8886 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8887 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8888 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8889 wake_up(&resync_wait);
8890 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8891 &mddev->recovery))
8892 if (mddev->sysfs_action)
8893 sysfs_notify_dirent_safe(mddev->sysfs_action);
8894 } else
8895 md_wakeup_thread(mddev->sync_thread);
8896 sysfs_notify_dirent_safe(mddev->sysfs_action);
8897 md_new_event(mddev);
8898}
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922void md_check_recovery(struct mddev *mddev)
8923{
8924 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
8925
8926
8927
8928 set_bit(MD_UPDATING_SB, &mddev->flags);
8929 smp_mb__after_atomic();
8930 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
8931 md_update_sb(mddev, 0);
8932 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
8933 wake_up(&mddev->sb_wait);
8934 }
8935
8936 if (mddev->suspended)
8937 return;
8938
8939 if (mddev->bitmap)
8940 md_bitmap_daemon_work(mddev);
8941
8942 if (signal_pending(current)) {
8943 if (mddev->pers->sync_request && !mddev->external) {
8944 pr_debug("md: %s in immediate safe mode\n",
8945 mdname(mddev));
8946 mddev->safemode = 2;
8947 }
8948 flush_signals(current);
8949 }
8950
8951 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8952 return;
8953 if ( ! (
8954 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8955 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8956 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8957 (mddev->external == 0 && mddev->safemode == 1) ||
8958 (mddev->safemode == 2
8959 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8960 ))
8961 return;
8962
8963 if (mddev_trylock(mddev)) {
8964 int spares = 0;
8965 bool try_set_sync = mddev->safemode != 0;
8966
8967 if (!mddev->external && mddev->safemode == 1)
8968 mddev->safemode = 0;
8969
8970 if (mddev->ro) {
8971 struct md_rdev *rdev;
8972 if (!mddev->external && mddev->in_sync)
8973
8974
8975
8976
8977
8978 rdev_for_each(rdev, mddev)
8979 clear_bit(Blocked, &rdev->flags);
8980
8981
8982
8983
8984
8985
8986
8987 remove_and_add_spares(mddev, NULL);
8988
8989
8990
8991 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8992 md_reap_sync_thread(mddev);
8993 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8994 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8995 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8996 goto unlock;
8997 }
8998
8999 if (mddev_is_clustered(mddev)) {
9000 struct md_rdev *rdev;
9001
9002
9003
9004 rdev_for_each(rdev, mddev) {
9005 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9006 rdev->raid_disk < 0)
9007 md_kick_rdev_from_array(rdev);
9008 }
9009 }
9010
9011 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9012 spin_lock(&mddev->lock);
9013 set_in_sync(mddev);
9014 spin_unlock(&mddev->lock);
9015 }
9016
9017 if (mddev->sb_flags)
9018 md_update_sb(mddev, 0);
9019
9020 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9021 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9022
9023 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9024 goto unlock;
9025 }
9026 if (mddev->sync_thread) {
9027 md_reap_sync_thread(mddev);
9028 goto unlock;
9029 }
9030
9031
9032
9033 mddev->curr_resync_completed = 0;
9034 spin_lock(&mddev->lock);
9035 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9036 spin_unlock(&mddev->lock);
9037
9038
9039
9040 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9041 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9042
9043 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9044 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9045 goto not_running;
9046
9047
9048
9049
9050
9051
9052
9053 if (mddev->reshape_position != MaxSector) {
9054 if (mddev->pers->check_reshape == NULL ||
9055 mddev->pers->check_reshape(mddev) != 0)
9056
9057 goto not_running;
9058 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9059 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9060 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9061 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9062 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9063 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9064 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9065 } else if (mddev->recovery_cp < MaxSector) {
9066 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9067 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9068 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9069
9070 goto not_running;
9071
9072 if (mddev->pers->sync_request) {
9073 if (spares) {
9074
9075
9076
9077
9078 md_bitmap_write_all(mddev->bitmap);
9079 }
9080 INIT_WORK(&mddev->del_work, md_start_sync);
9081 queue_work(md_misc_wq, &mddev->del_work);
9082 goto unlock;
9083 }
9084 not_running:
9085 if (!mddev->sync_thread) {
9086 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9087 wake_up(&resync_wait);
9088 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9089 &mddev->recovery))
9090 if (mddev->sysfs_action)
9091 sysfs_notify_dirent_safe(mddev->sysfs_action);
9092 }
9093 unlock:
9094 wake_up(&mddev->sb_wait);
9095 mddev_unlock(mddev);
9096 }
9097}
9098EXPORT_SYMBOL(md_check_recovery);
9099
9100void md_reap_sync_thread(struct mddev *mddev)
9101{
9102 struct md_rdev *rdev;
9103 sector_t old_dev_sectors = mddev->dev_sectors;
9104 bool is_reshaped = false;
9105
9106
9107 md_unregister_thread(&mddev->sync_thread);
9108 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9109 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9110 mddev->degraded != mddev->raid_disks) {
9111
9112
9113 if (mddev->pers->spare_active(mddev)) {
9114 sysfs_notify(&mddev->kobj, NULL,
9115 "degraded");
9116 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9117 }
9118 }
9119 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9120 mddev->pers->finish_reshape) {
9121 mddev->pers->finish_reshape(mddev);
9122 if (mddev_is_clustered(mddev))
9123 is_reshaped = true;
9124 }
9125
9126
9127
9128
9129 if (!mddev->degraded)
9130 rdev_for_each(rdev, mddev)
9131 rdev->saved_raid_disk = -1;
9132
9133 md_update_sb(mddev, 1);
9134
9135
9136
9137 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9138 md_cluster_ops->resync_finish(mddev);
9139 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9140 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9141 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9142 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9143 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9144 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9145
9146
9147
9148
9149
9150 if (mddev_is_clustered(mddev) && is_reshaped
9151 && !test_bit(MD_CLOSING, &mddev->flags))
9152 md_cluster_ops->update_size(mddev, old_dev_sectors);
9153 wake_up(&resync_wait);
9154
9155 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9156 sysfs_notify_dirent_safe(mddev->sysfs_action);
9157 md_new_event(mddev);
9158 if (mddev->event_work.func)
9159 queue_work(md_misc_wq, &mddev->event_work);
9160}
9161EXPORT_SYMBOL(md_reap_sync_thread);
9162
9163void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9164{
9165 sysfs_notify_dirent_safe(rdev->sysfs_state);
9166 wait_event_timeout(rdev->blocked_wait,
9167 !test_bit(Blocked, &rdev->flags) &&
9168 !test_bit(BlockedBadBlocks, &rdev->flags),
9169 msecs_to_jiffies(5000));
9170 rdev_dec_pending(rdev, mddev);
9171}
9172EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9173
9174void md_finish_reshape(struct mddev *mddev)
9175{
9176
9177 struct md_rdev *rdev;
9178
9179 rdev_for_each(rdev, mddev) {
9180 if (rdev->data_offset > rdev->new_data_offset)
9181 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9182 else
9183 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9184 rdev->data_offset = rdev->new_data_offset;
9185 }
9186}
9187EXPORT_SYMBOL(md_finish_reshape);
9188
9189
9190
9191
9192int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9193 int is_new)
9194{
9195 struct mddev *mddev = rdev->mddev;
9196 int rv;
9197 if (is_new)
9198 s += rdev->new_data_offset;
9199 else
9200 s += rdev->data_offset;
9201 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9202 if (rv == 0) {
9203
9204 if (test_bit(ExternalBbl, &rdev->flags))
9205 sysfs_notify(&rdev->kobj, NULL,
9206 "unacknowledged_bad_blocks");
9207 sysfs_notify_dirent_safe(rdev->sysfs_state);
9208 set_mask_bits(&mddev->sb_flags, 0,
9209 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9210 md_wakeup_thread(rdev->mddev->thread);
9211 return 1;
9212 } else
9213 return 0;
9214}
9215EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9216
9217int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9218 int is_new)
9219{
9220 int rv;
9221 if (is_new)
9222 s += rdev->new_data_offset;
9223 else
9224 s += rdev->data_offset;
9225 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9226 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9227 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9228 return rv;
9229}
9230EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9231
9232static int md_notify_reboot(struct notifier_block *this,
9233 unsigned long code, void *x)
9234{
9235 struct list_head *tmp;
9236 struct mddev *mddev;
9237 int need_delay = 0;
9238
9239 for_each_mddev(mddev, tmp) {
9240 if (mddev_trylock(mddev)) {
9241 if (mddev->pers)
9242 __md_stop_writes(mddev);
9243 if (mddev->persistent)
9244 mddev->safemode = 2;
9245 mddev_unlock(mddev);
9246 }
9247 need_delay = 1;
9248 }
9249
9250
9251
9252
9253
9254
9255 if (need_delay)
9256 mdelay(1000*1);
9257
9258 return NOTIFY_DONE;
9259}
9260
9261static struct notifier_block md_notifier = {
9262 .notifier_call = md_notify_reboot,
9263 .next = NULL,
9264 .priority = INT_MAX,
9265};
9266
9267static void md_geninit(void)
9268{
9269 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9270
9271 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
9272}
9273
9274static int __init md_init(void)
9275{
9276 int ret = -ENOMEM;
9277
9278 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9279 if (!md_wq)
9280 goto err_wq;
9281
9282 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9283 if (!md_misc_wq)
9284 goto err_misc_wq;
9285
9286 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9287 goto err_md;
9288
9289 if ((ret = register_blkdev(0, "mdp")) < 0)
9290 goto err_mdp;
9291 mdp_major = ret;
9292
9293 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9294 md_probe, NULL, NULL);
9295 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9296 md_probe, NULL, NULL);
9297
9298 register_reboot_notifier(&md_notifier);
9299 raid_table_header = register_sysctl_table(raid_root_table);
9300
9301 md_geninit();
9302 return 0;
9303
9304err_mdp:
9305 unregister_blkdev(MD_MAJOR, "md");
9306err_md:
9307 destroy_workqueue(md_misc_wq);
9308err_misc_wq:
9309 destroy_workqueue(md_wq);
9310err_wq:
9311 return ret;
9312}
9313
9314static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9315{
9316 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9317 struct md_rdev *rdev2;
9318 int role, ret;
9319 char b[BDEVNAME_SIZE];
9320
9321
9322
9323
9324
9325 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9326 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9327 if (ret)
9328 pr_info("md-cluster: resize failed\n");
9329 else
9330 md_bitmap_update_sb(mddev->bitmap);
9331 }
9332
9333
9334 rdev_for_each(rdev2, mddev) {
9335 if (test_bit(Faulty, &rdev2->flags))
9336 continue;
9337
9338
9339 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9340
9341 if (test_bit(Candidate, &rdev2->flags)) {
9342 if (role == 0xfffe) {
9343 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9344 md_kick_rdev_from_array(rdev2);
9345 continue;
9346 }
9347 else
9348 clear_bit(Candidate, &rdev2->flags);
9349 }
9350
9351 if (role != rdev2->raid_disk) {
9352
9353
9354
9355 if (rdev2->raid_disk == -1 && role != 0xffff &&
9356 !(le32_to_cpu(sb->feature_map) &
9357 MD_FEATURE_RESHAPE_ACTIVE)) {
9358 rdev2->saved_raid_disk = role;
9359 ret = remove_and_add_spares(mddev, rdev2);
9360 pr_info("Activated spare: %s\n",
9361 bdevname(rdev2->bdev,b));
9362
9363
9364 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9365 md_wakeup_thread(mddev->thread);
9366 }
9367
9368
9369
9370
9371
9372 if ((role == 0xfffe) || (role == 0xfffd)) {
9373 md_error(mddev, rdev2);
9374 clear_bit(Blocked, &rdev2->flags);
9375 }
9376 }
9377 }
9378
9379 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9380 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9381
9382
9383
9384
9385
9386 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9387 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9388
9389
9390
9391
9392 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9393 if (mddev->pers->update_reshape_pos)
9394 mddev->pers->update_reshape_pos(mddev);
9395 if (mddev->pers->start_reshape)
9396 mddev->pers->start_reshape(mddev);
9397 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9398 mddev->reshape_position != MaxSector &&
9399 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9400
9401 mddev->reshape_position = MaxSector;
9402 if (mddev->pers->update_reshape_pos)
9403 mddev->pers->update_reshape_pos(mddev);
9404 }
9405
9406
9407 mddev->events = le64_to_cpu(sb->events);
9408}
9409
9410static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9411{
9412 int err;
9413 struct page *swapout = rdev->sb_page;
9414 struct mdp_superblock_1 *sb;
9415
9416
9417
9418
9419 rdev->sb_page = NULL;
9420 err = alloc_disk_sb(rdev);
9421 if (err == 0) {
9422 ClearPageUptodate(rdev->sb_page);
9423 rdev->sb_loaded = 0;
9424 err = super_types[mddev->major_version].
9425 load_super(rdev, NULL, mddev->minor_version);
9426 }
9427 if (err < 0) {
9428 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9429 __func__, __LINE__, rdev->desc_nr, err);
9430 if (rdev->sb_page)
9431 put_page(rdev->sb_page);
9432 rdev->sb_page = swapout;
9433 rdev->sb_loaded = 1;
9434 return err;
9435 }
9436
9437 sb = page_address(rdev->sb_page);
9438
9439
9440
9441
9442 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9443 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9444
9445
9446
9447
9448 if (rdev->recovery_offset == MaxSector &&
9449 !test_bit(In_sync, &rdev->flags) &&
9450 mddev->pers->spare_active(mddev))
9451 sysfs_notify(&mddev->kobj, NULL, "degraded");
9452
9453 put_page(swapout);
9454 return 0;
9455}
9456
9457void md_reload_sb(struct mddev *mddev, int nr)
9458{
9459 struct md_rdev *rdev;
9460 int err;
9461
9462
9463 rdev_for_each_rcu(rdev, mddev) {
9464 if (rdev->desc_nr == nr)
9465 break;
9466 }
9467
9468 if (!rdev || rdev->desc_nr != nr) {
9469 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9470 return;
9471 }
9472
9473 err = read_rdev(mddev, rdev);
9474 if (err < 0)
9475 return;
9476
9477 check_sb_changes(mddev, rdev);
9478
9479
9480 rdev_for_each_rcu(rdev, mddev) {
9481 if (!test_bit(Faulty, &rdev->flags))
9482 read_rdev(mddev, rdev);
9483 }
9484}
9485EXPORT_SYMBOL(md_reload_sb);
9486
9487#ifndef MODULE
9488
9489
9490
9491
9492
9493
9494static DEFINE_MUTEX(detected_devices_mutex);
9495static LIST_HEAD(all_detected_devices);
9496struct detected_devices_node {
9497 struct list_head list;
9498 dev_t dev;
9499};
9500
9501void md_autodetect_dev(dev_t dev)
9502{
9503 struct detected_devices_node *node_detected_dev;
9504
9505 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9506 if (node_detected_dev) {
9507 node_detected_dev->dev = dev;
9508 mutex_lock(&detected_devices_mutex);
9509 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9510 mutex_unlock(&detected_devices_mutex);
9511 }
9512}
9513
9514static void autostart_arrays(int part)
9515{
9516 struct md_rdev *rdev;
9517 struct detected_devices_node *node_detected_dev;
9518 dev_t dev;
9519 int i_scanned, i_passed;
9520
9521 i_scanned = 0;
9522 i_passed = 0;
9523
9524 pr_info("md: Autodetecting RAID arrays.\n");
9525
9526 mutex_lock(&detected_devices_mutex);
9527 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9528 i_scanned++;
9529 node_detected_dev = list_entry(all_detected_devices.next,
9530 struct detected_devices_node, list);
9531 list_del(&node_detected_dev->list);
9532 dev = node_detected_dev->dev;
9533 kfree(node_detected_dev);
9534 mutex_unlock(&detected_devices_mutex);
9535 rdev = md_import_device(dev,0, 90);
9536 mutex_lock(&detected_devices_mutex);
9537 if (IS_ERR(rdev))
9538 continue;
9539
9540 if (test_bit(Faulty, &rdev->flags))
9541 continue;
9542
9543 set_bit(AutoDetected, &rdev->flags);
9544 list_add(&rdev->same_set, &pending_raid_disks);
9545 i_passed++;
9546 }
9547 mutex_unlock(&detected_devices_mutex);
9548
9549 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9550
9551 autorun_devices(part);
9552}
9553
9554#endif
9555
9556static __exit void md_exit(void)
9557{
9558 struct mddev *mddev;
9559 struct list_head *tmp;
9560 int delay = 1;
9561
9562 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9563 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9564
9565 unregister_blkdev(MD_MAJOR,"md");
9566 unregister_blkdev(mdp_major, "mdp");
9567 unregister_reboot_notifier(&md_notifier);
9568 unregister_sysctl_table(raid_table_header);
9569
9570
9571
9572
9573 md_unloading = 1;
9574 while (waitqueue_active(&md_event_waiters)) {
9575
9576 wake_up(&md_event_waiters);
9577 msleep(delay);
9578 delay += delay;
9579 }
9580 remove_proc_entry("mdstat", NULL);
9581
9582 for_each_mddev(mddev, tmp) {
9583 export_array(mddev);
9584 mddev->ctime = 0;
9585 mddev->hold_active = 0;
9586
9587
9588
9589
9590
9591
9592 }
9593 destroy_workqueue(md_misc_wq);
9594 destroy_workqueue(md_wq);
9595}
9596
9597subsys_initcall(md_init);
9598module_exit(md_exit)
9599
9600static int get_ro(char *buffer, const struct kernel_param *kp)
9601{
9602 return sprintf(buffer, "%d", start_readonly);
9603}
9604static int set_ro(const char *val, const struct kernel_param *kp)
9605{
9606 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9607}
9608
9609module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9610module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9611module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9612module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9613
9614MODULE_LICENSE("GPL");
9615MODULE_DESCRIPTION("MD RAID framework");
9616MODULE_ALIAS("md");
9617MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9618