1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/sched/signal.h>
48#include <linux/kthread.h>
49#include <linux/blkdev.h>
50#include <linux/badblocks.h>
51#include <linux/sysctl.h>
52#include <linux/seq_file.h>
53#include <linux/fs.h>
54#include <linux/poll.h>
55#include <linux/ctype.h>
56#include <linux/string.h>
57#include <linux/hdreg.h>
58#include <linux/proc_fs.h>
59#include <linux/random.h>
60#include <linux/module.h>
61#include <linux/reboot.h>
62#include <linux/file.h>
63#include <linux/compat.h>
64#include <linux/delay.h>
65#include <linux/raid/md_p.h>
66#include <linux/raid/md_u.h>
67#include <linux/slab.h>
68#include <linux/percpu-refcount.h>
69
70#include <trace/events/block.h>
71#include "md.h"
72#include "md-bitmap.h"
73#include "md-cluster.h"
74
75#ifndef MODULE
76static void autostart_arrays(int part);
77#endif
78
79
80
81
82
83
84static LIST_HEAD(pers_list);
85static DEFINE_SPINLOCK(pers_lock);
86
87static struct kobj_type md_ktype;
88
89struct md_cluster_operations *md_cluster_ops;
90EXPORT_SYMBOL(md_cluster_ops);
91struct module *md_cluster_mod;
92EXPORT_SYMBOL(md_cluster_mod);
93
94static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
95static struct workqueue_struct *md_wq;
96static struct workqueue_struct *md_misc_wq;
97
98static int remove_and_add_spares(struct mddev *mddev,
99 struct md_rdev *this);
100static void mddev_detach(struct mddev *mddev);
101
102
103
104
105
106
107#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
108
109
110
111
112
113
114
115
116
117
118
119
120
121static int sysctl_speed_limit_min = 1000;
122static int sysctl_speed_limit_max = 200000;
123static inline int speed_min(struct mddev *mddev)
124{
125 return mddev->sync_speed_min ?
126 mddev->sync_speed_min : sysctl_speed_limit_min;
127}
128
129static inline int speed_max(struct mddev *mddev)
130{
131 return mddev->sync_speed_max ?
132 mddev->sync_speed_max : sysctl_speed_limit_max;
133}
134
135static void * flush_info_alloc(gfp_t gfp_flags, void *data)
136{
137 return kzalloc(sizeof(struct flush_info), gfp_flags);
138}
139static void flush_info_free(void *flush_info, void *data)
140{
141 kfree(flush_info);
142}
143
144static void * flush_bio_alloc(gfp_t gfp_flags, void *data)
145{
146 return kzalloc(sizeof(struct flush_bio), gfp_flags);
147}
148static void flush_bio_free(void *flush_bio, void *data)
149{
150 kfree(flush_bio);
151}
152
153static struct ctl_table_header *raid_table_header;
154
155static struct ctl_table raid_table[] = {
156 {
157 .procname = "speed_limit_min",
158 .data = &sysctl_speed_limit_min,
159 .maxlen = sizeof(int),
160 .mode = S_IRUGO|S_IWUSR,
161 .proc_handler = proc_dointvec,
162 },
163 {
164 .procname = "speed_limit_max",
165 .data = &sysctl_speed_limit_max,
166 .maxlen = sizeof(int),
167 .mode = S_IRUGO|S_IWUSR,
168 .proc_handler = proc_dointvec,
169 },
170 { }
171};
172
173static struct ctl_table raid_dir_table[] = {
174 {
175 .procname = "raid",
176 .maxlen = 0,
177 .mode = S_IRUGO|S_IXUGO,
178 .child = raid_table,
179 },
180 { }
181};
182
183static struct ctl_table raid_root_table[] = {
184 {
185 .procname = "dev",
186 .maxlen = 0,
187 .mode = 0555,
188 .child = raid_dir_table,
189 },
190 { }
191};
192
193static const struct block_device_operations md_fops;
194
195static int start_readonly;
196
197
198
199
200
201
202
203
204
205static bool create_on_open = true;
206
207struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
208 struct mddev *mddev)
209{
210 if (!mddev || !bioset_initialized(&mddev->bio_set))
211 return bio_alloc(gfp_mask, nr_iovecs);
212
213 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
214}
215EXPORT_SYMBOL_GPL(bio_alloc_mddev);
216
217static struct bio *md_bio_alloc_sync(struct mddev *mddev)
218{
219 if (!mddev || !bioset_initialized(&mddev->sync_set))
220 return bio_alloc(GFP_NOIO, 1);
221
222 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
223}
224
225
226
227
228
229
230
231
232
233
234
235static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
236static atomic_t md_event_count;
237void md_new_event(struct mddev *mddev)
238{
239 atomic_inc(&md_event_count);
240 wake_up(&md_event_waiters);
241}
242EXPORT_SYMBOL_GPL(md_new_event);
243
244
245
246
247
248static LIST_HEAD(all_mddevs);
249static DEFINE_SPINLOCK(all_mddevs_lock);
250
251
252
253
254
255
256
257
258#define for_each_mddev(_mddev,_tmp) \
259 \
260 for (({ spin_lock(&all_mddevs_lock); \
261 _tmp = all_mddevs.next; \
262 _mddev = NULL;}); \
263 ({ if (_tmp != &all_mddevs) \
264 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
265 spin_unlock(&all_mddevs_lock); \
266 if (_mddev) mddev_put(_mddev); \
267 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
268 _tmp != &all_mddevs;}); \
269 ({ spin_lock(&all_mddevs_lock); \
270 _tmp = _tmp->next;}) \
271 )
272
273
274
275
276
277
278
279
280static bool is_suspended(struct mddev *mddev, struct bio *bio)
281{
282 if (mddev->suspended)
283 return true;
284 if (bio_data_dir(bio) != WRITE)
285 return false;
286 if (mddev->suspend_lo >= mddev->suspend_hi)
287 return false;
288 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
289 return false;
290 if (bio_end_sector(bio) < mddev->suspend_lo)
291 return false;
292 return true;
293}
294
295void md_handle_request(struct mddev *mddev, struct bio *bio)
296{
297check_suspended:
298 rcu_read_lock();
299 if (is_suspended(mddev, bio)) {
300 DEFINE_WAIT(__wait);
301 for (;;) {
302 prepare_to_wait(&mddev->sb_wait, &__wait,
303 TASK_UNINTERRUPTIBLE);
304 if (!is_suspended(mddev, bio))
305 break;
306 rcu_read_unlock();
307 schedule();
308 rcu_read_lock();
309 }
310 finish_wait(&mddev->sb_wait, &__wait);
311 }
312 atomic_inc(&mddev->active_io);
313 rcu_read_unlock();
314
315 if (!mddev->pers->make_request(mddev, bio)) {
316 atomic_dec(&mddev->active_io);
317 wake_up(&mddev->sb_wait);
318 goto check_suspended;
319 }
320
321 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
322 wake_up(&mddev->sb_wait);
323}
324EXPORT_SYMBOL(md_handle_request);
325
326static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
327{
328 const int rw = bio_data_dir(bio);
329 const int sgrp = op_stat_group(bio_op(bio));
330 struct mddev *mddev = q->queuedata;
331 unsigned int sectors;
332
333 blk_queue_split(q, &bio);
334
335 if (mddev == NULL || mddev->pers == NULL) {
336 bio_io_error(bio);
337 return BLK_QC_T_NONE;
338 }
339 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
340 if (bio_sectors(bio) != 0)
341 bio->bi_status = BLK_STS_IOERR;
342 bio_endio(bio);
343 return BLK_QC_T_NONE;
344 }
345
346
347
348
349
350 sectors = bio_sectors(bio);
351
352 bio->bi_opf &= ~REQ_NOMERGE;
353
354 md_handle_request(mddev, bio);
355
356 part_stat_lock();
357 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
358 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
359 part_stat_unlock();
360
361 return BLK_QC_T_NONE;
362}
363
364
365
366
367
368
369
370void mddev_suspend(struct mddev *mddev)
371{
372 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
373 lockdep_assert_held(&mddev->reconfig_mutex);
374 if (mddev->suspended++)
375 return;
376 synchronize_rcu();
377 wake_up(&mddev->sb_wait);
378 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
379 smp_mb__after_atomic();
380 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
381 mddev->pers->quiesce(mddev, 1);
382 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
383 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
384
385 del_timer_sync(&mddev->safemode_timer);
386}
387EXPORT_SYMBOL_GPL(mddev_suspend);
388
389void mddev_resume(struct mddev *mddev)
390{
391 lockdep_assert_held(&mddev->reconfig_mutex);
392 if (--mddev->suspended)
393 return;
394 wake_up(&mddev->sb_wait);
395 mddev->pers->quiesce(mddev, 0);
396
397 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
398 md_wakeup_thread(mddev->thread);
399 md_wakeup_thread(mddev->sync_thread);
400}
401EXPORT_SYMBOL_GPL(mddev_resume);
402
403int mddev_congested(struct mddev *mddev, int bits)
404{
405 struct md_personality *pers = mddev->pers;
406 int ret = 0;
407
408 rcu_read_lock();
409 if (mddev->suspended)
410 ret = 1;
411 else if (pers && pers->congested)
412 ret = pers->congested(mddev, bits);
413 rcu_read_unlock();
414 return ret;
415}
416EXPORT_SYMBOL_GPL(mddev_congested);
417static int md_congested(void *data, int bits)
418{
419 struct mddev *mddev = data;
420 return mddev_congested(mddev, bits);
421}
422
423
424
425
426static void submit_flushes(struct work_struct *ws)
427{
428 struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
429 struct mddev *mddev = fi->mddev;
430 struct bio *bio = fi->bio;
431
432 bio->bi_opf &= ~REQ_PREFLUSH;
433 md_handle_request(mddev, bio);
434
435 mempool_free(fi, mddev->flush_pool);
436}
437
438static void md_end_flush(struct bio *fbio)
439{
440 struct flush_bio *fb = fbio->bi_private;
441 struct md_rdev *rdev = fb->rdev;
442 struct flush_info *fi = fb->fi;
443 struct bio *bio = fi->bio;
444 struct mddev *mddev = fi->mddev;
445
446 rdev_dec_pending(rdev, mddev);
447
448 if (atomic_dec_and_test(&fi->flush_pending)) {
449 if (bio->bi_iter.bi_size == 0) {
450
451 bio_endio(bio);
452 mempool_free(fi, mddev->flush_pool);
453 } else {
454 INIT_WORK(&fi->flush_work, submit_flushes);
455 queue_work(md_wq, &fi->flush_work);
456 }
457 }
458
459 mempool_free(fb, mddev->flush_bio_pool);
460 bio_put(fbio);
461}
462
463void md_flush_request(struct mddev *mddev, struct bio *bio)
464{
465 struct md_rdev *rdev;
466 struct flush_info *fi;
467
468 fi = mempool_alloc(mddev->flush_pool, GFP_NOIO);
469
470 fi->bio = bio;
471 fi->mddev = mddev;
472 atomic_set(&fi->flush_pending, 1);
473
474 rcu_read_lock();
475 rdev_for_each_rcu(rdev, mddev)
476 if (rdev->raid_disk >= 0 &&
477 !test_bit(Faulty, &rdev->flags)) {
478
479
480
481
482 struct bio *bi;
483 struct flush_bio *fb;
484 atomic_inc(&rdev->nr_pending);
485 atomic_inc(&rdev->nr_pending);
486 rcu_read_unlock();
487
488 fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO);
489 fb->fi = fi;
490 fb->rdev = rdev;
491
492 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
493 bio_set_dev(bi, rdev->bdev);
494 bi->bi_end_io = md_end_flush;
495 bi->bi_private = fb;
496 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
497
498 atomic_inc(&fi->flush_pending);
499 submit_bio(bi);
500
501 rcu_read_lock();
502 rdev_dec_pending(rdev, mddev);
503 }
504 rcu_read_unlock();
505
506 if (atomic_dec_and_test(&fi->flush_pending)) {
507 if (bio->bi_iter.bi_size == 0) {
508
509 bio_endio(bio);
510 mempool_free(fi, mddev->flush_pool);
511 } else {
512 INIT_WORK(&fi->flush_work, submit_flushes);
513 queue_work(md_wq, &fi->flush_work);
514 }
515 }
516}
517EXPORT_SYMBOL(md_flush_request);
518
519static inline struct mddev *mddev_get(struct mddev *mddev)
520{
521 atomic_inc(&mddev->active);
522 return mddev;
523}
524
525static void mddev_delayed_delete(struct work_struct *ws);
526
527static void mddev_put(struct mddev *mddev)
528{
529 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
530 return;
531 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
532 mddev->ctime == 0 && !mddev->hold_active) {
533
534
535 list_del_init(&mddev->all_mddevs);
536
537
538
539
540
541
542 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
543 queue_work(md_misc_wq, &mddev->del_work);
544 }
545 spin_unlock(&all_mddevs_lock);
546}
547
548static void md_safemode_timeout(struct timer_list *t);
549
550void mddev_init(struct mddev *mddev)
551{
552 kobject_init(&mddev->kobj, &md_ktype);
553 mutex_init(&mddev->open_mutex);
554 mutex_init(&mddev->reconfig_mutex);
555 mutex_init(&mddev->bitmap_info.mutex);
556 INIT_LIST_HEAD(&mddev->disks);
557 INIT_LIST_HEAD(&mddev->all_mddevs);
558 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
559 atomic_set(&mddev->active, 1);
560 atomic_set(&mddev->openers, 0);
561 atomic_set(&mddev->active_io, 0);
562 spin_lock_init(&mddev->lock);
563 init_waitqueue_head(&mddev->sb_wait);
564 init_waitqueue_head(&mddev->recovery_wait);
565 mddev->reshape_position = MaxSector;
566 mddev->reshape_backwards = 0;
567 mddev->last_sync_action = "none";
568 mddev->resync_min = 0;
569 mddev->resync_max = MaxSector;
570 mddev->level = LEVEL_NONE;
571}
572EXPORT_SYMBOL_GPL(mddev_init);
573
574static struct mddev *mddev_find(dev_t unit)
575{
576 struct mddev *mddev, *new = NULL;
577
578 if (unit && MAJOR(unit) != MD_MAJOR)
579 unit &= ~((1<<MdpMinorShift)-1);
580
581 retry:
582 spin_lock(&all_mddevs_lock);
583
584 if (unit) {
585 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
586 if (mddev->unit == unit) {
587 mddev_get(mddev);
588 spin_unlock(&all_mddevs_lock);
589 kfree(new);
590 return mddev;
591 }
592
593 if (new) {
594 list_add(&new->all_mddevs, &all_mddevs);
595 spin_unlock(&all_mddevs_lock);
596 new->hold_active = UNTIL_IOCTL;
597 return new;
598 }
599 } else if (new) {
600
601 static int next_minor = 512;
602 int start = next_minor;
603 int is_free = 0;
604 int dev = 0;
605 while (!is_free) {
606 dev = MKDEV(MD_MAJOR, next_minor);
607 next_minor++;
608 if (next_minor > MINORMASK)
609 next_minor = 0;
610 if (next_minor == start) {
611
612 spin_unlock(&all_mddevs_lock);
613 kfree(new);
614 return NULL;
615 }
616
617 is_free = 1;
618 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
619 if (mddev->unit == dev) {
620 is_free = 0;
621 break;
622 }
623 }
624 new->unit = dev;
625 new->md_minor = MINOR(dev);
626 new->hold_active = UNTIL_STOP;
627 list_add(&new->all_mddevs, &all_mddevs);
628 spin_unlock(&all_mddevs_lock);
629 return new;
630 }
631 spin_unlock(&all_mddevs_lock);
632
633 new = kzalloc(sizeof(*new), GFP_KERNEL);
634 if (!new)
635 return NULL;
636
637 new->unit = unit;
638 if (MAJOR(unit) == MD_MAJOR)
639 new->md_minor = MINOR(unit);
640 else
641 new->md_minor = MINOR(unit) >> MdpMinorShift;
642
643 mddev_init(new);
644
645 goto retry;
646}
647
648static struct attribute_group md_redundancy_group;
649
650void mddev_unlock(struct mddev *mddev)
651{
652 if (mddev->to_remove) {
653
654
655
656
657
658
659
660
661
662
663
664
665 struct attribute_group *to_remove = mddev->to_remove;
666 mddev->to_remove = NULL;
667 mddev->sysfs_active = 1;
668 mutex_unlock(&mddev->reconfig_mutex);
669
670 if (mddev->kobj.sd) {
671 if (to_remove != &md_redundancy_group)
672 sysfs_remove_group(&mddev->kobj, to_remove);
673 if (mddev->pers == NULL ||
674 mddev->pers->sync_request == NULL) {
675 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
676 if (mddev->sysfs_action)
677 sysfs_put(mddev->sysfs_action);
678 mddev->sysfs_action = NULL;
679 }
680 }
681 mddev->sysfs_active = 0;
682 } else
683 mutex_unlock(&mddev->reconfig_mutex);
684
685
686
687
688 spin_lock(&pers_lock);
689 md_wakeup_thread(mddev->thread);
690 wake_up(&mddev->sb_wait);
691 spin_unlock(&pers_lock);
692}
693EXPORT_SYMBOL_GPL(mddev_unlock);
694
695struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
696{
697 struct md_rdev *rdev;
698
699 rdev_for_each_rcu(rdev, mddev)
700 if (rdev->desc_nr == nr)
701 return rdev;
702
703 return NULL;
704}
705EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
706
707static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
708{
709 struct md_rdev *rdev;
710
711 rdev_for_each(rdev, mddev)
712 if (rdev->bdev->bd_dev == dev)
713 return rdev;
714
715 return NULL;
716}
717
718struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
719{
720 struct md_rdev *rdev;
721
722 rdev_for_each_rcu(rdev, mddev)
723 if (rdev->bdev->bd_dev == dev)
724 return rdev;
725
726 return NULL;
727}
728EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
729
730static struct md_personality *find_pers(int level, char *clevel)
731{
732 struct md_personality *pers;
733 list_for_each_entry(pers, &pers_list, list) {
734 if (level != LEVEL_NONE && pers->level == level)
735 return pers;
736 if (strcmp(pers->name, clevel)==0)
737 return pers;
738 }
739 return NULL;
740}
741
742
743static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
744{
745 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
746 return MD_NEW_SIZE_SECTORS(num_sectors);
747}
748
749static int alloc_disk_sb(struct md_rdev *rdev)
750{
751 rdev->sb_page = alloc_page(GFP_KERNEL);
752 if (!rdev->sb_page)
753 return -ENOMEM;
754 return 0;
755}
756
757void md_rdev_clear(struct md_rdev *rdev)
758{
759 if (rdev->sb_page) {
760 put_page(rdev->sb_page);
761 rdev->sb_loaded = 0;
762 rdev->sb_page = NULL;
763 rdev->sb_start = 0;
764 rdev->sectors = 0;
765 }
766 if (rdev->bb_page) {
767 put_page(rdev->bb_page);
768 rdev->bb_page = NULL;
769 }
770 badblocks_exit(&rdev->badblocks);
771}
772EXPORT_SYMBOL_GPL(md_rdev_clear);
773
774static void super_written(struct bio *bio)
775{
776 struct md_rdev *rdev = bio->bi_private;
777 struct mddev *mddev = rdev->mddev;
778
779 if (bio->bi_status) {
780 pr_err("md: super_written gets error=%d\n", bio->bi_status);
781 md_error(mddev, rdev);
782 if (!test_bit(Faulty, &rdev->flags)
783 && (bio->bi_opf & MD_FAILFAST)) {
784 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
785 set_bit(LastDev, &rdev->flags);
786 }
787 } else
788 clear_bit(LastDev, &rdev->flags);
789
790 if (atomic_dec_and_test(&mddev->pending_writes))
791 wake_up(&mddev->sb_wait);
792 rdev_dec_pending(rdev, mddev);
793 bio_put(bio);
794}
795
796void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
797 sector_t sector, int size, struct page *page)
798{
799
800
801
802
803
804
805 struct bio *bio;
806 int ff = 0;
807
808 if (!page)
809 return;
810
811 if (test_bit(Faulty, &rdev->flags))
812 return;
813
814 bio = md_bio_alloc_sync(mddev);
815
816 atomic_inc(&rdev->nr_pending);
817
818 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
819 bio->bi_iter.bi_sector = sector;
820 bio_add_page(bio, page, size, 0);
821 bio->bi_private = rdev;
822 bio->bi_end_io = super_written;
823
824 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
825 test_bit(FailFast, &rdev->flags) &&
826 !test_bit(LastDev, &rdev->flags))
827 ff = MD_FAILFAST;
828 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
829
830 atomic_inc(&mddev->pending_writes);
831 submit_bio(bio);
832}
833
834int md_super_wait(struct mddev *mddev)
835{
836
837 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
838 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
839 return -EAGAIN;
840 return 0;
841}
842
843int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
844 struct page *page, int op, int op_flags, bool metadata_op)
845{
846 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
847 int ret;
848
849 if (metadata_op && rdev->meta_bdev)
850 bio_set_dev(bio, rdev->meta_bdev);
851 else
852 bio_set_dev(bio, rdev->bdev);
853 bio_set_op_attrs(bio, op, op_flags);
854 if (metadata_op)
855 bio->bi_iter.bi_sector = sector + rdev->sb_start;
856 else if (rdev->mddev->reshape_position != MaxSector &&
857 (rdev->mddev->reshape_backwards ==
858 (sector >= rdev->mddev->reshape_position)))
859 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
860 else
861 bio->bi_iter.bi_sector = sector + rdev->data_offset;
862 bio_add_page(bio, page, size, 0);
863
864 submit_bio_wait(bio);
865
866 ret = !bio->bi_status;
867 bio_put(bio);
868 return ret;
869}
870EXPORT_SYMBOL_GPL(sync_page_io);
871
872static int read_disk_sb(struct md_rdev *rdev, int size)
873{
874 char b[BDEVNAME_SIZE];
875
876 if (rdev->sb_loaded)
877 return 0;
878
879 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
880 goto fail;
881 rdev->sb_loaded = 1;
882 return 0;
883
884fail:
885 pr_err("md: disabled device %s, could not read superblock.\n",
886 bdevname(rdev->bdev,b));
887 return -EINVAL;
888}
889
890static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
891{
892 return sb1->set_uuid0 == sb2->set_uuid0 &&
893 sb1->set_uuid1 == sb2->set_uuid1 &&
894 sb1->set_uuid2 == sb2->set_uuid2 &&
895 sb1->set_uuid3 == sb2->set_uuid3;
896}
897
898static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
899{
900 int ret;
901 mdp_super_t *tmp1, *tmp2;
902
903 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
904 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
905
906 if (!tmp1 || !tmp2) {
907 ret = 0;
908 goto abort;
909 }
910
911 *tmp1 = *sb1;
912 *tmp2 = *sb2;
913
914
915
916
917 tmp1->nr_disks = 0;
918 tmp2->nr_disks = 0;
919
920 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
921abort:
922 kfree(tmp1);
923 kfree(tmp2);
924 return ret;
925}
926
927static u32 md_csum_fold(u32 csum)
928{
929 csum = (csum & 0xffff) + (csum >> 16);
930 return (csum & 0xffff) + (csum >> 16);
931}
932
933static unsigned int calc_sb_csum(mdp_super_t *sb)
934{
935 u64 newcsum = 0;
936 u32 *sb32 = (u32*)sb;
937 int i;
938 unsigned int disk_csum, csum;
939
940 disk_csum = sb->sb_csum;
941 sb->sb_csum = 0;
942
943 for (i = 0; i < MD_SB_BYTES/4 ; i++)
944 newcsum += sb32[i];
945 csum = (newcsum & 0xffffffff) + (newcsum>>32);
946
947#ifdef CONFIG_ALPHA
948
949
950
951
952
953
954
955
956 sb->sb_csum = md_csum_fold(disk_csum);
957#else
958 sb->sb_csum = disk_csum;
959#endif
960 return csum;
961}
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993struct super_type {
994 char *name;
995 struct module *owner;
996 int (*load_super)(struct md_rdev *rdev,
997 struct md_rdev *refdev,
998 int minor_version);
999 int (*validate_super)(struct mddev *mddev,
1000 struct md_rdev *rdev);
1001 void (*sync_super)(struct mddev *mddev,
1002 struct md_rdev *rdev);
1003 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1004 sector_t num_sectors);
1005 int (*allow_new_offset)(struct md_rdev *rdev,
1006 unsigned long long new_offset);
1007};
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017int md_check_no_bitmap(struct mddev *mddev)
1018{
1019 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1020 return 0;
1021 pr_warn("%s: bitmaps are not supported for %s\n",
1022 mdname(mddev), mddev->pers->name);
1023 return 1;
1024}
1025EXPORT_SYMBOL(md_check_no_bitmap);
1026
1027
1028
1029
1030static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1031{
1032 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1033 mdp_super_t *sb;
1034 int ret;
1035
1036
1037
1038
1039
1040
1041
1042 rdev->sb_start = calc_dev_sboffset(rdev);
1043
1044 ret = read_disk_sb(rdev, MD_SB_BYTES);
1045 if (ret)
1046 return ret;
1047
1048 ret = -EINVAL;
1049
1050 bdevname(rdev->bdev, b);
1051 sb = page_address(rdev->sb_page);
1052
1053 if (sb->md_magic != MD_SB_MAGIC) {
1054 pr_warn("md: invalid raid superblock magic on %s\n", b);
1055 goto abort;
1056 }
1057
1058 if (sb->major_version != 0 ||
1059 sb->minor_version < 90 ||
1060 sb->minor_version > 91) {
1061 pr_warn("Bad version number %d.%d on %s\n",
1062 sb->major_version, sb->minor_version, b);
1063 goto abort;
1064 }
1065
1066 if (sb->raid_disks <= 0)
1067 goto abort;
1068
1069 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1070 pr_warn("md: invalid superblock checksum on %s\n", b);
1071 goto abort;
1072 }
1073
1074 rdev->preferred_minor = sb->md_minor;
1075 rdev->data_offset = 0;
1076 rdev->new_data_offset = 0;
1077 rdev->sb_size = MD_SB_BYTES;
1078 rdev->badblocks.shift = -1;
1079
1080 if (sb->level == LEVEL_MULTIPATH)
1081 rdev->desc_nr = -1;
1082 else
1083 rdev->desc_nr = sb->this_disk.number;
1084
1085 if (!refdev) {
1086 ret = 1;
1087 } else {
1088 __u64 ev1, ev2;
1089 mdp_super_t *refsb = page_address(refdev->sb_page);
1090 if (!md_uuid_equal(refsb, sb)) {
1091 pr_warn("md: %s has different UUID to %s\n",
1092 b, bdevname(refdev->bdev,b2));
1093 goto abort;
1094 }
1095 if (!md_sb_equal(refsb, sb)) {
1096 pr_warn("md: %s has same UUID but different superblock to %s\n",
1097 b, bdevname(refdev->bdev, b2));
1098 goto abort;
1099 }
1100 ev1 = md_event(sb);
1101 ev2 = md_event(refsb);
1102 if (ev1 > ev2)
1103 ret = 1;
1104 else
1105 ret = 0;
1106 }
1107 rdev->sectors = rdev->sb_start;
1108
1109
1110
1111
1112 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1113 sb->level >= 1)
1114 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1115
1116 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1117
1118 ret = -EINVAL;
1119
1120 abort:
1121 return ret;
1122}
1123
1124
1125
1126
1127static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1128{
1129 mdp_disk_t *desc;
1130 mdp_super_t *sb = page_address(rdev->sb_page);
1131 __u64 ev1 = md_event(sb);
1132
1133 rdev->raid_disk = -1;
1134 clear_bit(Faulty, &rdev->flags);
1135 clear_bit(In_sync, &rdev->flags);
1136 clear_bit(Bitmap_sync, &rdev->flags);
1137 clear_bit(WriteMostly, &rdev->flags);
1138
1139 if (mddev->raid_disks == 0) {
1140 mddev->major_version = 0;
1141 mddev->minor_version = sb->minor_version;
1142 mddev->patch_version = sb->patch_version;
1143 mddev->external = 0;
1144 mddev->chunk_sectors = sb->chunk_size >> 9;
1145 mddev->ctime = sb->ctime;
1146 mddev->utime = sb->utime;
1147 mddev->level = sb->level;
1148 mddev->clevel[0] = 0;
1149 mddev->layout = sb->layout;
1150 mddev->raid_disks = sb->raid_disks;
1151 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1152 mddev->events = ev1;
1153 mddev->bitmap_info.offset = 0;
1154 mddev->bitmap_info.space = 0;
1155
1156 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1157 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1158 mddev->reshape_backwards = 0;
1159
1160 if (mddev->minor_version >= 91) {
1161 mddev->reshape_position = sb->reshape_position;
1162 mddev->delta_disks = sb->delta_disks;
1163 mddev->new_level = sb->new_level;
1164 mddev->new_layout = sb->new_layout;
1165 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1166 if (mddev->delta_disks < 0)
1167 mddev->reshape_backwards = 1;
1168 } else {
1169 mddev->reshape_position = MaxSector;
1170 mddev->delta_disks = 0;
1171 mddev->new_level = mddev->level;
1172 mddev->new_layout = mddev->layout;
1173 mddev->new_chunk_sectors = mddev->chunk_sectors;
1174 }
1175
1176 if (sb->state & (1<<MD_SB_CLEAN))
1177 mddev->recovery_cp = MaxSector;
1178 else {
1179 if (sb->events_hi == sb->cp_events_hi &&
1180 sb->events_lo == sb->cp_events_lo) {
1181 mddev->recovery_cp = sb->recovery_cp;
1182 } else
1183 mddev->recovery_cp = 0;
1184 }
1185
1186 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1187 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1188 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1189 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1190
1191 mddev->max_disks = MD_SB_DISKS;
1192
1193 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1194 mddev->bitmap_info.file == NULL) {
1195 mddev->bitmap_info.offset =
1196 mddev->bitmap_info.default_offset;
1197 mddev->bitmap_info.space =
1198 mddev->bitmap_info.default_space;
1199 }
1200
1201 } else if (mddev->pers == NULL) {
1202
1203
1204 ++ev1;
1205 if (sb->disks[rdev->desc_nr].state & (
1206 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1207 if (ev1 < mddev->events)
1208 return -EINVAL;
1209 } else if (mddev->bitmap) {
1210
1211
1212
1213 if (ev1 < mddev->bitmap->events_cleared)
1214 return 0;
1215 if (ev1 < mddev->events)
1216 set_bit(Bitmap_sync, &rdev->flags);
1217 } else {
1218 if (ev1 < mddev->events)
1219
1220 return 0;
1221 }
1222
1223 if (mddev->level != LEVEL_MULTIPATH) {
1224 desc = sb->disks + rdev->desc_nr;
1225
1226 if (desc->state & (1<<MD_DISK_FAULTY))
1227 set_bit(Faulty, &rdev->flags);
1228 else if (desc->state & (1<<MD_DISK_SYNC)
1229) {
1230 set_bit(In_sync, &rdev->flags);
1231 rdev->raid_disk = desc->raid_disk;
1232 rdev->saved_raid_disk = desc->raid_disk;
1233 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1234
1235
1236
1237 if (mddev->minor_version >= 91) {
1238 rdev->recovery_offset = 0;
1239 rdev->raid_disk = desc->raid_disk;
1240 }
1241 }
1242 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1243 set_bit(WriteMostly, &rdev->flags);
1244 if (desc->state & (1<<MD_DISK_FAILFAST))
1245 set_bit(FailFast, &rdev->flags);
1246 } else
1247 set_bit(In_sync, &rdev->flags);
1248 return 0;
1249}
1250
1251
1252
1253
1254static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1255{
1256 mdp_super_t *sb;
1257 struct md_rdev *rdev2;
1258 int next_spare = mddev->raid_disks;
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270 int i;
1271 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1272
1273 rdev->sb_size = MD_SB_BYTES;
1274
1275 sb = page_address(rdev->sb_page);
1276
1277 memset(sb, 0, sizeof(*sb));
1278
1279 sb->md_magic = MD_SB_MAGIC;
1280 sb->major_version = mddev->major_version;
1281 sb->patch_version = mddev->patch_version;
1282 sb->gvalid_words = 0;
1283 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1284 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1285 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1286 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1287
1288 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1289 sb->level = mddev->level;
1290 sb->size = mddev->dev_sectors / 2;
1291 sb->raid_disks = mddev->raid_disks;
1292 sb->md_minor = mddev->md_minor;
1293 sb->not_persistent = 0;
1294 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1295 sb->state = 0;
1296 sb->events_hi = (mddev->events>>32);
1297 sb->events_lo = (u32)mddev->events;
1298
1299 if (mddev->reshape_position == MaxSector)
1300 sb->minor_version = 90;
1301 else {
1302 sb->minor_version = 91;
1303 sb->reshape_position = mddev->reshape_position;
1304 sb->new_level = mddev->new_level;
1305 sb->delta_disks = mddev->delta_disks;
1306 sb->new_layout = mddev->new_layout;
1307 sb->new_chunk = mddev->new_chunk_sectors << 9;
1308 }
1309 mddev->minor_version = sb->minor_version;
1310 if (mddev->in_sync)
1311 {
1312 sb->recovery_cp = mddev->recovery_cp;
1313 sb->cp_events_hi = (mddev->events>>32);
1314 sb->cp_events_lo = (u32)mddev->events;
1315 if (mddev->recovery_cp == MaxSector)
1316 sb->state = (1<< MD_SB_CLEAN);
1317 } else
1318 sb->recovery_cp = 0;
1319
1320 sb->layout = mddev->layout;
1321 sb->chunk_size = mddev->chunk_sectors << 9;
1322
1323 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1324 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1325
1326 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1327 rdev_for_each(rdev2, mddev) {
1328 mdp_disk_t *d;
1329 int desc_nr;
1330 int is_active = test_bit(In_sync, &rdev2->flags);
1331
1332 if (rdev2->raid_disk >= 0 &&
1333 sb->minor_version >= 91)
1334
1335
1336
1337
1338 is_active = 1;
1339 if (rdev2->raid_disk < 0 ||
1340 test_bit(Faulty, &rdev2->flags))
1341 is_active = 0;
1342 if (is_active)
1343 desc_nr = rdev2->raid_disk;
1344 else
1345 desc_nr = next_spare++;
1346 rdev2->desc_nr = desc_nr;
1347 d = &sb->disks[rdev2->desc_nr];
1348 nr_disks++;
1349 d->number = rdev2->desc_nr;
1350 d->major = MAJOR(rdev2->bdev->bd_dev);
1351 d->minor = MINOR(rdev2->bdev->bd_dev);
1352 if (is_active)
1353 d->raid_disk = rdev2->raid_disk;
1354 else
1355 d->raid_disk = rdev2->desc_nr;
1356 if (test_bit(Faulty, &rdev2->flags))
1357 d->state = (1<<MD_DISK_FAULTY);
1358 else if (is_active) {
1359 d->state = (1<<MD_DISK_ACTIVE);
1360 if (test_bit(In_sync, &rdev2->flags))
1361 d->state |= (1<<MD_DISK_SYNC);
1362 active++;
1363 working++;
1364 } else {
1365 d->state = 0;
1366 spare++;
1367 working++;
1368 }
1369 if (test_bit(WriteMostly, &rdev2->flags))
1370 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1371 if (test_bit(FailFast, &rdev2->flags))
1372 d->state |= (1<<MD_DISK_FAILFAST);
1373 }
1374
1375 for (i=0 ; i < mddev->raid_disks ; i++) {
1376 mdp_disk_t *d = &sb->disks[i];
1377 if (d->state == 0 && d->number == 0) {
1378 d->number = i;
1379 d->raid_disk = i;
1380 d->state = (1<<MD_DISK_REMOVED);
1381 d->state |= (1<<MD_DISK_FAULTY);
1382 failed++;
1383 }
1384 }
1385 sb->nr_disks = nr_disks;
1386 sb->active_disks = active;
1387 sb->working_disks = working;
1388 sb->failed_disks = failed;
1389 sb->spare_disks = spare;
1390
1391 sb->this_disk = sb->disks[rdev->desc_nr];
1392 sb->sb_csum = calc_sb_csum(sb);
1393}
1394
1395
1396
1397
1398static unsigned long long
1399super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1400{
1401 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1402 return 0;
1403 if (rdev->mddev->bitmap_info.offset)
1404 return 0;
1405 rdev->sb_start = calc_dev_sboffset(rdev);
1406 if (!num_sectors || num_sectors > rdev->sb_start)
1407 num_sectors = rdev->sb_start;
1408
1409
1410
1411 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1412 rdev->mddev->level >= 1)
1413 num_sectors = (sector_t)(2ULL << 32) - 2;
1414 do {
1415 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1416 rdev->sb_page);
1417 } while (md_super_wait(rdev->mddev) < 0);
1418 return num_sectors;
1419}
1420
1421static int
1422super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1423{
1424
1425 return new_offset == 0;
1426}
1427
1428
1429
1430
1431
1432static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1433{
1434 __le32 disk_csum;
1435 u32 csum;
1436 unsigned long long newcsum;
1437 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1438 __le32 *isuper = (__le32*)sb;
1439
1440 disk_csum = sb->sb_csum;
1441 sb->sb_csum = 0;
1442 newcsum = 0;
1443 for (; size >= 4; size -= 4)
1444 newcsum += le32_to_cpu(*isuper++);
1445
1446 if (size == 2)
1447 newcsum += le16_to_cpu(*(__le16*) isuper);
1448
1449 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1450 sb->sb_csum = disk_csum;
1451 return cpu_to_le32(csum);
1452}
1453
1454static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1455{
1456 struct mdp_superblock_1 *sb;
1457 int ret;
1458 sector_t sb_start;
1459 sector_t sectors;
1460 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1461 int bmask;
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471 switch(minor_version) {
1472 case 0:
1473 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1474 sb_start -= 8*2;
1475 sb_start &= ~(sector_t)(4*2-1);
1476 break;
1477 case 1:
1478 sb_start = 0;
1479 break;
1480 case 2:
1481 sb_start = 8;
1482 break;
1483 default:
1484 return -EINVAL;
1485 }
1486 rdev->sb_start = sb_start;
1487
1488
1489
1490
1491 ret = read_disk_sb(rdev, 4096);
1492 if (ret) return ret;
1493
1494 sb = page_address(rdev->sb_page);
1495
1496 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1497 sb->major_version != cpu_to_le32(1) ||
1498 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1499 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1500 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1501 return -EINVAL;
1502
1503 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1504 pr_warn("md: invalid superblock checksum on %s\n",
1505 bdevname(rdev->bdev,b));
1506 return -EINVAL;
1507 }
1508 if (le64_to_cpu(sb->data_size) < 10) {
1509 pr_warn("md: data_size too small on %s\n",
1510 bdevname(rdev->bdev,b));
1511 return -EINVAL;
1512 }
1513 if (sb->pad0 ||
1514 sb->pad3[0] ||
1515 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1516
1517 return -EINVAL;
1518
1519 rdev->preferred_minor = 0xffff;
1520 rdev->data_offset = le64_to_cpu(sb->data_offset);
1521 rdev->new_data_offset = rdev->data_offset;
1522 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1523 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1524 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1525 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1526
1527 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1528 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1529 if (rdev->sb_size & bmask)
1530 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1531
1532 if (minor_version
1533 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1534 return -EINVAL;
1535 if (minor_version
1536 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1537 return -EINVAL;
1538
1539 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1540 rdev->desc_nr = -1;
1541 else
1542 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1543
1544 if (!rdev->bb_page) {
1545 rdev->bb_page = alloc_page(GFP_KERNEL);
1546 if (!rdev->bb_page)
1547 return -ENOMEM;
1548 }
1549 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1550 rdev->badblocks.count == 0) {
1551
1552
1553
1554 s32 offset;
1555 sector_t bb_sector;
1556 u64 *bbp;
1557 int i;
1558 int sectors = le16_to_cpu(sb->bblog_size);
1559 if (sectors > (PAGE_SIZE / 512))
1560 return -EINVAL;
1561 offset = le32_to_cpu(sb->bblog_offset);
1562 if (offset == 0)
1563 return -EINVAL;
1564 bb_sector = (long long)offset;
1565 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1566 rdev->bb_page, REQ_OP_READ, 0, true))
1567 return -EIO;
1568 bbp = (u64 *)page_address(rdev->bb_page);
1569 rdev->badblocks.shift = sb->bblog_shift;
1570 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1571 u64 bb = le64_to_cpu(*bbp);
1572 int count = bb & (0x3ff);
1573 u64 sector = bb >> 10;
1574 sector <<= sb->bblog_shift;
1575 count <<= sb->bblog_shift;
1576 if (bb + 1 == 0)
1577 break;
1578 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1579 return -EINVAL;
1580 }
1581 } else if (sb->bblog_offset != 0)
1582 rdev->badblocks.shift = 0;
1583
1584 if ((le32_to_cpu(sb->feature_map) &
1585 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1586 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1587 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1588 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1589 }
1590
1591 if (!refdev) {
1592 ret = 1;
1593 } else {
1594 __u64 ev1, ev2;
1595 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1596
1597 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1598 sb->level != refsb->level ||
1599 sb->layout != refsb->layout ||
1600 sb->chunksize != refsb->chunksize) {
1601 pr_warn("md: %s has strangely different superblock to %s\n",
1602 bdevname(rdev->bdev,b),
1603 bdevname(refdev->bdev,b2));
1604 return -EINVAL;
1605 }
1606 ev1 = le64_to_cpu(sb->events);
1607 ev2 = le64_to_cpu(refsb->events);
1608
1609 if (ev1 > ev2)
1610 ret = 1;
1611 else
1612 ret = 0;
1613 }
1614 if (minor_version) {
1615 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1616 sectors -= rdev->data_offset;
1617 } else
1618 sectors = rdev->sb_start;
1619 if (sectors < le64_to_cpu(sb->data_size))
1620 return -EINVAL;
1621 rdev->sectors = le64_to_cpu(sb->data_size);
1622 return ret;
1623}
1624
1625static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1626{
1627 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1628 __u64 ev1 = le64_to_cpu(sb->events);
1629
1630 rdev->raid_disk = -1;
1631 clear_bit(Faulty, &rdev->flags);
1632 clear_bit(In_sync, &rdev->flags);
1633 clear_bit(Bitmap_sync, &rdev->flags);
1634 clear_bit(WriteMostly, &rdev->flags);
1635
1636 if (mddev->raid_disks == 0) {
1637 mddev->major_version = 1;
1638 mddev->patch_version = 0;
1639 mddev->external = 0;
1640 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1641 mddev->ctime = le64_to_cpu(sb->ctime);
1642 mddev->utime = le64_to_cpu(sb->utime);
1643 mddev->level = le32_to_cpu(sb->level);
1644 mddev->clevel[0] = 0;
1645 mddev->layout = le32_to_cpu(sb->layout);
1646 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1647 mddev->dev_sectors = le64_to_cpu(sb->size);
1648 mddev->events = ev1;
1649 mddev->bitmap_info.offset = 0;
1650 mddev->bitmap_info.space = 0;
1651
1652
1653
1654 mddev->bitmap_info.default_offset = 1024 >> 9;
1655 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1656 mddev->reshape_backwards = 0;
1657
1658 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1659 memcpy(mddev->uuid, sb->set_uuid, 16);
1660
1661 mddev->max_disks = (4096-256)/2;
1662
1663 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1664 mddev->bitmap_info.file == NULL) {
1665 mddev->bitmap_info.offset =
1666 (__s32)le32_to_cpu(sb->bitmap_offset);
1667
1668
1669
1670
1671
1672 if (mddev->minor_version > 0)
1673 mddev->bitmap_info.space = 0;
1674 else if (mddev->bitmap_info.offset > 0)
1675 mddev->bitmap_info.space =
1676 8 - mddev->bitmap_info.offset;
1677 else
1678 mddev->bitmap_info.space =
1679 -mddev->bitmap_info.offset;
1680 }
1681
1682 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1683 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1684 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1685 mddev->new_level = le32_to_cpu(sb->new_level);
1686 mddev->new_layout = le32_to_cpu(sb->new_layout);
1687 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1688 if (mddev->delta_disks < 0 ||
1689 (mddev->delta_disks == 0 &&
1690 (le32_to_cpu(sb->feature_map)
1691 & MD_FEATURE_RESHAPE_BACKWARDS)))
1692 mddev->reshape_backwards = 1;
1693 } else {
1694 mddev->reshape_position = MaxSector;
1695 mddev->delta_disks = 0;
1696 mddev->new_level = mddev->level;
1697 mddev->new_layout = mddev->layout;
1698 mddev->new_chunk_sectors = mddev->chunk_sectors;
1699 }
1700
1701 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1702 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1703
1704 if (le32_to_cpu(sb->feature_map) &
1705 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1706 if (le32_to_cpu(sb->feature_map) &
1707 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1708 return -EINVAL;
1709 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1710 (le32_to_cpu(sb->feature_map) &
1711 MD_FEATURE_MULTIPLE_PPLS))
1712 return -EINVAL;
1713 set_bit(MD_HAS_PPL, &mddev->flags);
1714 }
1715 } else if (mddev->pers == NULL) {
1716
1717
1718 ++ev1;
1719 if (rdev->desc_nr >= 0 &&
1720 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1721 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1722 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1723 if (ev1 < mddev->events)
1724 return -EINVAL;
1725 } else if (mddev->bitmap) {
1726
1727
1728
1729 if (ev1 < mddev->bitmap->events_cleared)
1730 return 0;
1731 if (ev1 < mddev->events)
1732 set_bit(Bitmap_sync, &rdev->flags);
1733 } else {
1734 if (ev1 < mddev->events)
1735
1736 return 0;
1737 }
1738 if (mddev->level != LEVEL_MULTIPATH) {
1739 int role;
1740 if (rdev->desc_nr < 0 ||
1741 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1742 role = MD_DISK_ROLE_SPARE;
1743 rdev->desc_nr = -1;
1744 } else
1745 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1746 switch(role) {
1747 case MD_DISK_ROLE_SPARE:
1748 break;
1749 case MD_DISK_ROLE_FAULTY:
1750 set_bit(Faulty, &rdev->flags);
1751 break;
1752 case MD_DISK_ROLE_JOURNAL:
1753 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1754
1755 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1756 return -EINVAL;
1757 }
1758 set_bit(Journal, &rdev->flags);
1759 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1760 rdev->raid_disk = 0;
1761 break;
1762 default:
1763 rdev->saved_raid_disk = role;
1764 if ((le32_to_cpu(sb->feature_map) &
1765 MD_FEATURE_RECOVERY_OFFSET)) {
1766 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1767 if (!(le32_to_cpu(sb->feature_map) &
1768 MD_FEATURE_RECOVERY_BITMAP))
1769 rdev->saved_raid_disk = -1;
1770 } else
1771 set_bit(In_sync, &rdev->flags);
1772 rdev->raid_disk = role;
1773 break;
1774 }
1775 if (sb->devflags & WriteMostly1)
1776 set_bit(WriteMostly, &rdev->flags);
1777 if (sb->devflags & FailFast1)
1778 set_bit(FailFast, &rdev->flags);
1779 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1780 set_bit(Replacement, &rdev->flags);
1781 } else
1782 set_bit(In_sync, &rdev->flags);
1783
1784 return 0;
1785}
1786
1787static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1788{
1789 struct mdp_superblock_1 *sb;
1790 struct md_rdev *rdev2;
1791 int max_dev, i;
1792
1793
1794 sb = page_address(rdev->sb_page);
1795
1796 sb->feature_map = 0;
1797 sb->pad0 = 0;
1798 sb->recovery_offset = cpu_to_le64(0);
1799 memset(sb->pad3, 0, sizeof(sb->pad3));
1800
1801 sb->utime = cpu_to_le64((__u64)mddev->utime);
1802 sb->events = cpu_to_le64(mddev->events);
1803 if (mddev->in_sync)
1804 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1805 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1806 sb->resync_offset = cpu_to_le64(MaxSector);
1807 else
1808 sb->resync_offset = cpu_to_le64(0);
1809
1810 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1811
1812 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1813 sb->size = cpu_to_le64(mddev->dev_sectors);
1814 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1815 sb->level = cpu_to_le32(mddev->level);
1816 sb->layout = cpu_to_le32(mddev->layout);
1817 if (test_bit(FailFast, &rdev->flags))
1818 sb->devflags |= FailFast1;
1819 else
1820 sb->devflags &= ~FailFast1;
1821
1822 if (test_bit(WriteMostly, &rdev->flags))
1823 sb->devflags |= WriteMostly1;
1824 else
1825 sb->devflags &= ~WriteMostly1;
1826 sb->data_offset = cpu_to_le64(rdev->data_offset);
1827 sb->data_size = cpu_to_le64(rdev->sectors);
1828
1829 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1830 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1831 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1832 }
1833
1834 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1835 !test_bit(In_sync, &rdev->flags)) {
1836 sb->feature_map |=
1837 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1838 sb->recovery_offset =
1839 cpu_to_le64(rdev->recovery_offset);
1840 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1841 sb->feature_map |=
1842 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1843 }
1844
1845 if (test_bit(Journal, &rdev->flags))
1846 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1847 if (test_bit(Replacement, &rdev->flags))
1848 sb->feature_map |=
1849 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1850
1851 if (mddev->reshape_position != MaxSector) {
1852 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1853 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1854 sb->new_layout = cpu_to_le32(mddev->new_layout);
1855 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1856 sb->new_level = cpu_to_le32(mddev->new_level);
1857 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1858 if (mddev->delta_disks == 0 &&
1859 mddev->reshape_backwards)
1860 sb->feature_map
1861 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1862 if (rdev->new_data_offset != rdev->data_offset) {
1863 sb->feature_map
1864 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1865 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1866 - rdev->data_offset));
1867 }
1868 }
1869
1870 if (mddev_is_clustered(mddev))
1871 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1872
1873 if (rdev->badblocks.count == 0)
1874 ;
1875 else if (sb->bblog_offset == 0)
1876
1877 md_error(mddev, rdev);
1878 else {
1879 struct badblocks *bb = &rdev->badblocks;
1880 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1881 u64 *p = bb->page;
1882 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1883 if (bb->changed) {
1884 unsigned seq;
1885
1886retry:
1887 seq = read_seqbegin(&bb->lock);
1888
1889 memset(bbp, 0xff, PAGE_SIZE);
1890
1891 for (i = 0 ; i < bb->count ; i++) {
1892 u64 internal_bb = p[i];
1893 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1894 | BB_LEN(internal_bb));
1895 bbp[i] = cpu_to_le64(store_bb);
1896 }
1897 bb->changed = 0;
1898 if (read_seqretry(&bb->lock, seq))
1899 goto retry;
1900
1901 bb->sector = (rdev->sb_start +
1902 (int)le32_to_cpu(sb->bblog_offset));
1903 bb->size = le16_to_cpu(sb->bblog_size);
1904 }
1905 }
1906
1907 max_dev = 0;
1908 rdev_for_each(rdev2, mddev)
1909 if (rdev2->desc_nr+1 > max_dev)
1910 max_dev = rdev2->desc_nr+1;
1911
1912 if (max_dev > le32_to_cpu(sb->max_dev)) {
1913 int bmask;
1914 sb->max_dev = cpu_to_le32(max_dev);
1915 rdev->sb_size = max_dev * 2 + 256;
1916 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1917 if (rdev->sb_size & bmask)
1918 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1919 } else
1920 max_dev = le32_to_cpu(sb->max_dev);
1921
1922 for (i=0; i<max_dev;i++)
1923 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1924
1925 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1926 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1927
1928 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
1929 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
1930 sb->feature_map |=
1931 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
1932 else
1933 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
1934 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
1935 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
1936 }
1937
1938 rdev_for_each(rdev2, mddev) {
1939 i = rdev2->desc_nr;
1940 if (test_bit(Faulty, &rdev2->flags))
1941 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1942 else if (test_bit(In_sync, &rdev2->flags))
1943 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1944 else if (test_bit(Journal, &rdev2->flags))
1945 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1946 else if (rdev2->raid_disk >= 0)
1947 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1948 else
1949 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1950 }
1951
1952 sb->sb_csum = calc_sb_1_csum(sb);
1953}
1954
1955static unsigned long long
1956super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1957{
1958 struct mdp_superblock_1 *sb;
1959 sector_t max_sectors;
1960 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1961 return 0;
1962 if (rdev->data_offset != rdev->new_data_offset)
1963 return 0;
1964 if (rdev->sb_start < rdev->data_offset) {
1965
1966 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1967 max_sectors -= rdev->data_offset;
1968 if (!num_sectors || num_sectors > max_sectors)
1969 num_sectors = max_sectors;
1970 } else if (rdev->mddev->bitmap_info.offset) {
1971
1972 return 0;
1973 } else {
1974
1975 sector_t sb_start;
1976 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1977 sb_start &= ~(sector_t)(4*2 - 1);
1978 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1979 if (!num_sectors || num_sectors > max_sectors)
1980 num_sectors = max_sectors;
1981 rdev->sb_start = sb_start;
1982 }
1983 sb = page_address(rdev->sb_page);
1984 sb->data_size = cpu_to_le64(num_sectors);
1985 sb->super_offset = cpu_to_le64(rdev->sb_start);
1986 sb->sb_csum = calc_sb_1_csum(sb);
1987 do {
1988 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1989 rdev->sb_page);
1990 } while (md_super_wait(rdev->mddev) < 0);
1991 return num_sectors;
1992
1993}
1994
1995static int
1996super_1_allow_new_offset(struct md_rdev *rdev,
1997 unsigned long long new_offset)
1998{
1999
2000 struct bitmap *bitmap;
2001 if (new_offset >= rdev->data_offset)
2002 return 1;
2003
2004
2005
2006 if (rdev->mddev->minor_version == 0)
2007 return 1;
2008
2009
2010
2011
2012
2013
2014
2015 if (rdev->sb_start + (32+4)*2 > new_offset)
2016 return 0;
2017 bitmap = rdev->mddev->bitmap;
2018 if (bitmap && !rdev->mddev->bitmap_info.file &&
2019 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2020 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2021 return 0;
2022 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2023 return 0;
2024
2025 return 1;
2026}
2027
2028static struct super_type super_types[] = {
2029 [0] = {
2030 .name = "0.90.0",
2031 .owner = THIS_MODULE,
2032 .load_super = super_90_load,
2033 .validate_super = super_90_validate,
2034 .sync_super = super_90_sync,
2035 .rdev_size_change = super_90_rdev_size_change,
2036 .allow_new_offset = super_90_allow_new_offset,
2037 },
2038 [1] = {
2039 .name = "md-1",
2040 .owner = THIS_MODULE,
2041 .load_super = super_1_load,
2042 .validate_super = super_1_validate,
2043 .sync_super = super_1_sync,
2044 .rdev_size_change = super_1_rdev_size_change,
2045 .allow_new_offset = super_1_allow_new_offset,
2046 },
2047};
2048
2049static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2050{
2051 if (mddev->sync_super) {
2052 mddev->sync_super(mddev, rdev);
2053 return;
2054 }
2055
2056 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2057
2058 super_types[mddev->major_version].sync_super(mddev, rdev);
2059}
2060
2061static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2062{
2063 struct md_rdev *rdev, *rdev2;
2064
2065 rcu_read_lock();
2066 rdev_for_each_rcu(rdev, mddev1) {
2067 if (test_bit(Faulty, &rdev->flags) ||
2068 test_bit(Journal, &rdev->flags) ||
2069 rdev->raid_disk == -1)
2070 continue;
2071 rdev_for_each_rcu(rdev2, mddev2) {
2072 if (test_bit(Faulty, &rdev2->flags) ||
2073 test_bit(Journal, &rdev2->flags) ||
2074 rdev2->raid_disk == -1)
2075 continue;
2076 if (rdev->bdev->bd_contains ==
2077 rdev2->bdev->bd_contains) {
2078 rcu_read_unlock();
2079 return 1;
2080 }
2081 }
2082 }
2083 rcu_read_unlock();
2084 return 0;
2085}
2086
2087static LIST_HEAD(pending_raid_disks);
2088
2089
2090
2091
2092
2093
2094
2095
2096int md_integrity_register(struct mddev *mddev)
2097{
2098 struct md_rdev *rdev, *reference = NULL;
2099
2100 if (list_empty(&mddev->disks))
2101 return 0;
2102 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2103 return 0;
2104 rdev_for_each(rdev, mddev) {
2105
2106 if (test_bit(Faulty, &rdev->flags))
2107 continue;
2108 if (rdev->raid_disk < 0)
2109 continue;
2110 if (!reference) {
2111
2112 reference = rdev;
2113 continue;
2114 }
2115
2116 if (blk_integrity_compare(reference->bdev->bd_disk,
2117 rdev->bdev->bd_disk) < 0)
2118 return -EINVAL;
2119 }
2120 if (!reference || !bdev_get_integrity(reference->bdev))
2121 return 0;
2122
2123
2124
2125
2126 blk_integrity_register(mddev->gendisk,
2127 bdev_get_integrity(reference->bdev));
2128
2129 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2130 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2131 pr_err("md: failed to create integrity pool for %s\n",
2132 mdname(mddev));
2133 return -EINVAL;
2134 }
2135 return 0;
2136}
2137EXPORT_SYMBOL(md_integrity_register);
2138
2139
2140
2141
2142
2143int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2144{
2145 struct blk_integrity *bi_mddev;
2146 char name[BDEVNAME_SIZE];
2147
2148 if (!mddev->gendisk)
2149 return 0;
2150
2151 bi_mddev = blk_get_integrity(mddev->gendisk);
2152
2153 if (!bi_mddev)
2154 return 0;
2155
2156 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2157 pr_err("%s: incompatible integrity profile for %s\n",
2158 mdname(mddev), bdevname(rdev->bdev, name));
2159 return -ENXIO;
2160 }
2161
2162 return 0;
2163}
2164EXPORT_SYMBOL(md_integrity_add_rdev);
2165
2166static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2167{
2168 char b[BDEVNAME_SIZE];
2169 struct kobject *ko;
2170 int err;
2171
2172
2173 if (find_rdev(mddev, rdev->bdev->bd_dev))
2174 return -EEXIST;
2175
2176 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2177 mddev->pers)
2178 return -EROFS;
2179
2180
2181 if (!test_bit(Journal, &rdev->flags) &&
2182 rdev->sectors &&
2183 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2184 if (mddev->pers) {
2185
2186
2187
2188
2189 if (mddev->level > 0)
2190 return -ENOSPC;
2191 } else
2192 mddev->dev_sectors = rdev->sectors;
2193 }
2194
2195
2196
2197
2198
2199 rcu_read_lock();
2200 if (rdev->desc_nr < 0) {
2201 int choice = 0;
2202 if (mddev->pers)
2203 choice = mddev->raid_disks;
2204 while (md_find_rdev_nr_rcu(mddev, choice))
2205 choice++;
2206 rdev->desc_nr = choice;
2207 } else {
2208 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2209 rcu_read_unlock();
2210 return -EBUSY;
2211 }
2212 }
2213 rcu_read_unlock();
2214 if (!test_bit(Journal, &rdev->flags) &&
2215 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2216 pr_warn("md: %s: array is limited to %d devices\n",
2217 mdname(mddev), mddev->max_disks);
2218 return -EBUSY;
2219 }
2220 bdevname(rdev->bdev,b);
2221 strreplace(b, '/', '!');
2222
2223 rdev->mddev = mddev;
2224 pr_debug("md: bind<%s>\n", b);
2225
2226 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2227 goto fail;
2228
2229 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2230 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2231 ;
2232 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2233
2234 list_add_rcu(&rdev->same_set, &mddev->disks);
2235 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2236
2237
2238 mddev->recovery_disabled++;
2239
2240 return 0;
2241
2242 fail:
2243 pr_warn("md: failed to register dev-%s for %s\n",
2244 b, mdname(mddev));
2245 return err;
2246}
2247
2248static void md_delayed_delete(struct work_struct *ws)
2249{
2250 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2251 kobject_del(&rdev->kobj);
2252 kobject_put(&rdev->kobj);
2253}
2254
2255static void unbind_rdev_from_array(struct md_rdev *rdev)
2256{
2257 char b[BDEVNAME_SIZE];
2258
2259 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2260 list_del_rcu(&rdev->same_set);
2261 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2262 rdev->mddev = NULL;
2263 sysfs_remove_link(&rdev->kobj, "block");
2264 sysfs_put(rdev->sysfs_state);
2265 rdev->sysfs_state = NULL;
2266 rdev->badblocks.count = 0;
2267
2268
2269
2270
2271 synchronize_rcu();
2272 INIT_WORK(&rdev->del_work, md_delayed_delete);
2273 kobject_get(&rdev->kobj);
2274 queue_work(md_misc_wq, &rdev->del_work);
2275}
2276
2277
2278
2279
2280
2281
2282static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2283{
2284 int err = 0;
2285 struct block_device *bdev;
2286 char b[BDEVNAME_SIZE];
2287
2288 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2289 shared ? (struct md_rdev *)lock_rdev : rdev);
2290 if (IS_ERR(bdev)) {
2291 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2292 return PTR_ERR(bdev);
2293 }
2294 rdev->bdev = bdev;
2295 return err;
2296}
2297
2298static void unlock_rdev(struct md_rdev *rdev)
2299{
2300 struct block_device *bdev = rdev->bdev;
2301 rdev->bdev = NULL;
2302 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2303}
2304
2305void md_autodetect_dev(dev_t dev);
2306
2307static void export_rdev(struct md_rdev *rdev)
2308{
2309 char b[BDEVNAME_SIZE];
2310
2311 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2312 md_rdev_clear(rdev);
2313#ifndef MODULE
2314 if (test_bit(AutoDetected, &rdev->flags))
2315 md_autodetect_dev(rdev->bdev->bd_dev);
2316#endif
2317 unlock_rdev(rdev);
2318 kobject_put(&rdev->kobj);
2319}
2320
2321void md_kick_rdev_from_array(struct md_rdev *rdev)
2322{
2323 unbind_rdev_from_array(rdev);
2324 export_rdev(rdev);
2325}
2326EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2327
2328static void export_array(struct mddev *mddev)
2329{
2330 struct md_rdev *rdev;
2331
2332 while (!list_empty(&mddev->disks)) {
2333 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2334 same_set);
2335 md_kick_rdev_from_array(rdev);
2336 }
2337 mddev->raid_disks = 0;
2338 mddev->major_version = 0;
2339}
2340
2341static bool set_in_sync(struct mddev *mddev)
2342{
2343 lockdep_assert_held(&mddev->lock);
2344 if (!mddev->in_sync) {
2345 mddev->sync_checkers++;
2346 spin_unlock(&mddev->lock);
2347 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2348 spin_lock(&mddev->lock);
2349 if (!mddev->in_sync &&
2350 percpu_ref_is_zero(&mddev->writes_pending)) {
2351 mddev->in_sync = 1;
2352
2353
2354
2355
2356 smp_mb();
2357 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2358 sysfs_notify_dirent_safe(mddev->sysfs_state);
2359 }
2360 if (--mddev->sync_checkers == 0)
2361 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2362 }
2363 if (mddev->safemode == 1)
2364 mddev->safemode = 0;
2365 return mddev->in_sync;
2366}
2367
2368static void sync_sbs(struct mddev *mddev, int nospares)
2369{
2370
2371
2372
2373
2374
2375
2376 struct md_rdev *rdev;
2377 rdev_for_each(rdev, mddev) {
2378 if (rdev->sb_events == mddev->events ||
2379 (nospares &&
2380 rdev->raid_disk < 0 &&
2381 rdev->sb_events+1 == mddev->events)) {
2382
2383 rdev->sb_loaded = 2;
2384 } else {
2385 sync_super(mddev, rdev);
2386 rdev->sb_loaded = 1;
2387 }
2388 }
2389}
2390
2391static bool does_sb_need_changing(struct mddev *mddev)
2392{
2393 struct md_rdev *rdev;
2394 struct mdp_superblock_1 *sb;
2395 int role;
2396
2397
2398 rdev_for_each(rdev, mddev)
2399 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2400 break;
2401
2402
2403 if (!rdev)
2404 return false;
2405
2406 sb = page_address(rdev->sb_page);
2407
2408 rdev_for_each(rdev, mddev) {
2409 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2410
2411 if (role == 0xffff && rdev->raid_disk >=0 &&
2412 !test_bit(Faulty, &rdev->flags))
2413 return true;
2414
2415 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2416 return true;
2417 }
2418
2419
2420 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2421 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2422 (mddev->layout != le32_to_cpu(sb->layout)) ||
2423 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2424 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2425 return true;
2426
2427 return false;
2428}
2429
2430void md_update_sb(struct mddev *mddev, int force_change)
2431{
2432 struct md_rdev *rdev;
2433 int sync_req;
2434 int nospares = 0;
2435 int any_badblocks_changed = 0;
2436 int ret = -1;
2437
2438 if (mddev->ro) {
2439 if (force_change)
2440 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2441 return;
2442 }
2443
2444repeat:
2445 if (mddev_is_clustered(mddev)) {
2446 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2447 force_change = 1;
2448 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2449 nospares = 1;
2450 ret = md_cluster_ops->metadata_update_start(mddev);
2451
2452 if (!does_sb_need_changing(mddev)) {
2453 if (ret == 0)
2454 md_cluster_ops->metadata_update_cancel(mddev);
2455 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2456 BIT(MD_SB_CHANGE_DEVS) |
2457 BIT(MD_SB_CHANGE_CLEAN));
2458 return;
2459 }
2460 }
2461
2462
2463
2464
2465
2466
2467
2468 rdev_for_each(rdev, mddev) {
2469 if (rdev->raid_disk >= 0 &&
2470 mddev->delta_disks >= 0 &&
2471 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2472 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2473 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2474 !test_bit(Journal, &rdev->flags) &&
2475 !test_bit(In_sync, &rdev->flags) &&
2476 mddev->curr_resync_completed > rdev->recovery_offset)
2477 rdev->recovery_offset = mddev->curr_resync_completed;
2478
2479 }
2480 if (!mddev->persistent) {
2481 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2482 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2483 if (!mddev->external) {
2484 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2485 rdev_for_each(rdev, mddev) {
2486 if (rdev->badblocks.changed) {
2487 rdev->badblocks.changed = 0;
2488 ack_all_badblocks(&rdev->badblocks);
2489 md_error(mddev, rdev);
2490 }
2491 clear_bit(Blocked, &rdev->flags);
2492 clear_bit(BlockedBadBlocks, &rdev->flags);
2493 wake_up(&rdev->blocked_wait);
2494 }
2495 }
2496 wake_up(&mddev->sb_wait);
2497 return;
2498 }
2499
2500 spin_lock(&mddev->lock);
2501
2502 mddev->utime = ktime_get_real_seconds();
2503
2504 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2505 force_change = 1;
2506 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2507
2508
2509
2510
2511 nospares = 1;
2512 if (force_change)
2513 nospares = 0;
2514 if (mddev->degraded)
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524 nospares = 0;
2525
2526 sync_req = mddev->in_sync;
2527
2528
2529
2530 if (nospares
2531 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2532 && mddev->can_decrease_events
2533 && mddev->events != 1) {
2534 mddev->events--;
2535 mddev->can_decrease_events = 0;
2536 } else {
2537
2538 mddev->events ++;
2539 mddev->can_decrease_events = nospares;
2540 }
2541
2542
2543
2544
2545
2546
2547 WARN_ON(mddev->events == 0);
2548
2549 rdev_for_each(rdev, mddev) {
2550 if (rdev->badblocks.changed)
2551 any_badblocks_changed++;
2552 if (test_bit(Faulty, &rdev->flags))
2553 set_bit(FaultRecorded, &rdev->flags);
2554 }
2555
2556 sync_sbs(mddev, nospares);
2557 spin_unlock(&mddev->lock);
2558
2559 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2560 mdname(mddev), mddev->in_sync);
2561
2562 if (mddev->queue)
2563 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2564rewrite:
2565 md_bitmap_update_sb(mddev->bitmap);
2566 rdev_for_each(rdev, mddev) {
2567 char b[BDEVNAME_SIZE];
2568
2569 if (rdev->sb_loaded != 1)
2570 continue;
2571
2572 if (!test_bit(Faulty, &rdev->flags)) {
2573 md_super_write(mddev,rdev,
2574 rdev->sb_start, rdev->sb_size,
2575 rdev->sb_page);
2576 pr_debug("md: (write) %s's sb offset: %llu\n",
2577 bdevname(rdev->bdev, b),
2578 (unsigned long long)rdev->sb_start);
2579 rdev->sb_events = mddev->events;
2580 if (rdev->badblocks.size) {
2581 md_super_write(mddev, rdev,
2582 rdev->badblocks.sector,
2583 rdev->badblocks.size << 9,
2584 rdev->bb_page);
2585 rdev->badblocks.size = 0;
2586 }
2587
2588 } else
2589 pr_debug("md: %s (skipping faulty)\n",
2590 bdevname(rdev->bdev, b));
2591
2592 if (mddev->level == LEVEL_MULTIPATH)
2593
2594 break;
2595 }
2596 if (md_super_wait(mddev) < 0)
2597 goto rewrite;
2598
2599
2600 if (mddev_is_clustered(mddev) && ret == 0)
2601 md_cluster_ops->metadata_update_finish(mddev);
2602
2603 if (mddev->in_sync != sync_req ||
2604 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2605 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2606
2607 goto repeat;
2608 wake_up(&mddev->sb_wait);
2609 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2610 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2611
2612 rdev_for_each(rdev, mddev) {
2613 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2614 clear_bit(Blocked, &rdev->flags);
2615
2616 if (any_badblocks_changed)
2617 ack_all_badblocks(&rdev->badblocks);
2618 clear_bit(BlockedBadBlocks, &rdev->flags);
2619 wake_up(&rdev->blocked_wait);
2620 }
2621}
2622EXPORT_SYMBOL(md_update_sb);
2623
2624static int add_bound_rdev(struct md_rdev *rdev)
2625{
2626 struct mddev *mddev = rdev->mddev;
2627 int err = 0;
2628 bool add_journal = test_bit(Journal, &rdev->flags);
2629
2630 if (!mddev->pers->hot_remove_disk || add_journal) {
2631
2632
2633
2634
2635 super_types[mddev->major_version].
2636 validate_super(mddev, rdev);
2637 if (add_journal)
2638 mddev_suspend(mddev);
2639 err = mddev->pers->hot_add_disk(mddev, rdev);
2640 if (add_journal)
2641 mddev_resume(mddev);
2642 if (err) {
2643 md_kick_rdev_from_array(rdev);
2644 return err;
2645 }
2646 }
2647 sysfs_notify_dirent_safe(rdev->sysfs_state);
2648
2649 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2650 if (mddev->degraded)
2651 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2652 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2653 md_new_event(mddev);
2654 md_wakeup_thread(mddev->thread);
2655 return 0;
2656}
2657
2658
2659
2660
2661static int cmd_match(const char *cmd, const char *str)
2662{
2663
2664
2665
2666
2667 while (*cmd && *str && *cmd == *str) {
2668 cmd++;
2669 str++;
2670 }
2671 if (*cmd == '\n')
2672 cmd++;
2673 if (*str || *cmd)
2674 return 0;
2675 return 1;
2676}
2677
2678struct rdev_sysfs_entry {
2679 struct attribute attr;
2680 ssize_t (*show)(struct md_rdev *, char *);
2681 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2682};
2683
2684static ssize_t
2685state_show(struct md_rdev *rdev, char *page)
2686{
2687 char *sep = ",";
2688 size_t len = 0;
2689 unsigned long flags = READ_ONCE(rdev->flags);
2690
2691 if (test_bit(Faulty, &flags) ||
2692 (!test_bit(ExternalBbl, &flags) &&
2693 rdev->badblocks.unacked_exist))
2694 len += sprintf(page+len, "faulty%s", sep);
2695 if (test_bit(In_sync, &flags))
2696 len += sprintf(page+len, "in_sync%s", sep);
2697 if (test_bit(Journal, &flags))
2698 len += sprintf(page+len, "journal%s", sep);
2699 if (test_bit(WriteMostly, &flags))
2700 len += sprintf(page+len, "write_mostly%s", sep);
2701 if (test_bit(Blocked, &flags) ||
2702 (rdev->badblocks.unacked_exist
2703 && !test_bit(Faulty, &flags)))
2704 len += sprintf(page+len, "blocked%s", sep);
2705 if (!test_bit(Faulty, &flags) &&
2706 !test_bit(Journal, &flags) &&
2707 !test_bit(In_sync, &flags))
2708 len += sprintf(page+len, "spare%s", sep);
2709 if (test_bit(WriteErrorSeen, &flags))
2710 len += sprintf(page+len, "write_error%s", sep);
2711 if (test_bit(WantReplacement, &flags))
2712 len += sprintf(page+len, "want_replacement%s", sep);
2713 if (test_bit(Replacement, &flags))
2714 len += sprintf(page+len, "replacement%s", sep);
2715 if (test_bit(ExternalBbl, &flags))
2716 len += sprintf(page+len, "external_bbl%s", sep);
2717 if (test_bit(FailFast, &flags))
2718 len += sprintf(page+len, "failfast%s", sep);
2719
2720 if (len)
2721 len -= strlen(sep);
2722
2723 return len+sprintf(page+len, "\n");
2724}
2725
2726static ssize_t
2727state_store(struct md_rdev *rdev, const char *buf, size_t len)
2728{
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743 int err = -EINVAL;
2744 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2745 md_error(rdev->mddev, rdev);
2746 if (test_bit(Faulty, &rdev->flags))
2747 err = 0;
2748 else
2749 err = -EBUSY;
2750 } else if (cmd_match(buf, "remove")) {
2751 if (rdev->mddev->pers) {
2752 clear_bit(Blocked, &rdev->flags);
2753 remove_and_add_spares(rdev->mddev, rdev);
2754 }
2755 if (rdev->raid_disk >= 0)
2756 err = -EBUSY;
2757 else {
2758 struct mddev *mddev = rdev->mddev;
2759 err = 0;
2760 if (mddev_is_clustered(mddev))
2761 err = md_cluster_ops->remove_disk(mddev, rdev);
2762
2763 if (err == 0) {
2764 md_kick_rdev_from_array(rdev);
2765 if (mddev->pers) {
2766 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2767 md_wakeup_thread(mddev->thread);
2768 }
2769 md_new_event(mddev);
2770 }
2771 }
2772 } else if (cmd_match(buf, "writemostly")) {
2773 set_bit(WriteMostly, &rdev->flags);
2774 err = 0;
2775 } else if (cmd_match(buf, "-writemostly")) {
2776 clear_bit(WriteMostly, &rdev->flags);
2777 err = 0;
2778 } else if (cmd_match(buf, "blocked")) {
2779 set_bit(Blocked, &rdev->flags);
2780 err = 0;
2781 } else if (cmd_match(buf, "-blocked")) {
2782 if (!test_bit(Faulty, &rdev->flags) &&
2783 !test_bit(ExternalBbl, &rdev->flags) &&
2784 rdev->badblocks.unacked_exist) {
2785
2786
2787
2788 md_error(rdev->mddev, rdev);
2789 }
2790 clear_bit(Blocked, &rdev->flags);
2791 clear_bit(BlockedBadBlocks, &rdev->flags);
2792 wake_up(&rdev->blocked_wait);
2793 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2794 md_wakeup_thread(rdev->mddev->thread);
2795
2796 err = 0;
2797 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2798 set_bit(In_sync, &rdev->flags);
2799 err = 0;
2800 } else if (cmd_match(buf, "failfast")) {
2801 set_bit(FailFast, &rdev->flags);
2802 err = 0;
2803 } else if (cmd_match(buf, "-failfast")) {
2804 clear_bit(FailFast, &rdev->flags);
2805 err = 0;
2806 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2807 !test_bit(Journal, &rdev->flags)) {
2808 if (rdev->mddev->pers == NULL) {
2809 clear_bit(In_sync, &rdev->flags);
2810 rdev->saved_raid_disk = rdev->raid_disk;
2811 rdev->raid_disk = -1;
2812 err = 0;
2813 }
2814 } else if (cmd_match(buf, "write_error")) {
2815 set_bit(WriteErrorSeen, &rdev->flags);
2816 err = 0;
2817 } else if (cmd_match(buf, "-write_error")) {
2818 clear_bit(WriteErrorSeen, &rdev->flags);
2819 err = 0;
2820 } else if (cmd_match(buf, "want_replacement")) {
2821
2822
2823
2824
2825 if (rdev->raid_disk >= 0 &&
2826 !test_bit(Journal, &rdev->flags) &&
2827 !test_bit(Replacement, &rdev->flags))
2828 set_bit(WantReplacement, &rdev->flags);
2829 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2830 md_wakeup_thread(rdev->mddev->thread);
2831 err = 0;
2832 } else if (cmd_match(buf, "-want_replacement")) {
2833
2834
2835
2836 err = 0;
2837 clear_bit(WantReplacement, &rdev->flags);
2838 } else if (cmd_match(buf, "replacement")) {
2839
2840
2841
2842
2843 if (rdev->mddev->pers)
2844 err = -EBUSY;
2845 else {
2846 set_bit(Replacement, &rdev->flags);
2847 err = 0;
2848 }
2849 } else if (cmd_match(buf, "-replacement")) {
2850
2851 if (rdev->mddev->pers)
2852 err = -EBUSY;
2853 else {
2854 clear_bit(Replacement, &rdev->flags);
2855 err = 0;
2856 }
2857 } else if (cmd_match(buf, "re-add")) {
2858 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
2859 rdev->saved_raid_disk >= 0) {
2860
2861
2862
2863
2864
2865
2866 if (!mddev_is_clustered(rdev->mddev) ||
2867 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2868 clear_bit(Faulty, &rdev->flags);
2869 err = add_bound_rdev(rdev);
2870 }
2871 } else
2872 err = -EBUSY;
2873 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2874 set_bit(ExternalBbl, &rdev->flags);
2875 rdev->badblocks.shift = 0;
2876 err = 0;
2877 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2878 clear_bit(ExternalBbl, &rdev->flags);
2879 err = 0;
2880 }
2881 if (!err)
2882 sysfs_notify_dirent_safe(rdev->sysfs_state);
2883 return err ? err : len;
2884}
2885static struct rdev_sysfs_entry rdev_state =
2886__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2887
2888static ssize_t
2889errors_show(struct md_rdev *rdev, char *page)
2890{
2891 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2892}
2893
2894static ssize_t
2895errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2896{
2897 unsigned int n;
2898 int rv;
2899
2900 rv = kstrtouint(buf, 10, &n);
2901 if (rv < 0)
2902 return rv;
2903 atomic_set(&rdev->corrected_errors, n);
2904 return len;
2905}
2906static struct rdev_sysfs_entry rdev_errors =
2907__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2908
2909static ssize_t
2910slot_show(struct md_rdev *rdev, char *page)
2911{
2912 if (test_bit(Journal, &rdev->flags))
2913 return sprintf(page, "journal\n");
2914 else if (rdev->raid_disk < 0)
2915 return sprintf(page, "none\n");
2916 else
2917 return sprintf(page, "%d\n", rdev->raid_disk);
2918}
2919
2920static ssize_t
2921slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2922{
2923 int slot;
2924 int err;
2925
2926 if (test_bit(Journal, &rdev->flags))
2927 return -EBUSY;
2928 if (strncmp(buf, "none", 4)==0)
2929 slot = -1;
2930 else {
2931 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2932 if (err < 0)
2933 return err;
2934 }
2935 if (rdev->mddev->pers && slot == -1) {
2936
2937
2938
2939
2940
2941
2942
2943 if (rdev->raid_disk == -1)
2944 return -EEXIST;
2945
2946 if (rdev->mddev->pers->hot_remove_disk == NULL)
2947 return -EINVAL;
2948 clear_bit(Blocked, &rdev->flags);
2949 remove_and_add_spares(rdev->mddev, rdev);
2950 if (rdev->raid_disk >= 0)
2951 return -EBUSY;
2952 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2953 md_wakeup_thread(rdev->mddev->thread);
2954 } else if (rdev->mddev->pers) {
2955
2956
2957
2958 int err;
2959
2960 if (rdev->raid_disk != -1)
2961 return -EBUSY;
2962
2963 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2964 return -EBUSY;
2965
2966 if (rdev->mddev->pers->hot_add_disk == NULL)
2967 return -EINVAL;
2968
2969 if (slot >= rdev->mddev->raid_disks &&
2970 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2971 return -ENOSPC;
2972
2973 rdev->raid_disk = slot;
2974 if (test_bit(In_sync, &rdev->flags))
2975 rdev->saved_raid_disk = slot;
2976 else
2977 rdev->saved_raid_disk = -1;
2978 clear_bit(In_sync, &rdev->flags);
2979 clear_bit(Bitmap_sync, &rdev->flags);
2980 err = rdev->mddev->pers->
2981 hot_add_disk(rdev->mddev, rdev);
2982 if (err) {
2983 rdev->raid_disk = -1;
2984 return err;
2985 } else
2986 sysfs_notify_dirent_safe(rdev->sysfs_state);
2987 if (sysfs_link_rdev(rdev->mddev, rdev))
2988 ;
2989
2990 } else {
2991 if (slot >= rdev->mddev->raid_disks &&
2992 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2993 return -ENOSPC;
2994 rdev->raid_disk = slot;
2995
2996 clear_bit(Faulty, &rdev->flags);
2997 clear_bit(WriteMostly, &rdev->flags);
2998 set_bit(In_sync, &rdev->flags);
2999 sysfs_notify_dirent_safe(rdev->sysfs_state);
3000 }
3001 return len;
3002}
3003
3004static struct rdev_sysfs_entry rdev_slot =
3005__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3006
3007static ssize_t
3008offset_show(struct md_rdev *rdev, char *page)
3009{
3010 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3011}
3012
3013static ssize_t
3014offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3015{
3016 unsigned long long offset;
3017 if (kstrtoull(buf, 10, &offset) < 0)
3018 return -EINVAL;
3019 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3020 return -EBUSY;
3021 if (rdev->sectors && rdev->mddev->external)
3022
3023
3024 return -EBUSY;
3025 rdev->data_offset = offset;
3026 rdev->new_data_offset = offset;
3027 return len;
3028}
3029
3030static struct rdev_sysfs_entry rdev_offset =
3031__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3032
3033static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3034{
3035 return sprintf(page, "%llu\n",
3036 (unsigned long long)rdev->new_data_offset);
3037}
3038
3039static ssize_t new_offset_store(struct md_rdev *rdev,
3040 const char *buf, size_t len)
3041{
3042 unsigned long long new_offset;
3043 struct mddev *mddev = rdev->mddev;
3044
3045 if (kstrtoull(buf, 10, &new_offset) < 0)
3046 return -EINVAL;
3047
3048 if (mddev->sync_thread ||
3049 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3050 return -EBUSY;
3051 if (new_offset == rdev->data_offset)
3052
3053 ;
3054 else if (new_offset > rdev->data_offset) {
3055
3056 if (new_offset - rdev->data_offset
3057 + mddev->dev_sectors > rdev->sectors)
3058 return -E2BIG;
3059 }
3060
3061
3062
3063
3064
3065 if (new_offset < rdev->data_offset &&
3066 mddev->reshape_backwards)
3067 return -EINVAL;
3068
3069
3070
3071
3072 if (new_offset > rdev->data_offset &&
3073 !mddev->reshape_backwards)
3074 return -EINVAL;
3075
3076 if (mddev->pers && mddev->persistent &&
3077 !super_types[mddev->major_version]
3078 .allow_new_offset(rdev, new_offset))
3079 return -E2BIG;
3080 rdev->new_data_offset = new_offset;
3081 if (new_offset > rdev->data_offset)
3082 mddev->reshape_backwards = 1;
3083 else if (new_offset < rdev->data_offset)
3084 mddev->reshape_backwards = 0;
3085
3086 return len;
3087}
3088static struct rdev_sysfs_entry rdev_new_offset =
3089__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3090
3091static ssize_t
3092rdev_size_show(struct md_rdev *rdev, char *page)
3093{
3094 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3095}
3096
3097static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3098{
3099
3100 if (s1+l1 <= s2)
3101 return 0;
3102 if (s2+l2 <= s1)
3103 return 0;
3104 return 1;
3105}
3106
3107static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3108{
3109 unsigned long long blocks;
3110 sector_t new;
3111
3112 if (kstrtoull(buf, 10, &blocks) < 0)
3113 return -EINVAL;
3114
3115 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3116 return -EINVAL;
3117
3118 new = blocks * 2;
3119 if (new != blocks * 2)
3120 return -EINVAL;
3121
3122 *sectors = new;
3123 return 0;
3124}
3125
3126static ssize_t
3127rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3128{
3129 struct mddev *my_mddev = rdev->mddev;
3130 sector_t oldsectors = rdev->sectors;
3131 sector_t sectors;
3132
3133 if (test_bit(Journal, &rdev->flags))
3134 return -EBUSY;
3135 if (strict_blocks_to_sectors(buf, §ors) < 0)
3136 return -EINVAL;
3137 if (rdev->data_offset != rdev->new_data_offset)
3138 return -EINVAL;
3139 if (my_mddev->pers && rdev->raid_disk >= 0) {
3140 if (my_mddev->persistent) {
3141 sectors = super_types[my_mddev->major_version].
3142 rdev_size_change(rdev, sectors);
3143 if (!sectors)
3144 return -EBUSY;
3145 } else if (!sectors)
3146 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3147 rdev->data_offset;
3148 if (!my_mddev->pers->resize)
3149
3150 return -EINVAL;
3151 }
3152 if (sectors < my_mddev->dev_sectors)
3153 return -EINVAL;
3154
3155 rdev->sectors = sectors;
3156 if (sectors > oldsectors && my_mddev->external) {
3157
3158
3159
3160
3161
3162
3163 struct mddev *mddev;
3164 int overlap = 0;
3165 struct list_head *tmp;
3166
3167 rcu_read_lock();
3168 for_each_mddev(mddev, tmp) {
3169 struct md_rdev *rdev2;
3170
3171 rdev_for_each(rdev2, mddev)
3172 if (rdev->bdev == rdev2->bdev &&
3173 rdev != rdev2 &&
3174 overlaps(rdev->data_offset, rdev->sectors,
3175 rdev2->data_offset,
3176 rdev2->sectors)) {
3177 overlap = 1;
3178 break;
3179 }
3180 if (overlap) {
3181 mddev_put(mddev);
3182 break;
3183 }
3184 }
3185 rcu_read_unlock();
3186 if (overlap) {
3187
3188
3189
3190
3191
3192
3193 rdev->sectors = oldsectors;
3194 return -EBUSY;
3195 }
3196 }
3197 return len;
3198}
3199
3200static struct rdev_sysfs_entry rdev_size =
3201__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3202
3203static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3204{
3205 unsigned long long recovery_start = rdev->recovery_offset;
3206
3207 if (test_bit(In_sync, &rdev->flags) ||
3208 recovery_start == MaxSector)
3209 return sprintf(page, "none\n");
3210
3211 return sprintf(page, "%llu\n", recovery_start);
3212}
3213
3214static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3215{
3216 unsigned long long recovery_start;
3217
3218 if (cmd_match(buf, "none"))
3219 recovery_start = MaxSector;
3220 else if (kstrtoull(buf, 10, &recovery_start))
3221 return -EINVAL;
3222
3223 if (rdev->mddev->pers &&
3224 rdev->raid_disk >= 0)
3225 return -EBUSY;
3226
3227 rdev->recovery_offset = recovery_start;
3228 if (recovery_start == MaxSector)
3229 set_bit(In_sync, &rdev->flags);
3230 else
3231 clear_bit(In_sync, &rdev->flags);
3232 return len;
3233}
3234
3235static struct rdev_sysfs_entry rdev_recovery_start =
3236__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249static ssize_t bb_show(struct md_rdev *rdev, char *page)
3250{
3251 return badblocks_show(&rdev->badblocks, page, 0);
3252}
3253static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3254{
3255 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3256
3257 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3258 wake_up(&rdev->blocked_wait);
3259 return rv;
3260}
3261static struct rdev_sysfs_entry rdev_bad_blocks =
3262__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3263
3264static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3265{
3266 return badblocks_show(&rdev->badblocks, page, 1);
3267}
3268static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3269{
3270 return badblocks_store(&rdev->badblocks, page, len, 1);
3271}
3272static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3273__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3274
3275static ssize_t
3276ppl_sector_show(struct md_rdev *rdev, char *page)
3277{
3278 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3279}
3280
3281static ssize_t
3282ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3283{
3284 unsigned long long sector;
3285
3286 if (kstrtoull(buf, 10, §or) < 0)
3287 return -EINVAL;
3288 if (sector != (sector_t)sector)
3289 return -EINVAL;
3290
3291 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3292 rdev->raid_disk >= 0)
3293 return -EBUSY;
3294
3295 if (rdev->mddev->persistent) {
3296 if (rdev->mddev->major_version == 0)
3297 return -EINVAL;
3298 if ((sector > rdev->sb_start &&
3299 sector - rdev->sb_start > S16_MAX) ||
3300 (sector < rdev->sb_start &&
3301 rdev->sb_start - sector > -S16_MIN))
3302 return -EINVAL;
3303 rdev->ppl.offset = sector - rdev->sb_start;
3304 } else if (!rdev->mddev->external) {
3305 return -EBUSY;
3306 }
3307 rdev->ppl.sector = sector;
3308 return len;
3309}
3310
3311static struct rdev_sysfs_entry rdev_ppl_sector =
3312__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3313
3314static ssize_t
3315ppl_size_show(struct md_rdev *rdev, char *page)
3316{
3317 return sprintf(page, "%u\n", rdev->ppl.size);
3318}
3319
3320static ssize_t
3321ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3322{
3323 unsigned int size;
3324
3325 if (kstrtouint(buf, 10, &size) < 0)
3326 return -EINVAL;
3327
3328 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3329 rdev->raid_disk >= 0)
3330 return -EBUSY;
3331
3332 if (rdev->mddev->persistent) {
3333 if (rdev->mddev->major_version == 0)
3334 return -EINVAL;
3335 if (size > U16_MAX)
3336 return -EINVAL;
3337 } else if (!rdev->mddev->external) {
3338 return -EBUSY;
3339 }
3340 rdev->ppl.size = size;
3341 return len;
3342}
3343
3344static struct rdev_sysfs_entry rdev_ppl_size =
3345__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3346
3347static struct attribute *rdev_default_attrs[] = {
3348 &rdev_state.attr,
3349 &rdev_errors.attr,
3350 &rdev_slot.attr,
3351 &rdev_offset.attr,
3352 &rdev_new_offset.attr,
3353 &rdev_size.attr,
3354 &rdev_recovery_start.attr,
3355 &rdev_bad_blocks.attr,
3356 &rdev_unack_bad_blocks.attr,
3357 &rdev_ppl_sector.attr,
3358 &rdev_ppl_size.attr,
3359 NULL,
3360};
3361static ssize_t
3362rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3363{
3364 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3365 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3366
3367 if (!entry->show)
3368 return -EIO;
3369 if (!rdev->mddev)
3370 return -EBUSY;
3371 return entry->show(rdev, page);
3372}
3373
3374static ssize_t
3375rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3376 const char *page, size_t length)
3377{
3378 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3379 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3380 ssize_t rv;
3381 struct mddev *mddev = rdev->mddev;
3382
3383 if (!entry->store)
3384 return -EIO;
3385 if (!capable(CAP_SYS_ADMIN))
3386 return -EACCES;
3387 rv = mddev ? mddev_lock(mddev): -EBUSY;
3388 if (!rv) {
3389 if (rdev->mddev == NULL)
3390 rv = -EBUSY;
3391 else
3392 rv = entry->store(rdev, page, length);
3393 mddev_unlock(mddev);
3394 }
3395 return rv;
3396}
3397
3398static void rdev_free(struct kobject *ko)
3399{
3400 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3401 kfree(rdev);
3402}
3403static const struct sysfs_ops rdev_sysfs_ops = {
3404 .show = rdev_attr_show,
3405 .store = rdev_attr_store,
3406};
3407static struct kobj_type rdev_ktype = {
3408 .release = rdev_free,
3409 .sysfs_ops = &rdev_sysfs_ops,
3410 .default_attrs = rdev_default_attrs,
3411};
3412
3413int md_rdev_init(struct md_rdev *rdev)
3414{
3415 rdev->desc_nr = -1;
3416 rdev->saved_raid_disk = -1;
3417 rdev->raid_disk = -1;
3418 rdev->flags = 0;
3419 rdev->data_offset = 0;
3420 rdev->new_data_offset = 0;
3421 rdev->sb_events = 0;
3422 rdev->last_read_error = 0;
3423 rdev->sb_loaded = 0;
3424 rdev->bb_page = NULL;
3425 atomic_set(&rdev->nr_pending, 0);
3426 atomic_set(&rdev->read_errors, 0);
3427 atomic_set(&rdev->corrected_errors, 0);
3428
3429 INIT_LIST_HEAD(&rdev->same_set);
3430 init_waitqueue_head(&rdev->blocked_wait);
3431
3432
3433
3434
3435
3436 return badblocks_init(&rdev->badblocks, 0);
3437}
3438EXPORT_SYMBOL_GPL(md_rdev_init);
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3450{
3451 char b[BDEVNAME_SIZE];
3452 int err;
3453 struct md_rdev *rdev;
3454 sector_t size;
3455
3456 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3457 if (!rdev)
3458 return ERR_PTR(-ENOMEM);
3459
3460 err = md_rdev_init(rdev);
3461 if (err)
3462 goto abort_free;
3463 err = alloc_disk_sb(rdev);
3464 if (err)
3465 goto abort_free;
3466
3467 err = lock_rdev(rdev, newdev, super_format == -2);
3468 if (err)
3469 goto abort_free;
3470
3471 kobject_init(&rdev->kobj, &rdev_ktype);
3472
3473 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3474 if (!size) {
3475 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3476 bdevname(rdev->bdev,b));
3477 err = -EINVAL;
3478 goto abort_free;
3479 }
3480
3481 if (super_format >= 0) {
3482 err = super_types[super_format].
3483 load_super(rdev, NULL, super_minor);
3484 if (err == -EINVAL) {
3485 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3486 bdevname(rdev->bdev,b),
3487 super_format, super_minor);
3488 goto abort_free;
3489 }
3490 if (err < 0) {
3491 pr_warn("md: could not read %s's sb, not importing!\n",
3492 bdevname(rdev->bdev,b));
3493 goto abort_free;
3494 }
3495 }
3496
3497 return rdev;
3498
3499abort_free:
3500 if (rdev->bdev)
3501 unlock_rdev(rdev);
3502 md_rdev_clear(rdev);
3503 kfree(rdev);
3504 return ERR_PTR(err);
3505}
3506
3507
3508
3509
3510
3511static void analyze_sbs(struct mddev *mddev)
3512{
3513 int i;
3514 struct md_rdev *rdev, *freshest, *tmp;
3515 char b[BDEVNAME_SIZE];
3516
3517 freshest = NULL;
3518 rdev_for_each_safe(rdev, tmp, mddev)
3519 switch (super_types[mddev->major_version].
3520 load_super(rdev, freshest, mddev->minor_version)) {
3521 case 1:
3522 freshest = rdev;
3523 break;
3524 case 0:
3525 break;
3526 default:
3527 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3528 bdevname(rdev->bdev,b));
3529 md_kick_rdev_from_array(rdev);
3530 }
3531
3532 super_types[mddev->major_version].
3533 validate_super(mddev, freshest);
3534
3535 i = 0;
3536 rdev_for_each_safe(rdev, tmp, mddev) {
3537 if (mddev->max_disks &&
3538 (rdev->desc_nr >= mddev->max_disks ||
3539 i > mddev->max_disks)) {
3540 pr_warn("md: %s: %s: only %d devices permitted\n",
3541 mdname(mddev), bdevname(rdev->bdev, b),
3542 mddev->max_disks);
3543 md_kick_rdev_from_array(rdev);
3544 continue;
3545 }
3546 if (rdev != freshest) {
3547 if (super_types[mddev->major_version].
3548 validate_super(mddev, rdev)) {
3549 pr_warn("md: kicking non-fresh %s from array!\n",
3550 bdevname(rdev->bdev,b));
3551 md_kick_rdev_from_array(rdev);
3552 continue;
3553 }
3554 }
3555 if (mddev->level == LEVEL_MULTIPATH) {
3556 rdev->desc_nr = i++;
3557 rdev->raid_disk = rdev->desc_nr;
3558 set_bit(In_sync, &rdev->flags);
3559 } else if (rdev->raid_disk >=
3560 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3561 !test_bit(Journal, &rdev->flags)) {
3562 rdev->raid_disk = -1;
3563 clear_bit(In_sync, &rdev->flags);
3564 }
3565 }
3566}
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3579{
3580 unsigned long result = 0;
3581 long decimals = -1;
3582 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3583 if (*cp == '.')
3584 decimals = 0;
3585 else if (decimals < scale) {
3586 unsigned int value;
3587 value = *cp - '0';
3588 result = result * 10 + value;
3589 if (decimals >= 0)
3590 decimals++;
3591 }
3592 cp++;
3593 }
3594 if (*cp == '\n')
3595 cp++;
3596 if (*cp)
3597 return -EINVAL;
3598 if (decimals < 0)
3599 decimals = 0;
3600 while (decimals < scale) {
3601 result *= 10;
3602 decimals ++;
3603 }
3604 *res = result;
3605 return 0;
3606}
3607
3608static ssize_t
3609safe_delay_show(struct mddev *mddev, char *page)
3610{
3611 int msec = (mddev->safemode_delay*1000)/HZ;
3612 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3613}
3614static ssize_t
3615safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3616{
3617 unsigned long msec;
3618
3619 if (mddev_is_clustered(mddev)) {
3620 pr_warn("md: Safemode is disabled for clustered mode\n");
3621 return -EINVAL;
3622 }
3623
3624 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3625 return -EINVAL;
3626 if (msec == 0)
3627 mddev->safemode_delay = 0;
3628 else {
3629 unsigned long old_delay = mddev->safemode_delay;
3630 unsigned long new_delay = (msec*HZ)/1000;
3631
3632 if (new_delay == 0)
3633 new_delay = 1;
3634 mddev->safemode_delay = new_delay;
3635 if (new_delay < old_delay || old_delay == 0)
3636 mod_timer(&mddev->safemode_timer, jiffies+1);
3637 }
3638 return len;
3639}
3640static struct md_sysfs_entry md_safe_delay =
3641__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3642
3643static ssize_t
3644level_show(struct mddev *mddev, char *page)
3645{
3646 struct md_personality *p;
3647 int ret;
3648 spin_lock(&mddev->lock);
3649 p = mddev->pers;
3650 if (p)
3651 ret = sprintf(page, "%s\n", p->name);
3652 else if (mddev->clevel[0])
3653 ret = sprintf(page, "%s\n", mddev->clevel);
3654 else if (mddev->level != LEVEL_NONE)
3655 ret = sprintf(page, "%d\n", mddev->level);
3656 else
3657 ret = 0;
3658 spin_unlock(&mddev->lock);
3659 return ret;
3660}
3661
3662static ssize_t
3663level_store(struct mddev *mddev, const char *buf, size_t len)
3664{
3665 char clevel[16];
3666 ssize_t rv;
3667 size_t slen = len;
3668 struct md_personality *pers, *oldpers;
3669 long level;
3670 void *priv, *oldpriv;
3671 struct md_rdev *rdev;
3672
3673 if (slen == 0 || slen >= sizeof(clevel))
3674 return -EINVAL;
3675
3676 rv = mddev_lock(mddev);
3677 if (rv)
3678 return rv;
3679
3680 if (mddev->pers == NULL) {
3681 strncpy(mddev->clevel, buf, slen);
3682 if (mddev->clevel[slen-1] == '\n')
3683 slen--;
3684 mddev->clevel[slen] = 0;
3685 mddev->level = LEVEL_NONE;
3686 rv = len;
3687 goto out_unlock;
3688 }
3689 rv = -EROFS;
3690 if (mddev->ro)
3691 goto out_unlock;
3692
3693
3694
3695
3696
3697
3698
3699 rv = -EBUSY;
3700 if (mddev->sync_thread ||
3701 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3702 mddev->reshape_position != MaxSector ||
3703 mddev->sysfs_active)
3704 goto out_unlock;
3705
3706 rv = -EINVAL;
3707 if (!mddev->pers->quiesce) {
3708 pr_warn("md: %s: %s does not support online personality change\n",
3709 mdname(mddev), mddev->pers->name);
3710 goto out_unlock;
3711 }
3712
3713
3714 strncpy(clevel, buf, slen);
3715 if (clevel[slen-1] == '\n')
3716 slen--;
3717 clevel[slen] = 0;
3718 if (kstrtol(clevel, 10, &level))
3719 level = LEVEL_NONE;
3720
3721 if (request_module("md-%s", clevel) != 0)
3722 request_module("md-level-%s", clevel);
3723 spin_lock(&pers_lock);
3724 pers = find_pers(level, clevel);
3725 if (!pers || !try_module_get(pers->owner)) {
3726 spin_unlock(&pers_lock);
3727 pr_warn("md: personality %s not loaded\n", clevel);
3728 rv = -EINVAL;
3729 goto out_unlock;
3730 }
3731 spin_unlock(&pers_lock);
3732
3733 if (pers == mddev->pers) {
3734
3735 module_put(pers->owner);
3736 rv = len;
3737 goto out_unlock;
3738 }
3739 if (!pers->takeover) {
3740 module_put(pers->owner);
3741 pr_warn("md: %s: %s does not support personality takeover\n",
3742 mdname(mddev), clevel);
3743 rv = -EINVAL;
3744 goto out_unlock;
3745 }
3746
3747 rdev_for_each(rdev, mddev)
3748 rdev->new_raid_disk = rdev->raid_disk;
3749
3750
3751
3752
3753 priv = pers->takeover(mddev);
3754 if (IS_ERR(priv)) {
3755 mddev->new_level = mddev->level;
3756 mddev->new_layout = mddev->layout;
3757 mddev->new_chunk_sectors = mddev->chunk_sectors;
3758 mddev->raid_disks -= mddev->delta_disks;
3759 mddev->delta_disks = 0;
3760 mddev->reshape_backwards = 0;
3761 module_put(pers->owner);
3762 pr_warn("md: %s: %s would not accept array\n",
3763 mdname(mddev), clevel);
3764 rv = PTR_ERR(priv);
3765 goto out_unlock;
3766 }
3767
3768
3769 mddev_suspend(mddev);
3770 mddev_detach(mddev);
3771
3772 spin_lock(&mddev->lock);
3773 oldpers = mddev->pers;
3774 oldpriv = mddev->private;
3775 mddev->pers = pers;
3776 mddev->private = priv;
3777 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3778 mddev->level = mddev->new_level;
3779 mddev->layout = mddev->new_layout;
3780 mddev->chunk_sectors = mddev->new_chunk_sectors;
3781 mddev->delta_disks = 0;
3782 mddev->reshape_backwards = 0;
3783 mddev->degraded = 0;
3784 spin_unlock(&mddev->lock);
3785
3786 if (oldpers->sync_request == NULL &&
3787 mddev->external) {
3788
3789
3790
3791
3792
3793
3794
3795 mddev->in_sync = 0;
3796 mddev->safemode_delay = 0;
3797 mddev->safemode = 0;
3798 }
3799
3800 oldpers->free(mddev, oldpriv);
3801
3802 if (oldpers->sync_request == NULL &&
3803 pers->sync_request != NULL) {
3804
3805 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3806 pr_warn("md: cannot register extra attributes for %s\n",
3807 mdname(mddev));
3808 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3809 }
3810 if (oldpers->sync_request != NULL &&
3811 pers->sync_request == NULL) {
3812
3813 if (mddev->to_remove == NULL)
3814 mddev->to_remove = &md_redundancy_group;
3815 }
3816
3817 module_put(oldpers->owner);
3818
3819 rdev_for_each(rdev, mddev) {
3820 if (rdev->raid_disk < 0)
3821 continue;
3822 if (rdev->new_raid_disk >= mddev->raid_disks)
3823 rdev->new_raid_disk = -1;
3824 if (rdev->new_raid_disk == rdev->raid_disk)
3825 continue;
3826 sysfs_unlink_rdev(mddev, rdev);
3827 }
3828 rdev_for_each(rdev, mddev) {
3829 if (rdev->raid_disk < 0)
3830 continue;
3831 if (rdev->new_raid_disk == rdev->raid_disk)
3832 continue;
3833 rdev->raid_disk = rdev->new_raid_disk;
3834 if (rdev->raid_disk < 0)
3835 clear_bit(In_sync, &rdev->flags);
3836 else {
3837 if (sysfs_link_rdev(mddev, rdev))
3838 pr_warn("md: cannot register rd%d for %s after level change\n",
3839 rdev->raid_disk, mdname(mddev));
3840 }
3841 }
3842
3843 if (pers->sync_request == NULL) {
3844
3845
3846
3847 mddev->in_sync = 1;
3848 del_timer_sync(&mddev->safemode_timer);
3849 }
3850 blk_set_stacking_limits(&mddev->queue->limits);
3851 pers->run(mddev);
3852 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3853 mddev_resume(mddev);
3854 if (!mddev->thread)
3855 md_update_sb(mddev, 1);
3856 sysfs_notify(&mddev->kobj, NULL, "level");
3857 md_new_event(mddev);
3858 rv = len;
3859out_unlock:
3860 mddev_unlock(mddev);
3861 return rv;
3862}
3863
3864static struct md_sysfs_entry md_level =
3865__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3866
3867static ssize_t
3868layout_show(struct mddev *mddev, char *page)
3869{
3870
3871 if (mddev->reshape_position != MaxSector &&
3872 mddev->layout != mddev->new_layout)
3873 return sprintf(page, "%d (%d)\n",
3874 mddev->new_layout, mddev->layout);
3875 return sprintf(page, "%d\n", mddev->layout);
3876}
3877
3878static ssize_t
3879layout_store(struct mddev *mddev, const char *buf, size_t len)
3880{
3881 unsigned int n;
3882 int err;
3883
3884 err = kstrtouint(buf, 10, &n);
3885 if (err < 0)
3886 return err;
3887 err = mddev_lock(mddev);
3888 if (err)
3889 return err;
3890
3891 if (mddev->pers) {
3892 if (mddev->pers->check_reshape == NULL)
3893 err = -EBUSY;
3894 else if (mddev->ro)
3895 err = -EROFS;
3896 else {
3897 mddev->new_layout = n;
3898 err = mddev->pers->check_reshape(mddev);
3899 if (err)
3900 mddev->new_layout = mddev->layout;
3901 }
3902 } else {
3903 mddev->new_layout = n;
3904 if (mddev->reshape_position == MaxSector)
3905 mddev->layout = n;
3906 }
3907 mddev_unlock(mddev);
3908 return err ?: len;
3909}
3910static struct md_sysfs_entry md_layout =
3911__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3912
3913static ssize_t
3914raid_disks_show(struct mddev *mddev, char *page)
3915{
3916 if (mddev->raid_disks == 0)
3917 return 0;
3918 if (mddev->reshape_position != MaxSector &&
3919 mddev->delta_disks != 0)
3920 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3921 mddev->raid_disks - mddev->delta_disks);
3922 return sprintf(page, "%d\n", mddev->raid_disks);
3923}
3924
3925static int update_raid_disks(struct mddev *mddev, int raid_disks);
3926
3927static ssize_t
3928raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3929{
3930 unsigned int n;
3931 int err;
3932
3933 err = kstrtouint(buf, 10, &n);
3934 if (err < 0)
3935 return err;
3936
3937 err = mddev_lock(mddev);
3938 if (err)
3939 return err;
3940 if (mddev->pers)
3941 err = update_raid_disks(mddev, n);
3942 else if (mddev->reshape_position != MaxSector) {
3943 struct md_rdev *rdev;
3944 int olddisks = mddev->raid_disks - mddev->delta_disks;
3945
3946 err = -EINVAL;
3947 rdev_for_each(rdev, mddev) {
3948 if (olddisks < n &&
3949 rdev->data_offset < rdev->new_data_offset)
3950 goto out_unlock;
3951 if (olddisks > n &&
3952 rdev->data_offset > rdev->new_data_offset)
3953 goto out_unlock;
3954 }
3955 err = 0;
3956 mddev->delta_disks = n - olddisks;
3957 mddev->raid_disks = n;
3958 mddev->reshape_backwards = (mddev->delta_disks < 0);
3959 } else
3960 mddev->raid_disks = n;
3961out_unlock:
3962 mddev_unlock(mddev);
3963 return err ? err : len;
3964}
3965static struct md_sysfs_entry md_raid_disks =
3966__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3967
3968static ssize_t
3969chunk_size_show(struct mddev *mddev, char *page)
3970{
3971 if (mddev->reshape_position != MaxSector &&
3972 mddev->chunk_sectors != mddev->new_chunk_sectors)
3973 return sprintf(page, "%d (%d)\n",
3974 mddev->new_chunk_sectors << 9,
3975 mddev->chunk_sectors << 9);
3976 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3977}
3978
3979static ssize_t
3980chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3981{
3982 unsigned long n;
3983 int err;
3984
3985 err = kstrtoul(buf, 10, &n);
3986 if (err < 0)
3987 return err;
3988
3989 err = mddev_lock(mddev);
3990 if (err)
3991 return err;
3992 if (mddev->pers) {
3993 if (mddev->pers->check_reshape == NULL)
3994 err = -EBUSY;
3995 else if (mddev->ro)
3996 err = -EROFS;
3997 else {
3998 mddev->new_chunk_sectors = n >> 9;
3999 err = mddev->pers->check_reshape(mddev);
4000 if (err)
4001 mddev->new_chunk_sectors = mddev->chunk_sectors;
4002 }
4003 } else {
4004 mddev->new_chunk_sectors = n >> 9;
4005 if (mddev->reshape_position == MaxSector)
4006 mddev->chunk_sectors = n >> 9;
4007 }
4008 mddev_unlock(mddev);
4009 return err ?: len;
4010}
4011static struct md_sysfs_entry md_chunk_size =
4012__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4013
4014static ssize_t
4015resync_start_show(struct mddev *mddev, char *page)
4016{
4017 if (mddev->recovery_cp == MaxSector)
4018 return sprintf(page, "none\n");
4019 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4020}
4021
4022static ssize_t
4023resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4024{
4025 unsigned long long n;
4026 int err;
4027
4028 if (cmd_match(buf, "none"))
4029 n = MaxSector;
4030 else {
4031 err = kstrtoull(buf, 10, &n);
4032 if (err < 0)
4033 return err;
4034 if (n != (sector_t)n)
4035 return -EINVAL;
4036 }
4037
4038 err = mddev_lock(mddev);
4039 if (err)
4040 return err;
4041 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4042 err = -EBUSY;
4043
4044 if (!err) {
4045 mddev->recovery_cp = n;
4046 if (mddev->pers)
4047 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4048 }
4049 mddev_unlock(mddev);
4050 return err ?: len;
4051}
4052static struct md_sysfs_entry md_resync_start =
4053__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4054 resync_start_show, resync_start_store);
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4093 write_pending, active_idle, bad_word};
4094static char *array_states[] = {
4095 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4096 "write-pending", "active-idle", NULL };
4097
4098static int match_word(const char *word, char **list)
4099{
4100 int n;
4101 for (n=0; list[n]; n++)
4102 if (cmd_match(word, list[n]))
4103 break;
4104 return n;
4105}
4106
4107static ssize_t
4108array_state_show(struct mddev *mddev, char *page)
4109{
4110 enum array_state st = inactive;
4111
4112 if (mddev->pers)
4113 switch(mddev->ro) {
4114 case 1:
4115 st = readonly;
4116 break;
4117 case 2:
4118 st = read_auto;
4119 break;
4120 case 0:
4121 spin_lock(&mddev->lock);
4122 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4123 st = write_pending;
4124 else if (mddev->in_sync)
4125 st = clean;
4126 else if (mddev->safemode)
4127 st = active_idle;
4128 else
4129 st = active;
4130 spin_unlock(&mddev->lock);
4131 }
4132 else {
4133 if (list_empty(&mddev->disks) &&
4134 mddev->raid_disks == 0 &&
4135 mddev->dev_sectors == 0)
4136 st = clear;
4137 else
4138 st = inactive;
4139 }
4140 return sprintf(page, "%s\n", array_states[st]);
4141}
4142
4143static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4144static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4145static int do_md_run(struct mddev *mddev);
4146static int restart_array(struct mddev *mddev);
4147
4148static ssize_t
4149array_state_store(struct mddev *mddev, const char *buf, size_t len)
4150{
4151 int err = 0;
4152 enum array_state st = match_word(buf, array_states);
4153
4154 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4155
4156
4157
4158 spin_lock(&mddev->lock);
4159 if (st == active) {
4160 restart_array(mddev);
4161 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4162 md_wakeup_thread(mddev->thread);
4163 wake_up(&mddev->sb_wait);
4164 } else {
4165 restart_array(mddev);
4166 if (!set_in_sync(mddev))
4167 err = -EBUSY;
4168 }
4169 if (!err)
4170 sysfs_notify_dirent_safe(mddev->sysfs_state);
4171 spin_unlock(&mddev->lock);
4172 return err ?: len;
4173 }
4174 err = mddev_lock(mddev);
4175 if (err)
4176 return err;
4177 err = -EINVAL;
4178 switch(st) {
4179 case bad_word:
4180 break;
4181 case clear:
4182
4183 err = do_md_stop(mddev, 0, NULL);
4184 break;
4185 case inactive:
4186
4187 if (mddev->pers)
4188 err = do_md_stop(mddev, 2, NULL);
4189 else
4190 err = 0;
4191 break;
4192 case suspended:
4193 break;
4194 case readonly:
4195 if (mddev->pers)
4196 err = md_set_readonly(mddev, NULL);
4197 else {
4198 mddev->ro = 1;
4199 set_disk_ro(mddev->gendisk, 1);
4200 err = do_md_run(mddev);
4201 }
4202 break;
4203 case read_auto:
4204 if (mddev->pers) {
4205 if (mddev->ro == 0)
4206 err = md_set_readonly(mddev, NULL);
4207 else if (mddev->ro == 1)
4208 err = restart_array(mddev);
4209 if (err == 0) {
4210 mddev->ro = 2;
4211 set_disk_ro(mddev->gendisk, 0);
4212 }
4213 } else {
4214 mddev->ro = 2;
4215 err = do_md_run(mddev);
4216 }
4217 break;
4218 case clean:
4219 if (mddev->pers) {
4220 err = restart_array(mddev);
4221 if (err)
4222 break;
4223 spin_lock(&mddev->lock);
4224 if (!set_in_sync(mddev))
4225 err = -EBUSY;
4226 spin_unlock(&mddev->lock);
4227 } else
4228 err = -EINVAL;
4229 break;
4230 case active:
4231 if (mddev->pers) {
4232 err = restart_array(mddev);
4233 if (err)
4234 break;
4235 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4236 wake_up(&mddev->sb_wait);
4237 err = 0;
4238 } else {
4239 mddev->ro = 0;
4240 set_disk_ro(mddev->gendisk, 0);
4241 err = do_md_run(mddev);
4242 }
4243 break;
4244 case write_pending:
4245 case active_idle:
4246
4247 break;
4248 }
4249
4250 if (!err) {
4251 if (mddev->hold_active == UNTIL_IOCTL)
4252 mddev->hold_active = 0;
4253 sysfs_notify_dirent_safe(mddev->sysfs_state);
4254 }
4255 mddev_unlock(mddev);
4256 return err ?: len;
4257}
4258static struct md_sysfs_entry md_array_state =
4259__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4260
4261static ssize_t
4262max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4263 return sprintf(page, "%d\n",
4264 atomic_read(&mddev->max_corr_read_errors));
4265}
4266
4267static ssize_t
4268max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4269{
4270 unsigned int n;
4271 int rv;
4272
4273 rv = kstrtouint(buf, 10, &n);
4274 if (rv < 0)
4275 return rv;
4276 atomic_set(&mddev->max_corr_read_errors, n);
4277 return len;
4278}
4279
4280static struct md_sysfs_entry max_corr_read_errors =
4281__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4282 max_corrected_read_errors_store);
4283
4284static ssize_t
4285null_show(struct mddev *mddev, char *page)
4286{
4287 return -EINVAL;
4288}
4289
4290static ssize_t
4291new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4292{
4293
4294
4295
4296
4297
4298
4299
4300 char *e;
4301 int major = simple_strtoul(buf, &e, 10);
4302 int minor;
4303 dev_t dev;
4304 struct md_rdev *rdev;
4305 int err;
4306
4307 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4308 return -EINVAL;
4309 minor = simple_strtoul(e+1, &e, 10);
4310 if (*e && *e != '\n')
4311 return -EINVAL;
4312 dev = MKDEV(major, minor);
4313 if (major != MAJOR(dev) ||
4314 minor != MINOR(dev))
4315 return -EOVERFLOW;
4316
4317 flush_workqueue(md_misc_wq);
4318
4319 err = mddev_lock(mddev);
4320 if (err)
4321 return err;
4322 if (mddev->persistent) {
4323 rdev = md_import_device(dev, mddev->major_version,
4324 mddev->minor_version);
4325 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4326 struct md_rdev *rdev0
4327 = list_entry(mddev->disks.next,
4328 struct md_rdev, same_set);
4329 err = super_types[mddev->major_version]
4330 .load_super(rdev, rdev0, mddev->minor_version);
4331 if (err < 0)
4332 goto out;
4333 }
4334 } else if (mddev->external)
4335 rdev = md_import_device(dev, -2, -1);
4336 else
4337 rdev = md_import_device(dev, -1, -1);
4338
4339 if (IS_ERR(rdev)) {
4340 mddev_unlock(mddev);
4341 return PTR_ERR(rdev);
4342 }
4343 err = bind_rdev_to_array(rdev, mddev);
4344 out:
4345 if (err)
4346 export_rdev(rdev);
4347 mddev_unlock(mddev);
4348 if (!err)
4349 md_new_event(mddev);
4350 return err ? err : len;
4351}
4352
4353static struct md_sysfs_entry md_new_device =
4354__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4355
4356static ssize_t
4357bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4358{
4359 char *end;
4360 unsigned long chunk, end_chunk;
4361 int err;
4362
4363 err = mddev_lock(mddev);
4364 if (err)
4365 return err;
4366 if (!mddev->bitmap)
4367 goto out;
4368
4369 while (*buf) {
4370 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4371 if (buf == end) break;
4372 if (*end == '-') {
4373 buf = end + 1;
4374 end_chunk = simple_strtoul(buf, &end, 0);
4375 if (buf == end) break;
4376 }
4377 if (*end && !isspace(*end)) break;
4378 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4379 buf = skip_spaces(end);
4380 }
4381 md_bitmap_unplug(mddev->bitmap);
4382out:
4383 mddev_unlock(mddev);
4384 return len;
4385}
4386
4387static struct md_sysfs_entry md_bitmap =
4388__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4389
4390static ssize_t
4391size_show(struct mddev *mddev, char *page)
4392{
4393 return sprintf(page, "%llu\n",
4394 (unsigned long long)mddev->dev_sectors / 2);
4395}
4396
4397static int update_size(struct mddev *mddev, sector_t num_sectors);
4398
4399static ssize_t
4400size_store(struct mddev *mddev, const char *buf, size_t len)
4401{
4402
4403
4404
4405
4406 sector_t sectors;
4407 int err = strict_blocks_to_sectors(buf, §ors);
4408
4409 if (err < 0)
4410 return err;
4411 err = mddev_lock(mddev);
4412 if (err)
4413 return err;
4414 if (mddev->pers) {
4415 err = update_size(mddev, sectors);
4416 if (err == 0)
4417 md_update_sb(mddev, 1);
4418 } else {
4419 if (mddev->dev_sectors == 0 ||
4420 mddev->dev_sectors > sectors)
4421 mddev->dev_sectors = sectors;
4422 else
4423 err = -ENOSPC;
4424 }
4425 mddev_unlock(mddev);
4426 return err ? err : len;
4427}
4428
4429static struct md_sysfs_entry md_size =
4430__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4431
4432
4433
4434
4435
4436
4437
4438static ssize_t
4439metadata_show(struct mddev *mddev, char *page)
4440{
4441 if (mddev->persistent)
4442 return sprintf(page, "%d.%d\n",
4443 mddev->major_version, mddev->minor_version);
4444 else if (mddev->external)
4445 return sprintf(page, "external:%s\n", mddev->metadata_type);
4446 else
4447 return sprintf(page, "none\n");
4448}
4449
4450static ssize_t
4451metadata_store(struct mddev *mddev, const char *buf, size_t len)
4452{
4453 int major, minor;
4454 char *e;
4455 int err;
4456
4457
4458
4459
4460
4461 err = mddev_lock(mddev);
4462 if (err)
4463 return err;
4464 err = -EBUSY;
4465 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4466 ;
4467 else if (!list_empty(&mddev->disks))
4468 goto out_unlock;
4469
4470 err = 0;
4471 if (cmd_match(buf, "none")) {
4472 mddev->persistent = 0;
4473 mddev->external = 0;
4474 mddev->major_version = 0;
4475 mddev->minor_version = 90;
4476 goto out_unlock;
4477 }
4478 if (strncmp(buf, "external:", 9) == 0) {
4479 size_t namelen = len-9;
4480 if (namelen >= sizeof(mddev->metadata_type))
4481 namelen = sizeof(mddev->metadata_type)-1;
4482 strncpy(mddev->metadata_type, buf+9, namelen);
4483 mddev->metadata_type[namelen] = 0;
4484 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4485 mddev->metadata_type[--namelen] = 0;
4486 mddev->persistent = 0;
4487 mddev->external = 1;
4488 mddev->major_version = 0;
4489 mddev->minor_version = 90;
4490 goto out_unlock;
4491 }
4492 major = simple_strtoul(buf, &e, 10);
4493 err = -EINVAL;
4494 if (e==buf || *e != '.')
4495 goto out_unlock;
4496 buf = e+1;
4497 minor = simple_strtoul(buf, &e, 10);
4498 if (e==buf || (*e && *e != '\n') )
4499 goto out_unlock;
4500 err = -ENOENT;
4501 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4502 goto out_unlock;
4503 mddev->major_version = major;
4504 mddev->minor_version = minor;
4505 mddev->persistent = 1;
4506 mddev->external = 0;
4507 err = 0;
4508out_unlock:
4509 mddev_unlock(mddev);
4510 return err ?: len;
4511}
4512
4513static struct md_sysfs_entry md_metadata =
4514__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4515
4516static ssize_t
4517action_show(struct mddev *mddev, char *page)
4518{
4519 char *type = "idle";
4520 unsigned long recovery = mddev->recovery;
4521 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4522 type = "frozen";
4523 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4524 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4525 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4526 type = "reshape";
4527 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4528 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4529 type = "resync";
4530 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4531 type = "check";
4532 else
4533 type = "repair";
4534 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4535 type = "recover";
4536 else if (mddev->reshape_position != MaxSector)
4537 type = "reshape";
4538 }
4539 return sprintf(page, "%s\n", type);
4540}
4541
4542static ssize_t
4543action_store(struct mddev *mddev, const char *page, size_t len)
4544{
4545 if (!mddev->pers || !mddev->pers->sync_request)
4546 return -EINVAL;
4547
4548
4549 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4550 if (cmd_match(page, "frozen"))
4551 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4552 else
4553 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4554 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4555 mddev_lock(mddev) == 0) {
4556 flush_workqueue(md_misc_wq);
4557 if (mddev->sync_thread) {
4558 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4559 md_reap_sync_thread(mddev);
4560 }
4561 mddev_unlock(mddev);
4562 }
4563 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4564 return -EBUSY;
4565 else if (cmd_match(page, "resync"))
4566 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4567 else if (cmd_match(page, "recover")) {
4568 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4569 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4570 } else if (cmd_match(page, "reshape")) {
4571 int err;
4572 if (mddev->pers->start_reshape == NULL)
4573 return -EINVAL;
4574 err = mddev_lock(mddev);
4575 if (!err) {
4576 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4577 err = -EBUSY;
4578 else {
4579 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4580 err = mddev->pers->start_reshape(mddev);
4581 }
4582 mddev_unlock(mddev);
4583 }
4584 if (err)
4585 return err;
4586 sysfs_notify(&mddev->kobj, NULL, "degraded");
4587 } else {
4588 if (cmd_match(page, "check"))
4589 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4590 else if (!cmd_match(page, "repair"))
4591 return -EINVAL;
4592 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4593 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4594 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4595 }
4596 if (mddev->ro == 2) {
4597
4598
4599
4600 mddev->ro = 0;
4601 md_wakeup_thread(mddev->sync_thread);
4602 }
4603 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4604 md_wakeup_thread(mddev->thread);
4605 sysfs_notify_dirent_safe(mddev->sysfs_action);
4606 return len;
4607}
4608
4609static struct md_sysfs_entry md_scan_mode =
4610__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4611
4612static ssize_t
4613last_sync_action_show(struct mddev *mddev, char *page)
4614{
4615 return sprintf(page, "%s\n", mddev->last_sync_action);
4616}
4617
4618static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4619
4620static ssize_t
4621mismatch_cnt_show(struct mddev *mddev, char *page)
4622{
4623 return sprintf(page, "%llu\n",
4624 (unsigned long long)
4625 atomic64_read(&mddev->resync_mismatches));
4626}
4627
4628static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4629
4630static ssize_t
4631sync_min_show(struct mddev *mddev, char *page)
4632{
4633 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4634 mddev->sync_speed_min ? "local": "system");
4635}
4636
4637static ssize_t
4638sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4639{
4640 unsigned int min;
4641 int rv;
4642
4643 if (strncmp(buf, "system", 6)==0) {
4644 min = 0;
4645 } else {
4646 rv = kstrtouint(buf, 10, &min);
4647 if (rv < 0)
4648 return rv;
4649 if (min == 0)
4650 return -EINVAL;
4651 }
4652 mddev->sync_speed_min = min;
4653 return len;
4654}
4655
4656static struct md_sysfs_entry md_sync_min =
4657__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4658
4659static ssize_t
4660sync_max_show(struct mddev *mddev, char *page)
4661{
4662 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4663 mddev->sync_speed_max ? "local": "system");
4664}
4665
4666static ssize_t
4667sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4668{
4669 unsigned int max;
4670 int rv;
4671
4672 if (strncmp(buf, "system", 6)==0) {
4673 max = 0;
4674 } else {
4675 rv = kstrtouint(buf, 10, &max);
4676 if (rv < 0)
4677 return rv;
4678 if (max == 0)
4679 return -EINVAL;
4680 }
4681 mddev->sync_speed_max = max;
4682 return len;
4683}
4684
4685static struct md_sysfs_entry md_sync_max =
4686__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4687
4688static ssize_t
4689degraded_show(struct mddev *mddev, char *page)
4690{
4691 return sprintf(page, "%d\n", mddev->degraded);
4692}
4693static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4694
4695static ssize_t
4696sync_force_parallel_show(struct mddev *mddev, char *page)
4697{
4698 return sprintf(page, "%d\n", mddev->parallel_resync);
4699}
4700
4701static ssize_t
4702sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4703{
4704 long n;
4705
4706 if (kstrtol(buf, 10, &n))
4707 return -EINVAL;
4708
4709 if (n != 0 && n != 1)
4710 return -EINVAL;
4711
4712 mddev->parallel_resync = n;
4713
4714 if (mddev->sync_thread)
4715 wake_up(&resync_wait);
4716
4717 return len;
4718}
4719
4720
4721static struct md_sysfs_entry md_sync_force_parallel =
4722__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4723 sync_force_parallel_show, sync_force_parallel_store);
4724
4725static ssize_t
4726sync_speed_show(struct mddev *mddev, char *page)
4727{
4728 unsigned long resync, dt, db;
4729 if (mddev->curr_resync == 0)
4730 return sprintf(page, "none\n");
4731 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4732 dt = (jiffies - mddev->resync_mark) / HZ;
4733 if (!dt) dt++;
4734 db = resync - mddev->resync_mark_cnt;
4735 return sprintf(page, "%lu\n", db/dt/2);
4736}
4737
4738static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4739
4740static ssize_t
4741sync_completed_show(struct mddev *mddev, char *page)
4742{
4743 unsigned long long max_sectors, resync;
4744
4745 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4746 return sprintf(page, "none\n");
4747
4748 if (mddev->curr_resync == 1 ||
4749 mddev->curr_resync == 2)
4750 return sprintf(page, "delayed\n");
4751
4752 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4753 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4754 max_sectors = mddev->resync_max_sectors;
4755 else
4756 max_sectors = mddev->dev_sectors;
4757
4758 resync = mddev->curr_resync_completed;
4759 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4760}
4761
4762static struct md_sysfs_entry md_sync_completed =
4763 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4764
4765static ssize_t
4766min_sync_show(struct mddev *mddev, char *page)
4767{
4768 return sprintf(page, "%llu\n",
4769 (unsigned long long)mddev->resync_min);
4770}
4771static ssize_t
4772min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4773{
4774 unsigned long long min;
4775 int err;
4776
4777 if (kstrtoull(buf, 10, &min))
4778 return -EINVAL;
4779
4780 spin_lock(&mddev->lock);
4781 err = -EINVAL;
4782 if (min > mddev->resync_max)
4783 goto out_unlock;
4784
4785 err = -EBUSY;
4786 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4787 goto out_unlock;
4788
4789
4790 mddev->resync_min = round_down(min, 8);
4791 err = 0;
4792
4793out_unlock:
4794 spin_unlock(&mddev->lock);
4795 return err ?: len;
4796}
4797
4798static struct md_sysfs_entry md_min_sync =
4799__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4800
4801static ssize_t
4802max_sync_show(struct mddev *mddev, char *page)
4803{
4804 if (mddev->resync_max == MaxSector)
4805 return sprintf(page, "max\n");
4806 else
4807 return sprintf(page, "%llu\n",
4808 (unsigned long long)mddev->resync_max);
4809}
4810static ssize_t
4811max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4812{
4813 int err;
4814 spin_lock(&mddev->lock);
4815 if (strncmp(buf, "max", 3) == 0)
4816 mddev->resync_max = MaxSector;
4817 else {
4818 unsigned long long max;
4819 int chunk;
4820
4821 err = -EINVAL;
4822 if (kstrtoull(buf, 10, &max))
4823 goto out_unlock;
4824 if (max < mddev->resync_min)
4825 goto out_unlock;
4826
4827 err = -EBUSY;
4828 if (max < mddev->resync_max &&
4829 mddev->ro == 0 &&
4830 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4831 goto out_unlock;
4832
4833
4834 chunk = mddev->chunk_sectors;
4835 if (chunk) {
4836 sector_t temp = max;
4837
4838 err = -EINVAL;
4839 if (sector_div(temp, chunk))
4840 goto out_unlock;
4841 }
4842 mddev->resync_max = max;
4843 }
4844 wake_up(&mddev->recovery_wait);
4845 err = 0;
4846out_unlock:
4847 spin_unlock(&mddev->lock);
4848 return err ?: len;
4849}
4850
4851static struct md_sysfs_entry md_max_sync =
4852__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4853
4854static ssize_t
4855suspend_lo_show(struct mddev *mddev, char *page)
4856{
4857 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4858}
4859
4860static ssize_t
4861suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4862{
4863 unsigned long long new;
4864 int err;
4865
4866 err = kstrtoull(buf, 10, &new);
4867 if (err < 0)
4868 return err;
4869 if (new != (sector_t)new)
4870 return -EINVAL;
4871
4872 err = mddev_lock(mddev);
4873 if (err)
4874 return err;
4875 err = -EINVAL;
4876 if (mddev->pers == NULL ||
4877 mddev->pers->quiesce == NULL)
4878 goto unlock;
4879 mddev_suspend(mddev);
4880 mddev->suspend_lo = new;
4881 mddev_resume(mddev);
4882
4883 err = 0;
4884unlock:
4885 mddev_unlock(mddev);
4886 return err ?: len;
4887}
4888static struct md_sysfs_entry md_suspend_lo =
4889__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4890
4891static ssize_t
4892suspend_hi_show(struct mddev *mddev, char *page)
4893{
4894 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4895}
4896
4897static ssize_t
4898suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4899{
4900 unsigned long long new;
4901 int err;
4902
4903 err = kstrtoull(buf, 10, &new);
4904 if (err < 0)
4905 return err;
4906 if (new != (sector_t)new)
4907 return -EINVAL;
4908
4909 err = mddev_lock(mddev);
4910 if (err)
4911 return err;
4912 err = -EINVAL;
4913 if (mddev->pers == NULL)
4914 goto unlock;
4915
4916 mddev_suspend(mddev);
4917 mddev->suspend_hi = new;
4918 mddev_resume(mddev);
4919
4920 err = 0;
4921unlock:
4922 mddev_unlock(mddev);
4923 return err ?: len;
4924}
4925static struct md_sysfs_entry md_suspend_hi =
4926__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4927
4928static ssize_t
4929reshape_position_show(struct mddev *mddev, char *page)
4930{
4931 if (mddev->reshape_position != MaxSector)
4932 return sprintf(page, "%llu\n",
4933 (unsigned long long)mddev->reshape_position);
4934 strcpy(page, "none\n");
4935 return 5;
4936}
4937
4938static ssize_t
4939reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4940{
4941 struct md_rdev *rdev;
4942 unsigned long long new;
4943 int err;
4944
4945 err = kstrtoull(buf, 10, &new);
4946 if (err < 0)
4947 return err;
4948 if (new != (sector_t)new)
4949 return -EINVAL;
4950 err = mddev_lock(mddev);
4951 if (err)
4952 return err;
4953 err = -EBUSY;
4954 if (mddev->pers)
4955 goto unlock;
4956 mddev->reshape_position = new;
4957 mddev->delta_disks = 0;
4958 mddev->reshape_backwards = 0;
4959 mddev->new_level = mddev->level;
4960 mddev->new_layout = mddev->layout;
4961 mddev->new_chunk_sectors = mddev->chunk_sectors;
4962 rdev_for_each(rdev, mddev)
4963 rdev->new_data_offset = rdev->data_offset;
4964 err = 0;
4965unlock:
4966 mddev_unlock(mddev);
4967 return err ?: len;
4968}
4969
4970static struct md_sysfs_entry md_reshape_position =
4971__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4972 reshape_position_store);
4973
4974static ssize_t
4975reshape_direction_show(struct mddev *mddev, char *page)
4976{
4977 return sprintf(page, "%s\n",
4978 mddev->reshape_backwards ? "backwards" : "forwards");
4979}
4980
4981static ssize_t
4982reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4983{
4984 int backwards = 0;
4985 int err;
4986
4987 if (cmd_match(buf, "forwards"))
4988 backwards = 0;
4989 else if (cmd_match(buf, "backwards"))
4990 backwards = 1;
4991 else
4992 return -EINVAL;
4993 if (mddev->reshape_backwards == backwards)
4994 return len;
4995
4996 err = mddev_lock(mddev);
4997 if (err)
4998 return err;
4999
5000 if (mddev->delta_disks)
5001 err = -EBUSY;
5002 else if (mddev->persistent &&
5003 mddev->major_version == 0)
5004 err = -EINVAL;
5005 else
5006 mddev->reshape_backwards = backwards;
5007 mddev_unlock(mddev);
5008 return err ?: len;
5009}
5010
5011static struct md_sysfs_entry md_reshape_direction =
5012__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5013 reshape_direction_store);
5014
5015static ssize_t
5016array_size_show(struct mddev *mddev, char *page)
5017{
5018 if (mddev->external_size)
5019 return sprintf(page, "%llu\n",
5020 (unsigned long long)mddev->array_sectors/2);
5021 else
5022 return sprintf(page, "default\n");
5023}
5024
5025static ssize_t
5026array_size_store(struct mddev *mddev, const char *buf, size_t len)
5027{
5028 sector_t sectors;
5029 int err;
5030
5031 err = mddev_lock(mddev);
5032 if (err)
5033 return err;
5034
5035
5036 if (mddev_is_clustered(mddev)) {
5037 mddev_unlock(mddev);
5038 return -EINVAL;
5039 }
5040
5041 if (strncmp(buf, "default", 7) == 0) {
5042 if (mddev->pers)
5043 sectors = mddev->pers->size(mddev, 0, 0);
5044 else
5045 sectors = mddev->array_sectors;
5046
5047 mddev->external_size = 0;
5048 } else {
5049 if (strict_blocks_to_sectors(buf, §ors) < 0)
5050 err = -EINVAL;
5051 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5052 err = -E2BIG;
5053 else
5054 mddev->external_size = 1;
5055 }
5056
5057 if (!err) {
5058 mddev->array_sectors = sectors;
5059 if (mddev->pers) {
5060 set_capacity(mddev->gendisk, mddev->array_sectors);
5061 revalidate_disk(mddev->gendisk);
5062 }
5063 }
5064 mddev_unlock(mddev);
5065 return err ?: len;
5066}
5067
5068static struct md_sysfs_entry md_array_size =
5069__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5070 array_size_store);
5071
5072static ssize_t
5073consistency_policy_show(struct mddev *mddev, char *page)
5074{
5075 int ret;
5076
5077 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5078 ret = sprintf(page, "journal\n");
5079 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5080 ret = sprintf(page, "ppl\n");
5081 } else if (mddev->bitmap) {
5082 ret = sprintf(page, "bitmap\n");
5083 } else if (mddev->pers) {
5084 if (mddev->pers->sync_request)
5085 ret = sprintf(page, "resync\n");
5086 else
5087 ret = sprintf(page, "none\n");
5088 } else {
5089 ret = sprintf(page, "unknown\n");
5090 }
5091
5092 return ret;
5093}
5094
5095static ssize_t
5096consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5097{
5098 int err = 0;
5099
5100 if (mddev->pers) {
5101 if (mddev->pers->change_consistency_policy)
5102 err = mddev->pers->change_consistency_policy(mddev, buf);
5103 else
5104 err = -EBUSY;
5105 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5106 set_bit(MD_HAS_PPL, &mddev->flags);
5107 } else {
5108 err = -EINVAL;
5109 }
5110
5111 return err ? err : len;
5112}
5113
5114static struct md_sysfs_entry md_consistency_policy =
5115__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5116 consistency_policy_store);
5117
5118static struct attribute *md_default_attrs[] = {
5119 &md_level.attr,
5120 &md_layout.attr,
5121 &md_raid_disks.attr,
5122 &md_chunk_size.attr,
5123 &md_size.attr,
5124 &md_resync_start.attr,
5125 &md_metadata.attr,
5126 &md_new_device.attr,
5127 &md_safe_delay.attr,
5128 &md_array_state.attr,
5129 &md_reshape_position.attr,
5130 &md_reshape_direction.attr,
5131 &md_array_size.attr,
5132 &max_corr_read_errors.attr,
5133 &md_consistency_policy.attr,
5134 NULL,
5135};
5136
5137static struct attribute *md_redundancy_attrs[] = {
5138 &md_scan_mode.attr,
5139 &md_last_scan_mode.attr,
5140 &md_mismatches.attr,
5141 &md_sync_min.attr,
5142 &md_sync_max.attr,
5143 &md_sync_speed.attr,
5144 &md_sync_force_parallel.attr,
5145 &md_sync_completed.attr,
5146 &md_min_sync.attr,
5147 &md_max_sync.attr,
5148 &md_suspend_lo.attr,
5149 &md_suspend_hi.attr,
5150 &md_bitmap.attr,
5151 &md_degraded.attr,
5152 NULL,
5153};
5154static struct attribute_group md_redundancy_group = {
5155 .name = NULL,
5156 .attrs = md_redundancy_attrs,
5157};
5158
5159static ssize_t
5160md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5161{
5162 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5163 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5164 ssize_t rv;
5165
5166 if (!entry->show)
5167 return -EIO;
5168 spin_lock(&all_mddevs_lock);
5169 if (list_empty(&mddev->all_mddevs)) {
5170 spin_unlock(&all_mddevs_lock);
5171 return -EBUSY;
5172 }
5173 mddev_get(mddev);
5174 spin_unlock(&all_mddevs_lock);
5175
5176 rv = entry->show(mddev, page);
5177 mddev_put(mddev);
5178 return rv;
5179}
5180
5181static ssize_t
5182md_attr_store(struct kobject *kobj, struct attribute *attr,
5183 const char *page, size_t length)
5184{
5185 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5186 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5187 ssize_t rv;
5188
5189 if (!entry->store)
5190 return -EIO;
5191 if (!capable(CAP_SYS_ADMIN))
5192 return -EACCES;
5193 spin_lock(&all_mddevs_lock);
5194 if (list_empty(&mddev->all_mddevs)) {
5195 spin_unlock(&all_mddevs_lock);
5196 return -EBUSY;
5197 }
5198 mddev_get(mddev);
5199 spin_unlock(&all_mddevs_lock);
5200 rv = entry->store(mddev, page, length);
5201 mddev_put(mddev);
5202 return rv;
5203}
5204
5205static void md_free(struct kobject *ko)
5206{
5207 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5208
5209 if (mddev->sysfs_state)
5210 sysfs_put(mddev->sysfs_state);
5211
5212 if (mddev->gendisk)
5213 del_gendisk(mddev->gendisk);
5214 if (mddev->queue)
5215 blk_cleanup_queue(mddev->queue);
5216 if (mddev->gendisk)
5217 put_disk(mddev->gendisk);
5218 percpu_ref_exit(&mddev->writes_pending);
5219
5220 bioset_exit(&mddev->bio_set);
5221 bioset_exit(&mddev->sync_set);
5222 kfree(mddev);
5223}
5224
5225static const struct sysfs_ops md_sysfs_ops = {
5226 .show = md_attr_show,
5227 .store = md_attr_store,
5228};
5229static struct kobj_type md_ktype = {
5230 .release = md_free,
5231 .sysfs_ops = &md_sysfs_ops,
5232 .default_attrs = md_default_attrs,
5233};
5234
5235int mdp_major = 0;
5236
5237static void mddev_delayed_delete(struct work_struct *ws)
5238{
5239 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5240
5241 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5242 kobject_del(&mddev->kobj);
5243 kobject_put(&mddev->kobj);
5244}
5245
5246static void no_op(struct percpu_ref *r) {}
5247
5248int mddev_init_writes_pending(struct mddev *mddev)
5249{
5250 if (mddev->writes_pending.percpu_count_ptr)
5251 return 0;
5252 if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
5253 return -ENOMEM;
5254
5255 percpu_ref_put(&mddev->writes_pending);
5256 return 0;
5257}
5258EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5259
5260static int md_alloc(dev_t dev, char *name)
5261{
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271 static DEFINE_MUTEX(disks_mutex);
5272 struct mddev *mddev = mddev_find(dev);
5273 struct gendisk *disk;
5274 int partitioned;
5275 int shift;
5276 int unit;
5277 int error;
5278
5279 if (!mddev)
5280 return -ENODEV;
5281
5282 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5283 shift = partitioned ? MdpMinorShift : 0;
5284 unit = MINOR(mddev->unit) >> shift;
5285
5286
5287
5288
5289 flush_workqueue(md_misc_wq);
5290
5291 mutex_lock(&disks_mutex);
5292 error = -EEXIST;
5293 if (mddev->gendisk)
5294 goto abort;
5295
5296 if (name && !dev) {
5297
5298
5299 struct mddev *mddev2;
5300 spin_lock(&all_mddevs_lock);
5301
5302 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5303 if (mddev2->gendisk &&
5304 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5305 spin_unlock(&all_mddevs_lock);
5306 goto abort;
5307 }
5308 spin_unlock(&all_mddevs_lock);
5309 }
5310 if (name && dev)
5311
5312
5313
5314 mddev->hold_active = UNTIL_STOP;
5315
5316 error = -ENOMEM;
5317 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5318 if (!mddev->queue)
5319 goto abort;
5320 mddev->queue->queuedata = mddev;
5321
5322 blk_queue_make_request(mddev->queue, md_make_request);
5323 blk_set_stacking_limits(&mddev->queue->limits);
5324
5325 disk = alloc_disk(1 << shift);
5326 if (!disk) {
5327 blk_cleanup_queue(mddev->queue);
5328 mddev->queue = NULL;
5329 goto abort;
5330 }
5331 disk->major = MAJOR(mddev->unit);
5332 disk->first_minor = unit << shift;
5333 if (name)
5334 strcpy(disk->disk_name, name);
5335 else if (partitioned)
5336 sprintf(disk->disk_name, "md_d%d", unit);
5337 else
5338 sprintf(disk->disk_name, "md%d", unit);
5339 disk->fops = &md_fops;
5340 disk->private_data = mddev;
5341 disk->queue = mddev->queue;
5342 blk_queue_write_cache(mddev->queue, true, true);
5343
5344
5345
5346
5347 disk->flags |= GENHD_FL_EXT_DEVT;
5348 mddev->gendisk = disk;
5349
5350
5351
5352 mutex_lock(&mddev->open_mutex);
5353 add_disk(disk);
5354
5355 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5356 if (error) {
5357
5358
5359
5360 pr_debug("md: cannot register %s/md - name in use\n",
5361 disk->disk_name);
5362 error = 0;
5363 }
5364 if (mddev->kobj.sd &&
5365 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5366 pr_debug("pointless warning\n");
5367 mutex_unlock(&mddev->open_mutex);
5368 abort:
5369 mutex_unlock(&disks_mutex);
5370 if (!error && mddev->kobj.sd) {
5371 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5372 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5373 }
5374 mddev_put(mddev);
5375 return error;
5376}
5377
5378static struct kobject *md_probe(dev_t dev, int *part, void *data)
5379{
5380 if (create_on_open)
5381 md_alloc(dev, NULL);
5382 return NULL;
5383}
5384
5385static int add_named_array(const char *val, const struct kernel_param *kp)
5386{
5387
5388
5389
5390
5391
5392
5393
5394 int len = strlen(val);
5395 char buf[DISK_NAME_LEN];
5396 unsigned long devnum;
5397
5398 while (len && val[len-1] == '\n')
5399 len--;
5400 if (len >= DISK_NAME_LEN)
5401 return -E2BIG;
5402 strlcpy(buf, val, len+1);
5403 if (strncmp(buf, "md_", 3) == 0)
5404 return md_alloc(0, buf);
5405 if (strncmp(buf, "md", 2) == 0 &&
5406 isdigit(buf[2]) &&
5407 kstrtoul(buf+2, 10, &devnum) == 0 &&
5408 devnum <= MINORMASK)
5409 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5410
5411 return -EINVAL;
5412}
5413
5414static void md_safemode_timeout(struct timer_list *t)
5415{
5416 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5417
5418 mddev->safemode = 1;
5419 if (mddev->external)
5420 sysfs_notify_dirent_safe(mddev->sysfs_state);
5421
5422 md_wakeup_thread(mddev->thread);
5423}
5424
5425static int start_dirty_degraded;
5426
5427int md_run(struct mddev *mddev)
5428{
5429 int err;
5430 struct md_rdev *rdev;
5431 struct md_personality *pers;
5432
5433 if (list_empty(&mddev->disks))
5434
5435 return -EINVAL;
5436
5437 if (mddev->pers)
5438 return -EBUSY;
5439
5440 if (mddev->sysfs_active)
5441 return -EBUSY;
5442
5443
5444
5445
5446 if (!mddev->raid_disks) {
5447 if (!mddev->persistent)
5448 return -EINVAL;
5449 analyze_sbs(mddev);
5450 }
5451
5452 if (mddev->level != LEVEL_NONE)
5453 request_module("md-level-%d", mddev->level);
5454 else if (mddev->clevel[0])
5455 request_module("md-%s", mddev->clevel);
5456
5457
5458
5459
5460
5461
5462 mddev->has_superblocks = false;
5463 rdev_for_each(rdev, mddev) {
5464 if (test_bit(Faulty, &rdev->flags))
5465 continue;
5466 sync_blockdev(rdev->bdev);
5467 invalidate_bdev(rdev->bdev);
5468 if (mddev->ro != 1 &&
5469 (bdev_read_only(rdev->bdev) ||
5470 bdev_read_only(rdev->meta_bdev))) {
5471 mddev->ro = 1;
5472 if (mddev->gendisk)
5473 set_disk_ro(mddev->gendisk, 1);
5474 }
5475
5476 if (rdev->sb_page)
5477 mddev->has_superblocks = true;
5478
5479
5480
5481
5482
5483 if (rdev->meta_bdev) {
5484 ;
5485 } else if (rdev->data_offset < rdev->sb_start) {
5486 if (mddev->dev_sectors &&
5487 rdev->data_offset + mddev->dev_sectors
5488 > rdev->sb_start) {
5489 pr_warn("md: %s: data overlaps metadata\n",
5490 mdname(mddev));
5491 return -EINVAL;
5492 }
5493 } else {
5494 if (rdev->sb_start + rdev->sb_size/512
5495 > rdev->data_offset) {
5496 pr_warn("md: %s: metadata overlaps data\n",
5497 mdname(mddev));
5498 return -EINVAL;
5499 }
5500 }
5501 sysfs_notify_dirent_safe(rdev->sysfs_state);
5502 }
5503
5504 if (!bioset_initialized(&mddev->bio_set)) {
5505 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5506 if (err)
5507 return err;
5508 }
5509 if (!bioset_initialized(&mddev->sync_set)) {
5510 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5511 if (err)
5512 return err;
5513 }
5514 if (mddev->flush_pool == NULL) {
5515 mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc,
5516 flush_info_free, mddev);
5517 if (!mddev->flush_pool) {
5518 err = -ENOMEM;
5519 goto abort;
5520 }
5521 }
5522 if (mddev->flush_bio_pool == NULL) {
5523 mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc,
5524 flush_bio_free, mddev);
5525 if (!mddev->flush_bio_pool) {
5526 err = -ENOMEM;
5527 goto abort;
5528 }
5529 }
5530
5531 spin_lock(&pers_lock);
5532 pers = find_pers(mddev->level, mddev->clevel);
5533 if (!pers || !try_module_get(pers->owner)) {
5534 spin_unlock(&pers_lock);
5535 if (mddev->level != LEVEL_NONE)
5536 pr_warn("md: personality for level %d is not loaded!\n",
5537 mddev->level);
5538 else
5539 pr_warn("md: personality for level %s is not loaded!\n",
5540 mddev->clevel);
5541 err = -EINVAL;
5542 goto abort;
5543 }
5544 spin_unlock(&pers_lock);
5545 if (mddev->level != pers->level) {
5546 mddev->level = pers->level;
5547 mddev->new_level = pers->level;
5548 }
5549 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5550
5551 if (mddev->reshape_position != MaxSector &&
5552 pers->start_reshape == NULL) {
5553
5554 module_put(pers->owner);
5555 err = -EINVAL;
5556 goto abort;
5557 }
5558
5559 if (pers->sync_request) {
5560
5561
5562
5563 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5564 struct md_rdev *rdev2;
5565 int warned = 0;
5566
5567 rdev_for_each(rdev, mddev)
5568 rdev_for_each(rdev2, mddev) {
5569 if (rdev < rdev2 &&
5570 rdev->bdev->bd_contains ==
5571 rdev2->bdev->bd_contains) {
5572 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5573 mdname(mddev),
5574 bdevname(rdev->bdev,b),
5575 bdevname(rdev2->bdev,b2));
5576 warned = 1;
5577 }
5578 }
5579
5580 if (warned)
5581 pr_warn("True protection against single-disk failure might be compromised.\n");
5582 }
5583
5584 mddev->recovery = 0;
5585
5586 mddev->resync_max_sectors = mddev->dev_sectors;
5587
5588 mddev->ok_start_degraded = start_dirty_degraded;
5589
5590 if (start_readonly && mddev->ro == 0)
5591 mddev->ro = 2;
5592
5593 err = pers->run(mddev);
5594 if (err)
5595 pr_warn("md: pers->run() failed ...\n");
5596 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5597 WARN_ONCE(!mddev->external_size,
5598 "%s: default size too small, but 'external_size' not in effect?\n",
5599 __func__);
5600 pr_warn("md: invalid array_size %llu > default size %llu\n",
5601 (unsigned long long)mddev->array_sectors / 2,
5602 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5603 err = -EINVAL;
5604 }
5605 if (err == 0 && pers->sync_request &&
5606 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5607 struct bitmap *bitmap;
5608
5609 bitmap = md_bitmap_create(mddev, -1);
5610 if (IS_ERR(bitmap)) {
5611 err = PTR_ERR(bitmap);
5612 pr_warn("%s: failed to create bitmap (%d)\n",
5613 mdname(mddev), err);
5614 } else
5615 mddev->bitmap = bitmap;
5616
5617 }
5618 if (err) {
5619 mddev_detach(mddev);
5620 if (mddev->private)
5621 pers->free(mddev, mddev->private);
5622 mddev->private = NULL;
5623 module_put(pers->owner);
5624 md_bitmap_destroy(mddev);
5625 goto abort;
5626 }
5627 if (mddev->queue) {
5628 bool nonrot = true;
5629
5630 rdev_for_each(rdev, mddev) {
5631 if (rdev->raid_disk >= 0 &&
5632 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5633 nonrot = false;
5634 break;
5635 }
5636 }
5637 if (mddev->degraded)
5638 nonrot = false;
5639 if (nonrot)
5640 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5641 else
5642 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5643 mddev->queue->backing_dev_info->congested_data = mddev;
5644 mddev->queue->backing_dev_info->congested_fn = md_congested;
5645 }
5646 if (pers->sync_request) {
5647 if (mddev->kobj.sd &&
5648 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5649 pr_warn("md: cannot register extra attributes for %s\n",
5650 mdname(mddev));
5651 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5652 } else if (mddev->ro == 2)
5653 mddev->ro = 0;
5654
5655 atomic_set(&mddev->max_corr_read_errors,
5656 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5657 mddev->safemode = 0;
5658 if (mddev_is_clustered(mddev))
5659 mddev->safemode_delay = 0;
5660 else
5661 mddev->safemode_delay = (200 * HZ)/1000 +1;
5662 mddev->in_sync = 1;
5663 smp_wmb();
5664 spin_lock(&mddev->lock);
5665 mddev->pers = pers;
5666 spin_unlock(&mddev->lock);
5667 rdev_for_each(rdev, mddev)
5668 if (rdev->raid_disk >= 0)
5669 if (sysfs_link_rdev(mddev, rdev))
5670 ;
5671
5672 if (mddev->degraded && !mddev->ro)
5673
5674
5675
5676 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5677 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5678
5679 if (mddev->sb_flags)
5680 md_update_sb(mddev, 0);
5681
5682 md_new_event(mddev);
5683 sysfs_notify_dirent_safe(mddev->sysfs_state);
5684 sysfs_notify_dirent_safe(mddev->sysfs_action);
5685 sysfs_notify(&mddev->kobj, NULL, "degraded");
5686 return 0;
5687
5688abort:
5689 mempool_destroy(mddev->flush_bio_pool);
5690 mddev->flush_bio_pool = NULL;
5691 mempool_destroy(mddev->flush_pool);
5692 mddev->flush_pool = NULL;
5693
5694 return err;
5695}
5696EXPORT_SYMBOL_GPL(md_run);
5697
5698static int do_md_run(struct mddev *mddev)
5699{
5700 int err;
5701
5702 err = md_run(mddev);
5703 if (err)
5704 goto out;
5705 err = md_bitmap_load(mddev);
5706 if (err) {
5707 md_bitmap_destroy(mddev);
5708 goto out;
5709 }
5710
5711 if (mddev_is_clustered(mddev))
5712 md_allow_write(mddev);
5713
5714
5715 md_start(mddev);
5716
5717 md_wakeup_thread(mddev->thread);
5718 md_wakeup_thread(mddev->sync_thread);
5719
5720 set_capacity(mddev->gendisk, mddev->array_sectors);
5721 revalidate_disk(mddev->gendisk);
5722 mddev->changed = 1;
5723 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5724out:
5725 return err;
5726}
5727
5728int md_start(struct mddev *mddev)
5729{
5730 int ret = 0;
5731
5732 if (mddev->pers->start) {
5733 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5734 md_wakeup_thread(mddev->thread);
5735 ret = mddev->pers->start(mddev);
5736 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5737 md_wakeup_thread(mddev->sync_thread);
5738 }
5739 return ret;
5740}
5741EXPORT_SYMBOL_GPL(md_start);
5742
5743static int restart_array(struct mddev *mddev)
5744{
5745 struct gendisk *disk = mddev->gendisk;
5746 struct md_rdev *rdev;
5747 bool has_journal = false;
5748 bool has_readonly = false;
5749
5750
5751 if (list_empty(&mddev->disks))
5752 return -ENXIO;
5753 if (!mddev->pers)
5754 return -EINVAL;
5755 if (!mddev->ro)
5756 return -EBUSY;
5757
5758 rcu_read_lock();
5759 rdev_for_each_rcu(rdev, mddev) {
5760 if (test_bit(Journal, &rdev->flags) &&
5761 !test_bit(Faulty, &rdev->flags))
5762 has_journal = true;
5763 if (bdev_read_only(rdev->bdev))
5764 has_readonly = true;
5765 }
5766 rcu_read_unlock();
5767 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
5768
5769 return -EINVAL;
5770 if (has_readonly)
5771 return -EROFS;
5772
5773 mddev->safemode = 0;
5774 mddev->ro = 0;
5775 set_disk_ro(disk, 0);
5776 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5777
5778 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5779 md_wakeup_thread(mddev->thread);
5780 md_wakeup_thread(mddev->sync_thread);
5781 sysfs_notify_dirent_safe(mddev->sysfs_state);
5782 return 0;
5783}
5784
5785static void md_clean(struct mddev *mddev)
5786{
5787 mddev->array_sectors = 0;
5788 mddev->external_size = 0;
5789 mddev->dev_sectors = 0;
5790 mddev->raid_disks = 0;
5791 mddev->recovery_cp = 0;
5792 mddev->resync_min = 0;
5793 mddev->resync_max = MaxSector;
5794 mddev->reshape_position = MaxSector;
5795 mddev->external = 0;
5796 mddev->persistent = 0;
5797 mddev->level = LEVEL_NONE;
5798 mddev->clevel[0] = 0;
5799 mddev->flags = 0;
5800 mddev->sb_flags = 0;
5801 mddev->ro = 0;
5802 mddev->metadata_type[0] = 0;
5803 mddev->chunk_sectors = 0;
5804 mddev->ctime = mddev->utime = 0;
5805 mddev->layout = 0;
5806 mddev->max_disks = 0;
5807 mddev->events = 0;
5808 mddev->can_decrease_events = 0;
5809 mddev->delta_disks = 0;
5810 mddev->reshape_backwards = 0;
5811 mddev->new_level = LEVEL_NONE;
5812 mddev->new_layout = 0;
5813 mddev->new_chunk_sectors = 0;
5814 mddev->curr_resync = 0;
5815 atomic64_set(&mddev->resync_mismatches, 0);
5816 mddev->suspend_lo = mddev->suspend_hi = 0;
5817 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5818 mddev->recovery = 0;
5819 mddev->in_sync = 0;
5820 mddev->changed = 0;
5821 mddev->degraded = 0;
5822 mddev->safemode = 0;
5823 mddev->private = NULL;
5824 mddev->cluster_info = NULL;
5825 mddev->bitmap_info.offset = 0;
5826 mddev->bitmap_info.default_offset = 0;
5827 mddev->bitmap_info.default_space = 0;
5828 mddev->bitmap_info.chunksize = 0;
5829 mddev->bitmap_info.daemon_sleep = 0;
5830 mddev->bitmap_info.max_write_behind = 0;
5831 mddev->bitmap_info.nodes = 0;
5832}
5833
5834static void __md_stop_writes(struct mddev *mddev)
5835{
5836 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5837 flush_workqueue(md_misc_wq);
5838 if (mddev->sync_thread) {
5839 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5840 md_reap_sync_thread(mddev);
5841 }
5842
5843 del_timer_sync(&mddev->safemode_timer);
5844
5845 if (mddev->pers && mddev->pers->quiesce) {
5846 mddev->pers->quiesce(mddev, 1);
5847 mddev->pers->quiesce(mddev, 0);
5848 }
5849 md_bitmap_flush(mddev);
5850
5851 if (mddev->ro == 0 &&
5852 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5853 mddev->sb_flags)) {
5854
5855 if (!mddev_is_clustered(mddev))
5856 mddev->in_sync = 1;
5857 md_update_sb(mddev, 1);
5858 }
5859}
5860
5861void md_stop_writes(struct mddev *mddev)
5862{
5863 mddev_lock_nointr(mddev);
5864 __md_stop_writes(mddev);
5865 mddev_unlock(mddev);
5866}
5867EXPORT_SYMBOL_GPL(md_stop_writes);
5868
5869static void mddev_detach(struct mddev *mddev)
5870{
5871 md_bitmap_wait_behind_writes(mddev);
5872 if (mddev->pers && mddev->pers->quiesce) {
5873 mddev->pers->quiesce(mddev, 1);
5874 mddev->pers->quiesce(mddev, 0);
5875 }
5876 md_unregister_thread(&mddev->thread);
5877 if (mddev->queue)
5878 blk_sync_queue(mddev->queue);
5879}
5880
5881static void __md_stop(struct mddev *mddev)
5882{
5883 struct md_personality *pers = mddev->pers;
5884 md_bitmap_destroy(mddev);
5885 mddev_detach(mddev);
5886
5887 flush_workqueue(md_misc_wq);
5888 spin_lock(&mddev->lock);
5889 mddev->pers = NULL;
5890 spin_unlock(&mddev->lock);
5891 pers->free(mddev, mddev->private);
5892 mddev->private = NULL;
5893 if (pers->sync_request && mddev->to_remove == NULL)
5894 mddev->to_remove = &md_redundancy_group;
5895 module_put(pers->owner);
5896 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5897 if (mddev->flush_bio_pool) {
5898 mempool_destroy(mddev->flush_bio_pool);
5899 mddev->flush_bio_pool = NULL;
5900 }
5901 if (mddev->flush_pool) {
5902 mempool_destroy(mddev->flush_pool);
5903 mddev->flush_pool = NULL;
5904 }
5905}
5906
5907void md_stop(struct mddev *mddev)
5908{
5909
5910
5911
5912 __md_stop(mddev);
5913 bioset_exit(&mddev->bio_set);
5914 bioset_exit(&mddev->sync_set);
5915}
5916
5917EXPORT_SYMBOL_GPL(md_stop);
5918
5919static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5920{
5921 int err = 0;
5922 int did_freeze = 0;
5923
5924 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5925 did_freeze = 1;
5926 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5927 md_wakeup_thread(mddev->thread);
5928 }
5929 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5930 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5931 if (mddev->sync_thread)
5932
5933
5934 wake_up_process(mddev->sync_thread->tsk);
5935
5936 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
5937 return -EBUSY;
5938 mddev_unlock(mddev);
5939 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5940 &mddev->recovery));
5941 wait_event(mddev->sb_wait,
5942 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
5943 mddev_lock_nointr(mddev);
5944
5945 mutex_lock(&mddev->open_mutex);
5946 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5947 mddev->sync_thread ||
5948 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5949 pr_warn("md: %s still in use.\n",mdname(mddev));
5950 if (did_freeze) {
5951 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5952 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5953 md_wakeup_thread(mddev->thread);
5954 }
5955 err = -EBUSY;
5956 goto out;
5957 }
5958 if (mddev->pers) {
5959 __md_stop_writes(mddev);
5960
5961 err = -ENXIO;
5962 if (mddev->ro==1)
5963 goto out;
5964 mddev->ro = 1;
5965 set_disk_ro(mddev->gendisk, 1);
5966 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5967 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5968 md_wakeup_thread(mddev->thread);
5969 sysfs_notify_dirent_safe(mddev->sysfs_state);
5970 err = 0;
5971 }
5972out:
5973 mutex_unlock(&mddev->open_mutex);
5974 return err;
5975}
5976
5977
5978
5979
5980
5981static int do_md_stop(struct mddev *mddev, int mode,
5982 struct block_device *bdev)
5983{
5984 struct gendisk *disk = mddev->gendisk;
5985 struct md_rdev *rdev;
5986 int did_freeze = 0;
5987
5988 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5989 did_freeze = 1;
5990 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5991 md_wakeup_thread(mddev->thread);
5992 }
5993 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5994 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5995 if (mddev->sync_thread)
5996
5997
5998 wake_up_process(mddev->sync_thread->tsk);
5999
6000 mddev_unlock(mddev);
6001 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6002 !test_bit(MD_RECOVERY_RUNNING,
6003 &mddev->recovery)));
6004 mddev_lock_nointr(mddev);
6005
6006 mutex_lock(&mddev->open_mutex);
6007 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6008 mddev->sysfs_active ||
6009 mddev->sync_thread ||
6010 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6011 pr_warn("md: %s still in use.\n",mdname(mddev));
6012 mutex_unlock(&mddev->open_mutex);
6013 if (did_freeze) {
6014 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6015 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6016 md_wakeup_thread(mddev->thread);
6017 }
6018 return -EBUSY;
6019 }
6020 if (mddev->pers) {
6021 if (mddev->ro)
6022 set_disk_ro(disk, 0);
6023
6024 __md_stop_writes(mddev);
6025 __md_stop(mddev);
6026 mddev->queue->backing_dev_info->congested_fn = NULL;
6027
6028
6029 sysfs_notify_dirent_safe(mddev->sysfs_state);
6030
6031 rdev_for_each(rdev, mddev)
6032 if (rdev->raid_disk >= 0)
6033 sysfs_unlink_rdev(mddev, rdev);
6034
6035 set_capacity(disk, 0);
6036 mutex_unlock(&mddev->open_mutex);
6037 mddev->changed = 1;
6038 revalidate_disk(disk);
6039
6040 if (mddev->ro)
6041 mddev->ro = 0;
6042 } else
6043 mutex_unlock(&mddev->open_mutex);
6044
6045
6046
6047 if (mode == 0) {
6048 pr_info("md: %s stopped.\n", mdname(mddev));
6049
6050 if (mddev->bitmap_info.file) {
6051 struct file *f = mddev->bitmap_info.file;
6052 spin_lock(&mddev->lock);
6053 mddev->bitmap_info.file = NULL;
6054 spin_unlock(&mddev->lock);
6055 fput(f);
6056 }
6057 mddev->bitmap_info.offset = 0;
6058
6059 export_array(mddev);
6060
6061 md_clean(mddev);
6062 if (mddev->hold_active == UNTIL_STOP)
6063 mddev->hold_active = 0;
6064 }
6065 md_new_event(mddev);
6066 sysfs_notify_dirent_safe(mddev->sysfs_state);
6067 return 0;
6068}
6069
6070#ifndef MODULE
6071static void autorun_array(struct mddev *mddev)
6072{
6073 struct md_rdev *rdev;
6074 int err;
6075
6076 if (list_empty(&mddev->disks))
6077 return;
6078
6079 pr_info("md: running: ");
6080
6081 rdev_for_each(rdev, mddev) {
6082 char b[BDEVNAME_SIZE];
6083 pr_cont("<%s>", bdevname(rdev->bdev,b));
6084 }
6085 pr_cont("\n");
6086
6087 err = do_md_run(mddev);
6088 if (err) {
6089 pr_warn("md: do_md_run() returned %d\n", err);
6090 do_md_stop(mddev, 0, NULL);
6091 }
6092}
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106static void autorun_devices(int part)
6107{
6108 struct md_rdev *rdev0, *rdev, *tmp;
6109 struct mddev *mddev;
6110 char b[BDEVNAME_SIZE];
6111
6112 pr_info("md: autorun ...\n");
6113 while (!list_empty(&pending_raid_disks)) {
6114 int unit;
6115 dev_t dev;
6116 LIST_HEAD(candidates);
6117 rdev0 = list_entry(pending_raid_disks.next,
6118 struct md_rdev, same_set);
6119
6120 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6121 INIT_LIST_HEAD(&candidates);
6122 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6123 if (super_90_load(rdev, rdev0, 0) >= 0) {
6124 pr_debug("md: adding %s ...\n",
6125 bdevname(rdev->bdev,b));
6126 list_move(&rdev->same_set, &candidates);
6127 }
6128
6129
6130
6131
6132
6133 if (part) {
6134 dev = MKDEV(mdp_major,
6135 rdev0->preferred_minor << MdpMinorShift);
6136 unit = MINOR(dev) >> MdpMinorShift;
6137 } else {
6138 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6139 unit = MINOR(dev);
6140 }
6141 if (rdev0->preferred_minor != unit) {
6142 pr_warn("md: unit number in %s is bad: %d\n",
6143 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6144 break;
6145 }
6146
6147 md_probe(dev, NULL, NULL);
6148 mddev = mddev_find(dev);
6149 if (!mddev || !mddev->gendisk) {
6150 if (mddev)
6151 mddev_put(mddev);
6152 break;
6153 }
6154 if (mddev_lock(mddev))
6155 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6156 else if (mddev->raid_disks || mddev->major_version
6157 || !list_empty(&mddev->disks)) {
6158 pr_warn("md: %s already running, cannot run %s\n",
6159 mdname(mddev), bdevname(rdev0->bdev,b));
6160 mddev_unlock(mddev);
6161 } else {
6162 pr_debug("md: created %s\n", mdname(mddev));
6163 mddev->persistent = 1;
6164 rdev_for_each_list(rdev, tmp, &candidates) {
6165 list_del_init(&rdev->same_set);
6166 if (bind_rdev_to_array(rdev, mddev))
6167 export_rdev(rdev);
6168 }
6169 autorun_array(mddev);
6170 mddev_unlock(mddev);
6171 }
6172
6173
6174
6175 rdev_for_each_list(rdev, tmp, &candidates) {
6176 list_del_init(&rdev->same_set);
6177 export_rdev(rdev);
6178 }
6179 mddev_put(mddev);
6180 }
6181 pr_info("md: ... autorun DONE.\n");
6182}
6183#endif
6184
6185static int get_version(void __user *arg)
6186{
6187 mdu_version_t ver;
6188
6189 ver.major = MD_MAJOR_VERSION;
6190 ver.minor = MD_MINOR_VERSION;
6191 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6192
6193 if (copy_to_user(arg, &ver, sizeof(ver)))
6194 return -EFAULT;
6195
6196 return 0;
6197}
6198
6199static int get_array_info(struct mddev *mddev, void __user *arg)
6200{
6201 mdu_array_info_t info;
6202 int nr,working,insync,failed,spare;
6203 struct md_rdev *rdev;
6204
6205 nr = working = insync = failed = spare = 0;
6206 rcu_read_lock();
6207 rdev_for_each_rcu(rdev, mddev) {
6208 nr++;
6209 if (test_bit(Faulty, &rdev->flags))
6210 failed++;
6211 else {
6212 working++;
6213 if (test_bit(In_sync, &rdev->flags))
6214 insync++;
6215 else if (test_bit(Journal, &rdev->flags))
6216
6217 ;
6218 else
6219 spare++;
6220 }
6221 }
6222 rcu_read_unlock();
6223
6224 info.major_version = mddev->major_version;
6225 info.minor_version = mddev->minor_version;
6226 info.patch_version = MD_PATCHLEVEL_VERSION;
6227 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6228 info.level = mddev->level;
6229 info.size = mddev->dev_sectors / 2;
6230 if (info.size != mddev->dev_sectors / 2)
6231 info.size = -1;
6232 info.nr_disks = nr;
6233 info.raid_disks = mddev->raid_disks;
6234 info.md_minor = mddev->md_minor;
6235 info.not_persistent= !mddev->persistent;
6236
6237 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6238 info.state = 0;
6239 if (mddev->in_sync)
6240 info.state = (1<<MD_SB_CLEAN);
6241 if (mddev->bitmap && mddev->bitmap_info.offset)
6242 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6243 if (mddev_is_clustered(mddev))
6244 info.state |= (1<<MD_SB_CLUSTERED);
6245 info.active_disks = insync;
6246 info.working_disks = working;
6247 info.failed_disks = failed;
6248 info.spare_disks = spare;
6249
6250 info.layout = mddev->layout;
6251 info.chunk_size = mddev->chunk_sectors << 9;
6252
6253 if (copy_to_user(arg, &info, sizeof(info)))
6254 return -EFAULT;
6255
6256 return 0;
6257}
6258
6259static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6260{
6261 mdu_bitmap_file_t *file = NULL;
6262 char *ptr;
6263 int err;
6264
6265 file = kzalloc(sizeof(*file), GFP_NOIO);
6266 if (!file)
6267 return -ENOMEM;
6268
6269 err = 0;
6270 spin_lock(&mddev->lock);
6271
6272 if (mddev->bitmap_info.file) {
6273 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6274 sizeof(file->pathname));
6275 if (IS_ERR(ptr))
6276 err = PTR_ERR(ptr);
6277 else
6278 memmove(file->pathname, ptr,
6279 sizeof(file->pathname)-(ptr-file->pathname));
6280 }
6281 spin_unlock(&mddev->lock);
6282
6283 if (err == 0 &&
6284 copy_to_user(arg, file, sizeof(*file)))
6285 err = -EFAULT;
6286
6287 kfree(file);
6288 return err;
6289}
6290
6291static int get_disk_info(struct mddev *mddev, void __user * arg)
6292{
6293 mdu_disk_info_t info;
6294 struct md_rdev *rdev;
6295
6296 if (copy_from_user(&info, arg, sizeof(info)))
6297 return -EFAULT;
6298
6299 rcu_read_lock();
6300 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6301 if (rdev) {
6302 info.major = MAJOR(rdev->bdev->bd_dev);
6303 info.minor = MINOR(rdev->bdev->bd_dev);
6304 info.raid_disk = rdev->raid_disk;
6305 info.state = 0;
6306 if (test_bit(Faulty, &rdev->flags))
6307 info.state |= (1<<MD_DISK_FAULTY);
6308 else if (test_bit(In_sync, &rdev->flags)) {
6309 info.state |= (1<<MD_DISK_ACTIVE);
6310 info.state |= (1<<MD_DISK_SYNC);
6311 }
6312 if (test_bit(Journal, &rdev->flags))
6313 info.state |= (1<<MD_DISK_JOURNAL);
6314 if (test_bit(WriteMostly, &rdev->flags))
6315 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6316 if (test_bit(FailFast, &rdev->flags))
6317 info.state |= (1<<MD_DISK_FAILFAST);
6318 } else {
6319 info.major = info.minor = 0;
6320 info.raid_disk = -1;
6321 info.state = (1<<MD_DISK_REMOVED);
6322 }
6323 rcu_read_unlock();
6324
6325 if (copy_to_user(arg, &info, sizeof(info)))
6326 return -EFAULT;
6327
6328 return 0;
6329}
6330
6331static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6332{
6333 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6334 struct md_rdev *rdev;
6335 dev_t dev = MKDEV(info->major,info->minor);
6336
6337 if (mddev_is_clustered(mddev) &&
6338 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6339 pr_warn("%s: Cannot add to clustered mddev.\n",
6340 mdname(mddev));
6341 return -EINVAL;
6342 }
6343
6344 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6345 return -EOVERFLOW;
6346
6347 if (!mddev->raid_disks) {
6348 int err;
6349
6350 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6351 if (IS_ERR(rdev)) {
6352 pr_warn("md: md_import_device returned %ld\n",
6353 PTR_ERR(rdev));
6354 return PTR_ERR(rdev);
6355 }
6356 if (!list_empty(&mddev->disks)) {
6357 struct md_rdev *rdev0
6358 = list_entry(mddev->disks.next,
6359 struct md_rdev, same_set);
6360 err = super_types[mddev->major_version]
6361 .load_super(rdev, rdev0, mddev->minor_version);
6362 if (err < 0) {
6363 pr_warn("md: %s has different UUID to %s\n",
6364 bdevname(rdev->bdev,b),
6365 bdevname(rdev0->bdev,b2));
6366 export_rdev(rdev);
6367 return -EINVAL;
6368 }
6369 }
6370 err = bind_rdev_to_array(rdev, mddev);
6371 if (err)
6372 export_rdev(rdev);
6373 return err;
6374 }
6375
6376
6377
6378
6379
6380
6381 if (mddev->pers) {
6382 int err;
6383 if (!mddev->pers->hot_add_disk) {
6384 pr_warn("%s: personality does not support diskops!\n",
6385 mdname(mddev));
6386 return -EINVAL;
6387 }
6388 if (mddev->persistent)
6389 rdev = md_import_device(dev, mddev->major_version,
6390 mddev->minor_version);
6391 else
6392 rdev = md_import_device(dev, -1, -1);
6393 if (IS_ERR(rdev)) {
6394 pr_warn("md: md_import_device returned %ld\n",
6395 PTR_ERR(rdev));
6396 return PTR_ERR(rdev);
6397 }
6398
6399 if (!mddev->persistent) {
6400 if (info->state & (1<<MD_DISK_SYNC) &&
6401 info->raid_disk < mddev->raid_disks) {
6402 rdev->raid_disk = info->raid_disk;
6403 set_bit(In_sync, &rdev->flags);
6404 clear_bit(Bitmap_sync, &rdev->flags);
6405 } else
6406 rdev->raid_disk = -1;
6407 rdev->saved_raid_disk = rdev->raid_disk;
6408 } else
6409 super_types[mddev->major_version].
6410 validate_super(mddev, rdev);
6411 if ((info->state & (1<<MD_DISK_SYNC)) &&
6412 rdev->raid_disk != info->raid_disk) {
6413
6414
6415
6416 export_rdev(rdev);
6417 return -EINVAL;
6418 }
6419
6420 clear_bit(In_sync, &rdev->flags);
6421 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6422 set_bit(WriteMostly, &rdev->flags);
6423 else
6424 clear_bit(WriteMostly, &rdev->flags);
6425 if (info->state & (1<<MD_DISK_FAILFAST))
6426 set_bit(FailFast, &rdev->flags);
6427 else
6428 clear_bit(FailFast, &rdev->flags);
6429
6430 if (info->state & (1<<MD_DISK_JOURNAL)) {
6431 struct md_rdev *rdev2;
6432 bool has_journal = false;
6433
6434
6435 rdev_for_each(rdev2, mddev) {
6436 if (test_bit(Journal, &rdev2->flags)) {
6437 has_journal = true;
6438 break;
6439 }
6440 }
6441 if (has_journal || mddev->bitmap) {
6442 export_rdev(rdev);
6443 return -EBUSY;
6444 }
6445 set_bit(Journal, &rdev->flags);
6446 }
6447
6448
6449
6450 if (mddev_is_clustered(mddev)) {
6451 if (info->state & (1 << MD_DISK_CANDIDATE))
6452 set_bit(Candidate, &rdev->flags);
6453 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6454
6455 err = md_cluster_ops->add_new_disk(mddev, rdev);
6456 if (err) {
6457 export_rdev(rdev);
6458 return err;
6459 }
6460 }
6461 }
6462
6463 rdev->raid_disk = -1;
6464 err = bind_rdev_to_array(rdev, mddev);
6465
6466 if (err)
6467 export_rdev(rdev);
6468
6469 if (mddev_is_clustered(mddev)) {
6470 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6471 if (!err) {
6472 err = md_cluster_ops->new_disk_ack(mddev,
6473 err == 0);
6474 if (err)
6475 md_kick_rdev_from_array(rdev);
6476 }
6477 } else {
6478 if (err)
6479 md_cluster_ops->add_new_disk_cancel(mddev);
6480 else
6481 err = add_bound_rdev(rdev);
6482 }
6483
6484 } else if (!err)
6485 err = add_bound_rdev(rdev);
6486
6487 return err;
6488 }
6489
6490
6491
6492
6493 if (mddev->major_version != 0) {
6494 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6495 return -EINVAL;
6496 }
6497
6498 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6499 int err;
6500 rdev = md_import_device(dev, -1, 0);
6501 if (IS_ERR(rdev)) {
6502 pr_warn("md: error, md_import_device() returned %ld\n",
6503 PTR_ERR(rdev));
6504 return PTR_ERR(rdev);
6505 }
6506 rdev->desc_nr = info->number;
6507 if (info->raid_disk < mddev->raid_disks)
6508 rdev->raid_disk = info->raid_disk;
6509 else
6510 rdev->raid_disk = -1;
6511
6512 if (rdev->raid_disk < mddev->raid_disks)
6513 if (info->state & (1<<MD_DISK_SYNC))
6514 set_bit(In_sync, &rdev->flags);
6515
6516 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6517 set_bit(WriteMostly, &rdev->flags);
6518 if (info->state & (1<<MD_DISK_FAILFAST))
6519 set_bit(FailFast, &rdev->flags);
6520
6521 if (!mddev->persistent) {
6522 pr_debug("md: nonpersistent superblock ...\n");
6523 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6524 } else
6525 rdev->sb_start = calc_dev_sboffset(rdev);
6526 rdev->sectors = rdev->sb_start;
6527
6528 err = bind_rdev_to_array(rdev, mddev);
6529 if (err) {
6530 export_rdev(rdev);
6531 return err;
6532 }
6533 }
6534
6535 return 0;
6536}
6537
6538static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6539{
6540 char b[BDEVNAME_SIZE];
6541 struct md_rdev *rdev;
6542
6543 if (!mddev->pers)
6544 return -ENODEV;
6545
6546 rdev = find_rdev(mddev, dev);
6547 if (!rdev)
6548 return -ENXIO;
6549
6550 if (rdev->raid_disk < 0)
6551 goto kick_rdev;
6552
6553 clear_bit(Blocked, &rdev->flags);
6554 remove_and_add_spares(mddev, rdev);
6555
6556 if (rdev->raid_disk >= 0)
6557 goto busy;
6558
6559kick_rdev:
6560 if (mddev_is_clustered(mddev))
6561 md_cluster_ops->remove_disk(mddev, rdev);
6562
6563 md_kick_rdev_from_array(rdev);
6564 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6565 if (mddev->thread)
6566 md_wakeup_thread(mddev->thread);
6567 else
6568 md_update_sb(mddev, 1);
6569 md_new_event(mddev);
6570
6571 return 0;
6572busy:
6573 pr_debug("md: cannot remove active disk %s from %s ...\n",
6574 bdevname(rdev->bdev,b), mdname(mddev));
6575 return -EBUSY;
6576}
6577
6578static int hot_add_disk(struct mddev *mddev, dev_t dev)
6579{
6580 char b[BDEVNAME_SIZE];
6581 int err;
6582 struct md_rdev *rdev;
6583
6584 if (!mddev->pers)
6585 return -ENODEV;
6586
6587 if (mddev->major_version != 0) {
6588 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6589 mdname(mddev));
6590 return -EINVAL;
6591 }
6592 if (!mddev->pers->hot_add_disk) {
6593 pr_warn("%s: personality does not support diskops!\n",
6594 mdname(mddev));
6595 return -EINVAL;
6596 }
6597
6598 rdev = md_import_device(dev, -1, 0);
6599 if (IS_ERR(rdev)) {
6600 pr_warn("md: error, md_import_device() returned %ld\n",
6601 PTR_ERR(rdev));
6602 return -EINVAL;
6603 }
6604
6605 if (mddev->persistent)
6606 rdev->sb_start = calc_dev_sboffset(rdev);
6607 else
6608 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6609
6610 rdev->sectors = rdev->sb_start;
6611
6612 if (test_bit(Faulty, &rdev->flags)) {
6613 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6614 bdevname(rdev->bdev,b), mdname(mddev));
6615 err = -EINVAL;
6616 goto abort_export;
6617 }
6618
6619 clear_bit(In_sync, &rdev->flags);
6620 rdev->desc_nr = -1;
6621 rdev->saved_raid_disk = -1;
6622 err = bind_rdev_to_array(rdev, mddev);
6623 if (err)
6624 goto abort_export;
6625
6626
6627
6628
6629
6630
6631 rdev->raid_disk = -1;
6632
6633 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6634 if (!mddev->thread)
6635 md_update_sb(mddev, 1);
6636
6637
6638
6639
6640 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6641 md_wakeup_thread(mddev->thread);
6642 md_new_event(mddev);
6643 return 0;
6644
6645abort_export:
6646 export_rdev(rdev);
6647 return err;
6648}
6649
6650static int set_bitmap_file(struct mddev *mddev, int fd)
6651{
6652 int err = 0;
6653
6654 if (mddev->pers) {
6655 if (!mddev->pers->quiesce || !mddev->thread)
6656 return -EBUSY;
6657 if (mddev->recovery || mddev->sync_thread)
6658 return -EBUSY;
6659
6660 }
6661
6662 if (fd >= 0) {
6663 struct inode *inode;
6664 struct file *f;
6665
6666 if (mddev->bitmap || mddev->bitmap_info.file)
6667 return -EEXIST;
6668 f = fget(fd);
6669
6670 if (f == NULL) {
6671 pr_warn("%s: error: failed to get bitmap file\n",
6672 mdname(mddev));
6673 return -EBADF;
6674 }
6675
6676 inode = f->f_mapping->host;
6677 if (!S_ISREG(inode->i_mode)) {
6678 pr_warn("%s: error: bitmap file must be a regular file\n",
6679 mdname(mddev));
6680 err = -EBADF;
6681 } else if (!(f->f_mode & FMODE_WRITE)) {
6682 pr_warn("%s: error: bitmap file must open for write\n",
6683 mdname(mddev));
6684 err = -EBADF;
6685 } else if (atomic_read(&inode->i_writecount) != 1) {
6686 pr_warn("%s: error: bitmap file is already in use\n",
6687 mdname(mddev));
6688 err = -EBUSY;
6689 }
6690 if (err) {
6691 fput(f);
6692 return err;
6693 }
6694 mddev->bitmap_info.file = f;
6695 mddev->bitmap_info.offset = 0;
6696 } else if (mddev->bitmap == NULL)
6697 return -ENOENT;
6698 err = 0;
6699 if (mddev->pers) {
6700 if (fd >= 0) {
6701 struct bitmap *bitmap;
6702
6703 bitmap = md_bitmap_create(mddev, -1);
6704 mddev_suspend(mddev);
6705 if (!IS_ERR(bitmap)) {
6706 mddev->bitmap = bitmap;
6707 err = md_bitmap_load(mddev);
6708 } else
6709 err = PTR_ERR(bitmap);
6710 if (err) {
6711 md_bitmap_destroy(mddev);
6712 fd = -1;
6713 }
6714 mddev_resume(mddev);
6715 } else if (fd < 0) {
6716 mddev_suspend(mddev);
6717 md_bitmap_destroy(mddev);
6718 mddev_resume(mddev);
6719 }
6720 }
6721 if (fd < 0) {
6722 struct file *f = mddev->bitmap_info.file;
6723 if (f) {
6724 spin_lock(&mddev->lock);
6725 mddev->bitmap_info.file = NULL;
6726 spin_unlock(&mddev->lock);
6727 fput(f);
6728 }
6729 }
6730
6731 return err;
6732}
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6748{
6749
6750 if (info->raid_disks == 0) {
6751
6752 if (info->major_version < 0 ||
6753 info->major_version >= ARRAY_SIZE(super_types) ||
6754 super_types[info->major_version].name == NULL) {
6755
6756 pr_warn("md: superblock version %d not known\n",
6757 info->major_version);
6758 return -EINVAL;
6759 }
6760 mddev->major_version = info->major_version;
6761 mddev->minor_version = info->minor_version;
6762 mddev->patch_version = info->patch_version;
6763 mddev->persistent = !info->not_persistent;
6764
6765
6766
6767 mddev->ctime = ktime_get_real_seconds();
6768 return 0;
6769 }
6770 mddev->major_version = MD_MAJOR_VERSION;
6771 mddev->minor_version = MD_MINOR_VERSION;
6772 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6773 mddev->ctime = ktime_get_real_seconds();
6774
6775 mddev->level = info->level;
6776 mddev->clevel[0] = 0;
6777 mddev->dev_sectors = 2 * (sector_t)info->size;
6778 mddev->raid_disks = info->raid_disks;
6779
6780
6781
6782 if (info->state & (1<<MD_SB_CLEAN))
6783 mddev->recovery_cp = MaxSector;
6784 else
6785 mddev->recovery_cp = 0;
6786 mddev->persistent = ! info->not_persistent;
6787 mddev->external = 0;
6788
6789 mddev->layout = info->layout;
6790 mddev->chunk_sectors = info->chunk_size >> 9;
6791
6792 if (mddev->persistent) {
6793 mddev->max_disks = MD_SB_DISKS;
6794 mddev->flags = 0;
6795 mddev->sb_flags = 0;
6796 }
6797 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6798
6799 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6800 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6801 mddev->bitmap_info.offset = 0;
6802
6803 mddev->reshape_position = MaxSector;
6804
6805
6806
6807
6808 get_random_bytes(mddev->uuid, 16);
6809
6810 mddev->new_level = mddev->level;
6811 mddev->new_chunk_sectors = mddev->chunk_sectors;
6812 mddev->new_layout = mddev->layout;
6813 mddev->delta_disks = 0;
6814 mddev->reshape_backwards = 0;
6815
6816 return 0;
6817}
6818
6819void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6820{
6821 lockdep_assert_held(&mddev->reconfig_mutex);
6822
6823 if (mddev->external_size)
6824 return;
6825
6826 mddev->array_sectors = array_sectors;
6827}
6828EXPORT_SYMBOL(md_set_array_sectors);
6829
6830static int update_size(struct mddev *mddev, sector_t num_sectors)
6831{
6832 struct md_rdev *rdev;
6833 int rv;
6834 int fit = (num_sectors == 0);
6835 sector_t old_dev_sectors = mddev->dev_sectors;
6836
6837 if (mddev->pers->resize == NULL)
6838 return -EINVAL;
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6849 mddev->sync_thread)
6850 return -EBUSY;
6851 if (mddev->ro)
6852 return -EROFS;
6853
6854 rdev_for_each(rdev, mddev) {
6855 sector_t avail = rdev->sectors;
6856
6857 if (fit && (num_sectors == 0 || num_sectors > avail))
6858 num_sectors = avail;
6859 if (avail < num_sectors)
6860 return -ENOSPC;
6861 }
6862 rv = mddev->pers->resize(mddev, num_sectors);
6863 if (!rv) {
6864 if (mddev_is_clustered(mddev))
6865 md_cluster_ops->update_size(mddev, old_dev_sectors);
6866 else if (mddev->queue) {
6867 set_capacity(mddev->gendisk, mddev->array_sectors);
6868 revalidate_disk(mddev->gendisk);
6869 }
6870 }
6871 return rv;
6872}
6873
6874static int update_raid_disks(struct mddev *mddev, int raid_disks)
6875{
6876 int rv;
6877 struct md_rdev *rdev;
6878
6879 if (mddev->pers->check_reshape == NULL)
6880 return -EINVAL;
6881 if (mddev->ro)
6882 return -EROFS;
6883 if (raid_disks <= 0 ||
6884 (mddev->max_disks && raid_disks >= mddev->max_disks))
6885 return -EINVAL;
6886 if (mddev->sync_thread ||
6887 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6888 mddev->reshape_position != MaxSector)
6889 return -EBUSY;
6890
6891 rdev_for_each(rdev, mddev) {
6892 if (mddev->raid_disks < raid_disks &&
6893 rdev->data_offset < rdev->new_data_offset)
6894 return -EINVAL;
6895 if (mddev->raid_disks > raid_disks &&
6896 rdev->data_offset > rdev->new_data_offset)
6897 return -EINVAL;
6898 }
6899
6900 mddev->delta_disks = raid_disks - mddev->raid_disks;
6901 if (mddev->delta_disks < 0)
6902 mddev->reshape_backwards = 1;
6903 else if (mddev->delta_disks > 0)
6904 mddev->reshape_backwards = 0;
6905
6906 rv = mddev->pers->check_reshape(mddev);
6907 if (rv < 0) {
6908 mddev->delta_disks = 0;
6909 mddev->reshape_backwards = 0;
6910 }
6911 return rv;
6912}
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6923{
6924 int rv = 0;
6925 int cnt = 0;
6926 int state = 0;
6927
6928
6929 if (mddev->bitmap && mddev->bitmap_info.offset)
6930 state |= (1 << MD_SB_BITMAP_PRESENT);
6931
6932 if (mddev->major_version != info->major_version ||
6933 mddev->minor_version != info->minor_version ||
6934
6935 mddev->ctime != info->ctime ||
6936 mddev->level != info->level ||
6937
6938 mddev->persistent != !info->not_persistent ||
6939 mddev->chunk_sectors != info->chunk_size >> 9 ||
6940
6941 ((state^info->state) & 0xfffffe00)
6942 )
6943 return -EINVAL;
6944
6945 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6946 cnt++;
6947 if (mddev->raid_disks != info->raid_disks)
6948 cnt++;
6949 if (mddev->layout != info->layout)
6950 cnt++;
6951 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6952 cnt++;
6953 if (cnt == 0)
6954 return 0;
6955 if (cnt > 1)
6956 return -EINVAL;
6957
6958 if (mddev->layout != info->layout) {
6959
6960
6961
6962
6963 if (mddev->pers->check_reshape == NULL)
6964 return -EINVAL;
6965 else {
6966 mddev->new_layout = info->layout;
6967 rv = mddev->pers->check_reshape(mddev);
6968 if (rv)
6969 mddev->new_layout = mddev->layout;
6970 return rv;
6971 }
6972 }
6973 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6974 rv = update_size(mddev, (sector_t)info->size * 2);
6975
6976 if (mddev->raid_disks != info->raid_disks)
6977 rv = update_raid_disks(mddev, info->raid_disks);
6978
6979 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6980 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6981 rv = -EINVAL;
6982 goto err;
6983 }
6984 if (mddev->recovery || mddev->sync_thread) {
6985 rv = -EBUSY;
6986 goto err;
6987 }
6988 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6989 struct bitmap *bitmap;
6990
6991 if (mddev->bitmap) {
6992 rv = -EEXIST;
6993 goto err;
6994 }
6995 if (mddev->bitmap_info.default_offset == 0) {
6996 rv = -EINVAL;
6997 goto err;
6998 }
6999 mddev->bitmap_info.offset =
7000 mddev->bitmap_info.default_offset;
7001 mddev->bitmap_info.space =
7002 mddev->bitmap_info.default_space;
7003 bitmap = md_bitmap_create(mddev, -1);
7004 mddev_suspend(mddev);
7005 if (!IS_ERR(bitmap)) {
7006 mddev->bitmap = bitmap;
7007 rv = md_bitmap_load(mddev);
7008 } else
7009 rv = PTR_ERR(bitmap);
7010 if (rv)
7011 md_bitmap_destroy(mddev);
7012 mddev_resume(mddev);
7013 } else {
7014
7015 if (!mddev->bitmap) {
7016 rv = -ENOENT;
7017 goto err;
7018 }
7019 if (mddev->bitmap->storage.file) {
7020 rv = -EINVAL;
7021 goto err;
7022 }
7023 if (mddev->bitmap_info.nodes) {
7024
7025 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7026 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7027 rv = -EPERM;
7028 md_cluster_ops->unlock_all_bitmaps(mddev);
7029 goto err;
7030 }
7031
7032 mddev->bitmap_info.nodes = 0;
7033 md_cluster_ops->leave(mddev);
7034 }
7035 mddev_suspend(mddev);
7036 md_bitmap_destroy(mddev);
7037 mddev_resume(mddev);
7038 mddev->bitmap_info.offset = 0;
7039 }
7040 }
7041 md_update_sb(mddev, 1);
7042 return rv;
7043err:
7044 return rv;
7045}
7046
7047static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7048{
7049 struct md_rdev *rdev;
7050 int err = 0;
7051
7052 if (mddev->pers == NULL)
7053 return -ENODEV;
7054
7055 rcu_read_lock();
7056 rdev = md_find_rdev_rcu(mddev, dev);
7057 if (!rdev)
7058 err = -ENODEV;
7059 else {
7060 md_error(mddev, rdev);
7061 if (!test_bit(Faulty, &rdev->flags))
7062 err = -EBUSY;
7063 }
7064 rcu_read_unlock();
7065 return err;
7066}
7067
7068
7069
7070
7071
7072
7073
7074static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7075{
7076 struct mddev *mddev = bdev->bd_disk->private_data;
7077
7078 geo->heads = 2;
7079 geo->sectors = 4;
7080 geo->cylinders = mddev->array_sectors / 8;
7081 return 0;
7082}
7083
7084static inline bool md_ioctl_valid(unsigned int cmd)
7085{
7086 switch (cmd) {
7087 case ADD_NEW_DISK:
7088 case BLKROSET:
7089 case GET_ARRAY_INFO:
7090 case GET_BITMAP_FILE:
7091 case GET_DISK_INFO:
7092 case HOT_ADD_DISK:
7093 case HOT_REMOVE_DISK:
7094 case RAID_AUTORUN:
7095 case RAID_VERSION:
7096 case RESTART_ARRAY_RW:
7097 case RUN_ARRAY:
7098 case SET_ARRAY_INFO:
7099 case SET_BITMAP_FILE:
7100 case SET_DISK_FAULTY:
7101 case STOP_ARRAY:
7102 case STOP_ARRAY_RO:
7103 case CLUSTERED_DISK_NACK:
7104 return true;
7105 default:
7106 return false;
7107 }
7108}
7109
7110static int md_ioctl(struct block_device *bdev, fmode_t mode,
7111 unsigned int cmd, unsigned long arg)
7112{
7113 int err = 0;
7114 void __user *argp = (void __user *)arg;
7115 struct mddev *mddev = NULL;
7116 int ro;
7117 bool did_set_md_closing = false;
7118
7119 if (!md_ioctl_valid(cmd))
7120 return -ENOTTY;
7121
7122 switch (cmd) {
7123 case RAID_VERSION:
7124 case GET_ARRAY_INFO:
7125 case GET_DISK_INFO:
7126 break;
7127 default:
7128 if (!capable(CAP_SYS_ADMIN))
7129 return -EACCES;
7130 }
7131
7132
7133
7134
7135
7136 switch (cmd) {
7137 case RAID_VERSION:
7138 err = get_version(argp);
7139 goto out;
7140
7141#ifndef MODULE
7142 case RAID_AUTORUN:
7143 err = 0;
7144 autostart_arrays(arg);
7145 goto out;
7146#endif
7147 default:;
7148 }
7149
7150
7151
7152
7153
7154 mddev = bdev->bd_disk->private_data;
7155
7156 if (!mddev) {
7157 BUG();
7158 goto out;
7159 }
7160
7161
7162 switch (cmd) {
7163 case GET_ARRAY_INFO:
7164 if (!mddev->raid_disks && !mddev->external)
7165 err = -ENODEV;
7166 else
7167 err = get_array_info(mddev, argp);
7168 goto out;
7169
7170 case GET_DISK_INFO:
7171 if (!mddev->raid_disks && !mddev->external)
7172 err = -ENODEV;
7173 else
7174 err = get_disk_info(mddev, argp);
7175 goto out;
7176
7177 case SET_DISK_FAULTY:
7178 err = set_disk_faulty(mddev, new_decode_dev(arg));
7179 goto out;
7180
7181 case GET_BITMAP_FILE:
7182 err = get_bitmap_file(mddev, argp);
7183 goto out;
7184
7185 }
7186
7187 if (cmd == ADD_NEW_DISK)
7188
7189 flush_workqueue(md_misc_wq);
7190
7191 if (cmd == HOT_REMOVE_DISK)
7192
7193 wait_event_interruptible_timeout(mddev->sb_wait,
7194 !test_bit(MD_RECOVERY_NEEDED,
7195 &mddev->recovery),
7196 msecs_to_jiffies(5000));
7197 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7198
7199
7200
7201 mutex_lock(&mddev->open_mutex);
7202 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7203 mutex_unlock(&mddev->open_mutex);
7204 err = -EBUSY;
7205 goto out;
7206 }
7207 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7208 set_bit(MD_CLOSING, &mddev->flags);
7209 did_set_md_closing = true;
7210 mutex_unlock(&mddev->open_mutex);
7211 sync_blockdev(bdev);
7212 }
7213 err = mddev_lock(mddev);
7214 if (err) {
7215 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7216 err, cmd);
7217 goto out;
7218 }
7219
7220 if (cmd == SET_ARRAY_INFO) {
7221 mdu_array_info_t info;
7222 if (!arg)
7223 memset(&info, 0, sizeof(info));
7224 else if (copy_from_user(&info, argp, sizeof(info))) {
7225 err = -EFAULT;
7226 goto unlock;
7227 }
7228 if (mddev->pers) {
7229 err = update_array_info(mddev, &info);
7230 if (err) {
7231 pr_warn("md: couldn't update array info. %d\n", err);
7232 goto unlock;
7233 }
7234 goto unlock;
7235 }
7236 if (!list_empty(&mddev->disks)) {
7237 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7238 err = -EBUSY;
7239 goto unlock;
7240 }
7241 if (mddev->raid_disks) {
7242 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7243 err = -EBUSY;
7244 goto unlock;
7245 }
7246 err = set_array_info(mddev, &info);
7247 if (err) {
7248 pr_warn("md: couldn't set array info. %d\n", err);
7249 goto unlock;
7250 }
7251 goto unlock;
7252 }
7253
7254
7255
7256
7257
7258
7259 if ((!mddev->raid_disks && !mddev->external)
7260 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7261 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7262 && cmd != GET_BITMAP_FILE) {
7263 err = -ENODEV;
7264 goto unlock;
7265 }
7266
7267
7268
7269
7270 switch (cmd) {
7271 case RESTART_ARRAY_RW:
7272 err = restart_array(mddev);
7273 goto unlock;
7274
7275 case STOP_ARRAY:
7276 err = do_md_stop(mddev, 0, bdev);
7277 goto unlock;
7278
7279 case STOP_ARRAY_RO:
7280 err = md_set_readonly(mddev, bdev);
7281 goto unlock;
7282
7283 case HOT_REMOVE_DISK:
7284 err = hot_remove_disk(mddev, new_decode_dev(arg));
7285 goto unlock;
7286
7287 case ADD_NEW_DISK:
7288
7289
7290
7291
7292 if (mddev->pers) {
7293 mdu_disk_info_t info;
7294 if (copy_from_user(&info, argp, sizeof(info)))
7295 err = -EFAULT;
7296 else if (!(info.state & (1<<MD_DISK_SYNC)))
7297
7298 break;
7299 else
7300 err = add_new_disk(mddev, &info);
7301 goto unlock;
7302 }
7303 break;
7304
7305 case BLKROSET:
7306 if (get_user(ro, (int __user *)(arg))) {
7307 err = -EFAULT;
7308 goto unlock;
7309 }
7310 err = -EINVAL;
7311
7312
7313
7314
7315 if (ro)
7316 goto unlock;
7317
7318
7319 if (mddev->ro != 1)
7320 goto unlock;
7321
7322
7323
7324
7325 if (mddev->pers) {
7326 err = restart_array(mddev);
7327 if (err == 0) {
7328 mddev->ro = 2;
7329 set_disk_ro(mddev->gendisk, 0);
7330 }
7331 }
7332 goto unlock;
7333 }
7334
7335
7336
7337
7338
7339 if (mddev->ro && mddev->pers) {
7340 if (mddev->ro == 2) {
7341 mddev->ro = 0;
7342 sysfs_notify_dirent_safe(mddev->sysfs_state);
7343 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7344
7345
7346
7347
7348 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7349 mddev_unlock(mddev);
7350 wait_event(mddev->sb_wait,
7351 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7352 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7353 mddev_lock_nointr(mddev);
7354 }
7355 } else {
7356 err = -EROFS;
7357 goto unlock;
7358 }
7359 }
7360
7361 switch (cmd) {
7362 case ADD_NEW_DISK:
7363 {
7364 mdu_disk_info_t info;
7365 if (copy_from_user(&info, argp, sizeof(info)))
7366 err = -EFAULT;
7367 else
7368 err = add_new_disk(mddev, &info);
7369 goto unlock;
7370 }
7371
7372 case CLUSTERED_DISK_NACK:
7373 if (mddev_is_clustered(mddev))
7374 md_cluster_ops->new_disk_ack(mddev, false);
7375 else
7376 err = -EINVAL;
7377 goto unlock;
7378
7379 case HOT_ADD_DISK:
7380 err = hot_add_disk(mddev, new_decode_dev(arg));
7381 goto unlock;
7382
7383 case RUN_ARRAY:
7384 err = do_md_run(mddev);
7385 goto unlock;
7386
7387 case SET_BITMAP_FILE:
7388 err = set_bitmap_file(mddev, (int)arg);
7389 goto unlock;
7390
7391 default:
7392 err = -EINVAL;
7393 goto unlock;
7394 }
7395
7396unlock:
7397 if (mddev->hold_active == UNTIL_IOCTL &&
7398 err != -EINVAL)
7399 mddev->hold_active = 0;
7400 mddev_unlock(mddev);
7401out:
7402 if(did_set_md_closing)
7403 clear_bit(MD_CLOSING, &mddev->flags);
7404 return err;
7405}
7406#ifdef CONFIG_COMPAT
7407static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7408 unsigned int cmd, unsigned long arg)
7409{
7410 switch (cmd) {
7411 case HOT_REMOVE_DISK:
7412 case HOT_ADD_DISK:
7413 case SET_DISK_FAULTY:
7414 case SET_BITMAP_FILE:
7415
7416 break;
7417 default:
7418 arg = (unsigned long)compat_ptr(arg);
7419 break;
7420 }
7421
7422 return md_ioctl(bdev, mode, cmd, arg);
7423}
7424#endif
7425
7426static int md_open(struct block_device *bdev, fmode_t mode)
7427{
7428
7429
7430
7431
7432 struct mddev *mddev = mddev_find(bdev->bd_dev);
7433 int err;
7434
7435 if (!mddev)
7436 return -ENODEV;
7437
7438 if (mddev->gendisk != bdev->bd_disk) {
7439
7440
7441
7442 mddev_put(mddev);
7443
7444 flush_workqueue(md_misc_wq);
7445
7446 return -ERESTARTSYS;
7447 }
7448 BUG_ON(mddev != bdev->bd_disk->private_data);
7449
7450 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7451 goto out;
7452
7453 if (test_bit(MD_CLOSING, &mddev->flags)) {
7454 mutex_unlock(&mddev->open_mutex);
7455 err = -ENODEV;
7456 goto out;
7457 }
7458
7459 err = 0;
7460 atomic_inc(&mddev->openers);
7461 mutex_unlock(&mddev->open_mutex);
7462
7463 check_disk_change(bdev);
7464 out:
7465 if (err)
7466 mddev_put(mddev);
7467 return err;
7468}
7469
7470static void md_release(struct gendisk *disk, fmode_t mode)
7471{
7472 struct mddev *mddev = disk->private_data;
7473
7474 BUG_ON(!mddev);
7475 atomic_dec(&mddev->openers);
7476 mddev_put(mddev);
7477}
7478
7479static int md_media_changed(struct gendisk *disk)
7480{
7481 struct mddev *mddev = disk->private_data;
7482
7483 return mddev->changed;
7484}
7485
7486static int md_revalidate(struct gendisk *disk)
7487{
7488 struct mddev *mddev = disk->private_data;
7489
7490 mddev->changed = 0;
7491 return 0;
7492}
7493static const struct block_device_operations md_fops =
7494{
7495 .owner = THIS_MODULE,
7496 .open = md_open,
7497 .release = md_release,
7498 .ioctl = md_ioctl,
7499#ifdef CONFIG_COMPAT
7500 .compat_ioctl = md_compat_ioctl,
7501#endif
7502 .getgeo = md_getgeo,
7503 .media_changed = md_media_changed,
7504 .revalidate_disk= md_revalidate,
7505};
7506
7507static int md_thread(void *arg)
7508{
7509 struct md_thread *thread = arg;
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523 allow_signal(SIGKILL);
7524 while (!kthread_should_stop()) {
7525
7526
7527
7528
7529
7530
7531 if (signal_pending(current))
7532 flush_signals(current);
7533
7534 wait_event_interruptible_timeout
7535 (thread->wqueue,
7536 test_bit(THREAD_WAKEUP, &thread->flags)
7537 || kthread_should_stop() || kthread_should_park(),
7538 thread->timeout);
7539
7540 clear_bit(THREAD_WAKEUP, &thread->flags);
7541 if (kthread_should_park())
7542 kthread_parkme();
7543 if (!kthread_should_stop())
7544 thread->run(thread);
7545 }
7546
7547 return 0;
7548}
7549
7550void md_wakeup_thread(struct md_thread *thread)
7551{
7552 if (thread) {
7553 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7554 set_bit(THREAD_WAKEUP, &thread->flags);
7555 wake_up(&thread->wqueue);
7556 }
7557}
7558EXPORT_SYMBOL(md_wakeup_thread);
7559
7560struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7561 struct mddev *mddev, const char *name)
7562{
7563 struct md_thread *thread;
7564
7565 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7566 if (!thread)
7567 return NULL;
7568
7569 init_waitqueue_head(&thread->wqueue);
7570
7571 thread->run = run;
7572 thread->mddev = mddev;
7573 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7574 thread->tsk = kthread_run(md_thread, thread,
7575 "%s_%s",
7576 mdname(thread->mddev),
7577 name);
7578 if (IS_ERR(thread->tsk)) {
7579 kfree(thread);
7580 return NULL;
7581 }
7582 return thread;
7583}
7584EXPORT_SYMBOL(md_register_thread);
7585
7586void md_unregister_thread(struct md_thread **threadp)
7587{
7588 struct md_thread *thread = *threadp;
7589 if (!thread)
7590 return;
7591 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7592
7593
7594
7595 spin_lock(&pers_lock);
7596 *threadp = NULL;
7597 spin_unlock(&pers_lock);
7598
7599 kthread_stop(thread->tsk);
7600 kfree(thread);
7601}
7602EXPORT_SYMBOL(md_unregister_thread);
7603
7604void md_error(struct mddev *mddev, struct md_rdev *rdev)
7605{
7606 if (!rdev || test_bit(Faulty, &rdev->flags))
7607 return;
7608
7609 if (!mddev->pers || !mddev->pers->error_handler)
7610 return;
7611 mddev->pers->error_handler(mddev,rdev);
7612 if (mddev->degraded)
7613 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7614 sysfs_notify_dirent_safe(rdev->sysfs_state);
7615 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7616 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7617 md_wakeup_thread(mddev->thread);
7618 if (mddev->event_work.func)
7619 queue_work(md_misc_wq, &mddev->event_work);
7620 md_new_event(mddev);
7621}
7622EXPORT_SYMBOL(md_error);
7623
7624
7625
7626static void status_unused(struct seq_file *seq)
7627{
7628 int i = 0;
7629 struct md_rdev *rdev;
7630
7631 seq_printf(seq, "unused devices: ");
7632
7633 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7634 char b[BDEVNAME_SIZE];
7635 i++;
7636 seq_printf(seq, "%s ",
7637 bdevname(rdev->bdev,b));
7638 }
7639 if (!i)
7640 seq_printf(seq, "<none>");
7641
7642 seq_printf(seq, "\n");
7643}
7644
7645static int status_resync(struct seq_file *seq, struct mddev *mddev)
7646{
7647 sector_t max_sectors, resync, res;
7648 unsigned long dt, db;
7649 sector_t rt;
7650 int scale;
7651 unsigned int per_milli;
7652
7653 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7654 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7655 max_sectors = mddev->resync_max_sectors;
7656 else
7657 max_sectors = mddev->dev_sectors;
7658
7659 resync = mddev->curr_resync;
7660 if (resync <= 3) {
7661 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7662
7663 resync = max_sectors;
7664 } else if (resync > max_sectors)
7665 resync = max_sectors;
7666 else
7667 resync -= atomic_read(&mddev->recovery_active);
7668
7669 if (resync == 0) {
7670 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
7671 struct md_rdev *rdev;
7672
7673 rdev_for_each(rdev, mddev)
7674 if (rdev->raid_disk >= 0 &&
7675 !test_bit(Faulty, &rdev->flags) &&
7676 rdev->recovery_offset != MaxSector &&
7677 rdev->recovery_offset) {
7678 seq_printf(seq, "\trecover=REMOTE");
7679 return 1;
7680 }
7681 if (mddev->reshape_position != MaxSector)
7682 seq_printf(seq, "\treshape=REMOTE");
7683 else
7684 seq_printf(seq, "\tresync=REMOTE");
7685 return 1;
7686 }
7687 if (mddev->recovery_cp < MaxSector) {
7688 seq_printf(seq, "\tresync=PENDING");
7689 return 1;
7690 }
7691 return 0;
7692 }
7693 if (resync < 3) {
7694 seq_printf(seq, "\tresync=DELAYED");
7695 return 1;
7696 }
7697
7698 WARN_ON(max_sectors == 0);
7699
7700
7701
7702
7703
7704 scale = 10;
7705 if (sizeof(sector_t) > sizeof(unsigned long)) {
7706 while ( max_sectors/2 > (1ULL<<(scale+32)))
7707 scale++;
7708 }
7709 res = (resync>>scale)*1000;
7710 sector_div(res, (u32)((max_sectors>>scale)+1));
7711
7712 per_milli = res;
7713 {
7714 int i, x = per_milli/50, y = 20-x;
7715 seq_printf(seq, "[");
7716 for (i = 0; i < x; i++)
7717 seq_printf(seq, "=");
7718 seq_printf(seq, ">");
7719 for (i = 0; i < y; i++)
7720 seq_printf(seq, ".");
7721 seq_printf(seq, "] ");
7722 }
7723 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7724 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7725 "reshape" :
7726 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7727 "check" :
7728 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7729 "resync" : "recovery"))),
7730 per_milli/10, per_milli % 10,
7731 (unsigned long long) resync/2,
7732 (unsigned long long) max_sectors/2);
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748 dt = ((jiffies - mddev->resync_mark) / HZ);
7749 if (!dt) dt++;
7750 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7751 - mddev->resync_mark_cnt;
7752
7753 rt = max_sectors - resync;
7754 sector_div(rt, db/32+1);
7755 rt *= dt;
7756 rt >>= 5;
7757
7758 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7759 ((unsigned long)rt % 60)/6);
7760
7761 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7762 return 1;
7763}
7764
7765static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7766{
7767 struct list_head *tmp;
7768 loff_t l = *pos;
7769 struct mddev *mddev;
7770
7771 if (l >= 0x10000)
7772 return NULL;
7773 if (!l--)
7774
7775 return (void*)1;
7776
7777 spin_lock(&all_mddevs_lock);
7778 list_for_each(tmp,&all_mddevs)
7779 if (!l--) {
7780 mddev = list_entry(tmp, struct mddev, all_mddevs);
7781 mddev_get(mddev);
7782 spin_unlock(&all_mddevs_lock);
7783 return mddev;
7784 }
7785 spin_unlock(&all_mddevs_lock);
7786 if (!l--)
7787 return (void*)2;
7788 return NULL;
7789}
7790
7791static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7792{
7793 struct list_head *tmp;
7794 struct mddev *next_mddev, *mddev = v;
7795
7796 ++*pos;
7797 if (v == (void*)2)
7798 return NULL;
7799
7800 spin_lock(&all_mddevs_lock);
7801 if (v == (void*)1)
7802 tmp = all_mddevs.next;
7803 else
7804 tmp = mddev->all_mddevs.next;
7805 if (tmp != &all_mddevs)
7806 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7807 else {
7808 next_mddev = (void*)2;
7809 *pos = 0x10000;
7810 }
7811 spin_unlock(&all_mddevs_lock);
7812
7813 if (v != (void*)1)
7814 mddev_put(mddev);
7815 return next_mddev;
7816
7817}
7818
7819static void md_seq_stop(struct seq_file *seq, void *v)
7820{
7821 struct mddev *mddev = v;
7822
7823 if (mddev && v != (void*)1 && v != (void*)2)
7824 mddev_put(mddev);
7825}
7826
7827static int md_seq_show(struct seq_file *seq, void *v)
7828{
7829 struct mddev *mddev = v;
7830 sector_t sectors;
7831 struct md_rdev *rdev;
7832
7833 if (v == (void*)1) {
7834 struct md_personality *pers;
7835 seq_printf(seq, "Personalities : ");
7836 spin_lock(&pers_lock);
7837 list_for_each_entry(pers, &pers_list, list)
7838 seq_printf(seq, "[%s] ", pers->name);
7839
7840 spin_unlock(&pers_lock);
7841 seq_printf(seq, "\n");
7842 seq->poll_event = atomic_read(&md_event_count);
7843 return 0;
7844 }
7845 if (v == (void*)2) {
7846 status_unused(seq);
7847 return 0;
7848 }
7849
7850 spin_lock(&mddev->lock);
7851 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7852 seq_printf(seq, "%s : %sactive", mdname(mddev),
7853 mddev->pers ? "" : "in");
7854 if (mddev->pers) {
7855 if (mddev->ro==1)
7856 seq_printf(seq, " (read-only)");
7857 if (mddev->ro==2)
7858 seq_printf(seq, " (auto-read-only)");
7859 seq_printf(seq, " %s", mddev->pers->name);
7860 }
7861
7862 sectors = 0;
7863 rcu_read_lock();
7864 rdev_for_each_rcu(rdev, mddev) {
7865 char b[BDEVNAME_SIZE];
7866 seq_printf(seq, " %s[%d]",
7867 bdevname(rdev->bdev,b), rdev->desc_nr);
7868 if (test_bit(WriteMostly, &rdev->flags))
7869 seq_printf(seq, "(W)");
7870 if (test_bit(Journal, &rdev->flags))
7871 seq_printf(seq, "(J)");
7872 if (test_bit(Faulty, &rdev->flags)) {
7873 seq_printf(seq, "(F)");
7874 continue;
7875 }
7876 if (rdev->raid_disk < 0)
7877 seq_printf(seq, "(S)");
7878 if (test_bit(Replacement, &rdev->flags))
7879 seq_printf(seq, "(R)");
7880 sectors += rdev->sectors;
7881 }
7882 rcu_read_unlock();
7883
7884 if (!list_empty(&mddev->disks)) {
7885 if (mddev->pers)
7886 seq_printf(seq, "\n %llu blocks",
7887 (unsigned long long)
7888 mddev->array_sectors / 2);
7889 else
7890 seq_printf(seq, "\n %llu blocks",
7891 (unsigned long long)sectors / 2);
7892 }
7893 if (mddev->persistent) {
7894 if (mddev->major_version != 0 ||
7895 mddev->minor_version != 90) {
7896 seq_printf(seq," super %d.%d",
7897 mddev->major_version,
7898 mddev->minor_version);
7899 }
7900 } else if (mddev->external)
7901 seq_printf(seq, " super external:%s",
7902 mddev->metadata_type);
7903 else
7904 seq_printf(seq, " super non-persistent");
7905
7906 if (mddev->pers) {
7907 mddev->pers->status(seq, mddev);
7908 seq_printf(seq, "\n ");
7909 if (mddev->pers->sync_request) {
7910 if (status_resync(seq, mddev))
7911 seq_printf(seq, "\n ");
7912 }
7913 } else
7914 seq_printf(seq, "\n ");
7915
7916 md_bitmap_status(seq, mddev->bitmap);
7917
7918 seq_printf(seq, "\n");
7919 }
7920 spin_unlock(&mddev->lock);
7921
7922 return 0;
7923}
7924
7925static const struct seq_operations md_seq_ops = {
7926 .start = md_seq_start,
7927 .next = md_seq_next,
7928 .stop = md_seq_stop,
7929 .show = md_seq_show,
7930};
7931
7932static int md_seq_open(struct inode *inode, struct file *file)
7933{
7934 struct seq_file *seq;
7935 int error;
7936
7937 error = seq_open(file, &md_seq_ops);
7938 if (error)
7939 return error;
7940
7941 seq = file->private_data;
7942 seq->poll_event = atomic_read(&md_event_count);
7943 return error;
7944}
7945
7946static int md_unloading;
7947static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
7948{
7949 struct seq_file *seq = filp->private_data;
7950 __poll_t mask;
7951
7952 if (md_unloading)
7953 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
7954 poll_wait(filp, &md_event_waiters, wait);
7955
7956
7957 mask = EPOLLIN | EPOLLRDNORM;
7958
7959 if (seq->poll_event != atomic_read(&md_event_count))
7960 mask |= EPOLLERR | EPOLLPRI;
7961 return mask;
7962}
7963
7964static const struct file_operations md_seq_fops = {
7965 .owner = THIS_MODULE,
7966 .open = md_seq_open,
7967 .read = seq_read,
7968 .llseek = seq_lseek,
7969 .release = seq_release,
7970 .poll = mdstat_poll,
7971};
7972
7973int register_md_personality(struct md_personality *p)
7974{
7975 pr_debug("md: %s personality registered for level %d\n",
7976 p->name, p->level);
7977 spin_lock(&pers_lock);
7978 list_add_tail(&p->list, &pers_list);
7979 spin_unlock(&pers_lock);
7980 return 0;
7981}
7982EXPORT_SYMBOL(register_md_personality);
7983
7984int unregister_md_personality(struct md_personality *p)
7985{
7986 pr_debug("md: %s personality unregistered\n", p->name);
7987 spin_lock(&pers_lock);
7988 list_del_init(&p->list);
7989 spin_unlock(&pers_lock);
7990 return 0;
7991}
7992EXPORT_SYMBOL(unregister_md_personality);
7993
7994int register_md_cluster_operations(struct md_cluster_operations *ops,
7995 struct module *module)
7996{
7997 int ret = 0;
7998 spin_lock(&pers_lock);
7999 if (md_cluster_ops != NULL)
8000 ret = -EALREADY;
8001 else {
8002 md_cluster_ops = ops;
8003 md_cluster_mod = module;
8004 }
8005 spin_unlock(&pers_lock);
8006 return ret;
8007}
8008EXPORT_SYMBOL(register_md_cluster_operations);
8009
8010int unregister_md_cluster_operations(void)
8011{
8012 spin_lock(&pers_lock);
8013 md_cluster_ops = NULL;
8014 spin_unlock(&pers_lock);
8015 return 0;
8016}
8017EXPORT_SYMBOL(unregister_md_cluster_operations);
8018
8019int md_setup_cluster(struct mddev *mddev, int nodes)
8020{
8021 if (!md_cluster_ops)
8022 request_module("md-cluster");
8023 spin_lock(&pers_lock);
8024
8025 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8026 pr_warn("can't find md-cluster module or get it's reference.\n");
8027 spin_unlock(&pers_lock);
8028 return -ENOENT;
8029 }
8030 spin_unlock(&pers_lock);
8031
8032 return md_cluster_ops->join(mddev, nodes);
8033}
8034
8035void md_cluster_stop(struct mddev *mddev)
8036{
8037 if (!md_cluster_ops)
8038 return;
8039 md_cluster_ops->leave(mddev);
8040 module_put(md_cluster_mod);
8041}
8042
8043static int is_mddev_idle(struct mddev *mddev, int init)
8044{
8045 struct md_rdev *rdev;
8046 int idle;
8047 int curr_events;
8048
8049 idle = 1;
8050 rcu_read_lock();
8051 rdev_for_each_rcu(rdev, mddev) {
8052 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8053 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8054 atomic_read(&disk->sync_io);
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077 if (init || curr_events - rdev->last_events > 64) {
8078 rdev->last_events = curr_events;
8079 idle = 0;
8080 }
8081 }
8082 rcu_read_unlock();
8083 return idle;
8084}
8085
8086void md_done_sync(struct mddev *mddev, int blocks, int ok)
8087{
8088
8089 atomic_sub(blocks, &mddev->recovery_active);
8090 wake_up(&mddev->recovery_wait);
8091 if (!ok) {
8092 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8093 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8094 md_wakeup_thread(mddev->thread);
8095
8096 }
8097}
8098EXPORT_SYMBOL(md_done_sync);
8099
8100
8101
8102
8103
8104
8105
8106
8107bool md_write_start(struct mddev *mddev, struct bio *bi)
8108{
8109 int did_change = 0;
8110
8111 if (bio_data_dir(bi) != WRITE)
8112 return true;
8113
8114 BUG_ON(mddev->ro == 1);
8115 if (mddev->ro == 2) {
8116
8117 mddev->ro = 0;
8118 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8119 md_wakeup_thread(mddev->thread);
8120 md_wakeup_thread(mddev->sync_thread);
8121 did_change = 1;
8122 }
8123 rcu_read_lock();
8124 percpu_ref_get(&mddev->writes_pending);
8125 smp_mb();
8126 if (mddev->safemode == 1)
8127 mddev->safemode = 0;
8128
8129 if (mddev->in_sync || mddev->sync_checkers) {
8130 spin_lock(&mddev->lock);
8131 if (mddev->in_sync) {
8132 mddev->in_sync = 0;
8133 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8134 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8135 md_wakeup_thread(mddev->thread);
8136 did_change = 1;
8137 }
8138 spin_unlock(&mddev->lock);
8139 }
8140 rcu_read_unlock();
8141 if (did_change)
8142 sysfs_notify_dirent_safe(mddev->sysfs_state);
8143 if (!mddev->has_superblocks)
8144 return true;
8145 wait_event(mddev->sb_wait,
8146 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8147 mddev->suspended);
8148 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8149 percpu_ref_put(&mddev->writes_pending);
8150 return false;
8151 }
8152 return true;
8153}
8154EXPORT_SYMBOL(md_write_start);
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164void md_write_inc(struct mddev *mddev, struct bio *bi)
8165{
8166 if (bio_data_dir(bi) != WRITE)
8167 return;
8168 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8169 percpu_ref_get(&mddev->writes_pending);
8170}
8171EXPORT_SYMBOL(md_write_inc);
8172
8173void md_write_end(struct mddev *mddev)
8174{
8175 percpu_ref_put(&mddev->writes_pending);
8176
8177 if (mddev->safemode == 2)
8178 md_wakeup_thread(mddev->thread);
8179 else if (mddev->safemode_delay)
8180
8181
8182
8183 mod_timer(&mddev->safemode_timer,
8184 roundup(jiffies, mddev->safemode_delay) +
8185 mddev->safemode_delay);
8186}
8187
8188EXPORT_SYMBOL(md_write_end);
8189
8190
8191
8192
8193
8194
8195
8196void md_allow_write(struct mddev *mddev)
8197{
8198 if (!mddev->pers)
8199 return;
8200 if (mddev->ro)
8201 return;
8202 if (!mddev->pers->sync_request)
8203 return;
8204
8205 spin_lock(&mddev->lock);
8206 if (mddev->in_sync) {
8207 mddev->in_sync = 0;
8208 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8209 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8210 if (mddev->safemode_delay &&
8211 mddev->safemode == 0)
8212 mddev->safemode = 1;
8213 spin_unlock(&mddev->lock);
8214 md_update_sb(mddev, 0);
8215 sysfs_notify_dirent_safe(mddev->sysfs_state);
8216
8217 wait_event(mddev->sb_wait,
8218 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8219 } else
8220 spin_unlock(&mddev->lock);
8221}
8222EXPORT_SYMBOL_GPL(md_allow_write);
8223
8224#define SYNC_MARKS 10
8225#define SYNC_MARK_STEP (3*HZ)
8226#define UPDATE_FREQUENCY (5*60*HZ)
8227void md_do_sync(struct md_thread *thread)
8228{
8229 struct mddev *mddev = thread->mddev;
8230 struct mddev *mddev2;
8231 unsigned int currspeed = 0,
8232 window;
8233 sector_t max_sectors,j, io_sectors, recovery_done;
8234 unsigned long mark[SYNC_MARKS];
8235 unsigned long update_time;
8236 sector_t mark_cnt[SYNC_MARKS];
8237 int last_mark,m;
8238 struct list_head *tmp;
8239 sector_t last_check;
8240 int skipped = 0;
8241 struct md_rdev *rdev;
8242 char *desc, *action = NULL;
8243 struct blk_plug plug;
8244 int ret;
8245
8246
8247 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8248 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8249 return;
8250 if (mddev->ro) {
8251 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8252 return;
8253 }
8254
8255 if (mddev_is_clustered(mddev)) {
8256 ret = md_cluster_ops->resync_start(mddev);
8257 if (ret)
8258 goto skip;
8259
8260 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8261 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8262 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8263 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8264 && ((unsigned long long)mddev->curr_resync_completed
8265 < (unsigned long long)mddev->resync_max_sectors))
8266 goto skip;
8267 }
8268
8269 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8270 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8271 desc = "data-check";
8272 action = "check";
8273 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8274 desc = "requested-resync";
8275 action = "repair";
8276 } else
8277 desc = "resync";
8278 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8279 desc = "reshape";
8280 else
8281 desc = "recovery";
8282
8283 mddev->last_sync_action = action ?: desc;
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301 do {
8302 int mddev2_minor = -1;
8303 mddev->curr_resync = 2;
8304
8305 try_again:
8306 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8307 goto skip;
8308 for_each_mddev(mddev2, tmp) {
8309 if (mddev2 == mddev)
8310 continue;
8311 if (!mddev->parallel_resync
8312 && mddev2->curr_resync
8313 && match_mddev_units(mddev, mddev2)) {
8314 DEFINE_WAIT(wq);
8315 if (mddev < mddev2 && mddev->curr_resync == 2) {
8316
8317 mddev->curr_resync = 1;
8318 wake_up(&resync_wait);
8319 }
8320 if (mddev > mddev2 && mddev->curr_resync == 1)
8321
8322
8323
8324 continue;
8325
8326
8327
8328
8329 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8330 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8331 mddev2->curr_resync >= mddev->curr_resync) {
8332 if (mddev2_minor != mddev2->md_minor) {
8333 mddev2_minor = mddev2->md_minor;
8334 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8335 desc, mdname(mddev),
8336 mdname(mddev2));
8337 }
8338 mddev_put(mddev2);
8339 if (signal_pending(current))
8340 flush_signals(current);
8341 schedule();
8342 finish_wait(&resync_wait, &wq);
8343 goto try_again;
8344 }
8345 finish_wait(&resync_wait, &wq);
8346 }
8347 }
8348 } while (mddev->curr_resync < 2);
8349
8350 j = 0;
8351 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8352
8353
8354
8355 max_sectors = mddev->resync_max_sectors;
8356 atomic64_set(&mddev->resync_mismatches, 0);
8357
8358 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8359 j = mddev->resync_min;
8360 else if (!mddev->bitmap)
8361 j = mddev->recovery_cp;
8362
8363 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8364 max_sectors = mddev->resync_max_sectors;
8365
8366
8367
8368
8369
8370 if (mddev_is_clustered(mddev) &&
8371 mddev->reshape_position != MaxSector)
8372 j = mddev->reshape_position;
8373 } else {
8374
8375 max_sectors = mddev->dev_sectors;
8376 j = MaxSector;
8377 rcu_read_lock();
8378 rdev_for_each_rcu(rdev, mddev)
8379 if (rdev->raid_disk >= 0 &&
8380 !test_bit(Journal, &rdev->flags) &&
8381 !test_bit(Faulty, &rdev->flags) &&
8382 !test_bit(In_sync, &rdev->flags) &&
8383 rdev->recovery_offset < j)
8384 j = rdev->recovery_offset;
8385 rcu_read_unlock();
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395 if (mddev->bitmap) {
8396 mddev->pers->quiesce(mddev, 1);
8397 mddev->pers->quiesce(mddev, 0);
8398 }
8399 }
8400
8401 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8402 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8403 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8404 speed_max(mddev), desc);
8405
8406 is_mddev_idle(mddev, 1);
8407
8408 io_sectors = 0;
8409 for (m = 0; m < SYNC_MARKS; m++) {
8410 mark[m] = jiffies;
8411 mark_cnt[m] = io_sectors;
8412 }
8413 last_mark = 0;
8414 mddev->resync_mark = mark[last_mark];
8415 mddev->resync_mark_cnt = mark_cnt[last_mark];
8416
8417
8418
8419
8420 window = 32*(PAGE_SIZE/512);
8421 pr_debug("md: using %dk window, over a total of %lluk.\n",
8422 window/2, (unsigned long long)max_sectors/2);
8423
8424 atomic_set(&mddev->recovery_active, 0);
8425 last_check = 0;
8426
8427 if (j>2) {
8428 pr_debug("md: resuming %s of %s from checkpoint.\n",
8429 desc, mdname(mddev));
8430 mddev->curr_resync = j;
8431 } else
8432 mddev->curr_resync = 3;
8433 mddev->curr_resync_completed = j;
8434 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8435 md_new_event(mddev);
8436 update_time = jiffies;
8437
8438 blk_start_plug(&plug);
8439 while (j < max_sectors) {
8440 sector_t sectors;
8441
8442 skipped = 0;
8443
8444 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8445 ((mddev->curr_resync > mddev->curr_resync_completed &&
8446 (mddev->curr_resync - mddev->curr_resync_completed)
8447 > (max_sectors >> 4)) ||
8448 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8449 (j - mddev->curr_resync_completed)*2
8450 >= mddev->resync_max - mddev->curr_resync_completed ||
8451 mddev->curr_resync_completed > mddev->resync_max
8452 )) {
8453
8454 wait_event(mddev->recovery_wait,
8455 atomic_read(&mddev->recovery_active) == 0);
8456 mddev->curr_resync_completed = j;
8457 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8458 j > mddev->recovery_cp)
8459 mddev->recovery_cp = j;
8460 update_time = jiffies;
8461 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8462 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8463 }
8464
8465 while (j >= mddev->resync_max &&
8466 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8467
8468
8469
8470
8471 flush_signals(current);
8472 wait_event_interruptible(mddev->recovery_wait,
8473 mddev->resync_max > j
8474 || test_bit(MD_RECOVERY_INTR,
8475 &mddev->recovery));
8476 }
8477
8478 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8479 break;
8480
8481 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8482 if (sectors == 0) {
8483 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8484 break;
8485 }
8486
8487 if (!skipped) {
8488 io_sectors += sectors;
8489 atomic_add(sectors, &mddev->recovery_active);
8490 }
8491
8492 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8493 break;
8494
8495 j += sectors;
8496 if (j > max_sectors)
8497
8498 j = max_sectors;
8499 if (j > 2)
8500 mddev->curr_resync = j;
8501 mddev->curr_mark_cnt = io_sectors;
8502 if (last_check == 0)
8503
8504
8505
8506 md_new_event(mddev);
8507
8508 if (last_check + window > io_sectors || j == max_sectors)
8509 continue;
8510
8511 last_check = io_sectors;
8512 repeat:
8513 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8514
8515 int next = (last_mark+1) % SYNC_MARKS;
8516
8517 mddev->resync_mark = mark[next];
8518 mddev->resync_mark_cnt = mark_cnt[next];
8519 mark[next] = jiffies;
8520 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8521 last_mark = next;
8522 }
8523
8524 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8525 break;
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535 cond_resched();
8536
8537 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8538 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8539 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8540
8541 if (currspeed > speed_min(mddev)) {
8542 if (currspeed > speed_max(mddev)) {
8543 msleep(500);
8544 goto repeat;
8545 }
8546 if (!is_mddev_idle(mddev, 0)) {
8547
8548
8549
8550
8551 wait_event(mddev->recovery_wait,
8552 !atomic_read(&mddev->recovery_active));
8553 }
8554 }
8555 }
8556 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8557 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8558 ? "interrupted" : "done");
8559
8560
8561
8562 blk_finish_plug(&plug);
8563 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8564
8565 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8566 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8567 mddev->curr_resync > 3) {
8568 mddev->curr_resync_completed = mddev->curr_resync;
8569 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8570 }
8571 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8572
8573 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8574 mddev->curr_resync > 3) {
8575 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8576 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8577 if (mddev->curr_resync >= mddev->recovery_cp) {
8578 pr_debug("md: checkpointing %s of %s.\n",
8579 desc, mdname(mddev));
8580 if (test_bit(MD_RECOVERY_ERROR,
8581 &mddev->recovery))
8582 mddev->recovery_cp =
8583 mddev->curr_resync_completed;
8584 else
8585 mddev->recovery_cp =
8586 mddev->curr_resync;
8587 }
8588 } else
8589 mddev->recovery_cp = MaxSector;
8590 } else {
8591 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8592 mddev->curr_resync = MaxSector;
8593 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8594 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8595 rcu_read_lock();
8596 rdev_for_each_rcu(rdev, mddev)
8597 if (rdev->raid_disk >= 0 &&
8598 mddev->delta_disks >= 0 &&
8599 !test_bit(Journal, &rdev->flags) &&
8600 !test_bit(Faulty, &rdev->flags) &&
8601 !test_bit(In_sync, &rdev->flags) &&
8602 rdev->recovery_offset < mddev->curr_resync)
8603 rdev->recovery_offset = mddev->curr_resync;
8604 rcu_read_unlock();
8605 }
8606 }
8607 }
8608 skip:
8609
8610
8611
8612 set_mask_bits(&mddev->sb_flags, 0,
8613 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8614
8615 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8616 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8617 mddev->delta_disks > 0 &&
8618 mddev->pers->finish_reshape &&
8619 mddev->pers->size &&
8620 mddev->queue) {
8621 mddev_lock_nointr(mddev);
8622 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8623 mddev_unlock(mddev);
8624 if (!mddev_is_clustered(mddev)) {
8625 set_capacity(mddev->gendisk, mddev->array_sectors);
8626 revalidate_disk(mddev->gendisk);
8627 }
8628 }
8629
8630 spin_lock(&mddev->lock);
8631 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8632
8633 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8634 mddev->resync_min = 0;
8635 mddev->resync_max = MaxSector;
8636 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8637 mddev->resync_min = mddev->curr_resync_completed;
8638 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8639 mddev->curr_resync = 0;
8640 spin_unlock(&mddev->lock);
8641
8642 wake_up(&resync_wait);
8643 md_wakeup_thread(mddev->thread);
8644 return;
8645}
8646EXPORT_SYMBOL_GPL(md_do_sync);
8647
8648static int remove_and_add_spares(struct mddev *mddev,
8649 struct md_rdev *this)
8650{
8651 struct md_rdev *rdev;
8652 int spares = 0;
8653 int removed = 0;
8654 bool remove_some = false;
8655
8656 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8657
8658 return 0;
8659
8660 rdev_for_each(rdev, mddev) {
8661 if ((this == NULL || rdev == this) &&
8662 rdev->raid_disk >= 0 &&
8663 !test_bit(Blocked, &rdev->flags) &&
8664 test_bit(Faulty, &rdev->flags) &&
8665 atomic_read(&rdev->nr_pending)==0) {
8666
8667
8668
8669
8670
8671 remove_some = true;
8672 set_bit(RemoveSynchronized, &rdev->flags);
8673 }
8674 }
8675
8676 if (remove_some)
8677 synchronize_rcu();
8678 rdev_for_each(rdev, mddev) {
8679 if ((this == NULL || rdev == this) &&
8680 rdev->raid_disk >= 0 &&
8681 !test_bit(Blocked, &rdev->flags) &&
8682 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8683 (!test_bit(In_sync, &rdev->flags) &&
8684 !test_bit(Journal, &rdev->flags))) &&
8685 atomic_read(&rdev->nr_pending)==0)) {
8686 if (mddev->pers->hot_remove_disk(
8687 mddev, rdev) == 0) {
8688 sysfs_unlink_rdev(mddev, rdev);
8689 rdev->saved_raid_disk = rdev->raid_disk;
8690 rdev->raid_disk = -1;
8691 removed++;
8692 }
8693 }
8694 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8695 clear_bit(RemoveSynchronized, &rdev->flags);
8696 }
8697
8698 if (removed && mddev->kobj.sd)
8699 sysfs_notify(&mddev->kobj, NULL, "degraded");
8700
8701 if (this && removed)
8702 goto no_add;
8703
8704 rdev_for_each(rdev, mddev) {
8705 if (this && this != rdev)
8706 continue;
8707 if (test_bit(Candidate, &rdev->flags))
8708 continue;
8709 if (rdev->raid_disk >= 0 &&
8710 !test_bit(In_sync, &rdev->flags) &&
8711 !test_bit(Journal, &rdev->flags) &&
8712 !test_bit(Faulty, &rdev->flags))
8713 spares++;
8714 if (rdev->raid_disk >= 0)
8715 continue;
8716 if (test_bit(Faulty, &rdev->flags))
8717 continue;
8718 if (!test_bit(Journal, &rdev->flags)) {
8719 if (mddev->ro &&
8720 ! (rdev->saved_raid_disk >= 0 &&
8721 !test_bit(Bitmap_sync, &rdev->flags)))
8722 continue;
8723
8724 rdev->recovery_offset = 0;
8725 }
8726 if (mddev->pers->
8727 hot_add_disk(mddev, rdev) == 0) {
8728 if (sysfs_link_rdev(mddev, rdev))
8729 ;
8730 if (!test_bit(Journal, &rdev->flags))
8731 spares++;
8732 md_new_event(mddev);
8733 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8734 }
8735 }
8736no_add:
8737 if (removed)
8738 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8739 return spares;
8740}
8741
8742static void md_start_sync(struct work_struct *ws)
8743{
8744 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8745
8746 mddev->sync_thread = md_register_thread(md_do_sync,
8747 mddev,
8748 "resync");
8749 if (!mddev->sync_thread) {
8750 pr_warn("%s: could not start resync thread...\n",
8751 mdname(mddev));
8752
8753 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8754 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8755 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8756 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8757 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8758 wake_up(&resync_wait);
8759 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8760 &mddev->recovery))
8761 if (mddev->sysfs_action)
8762 sysfs_notify_dirent_safe(mddev->sysfs_action);
8763 } else
8764 md_wakeup_thread(mddev->sync_thread);
8765 sysfs_notify_dirent_safe(mddev->sysfs_action);
8766 md_new_event(mddev);
8767}
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791void md_check_recovery(struct mddev *mddev)
8792{
8793 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
8794
8795
8796
8797 set_bit(MD_UPDATING_SB, &mddev->flags);
8798 smp_mb__after_atomic();
8799 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
8800 md_update_sb(mddev, 0);
8801 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
8802 wake_up(&mddev->sb_wait);
8803 }
8804
8805 if (mddev->suspended)
8806 return;
8807
8808 if (mddev->bitmap)
8809 md_bitmap_daemon_work(mddev);
8810
8811 if (signal_pending(current)) {
8812 if (mddev->pers->sync_request && !mddev->external) {
8813 pr_debug("md: %s in immediate safe mode\n",
8814 mdname(mddev));
8815 mddev->safemode = 2;
8816 }
8817 flush_signals(current);
8818 }
8819
8820 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8821 return;
8822 if ( ! (
8823 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8824 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8825 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8826 (mddev->external == 0 && mddev->safemode == 1) ||
8827 (mddev->safemode == 2
8828 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8829 ))
8830 return;
8831
8832 if (mddev_trylock(mddev)) {
8833 int spares = 0;
8834
8835 if (!mddev->external && mddev->safemode == 1)
8836 mddev->safemode = 0;
8837
8838 if (mddev->ro) {
8839 struct md_rdev *rdev;
8840 if (!mddev->external && mddev->in_sync)
8841
8842
8843
8844
8845
8846 rdev_for_each(rdev, mddev)
8847 clear_bit(Blocked, &rdev->flags);
8848
8849
8850
8851
8852
8853
8854
8855 remove_and_add_spares(mddev, NULL);
8856
8857
8858
8859 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8860 md_reap_sync_thread(mddev);
8861 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8862 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8863 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8864 goto unlock;
8865 }
8866
8867 if (mddev_is_clustered(mddev)) {
8868 struct md_rdev *rdev;
8869
8870
8871
8872 rdev_for_each(rdev, mddev) {
8873 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8874 rdev->raid_disk < 0)
8875 md_kick_rdev_from_array(rdev);
8876 }
8877 }
8878
8879 if (!mddev->external && !mddev->in_sync) {
8880 spin_lock(&mddev->lock);
8881 set_in_sync(mddev);
8882 spin_unlock(&mddev->lock);
8883 }
8884
8885 if (mddev->sb_flags)
8886 md_update_sb(mddev, 0);
8887
8888 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8889 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8890
8891 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8892 goto unlock;
8893 }
8894 if (mddev->sync_thread) {
8895 md_reap_sync_thread(mddev);
8896 goto unlock;
8897 }
8898
8899
8900
8901 mddev->curr_resync_completed = 0;
8902 spin_lock(&mddev->lock);
8903 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8904 spin_unlock(&mddev->lock);
8905
8906
8907
8908 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8909 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8910
8911 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8912 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8913 goto not_running;
8914
8915
8916
8917
8918
8919
8920
8921 if (mddev->reshape_position != MaxSector) {
8922 if (mddev->pers->check_reshape == NULL ||
8923 mddev->pers->check_reshape(mddev) != 0)
8924
8925 goto not_running;
8926 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8927 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8928 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8929 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8930 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8931 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8932 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8933 } else if (mddev->recovery_cp < MaxSector) {
8934 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8935 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8936 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8937
8938 goto not_running;
8939
8940 if (mddev->pers->sync_request) {
8941 if (spares) {
8942
8943
8944
8945
8946 md_bitmap_write_all(mddev->bitmap);
8947 }
8948 INIT_WORK(&mddev->del_work, md_start_sync);
8949 queue_work(md_misc_wq, &mddev->del_work);
8950 goto unlock;
8951 }
8952 not_running:
8953 if (!mddev->sync_thread) {
8954 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8955 wake_up(&resync_wait);
8956 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8957 &mddev->recovery))
8958 if (mddev->sysfs_action)
8959 sysfs_notify_dirent_safe(mddev->sysfs_action);
8960 }
8961 unlock:
8962 wake_up(&mddev->sb_wait);
8963 mddev_unlock(mddev);
8964 }
8965}
8966EXPORT_SYMBOL(md_check_recovery);
8967
8968void md_reap_sync_thread(struct mddev *mddev)
8969{
8970 struct md_rdev *rdev;
8971 sector_t old_dev_sectors = mddev->dev_sectors;
8972 bool is_reshaped = false;
8973
8974
8975 md_unregister_thread(&mddev->sync_thread);
8976 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8977 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8978
8979
8980 if (mddev->pers->spare_active(mddev)) {
8981 sysfs_notify(&mddev->kobj, NULL,
8982 "degraded");
8983 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8984 }
8985 }
8986 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8987 mddev->pers->finish_reshape) {
8988 mddev->pers->finish_reshape(mddev);
8989 if (mddev_is_clustered(mddev))
8990 is_reshaped = true;
8991 }
8992
8993
8994
8995
8996 if (!mddev->degraded)
8997 rdev_for_each(rdev, mddev)
8998 rdev->saved_raid_disk = -1;
8999
9000 md_update_sb(mddev, 1);
9001
9002
9003
9004 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9005 md_cluster_ops->resync_finish(mddev);
9006 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9007 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9008 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9009 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9010 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9011 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9012
9013
9014
9015
9016
9017 if (mddev_is_clustered(mddev) && is_reshaped
9018 && !test_bit(MD_CLOSING, &mddev->flags))
9019 md_cluster_ops->update_size(mddev, old_dev_sectors);
9020 wake_up(&resync_wait);
9021
9022 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9023 sysfs_notify_dirent_safe(mddev->sysfs_action);
9024 md_new_event(mddev);
9025 if (mddev->event_work.func)
9026 queue_work(md_misc_wq, &mddev->event_work);
9027}
9028EXPORT_SYMBOL(md_reap_sync_thread);
9029
9030void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9031{
9032 sysfs_notify_dirent_safe(rdev->sysfs_state);
9033 wait_event_timeout(rdev->blocked_wait,
9034 !test_bit(Blocked, &rdev->flags) &&
9035 !test_bit(BlockedBadBlocks, &rdev->flags),
9036 msecs_to_jiffies(5000));
9037 rdev_dec_pending(rdev, mddev);
9038}
9039EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9040
9041void md_finish_reshape(struct mddev *mddev)
9042{
9043
9044 struct md_rdev *rdev;
9045
9046 rdev_for_each(rdev, mddev) {
9047 if (rdev->data_offset > rdev->new_data_offset)
9048 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9049 else
9050 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9051 rdev->data_offset = rdev->new_data_offset;
9052 }
9053}
9054EXPORT_SYMBOL(md_finish_reshape);
9055
9056
9057
9058
9059int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9060 int is_new)
9061{
9062 struct mddev *mddev = rdev->mddev;
9063 int rv;
9064 if (is_new)
9065 s += rdev->new_data_offset;
9066 else
9067 s += rdev->data_offset;
9068 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9069 if (rv == 0) {
9070
9071 if (test_bit(ExternalBbl, &rdev->flags))
9072 sysfs_notify(&rdev->kobj, NULL,
9073 "unacknowledged_bad_blocks");
9074 sysfs_notify_dirent_safe(rdev->sysfs_state);
9075 set_mask_bits(&mddev->sb_flags, 0,
9076 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9077 md_wakeup_thread(rdev->mddev->thread);
9078 return 1;
9079 } else
9080 return 0;
9081}
9082EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9083
9084int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9085 int is_new)
9086{
9087 int rv;
9088 if (is_new)
9089 s += rdev->new_data_offset;
9090 else
9091 s += rdev->data_offset;
9092 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9093 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9094 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9095 return rv;
9096}
9097EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9098
9099static int md_notify_reboot(struct notifier_block *this,
9100 unsigned long code, void *x)
9101{
9102 struct list_head *tmp;
9103 struct mddev *mddev;
9104 int need_delay = 0;
9105
9106 for_each_mddev(mddev, tmp) {
9107 if (mddev_trylock(mddev)) {
9108 if (mddev->pers)
9109 __md_stop_writes(mddev);
9110 if (mddev->persistent)
9111 mddev->safemode = 2;
9112 mddev_unlock(mddev);
9113 }
9114 need_delay = 1;
9115 }
9116
9117
9118
9119
9120
9121
9122 if (need_delay)
9123 mdelay(1000*1);
9124
9125 return NOTIFY_DONE;
9126}
9127
9128static struct notifier_block md_notifier = {
9129 .notifier_call = md_notify_reboot,
9130 .next = NULL,
9131 .priority = INT_MAX,
9132};
9133
9134static void md_geninit(void)
9135{
9136 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9137
9138 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
9139}
9140
9141static int __init md_init(void)
9142{
9143 int ret = -ENOMEM;
9144
9145 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9146 if (!md_wq)
9147 goto err_wq;
9148
9149 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9150 if (!md_misc_wq)
9151 goto err_misc_wq;
9152
9153 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9154 goto err_md;
9155
9156 if ((ret = register_blkdev(0, "mdp")) < 0)
9157 goto err_mdp;
9158 mdp_major = ret;
9159
9160 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9161 md_probe, NULL, NULL);
9162 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9163 md_probe, NULL, NULL);
9164
9165 register_reboot_notifier(&md_notifier);
9166 raid_table_header = register_sysctl_table(raid_root_table);
9167
9168 md_geninit();
9169 return 0;
9170
9171err_mdp:
9172 unregister_blkdev(MD_MAJOR, "md");
9173err_md:
9174 destroy_workqueue(md_misc_wq);
9175err_misc_wq:
9176 destroy_workqueue(md_wq);
9177err_wq:
9178 return ret;
9179}
9180
9181static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9182{
9183 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9184 struct md_rdev *rdev2;
9185 int role, ret;
9186 char b[BDEVNAME_SIZE];
9187
9188
9189
9190
9191
9192 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9193 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9194 if (ret)
9195 pr_info("md-cluster: resize failed\n");
9196 else
9197 md_bitmap_update_sb(mddev->bitmap);
9198 }
9199
9200
9201 rdev_for_each(rdev2, mddev) {
9202 if (test_bit(Faulty, &rdev2->flags))
9203 continue;
9204
9205
9206 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9207
9208 if (test_bit(Candidate, &rdev2->flags)) {
9209 if (role == 0xfffe) {
9210 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9211 md_kick_rdev_from_array(rdev2);
9212 continue;
9213 }
9214 else
9215 clear_bit(Candidate, &rdev2->flags);
9216 }
9217
9218 if (role != rdev2->raid_disk) {
9219
9220
9221
9222 if (rdev2->raid_disk == -1 && role != 0xffff &&
9223 !(le32_to_cpu(sb->feature_map) &
9224 MD_FEATURE_RESHAPE_ACTIVE)) {
9225 rdev2->saved_raid_disk = role;
9226 ret = remove_and_add_spares(mddev, rdev2);
9227 pr_info("Activated spare: %s\n",
9228 bdevname(rdev2->bdev,b));
9229
9230
9231 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9232 md_wakeup_thread(mddev->thread);
9233
9234 }
9235
9236
9237
9238
9239
9240 if ((role == 0xfffe) || (role == 0xfffd)) {
9241 md_error(mddev, rdev2);
9242 clear_bit(Blocked, &rdev2->flags);
9243 }
9244 }
9245 }
9246
9247 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9248 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9249
9250
9251
9252
9253
9254 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9255 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9256
9257
9258
9259
9260 mddev->reshape_position = sb->reshape_position;
9261 if (mddev->pers->update_reshape_pos)
9262 mddev->pers->update_reshape_pos(mddev);
9263 if (mddev->pers->start_reshape)
9264 mddev->pers->start_reshape(mddev);
9265 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9266 mddev->reshape_position != MaxSector &&
9267 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9268
9269 mddev->reshape_position = MaxSector;
9270 if (mddev->pers->update_reshape_pos)
9271 mddev->pers->update_reshape_pos(mddev);
9272 }
9273
9274
9275 mddev->events = le64_to_cpu(sb->events);
9276}
9277
9278static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9279{
9280 int err;
9281 struct page *swapout = rdev->sb_page;
9282 struct mdp_superblock_1 *sb;
9283
9284
9285
9286
9287 rdev->sb_page = NULL;
9288 err = alloc_disk_sb(rdev);
9289 if (err == 0) {
9290 ClearPageUptodate(rdev->sb_page);
9291 rdev->sb_loaded = 0;
9292 err = super_types[mddev->major_version].
9293 load_super(rdev, NULL, mddev->minor_version);
9294 }
9295 if (err < 0) {
9296 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9297 __func__, __LINE__, rdev->desc_nr, err);
9298 if (rdev->sb_page)
9299 put_page(rdev->sb_page);
9300 rdev->sb_page = swapout;
9301 rdev->sb_loaded = 1;
9302 return err;
9303 }
9304
9305 sb = page_address(rdev->sb_page);
9306
9307
9308
9309
9310 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9311 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9312
9313
9314
9315
9316 if (rdev->recovery_offset == MaxSector &&
9317 !test_bit(In_sync, &rdev->flags) &&
9318 mddev->pers->spare_active(mddev))
9319 sysfs_notify(&mddev->kobj, NULL, "degraded");
9320
9321 put_page(swapout);
9322 return 0;
9323}
9324
9325void md_reload_sb(struct mddev *mddev, int nr)
9326{
9327 struct md_rdev *rdev;
9328 int err;
9329
9330
9331 rdev_for_each_rcu(rdev, mddev) {
9332 if (rdev->desc_nr == nr)
9333 break;
9334 }
9335
9336 if (!rdev || rdev->desc_nr != nr) {
9337 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9338 return;
9339 }
9340
9341 err = read_rdev(mddev, rdev);
9342 if (err < 0)
9343 return;
9344
9345 check_sb_changes(mddev, rdev);
9346
9347
9348 rdev_for_each_rcu(rdev, mddev) {
9349 if (!test_bit(Faulty, &rdev->flags))
9350 read_rdev(mddev, rdev);
9351 }
9352}
9353EXPORT_SYMBOL(md_reload_sb);
9354
9355#ifndef MODULE
9356
9357
9358
9359
9360
9361
9362static DEFINE_MUTEX(detected_devices_mutex);
9363static LIST_HEAD(all_detected_devices);
9364struct detected_devices_node {
9365 struct list_head list;
9366 dev_t dev;
9367};
9368
9369void md_autodetect_dev(dev_t dev)
9370{
9371 struct detected_devices_node *node_detected_dev;
9372
9373 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9374 if (node_detected_dev) {
9375 node_detected_dev->dev = dev;
9376 mutex_lock(&detected_devices_mutex);
9377 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9378 mutex_unlock(&detected_devices_mutex);
9379 }
9380}
9381
9382static void autostart_arrays(int part)
9383{
9384 struct md_rdev *rdev;
9385 struct detected_devices_node *node_detected_dev;
9386 dev_t dev;
9387 int i_scanned, i_passed;
9388
9389 i_scanned = 0;
9390 i_passed = 0;
9391
9392 pr_info("md: Autodetecting RAID arrays.\n");
9393
9394 mutex_lock(&detected_devices_mutex);
9395 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9396 i_scanned++;
9397 node_detected_dev = list_entry(all_detected_devices.next,
9398 struct detected_devices_node, list);
9399 list_del(&node_detected_dev->list);
9400 dev = node_detected_dev->dev;
9401 kfree(node_detected_dev);
9402 mutex_unlock(&detected_devices_mutex);
9403 rdev = md_import_device(dev,0, 90);
9404 mutex_lock(&detected_devices_mutex);
9405 if (IS_ERR(rdev))
9406 continue;
9407
9408 if (test_bit(Faulty, &rdev->flags))
9409 continue;
9410
9411 set_bit(AutoDetected, &rdev->flags);
9412 list_add(&rdev->same_set, &pending_raid_disks);
9413 i_passed++;
9414 }
9415 mutex_unlock(&detected_devices_mutex);
9416
9417 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9418
9419 autorun_devices(part);
9420}
9421
9422#endif
9423
9424static __exit void md_exit(void)
9425{
9426 struct mddev *mddev;
9427 struct list_head *tmp;
9428 int delay = 1;
9429
9430 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9431 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9432
9433 unregister_blkdev(MD_MAJOR,"md");
9434 unregister_blkdev(mdp_major, "mdp");
9435 unregister_reboot_notifier(&md_notifier);
9436 unregister_sysctl_table(raid_table_header);
9437
9438
9439
9440
9441 md_unloading = 1;
9442 while (waitqueue_active(&md_event_waiters)) {
9443
9444 wake_up(&md_event_waiters);
9445 msleep(delay);
9446 delay += delay;
9447 }
9448 remove_proc_entry("mdstat", NULL);
9449
9450 for_each_mddev(mddev, tmp) {
9451 export_array(mddev);
9452 mddev->ctime = 0;
9453 mddev->hold_active = 0;
9454
9455
9456
9457
9458
9459
9460 }
9461 destroy_workqueue(md_misc_wq);
9462 destroy_workqueue(md_wq);
9463}
9464
9465subsys_initcall(md_init);
9466module_exit(md_exit)
9467
9468static int get_ro(char *buffer, const struct kernel_param *kp)
9469{
9470 return sprintf(buffer, "%d", start_readonly);
9471}
9472static int set_ro(const char *val, const struct kernel_param *kp)
9473{
9474 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9475}
9476
9477module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9478module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9479module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9480module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9481
9482MODULE_LICENSE("GPL");
9483MODULE_DESCRIPTION("MD RAID framework");
9484MODULE_ALIAS("md");
9485MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9486