1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/sched/signal.h>
48#include <linux/kthread.h>
49#include <linux/blkdev.h>
50#include <linux/badblocks.h>
51#include <linux/sysctl.h>
52#include <linux/seq_file.h>
53#include <linux/fs.h>
54#include <linux/poll.h>
55#include <linux/ctype.h>
56#include <linux/string.h>
57#include <linux/hdreg.h>
58#include <linux/proc_fs.h>
59#include <linux/random.h>
60#include <linux/module.h>
61#include <linux/reboot.h>
62#include <linux/file.h>
63#include <linux/compat.h>
64#include <linux/delay.h>
65#include <linux/raid/md_p.h>
66#include <linux/raid/md_u.h>
67#include <linux/slab.h>
68#include <linux/percpu-refcount.h>
69
70#include <trace/events/block.h>
71#include "md.h"
72#include "md-bitmap.h"
73#include "md-cluster.h"
74
75#ifndef MODULE
76static void autostart_arrays(int part);
77#endif
78
79
80
81
82
83
84static LIST_HEAD(pers_list);
85static DEFINE_SPINLOCK(pers_lock);
86
87struct md_cluster_operations *md_cluster_ops;
88EXPORT_SYMBOL(md_cluster_ops);
89struct module *md_cluster_mod;
90EXPORT_SYMBOL(md_cluster_mod);
91
92static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
93static struct workqueue_struct *md_wq;
94static struct workqueue_struct *md_misc_wq;
95
96static int remove_and_add_spares(struct mddev *mddev,
97 struct md_rdev *this);
98static void mddev_detach(struct mddev *mddev);
99
100
101
102
103
104
105#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
106
107
108
109
110
111
112
113
114
115
116
117
118
119static int sysctl_speed_limit_min = 1000;
120static int sysctl_speed_limit_max = 200000;
121static inline int speed_min(struct mddev *mddev)
122{
123 return mddev->sync_speed_min ?
124 mddev->sync_speed_min : sysctl_speed_limit_min;
125}
126
127static inline int speed_max(struct mddev *mddev)
128{
129 return mddev->sync_speed_max ?
130 mddev->sync_speed_max : sysctl_speed_limit_max;
131}
132
133static struct ctl_table_header *raid_table_header;
134
135static struct ctl_table raid_table[] = {
136 {
137 .procname = "speed_limit_min",
138 .data = &sysctl_speed_limit_min,
139 .maxlen = sizeof(int),
140 .mode = S_IRUGO|S_IWUSR,
141 .proc_handler = proc_dointvec,
142 },
143 {
144 .procname = "speed_limit_max",
145 .data = &sysctl_speed_limit_max,
146 .maxlen = sizeof(int),
147 .mode = S_IRUGO|S_IWUSR,
148 .proc_handler = proc_dointvec,
149 },
150 { }
151};
152
153static struct ctl_table raid_dir_table[] = {
154 {
155 .procname = "raid",
156 .maxlen = 0,
157 .mode = S_IRUGO|S_IXUGO,
158 .child = raid_table,
159 },
160 { }
161};
162
163static struct ctl_table raid_root_table[] = {
164 {
165 .procname = "dev",
166 .maxlen = 0,
167 .mode = 0555,
168 .child = raid_dir_table,
169 },
170 { }
171};
172
173static const struct block_device_operations md_fops;
174
175static int start_readonly;
176
177
178
179
180
181
182
183
184
185static bool create_on_open = true;
186
187
188
189
190
191struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
192 struct mddev *mddev)
193{
194 struct bio *b;
195
196 if (!mddev || !mddev->bio_set)
197 return bio_alloc(gfp_mask, nr_iovecs);
198
199 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
200 if (!b)
201 return NULL;
202 return b;
203}
204EXPORT_SYMBOL_GPL(bio_alloc_mddev);
205
206static struct bio *md_bio_alloc_sync(struct mddev *mddev)
207{
208 if (!mddev || !mddev->sync_set)
209 return bio_alloc(GFP_NOIO, 1);
210
211 return bio_alloc_bioset(GFP_NOIO, 1, mddev->sync_set);
212}
213
214
215
216
217
218
219
220
221
222
223
224static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
225static atomic_t md_event_count;
226void md_new_event(struct mddev *mddev)
227{
228 atomic_inc(&md_event_count);
229 wake_up(&md_event_waiters);
230}
231EXPORT_SYMBOL_GPL(md_new_event);
232
233
234
235
236
237static LIST_HEAD(all_mddevs);
238static DEFINE_SPINLOCK(all_mddevs_lock);
239
240
241
242
243
244
245
246
247#define for_each_mddev(_mddev,_tmp) \
248 \
249 for (({ spin_lock(&all_mddevs_lock); \
250 _tmp = all_mddevs.next; \
251 _mddev = NULL;}); \
252 ({ if (_tmp != &all_mddevs) \
253 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
254 spin_unlock(&all_mddevs_lock); \
255 if (_mddev) mddev_put(_mddev); \
256 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
257 _tmp != &all_mddevs;}); \
258 ({ spin_lock(&all_mddevs_lock); \
259 _tmp = _tmp->next;}) \
260 )
261
262
263
264
265
266
267
268
269static bool is_suspended(struct mddev *mddev, struct bio *bio)
270{
271 if (mddev->suspended)
272 return true;
273 if (bio_data_dir(bio) != WRITE)
274 return false;
275 if (mddev->suspend_lo >= mddev->suspend_hi)
276 return false;
277 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
278 return false;
279 if (bio_end_sector(bio) < mddev->suspend_lo)
280 return false;
281 return true;
282}
283
284void md_handle_request(struct mddev *mddev, struct bio *bio)
285{
286check_suspended:
287 rcu_read_lock();
288 if (is_suspended(mddev, bio)) {
289 DEFINE_WAIT(__wait);
290 for (;;) {
291 prepare_to_wait(&mddev->sb_wait, &__wait,
292 TASK_UNINTERRUPTIBLE);
293 if (!is_suspended(mddev, bio))
294 break;
295 rcu_read_unlock();
296 schedule();
297 rcu_read_lock();
298 }
299 finish_wait(&mddev->sb_wait, &__wait);
300 }
301 atomic_inc(&mddev->active_io);
302 rcu_read_unlock();
303
304 if (!mddev->pers->make_request(mddev, bio)) {
305 atomic_dec(&mddev->active_io);
306 wake_up(&mddev->sb_wait);
307 goto check_suspended;
308 }
309
310 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
311 wake_up(&mddev->sb_wait);
312}
313EXPORT_SYMBOL(md_handle_request);
314
315static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
316{
317 const int rw = bio_data_dir(bio);
318 struct mddev *mddev = q->queuedata;
319 unsigned int sectors;
320 int cpu;
321
322 blk_queue_split(q, &bio);
323
324 if (mddev == NULL || mddev->pers == NULL) {
325 bio_io_error(bio);
326 return BLK_QC_T_NONE;
327 }
328 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
329 if (bio_sectors(bio) != 0)
330 bio->bi_status = BLK_STS_IOERR;
331 bio_endio(bio);
332 return BLK_QC_T_NONE;
333 }
334
335
336
337
338
339 sectors = bio_sectors(bio);
340
341 bio->bi_opf &= ~REQ_NOMERGE;
342
343 md_handle_request(mddev, bio);
344
345 cpu = part_stat_lock();
346 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
347 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
348 part_stat_unlock();
349
350 return BLK_QC_T_NONE;
351}
352
353
354
355
356
357
358
359void mddev_suspend(struct mddev *mddev)
360{
361 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
362 lockdep_assert_held(&mddev->reconfig_mutex);
363 if (mddev->suspended++)
364 return;
365 synchronize_rcu();
366 wake_up(&mddev->sb_wait);
367 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
368 smp_mb__after_atomic();
369 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
370 mddev->pers->quiesce(mddev, 1);
371 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
372 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
373
374 del_timer_sync(&mddev->safemode_timer);
375}
376EXPORT_SYMBOL_GPL(mddev_suspend);
377
378void mddev_resume(struct mddev *mddev)
379{
380 lockdep_assert_held(&mddev->reconfig_mutex);
381 if (--mddev->suspended)
382 return;
383 wake_up(&mddev->sb_wait);
384 mddev->pers->quiesce(mddev, 0);
385
386 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
387 md_wakeup_thread(mddev->thread);
388 md_wakeup_thread(mddev->sync_thread);
389}
390EXPORT_SYMBOL_GPL(mddev_resume);
391
392int mddev_congested(struct mddev *mddev, int bits)
393{
394 struct md_personality *pers = mddev->pers;
395 int ret = 0;
396
397 rcu_read_lock();
398 if (mddev->suspended)
399 ret = 1;
400 else if (pers && pers->congested)
401 ret = pers->congested(mddev, bits);
402 rcu_read_unlock();
403 return ret;
404}
405EXPORT_SYMBOL_GPL(mddev_congested);
406static int md_congested(void *data, int bits)
407{
408 struct mddev *mddev = data;
409 return mddev_congested(mddev, bits);
410}
411
412
413
414
415
416static void md_end_flush(struct bio *bio)
417{
418 struct md_rdev *rdev = bio->bi_private;
419 struct mddev *mddev = rdev->mddev;
420
421 rdev_dec_pending(rdev, mddev);
422
423 if (atomic_dec_and_test(&mddev->flush_pending)) {
424
425 queue_work(md_wq, &mddev->flush_work);
426 }
427 bio_put(bio);
428}
429
430static void md_submit_flush_data(struct work_struct *ws);
431
432static void submit_flushes(struct work_struct *ws)
433{
434 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
435 struct md_rdev *rdev;
436
437 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
438 atomic_set(&mddev->flush_pending, 1);
439 rcu_read_lock();
440 rdev_for_each_rcu(rdev, mddev)
441 if (rdev->raid_disk >= 0 &&
442 !test_bit(Faulty, &rdev->flags)) {
443
444
445
446
447 struct bio *bi;
448 atomic_inc(&rdev->nr_pending);
449 atomic_inc(&rdev->nr_pending);
450 rcu_read_unlock();
451 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
452 bi->bi_end_io = md_end_flush;
453 bi->bi_private = rdev;
454 bio_set_dev(bi, rdev->bdev);
455 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
456 atomic_inc(&mddev->flush_pending);
457 submit_bio(bi);
458 rcu_read_lock();
459 rdev_dec_pending(rdev, mddev);
460 }
461 rcu_read_unlock();
462 if (atomic_dec_and_test(&mddev->flush_pending))
463 queue_work(md_wq, &mddev->flush_work);
464}
465
466static void md_submit_flush_data(struct work_struct *ws)
467{
468 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
469 struct bio *bio = mddev->flush_bio;
470
471
472
473
474
475
476
477 mddev->flush_bio = NULL;
478 wake_up(&mddev->sb_wait);
479
480 if (bio->bi_iter.bi_size == 0)
481
482 bio_endio(bio);
483 else {
484 bio->bi_opf &= ~REQ_PREFLUSH;
485 md_handle_request(mddev, bio);
486 }
487}
488
489void md_flush_request(struct mddev *mddev, struct bio *bio)
490{
491 spin_lock_irq(&mddev->lock);
492 wait_event_lock_irq(mddev->sb_wait,
493 !mddev->flush_bio,
494 mddev->lock);
495 mddev->flush_bio = bio;
496 spin_unlock_irq(&mddev->lock);
497
498 INIT_WORK(&mddev->flush_work, submit_flushes);
499 queue_work(md_wq, &mddev->flush_work);
500}
501EXPORT_SYMBOL(md_flush_request);
502
503static inline struct mddev *mddev_get(struct mddev *mddev)
504{
505 atomic_inc(&mddev->active);
506 return mddev;
507}
508
509static void mddev_delayed_delete(struct work_struct *ws);
510
511static void mddev_put(struct mddev *mddev)
512{
513 struct bio_set *bs = NULL, *sync_bs = NULL;
514
515 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
516 return;
517 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
518 mddev->ctime == 0 && !mddev->hold_active) {
519
520
521 list_del_init(&mddev->all_mddevs);
522 bs = mddev->bio_set;
523 sync_bs = mddev->sync_set;
524 mddev->bio_set = NULL;
525 mddev->sync_set = NULL;
526 if (mddev->gendisk) {
527
528
529
530
531
532 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
533 queue_work(md_misc_wq, &mddev->del_work);
534 } else
535 kfree(mddev);
536 }
537 spin_unlock(&all_mddevs_lock);
538 if (bs)
539 bioset_free(bs);
540 if (sync_bs)
541 bioset_free(sync_bs);
542}
543
544static void md_safemode_timeout(struct timer_list *t);
545
546void mddev_init(struct mddev *mddev)
547{
548 mutex_init(&mddev->open_mutex);
549 mutex_init(&mddev->reconfig_mutex);
550 mutex_init(&mddev->bitmap_info.mutex);
551 INIT_LIST_HEAD(&mddev->disks);
552 INIT_LIST_HEAD(&mddev->all_mddevs);
553 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
554 atomic_set(&mddev->active, 1);
555 atomic_set(&mddev->openers, 0);
556 atomic_set(&mddev->active_io, 0);
557 spin_lock_init(&mddev->lock);
558 atomic_set(&mddev->flush_pending, 0);
559 init_waitqueue_head(&mddev->sb_wait);
560 init_waitqueue_head(&mddev->recovery_wait);
561 mddev->reshape_position = MaxSector;
562 mddev->reshape_backwards = 0;
563 mddev->last_sync_action = "none";
564 mddev->resync_min = 0;
565 mddev->resync_max = MaxSector;
566 mddev->level = LEVEL_NONE;
567}
568EXPORT_SYMBOL_GPL(mddev_init);
569
570static struct mddev *mddev_find(dev_t unit)
571{
572 struct mddev *mddev, *new = NULL;
573
574 if (unit && MAJOR(unit) != MD_MAJOR)
575 unit &= ~((1<<MdpMinorShift)-1);
576
577 retry:
578 spin_lock(&all_mddevs_lock);
579
580 if (unit) {
581 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
582 if (mddev->unit == unit) {
583 mddev_get(mddev);
584 spin_unlock(&all_mddevs_lock);
585 kfree(new);
586 return mddev;
587 }
588
589 if (new) {
590 list_add(&new->all_mddevs, &all_mddevs);
591 spin_unlock(&all_mddevs_lock);
592 new->hold_active = UNTIL_IOCTL;
593 return new;
594 }
595 } else if (new) {
596
597 static int next_minor = 512;
598 int start = next_minor;
599 int is_free = 0;
600 int dev = 0;
601 while (!is_free) {
602 dev = MKDEV(MD_MAJOR, next_minor);
603 next_minor++;
604 if (next_minor > MINORMASK)
605 next_minor = 0;
606 if (next_minor == start) {
607
608 spin_unlock(&all_mddevs_lock);
609 kfree(new);
610 return NULL;
611 }
612
613 is_free = 1;
614 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
615 if (mddev->unit == dev) {
616 is_free = 0;
617 break;
618 }
619 }
620 new->unit = dev;
621 new->md_minor = MINOR(dev);
622 new->hold_active = UNTIL_STOP;
623 list_add(&new->all_mddevs, &all_mddevs);
624 spin_unlock(&all_mddevs_lock);
625 return new;
626 }
627 spin_unlock(&all_mddevs_lock);
628
629 new = kzalloc(sizeof(*new), GFP_KERNEL);
630 if (!new)
631 return NULL;
632
633 new->unit = unit;
634 if (MAJOR(unit) == MD_MAJOR)
635 new->md_minor = MINOR(unit);
636 else
637 new->md_minor = MINOR(unit) >> MdpMinorShift;
638
639 mddev_init(new);
640
641 goto retry;
642}
643
644static struct attribute_group md_redundancy_group;
645
646void mddev_unlock(struct mddev *mddev)
647{
648 if (mddev->to_remove) {
649
650
651
652
653
654
655
656
657
658
659
660
661 struct attribute_group *to_remove = mddev->to_remove;
662 mddev->to_remove = NULL;
663 mddev->sysfs_active = 1;
664 mutex_unlock(&mddev->reconfig_mutex);
665
666 if (mddev->kobj.sd) {
667 if (to_remove != &md_redundancy_group)
668 sysfs_remove_group(&mddev->kobj, to_remove);
669 if (mddev->pers == NULL ||
670 mddev->pers->sync_request == NULL) {
671 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
672 if (mddev->sysfs_action)
673 sysfs_put(mddev->sysfs_action);
674 mddev->sysfs_action = NULL;
675 }
676 }
677 mddev->sysfs_active = 0;
678 } else
679 mutex_unlock(&mddev->reconfig_mutex);
680
681
682
683
684 spin_lock(&pers_lock);
685 md_wakeup_thread(mddev->thread);
686 wake_up(&mddev->sb_wait);
687 spin_unlock(&pers_lock);
688}
689EXPORT_SYMBOL_GPL(mddev_unlock);
690
691struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
692{
693 struct md_rdev *rdev;
694
695 rdev_for_each_rcu(rdev, mddev)
696 if (rdev->desc_nr == nr)
697 return rdev;
698
699 return NULL;
700}
701EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
702
703static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
704{
705 struct md_rdev *rdev;
706
707 rdev_for_each(rdev, mddev)
708 if (rdev->bdev->bd_dev == dev)
709 return rdev;
710
711 return NULL;
712}
713
714struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
715{
716 struct md_rdev *rdev;
717
718 rdev_for_each_rcu(rdev, mddev)
719 if (rdev->bdev->bd_dev == dev)
720 return rdev;
721
722 return NULL;
723}
724EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
725
726static struct md_personality *find_pers(int level, char *clevel)
727{
728 struct md_personality *pers;
729 list_for_each_entry(pers, &pers_list, list) {
730 if (level != LEVEL_NONE && pers->level == level)
731 return pers;
732 if (strcmp(pers->name, clevel)==0)
733 return pers;
734 }
735 return NULL;
736}
737
738
739static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
740{
741 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
742 return MD_NEW_SIZE_SECTORS(num_sectors);
743}
744
745static int alloc_disk_sb(struct md_rdev *rdev)
746{
747 rdev->sb_page = alloc_page(GFP_KERNEL);
748 if (!rdev->sb_page)
749 return -ENOMEM;
750 return 0;
751}
752
753void md_rdev_clear(struct md_rdev *rdev)
754{
755 if (rdev->sb_page) {
756 put_page(rdev->sb_page);
757 rdev->sb_loaded = 0;
758 rdev->sb_page = NULL;
759 rdev->sb_start = 0;
760 rdev->sectors = 0;
761 }
762 if (rdev->bb_page) {
763 put_page(rdev->bb_page);
764 rdev->bb_page = NULL;
765 }
766 badblocks_exit(&rdev->badblocks);
767}
768EXPORT_SYMBOL_GPL(md_rdev_clear);
769
770static void super_written(struct bio *bio)
771{
772 struct md_rdev *rdev = bio->bi_private;
773 struct mddev *mddev = rdev->mddev;
774
775 if (bio->bi_status) {
776 pr_err("md: super_written gets error=%d\n", bio->bi_status);
777 md_error(mddev, rdev);
778 if (!test_bit(Faulty, &rdev->flags)
779 && (bio->bi_opf & MD_FAILFAST)) {
780 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
781 set_bit(LastDev, &rdev->flags);
782 }
783 } else
784 clear_bit(LastDev, &rdev->flags);
785
786 if (atomic_dec_and_test(&mddev->pending_writes))
787 wake_up(&mddev->sb_wait);
788 rdev_dec_pending(rdev, mddev);
789 bio_put(bio);
790}
791
792void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
793 sector_t sector, int size, struct page *page)
794{
795
796
797
798
799
800
801 struct bio *bio;
802 int ff = 0;
803
804 if (!page)
805 return;
806
807 if (test_bit(Faulty, &rdev->flags))
808 return;
809
810 bio = md_bio_alloc_sync(mddev);
811
812 atomic_inc(&rdev->nr_pending);
813
814 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
815 bio->bi_iter.bi_sector = sector;
816 bio_add_page(bio, page, size, 0);
817 bio->bi_private = rdev;
818 bio->bi_end_io = super_written;
819
820 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
821 test_bit(FailFast, &rdev->flags) &&
822 !test_bit(LastDev, &rdev->flags))
823 ff = MD_FAILFAST;
824 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
825
826 atomic_inc(&mddev->pending_writes);
827 submit_bio(bio);
828}
829
830int md_super_wait(struct mddev *mddev)
831{
832
833 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
834 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
835 return -EAGAIN;
836 return 0;
837}
838
839int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
840 struct page *page, int op, int op_flags, bool metadata_op)
841{
842 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
843 int ret;
844
845 if (metadata_op && rdev->meta_bdev)
846 bio_set_dev(bio, rdev->meta_bdev);
847 else
848 bio_set_dev(bio, rdev->bdev);
849 bio_set_op_attrs(bio, op, op_flags);
850 if (metadata_op)
851 bio->bi_iter.bi_sector = sector + rdev->sb_start;
852 else if (rdev->mddev->reshape_position != MaxSector &&
853 (rdev->mddev->reshape_backwards ==
854 (sector >= rdev->mddev->reshape_position)))
855 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
856 else
857 bio->bi_iter.bi_sector = sector + rdev->data_offset;
858 bio_add_page(bio, page, size, 0);
859
860 submit_bio_wait(bio);
861
862 ret = !bio->bi_status;
863 bio_put(bio);
864 return ret;
865}
866EXPORT_SYMBOL_GPL(sync_page_io);
867
868static int read_disk_sb(struct md_rdev *rdev, int size)
869{
870 char b[BDEVNAME_SIZE];
871
872 if (rdev->sb_loaded)
873 return 0;
874
875 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
876 goto fail;
877 rdev->sb_loaded = 1;
878 return 0;
879
880fail:
881 pr_err("md: disabled device %s, could not read superblock.\n",
882 bdevname(rdev->bdev,b));
883 return -EINVAL;
884}
885
886static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
887{
888 return sb1->set_uuid0 == sb2->set_uuid0 &&
889 sb1->set_uuid1 == sb2->set_uuid1 &&
890 sb1->set_uuid2 == sb2->set_uuid2 &&
891 sb1->set_uuid3 == sb2->set_uuid3;
892}
893
894static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
895{
896 int ret;
897 mdp_super_t *tmp1, *tmp2;
898
899 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
900 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
901
902 if (!tmp1 || !tmp2) {
903 ret = 0;
904 goto abort;
905 }
906
907 *tmp1 = *sb1;
908 *tmp2 = *sb2;
909
910
911
912
913 tmp1->nr_disks = 0;
914 tmp2->nr_disks = 0;
915
916 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
917abort:
918 kfree(tmp1);
919 kfree(tmp2);
920 return ret;
921}
922
923static u32 md_csum_fold(u32 csum)
924{
925 csum = (csum & 0xffff) + (csum >> 16);
926 return (csum & 0xffff) + (csum >> 16);
927}
928
929static unsigned int calc_sb_csum(mdp_super_t *sb)
930{
931 u64 newcsum = 0;
932 u32 *sb32 = (u32*)sb;
933 int i;
934 unsigned int disk_csum, csum;
935
936 disk_csum = sb->sb_csum;
937 sb->sb_csum = 0;
938
939 for (i = 0; i < MD_SB_BYTES/4 ; i++)
940 newcsum += sb32[i];
941 csum = (newcsum & 0xffffffff) + (newcsum>>32);
942
943#ifdef CONFIG_ALPHA
944
945
946
947
948
949
950
951
952 sb->sb_csum = md_csum_fold(disk_csum);
953#else
954 sb->sb_csum = disk_csum;
955#endif
956 return csum;
957}
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989struct super_type {
990 char *name;
991 struct module *owner;
992 int (*load_super)(struct md_rdev *rdev,
993 struct md_rdev *refdev,
994 int minor_version);
995 int (*validate_super)(struct mddev *mddev,
996 struct md_rdev *rdev);
997 void (*sync_super)(struct mddev *mddev,
998 struct md_rdev *rdev);
999 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1000 sector_t num_sectors);
1001 int (*allow_new_offset)(struct md_rdev *rdev,
1002 unsigned long long new_offset);
1003};
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013int md_check_no_bitmap(struct mddev *mddev)
1014{
1015 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1016 return 0;
1017 pr_warn("%s: bitmaps are not supported for %s\n",
1018 mdname(mddev), mddev->pers->name);
1019 return 1;
1020}
1021EXPORT_SYMBOL(md_check_no_bitmap);
1022
1023
1024
1025
1026static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1027{
1028 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1029 mdp_super_t *sb;
1030 int ret;
1031
1032
1033
1034
1035
1036
1037
1038 rdev->sb_start = calc_dev_sboffset(rdev);
1039
1040 ret = read_disk_sb(rdev, MD_SB_BYTES);
1041 if (ret)
1042 return ret;
1043
1044 ret = -EINVAL;
1045
1046 bdevname(rdev->bdev, b);
1047 sb = page_address(rdev->sb_page);
1048
1049 if (sb->md_magic != MD_SB_MAGIC) {
1050 pr_warn("md: invalid raid superblock magic on %s\n", b);
1051 goto abort;
1052 }
1053
1054 if (sb->major_version != 0 ||
1055 sb->minor_version < 90 ||
1056 sb->minor_version > 91) {
1057 pr_warn("Bad version number %d.%d on %s\n",
1058 sb->major_version, sb->minor_version, b);
1059 goto abort;
1060 }
1061
1062 if (sb->raid_disks <= 0)
1063 goto abort;
1064
1065 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1066 pr_warn("md: invalid superblock checksum on %s\n", b);
1067 goto abort;
1068 }
1069
1070 rdev->preferred_minor = sb->md_minor;
1071 rdev->data_offset = 0;
1072 rdev->new_data_offset = 0;
1073 rdev->sb_size = MD_SB_BYTES;
1074 rdev->badblocks.shift = -1;
1075
1076 if (sb->level == LEVEL_MULTIPATH)
1077 rdev->desc_nr = -1;
1078 else
1079 rdev->desc_nr = sb->this_disk.number;
1080
1081 if (!refdev) {
1082 ret = 1;
1083 } else {
1084 __u64 ev1, ev2;
1085 mdp_super_t *refsb = page_address(refdev->sb_page);
1086 if (!md_uuid_equal(refsb, sb)) {
1087 pr_warn("md: %s has different UUID to %s\n",
1088 b, bdevname(refdev->bdev,b2));
1089 goto abort;
1090 }
1091 if (!md_sb_equal(refsb, sb)) {
1092 pr_warn("md: %s has same UUID but different superblock to %s\n",
1093 b, bdevname(refdev->bdev, b2));
1094 goto abort;
1095 }
1096 ev1 = md_event(sb);
1097 ev2 = md_event(refsb);
1098 if (ev1 > ev2)
1099 ret = 1;
1100 else
1101 ret = 0;
1102 }
1103 rdev->sectors = rdev->sb_start;
1104
1105
1106
1107
1108 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1109 sb->level >= 1)
1110 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1111
1112 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1113
1114 ret = -EINVAL;
1115
1116 abort:
1117 return ret;
1118}
1119
1120
1121
1122
1123static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1124{
1125 mdp_disk_t *desc;
1126 mdp_super_t *sb = page_address(rdev->sb_page);
1127 __u64 ev1 = md_event(sb);
1128
1129 rdev->raid_disk = -1;
1130 clear_bit(Faulty, &rdev->flags);
1131 clear_bit(In_sync, &rdev->flags);
1132 clear_bit(Bitmap_sync, &rdev->flags);
1133 clear_bit(WriteMostly, &rdev->flags);
1134
1135 if (mddev->raid_disks == 0) {
1136 mddev->major_version = 0;
1137 mddev->minor_version = sb->minor_version;
1138 mddev->patch_version = sb->patch_version;
1139 mddev->external = 0;
1140 mddev->chunk_sectors = sb->chunk_size >> 9;
1141 mddev->ctime = sb->ctime;
1142 mddev->utime = sb->utime;
1143 mddev->level = sb->level;
1144 mddev->clevel[0] = 0;
1145 mddev->layout = sb->layout;
1146 mddev->raid_disks = sb->raid_disks;
1147 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1148 mddev->events = ev1;
1149 mddev->bitmap_info.offset = 0;
1150 mddev->bitmap_info.space = 0;
1151
1152 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1153 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1154 mddev->reshape_backwards = 0;
1155
1156 if (mddev->minor_version >= 91) {
1157 mddev->reshape_position = sb->reshape_position;
1158 mddev->delta_disks = sb->delta_disks;
1159 mddev->new_level = sb->new_level;
1160 mddev->new_layout = sb->new_layout;
1161 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1162 if (mddev->delta_disks < 0)
1163 mddev->reshape_backwards = 1;
1164 } else {
1165 mddev->reshape_position = MaxSector;
1166 mddev->delta_disks = 0;
1167 mddev->new_level = mddev->level;
1168 mddev->new_layout = mddev->layout;
1169 mddev->new_chunk_sectors = mddev->chunk_sectors;
1170 }
1171
1172 if (sb->state & (1<<MD_SB_CLEAN))
1173 mddev->recovery_cp = MaxSector;
1174 else {
1175 if (sb->events_hi == sb->cp_events_hi &&
1176 sb->events_lo == sb->cp_events_lo) {
1177 mddev->recovery_cp = sb->recovery_cp;
1178 } else
1179 mddev->recovery_cp = 0;
1180 }
1181
1182 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1183 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1184 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1185 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1186
1187 mddev->max_disks = MD_SB_DISKS;
1188
1189 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1190 mddev->bitmap_info.file == NULL) {
1191 mddev->bitmap_info.offset =
1192 mddev->bitmap_info.default_offset;
1193 mddev->bitmap_info.space =
1194 mddev->bitmap_info.default_space;
1195 }
1196
1197 } else if (mddev->pers == NULL) {
1198
1199
1200 ++ev1;
1201 if (sb->disks[rdev->desc_nr].state & (
1202 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1203 if (ev1 < mddev->events)
1204 return -EINVAL;
1205 } else if (mddev->bitmap) {
1206
1207
1208
1209 if (ev1 < mddev->bitmap->events_cleared)
1210 return 0;
1211 if (ev1 < mddev->events)
1212 set_bit(Bitmap_sync, &rdev->flags);
1213 } else {
1214 if (ev1 < mddev->events)
1215
1216 return 0;
1217 }
1218
1219 if (mddev->level != LEVEL_MULTIPATH) {
1220 desc = sb->disks + rdev->desc_nr;
1221
1222 if (desc->state & (1<<MD_DISK_FAULTY))
1223 set_bit(Faulty, &rdev->flags);
1224 else if (desc->state & (1<<MD_DISK_SYNC)
1225) {
1226 set_bit(In_sync, &rdev->flags);
1227 rdev->raid_disk = desc->raid_disk;
1228 rdev->saved_raid_disk = desc->raid_disk;
1229 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1230
1231
1232
1233 if (mddev->minor_version >= 91) {
1234 rdev->recovery_offset = 0;
1235 rdev->raid_disk = desc->raid_disk;
1236 }
1237 }
1238 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1239 set_bit(WriteMostly, &rdev->flags);
1240 if (desc->state & (1<<MD_DISK_FAILFAST))
1241 set_bit(FailFast, &rdev->flags);
1242 } else
1243 set_bit(In_sync, &rdev->flags);
1244 return 0;
1245}
1246
1247
1248
1249
1250static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1251{
1252 mdp_super_t *sb;
1253 struct md_rdev *rdev2;
1254 int next_spare = mddev->raid_disks;
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266 int i;
1267 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1268
1269 rdev->sb_size = MD_SB_BYTES;
1270
1271 sb = page_address(rdev->sb_page);
1272
1273 memset(sb, 0, sizeof(*sb));
1274
1275 sb->md_magic = MD_SB_MAGIC;
1276 sb->major_version = mddev->major_version;
1277 sb->patch_version = mddev->patch_version;
1278 sb->gvalid_words = 0;
1279 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1280 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1281 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1282 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1283
1284 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1285 sb->level = mddev->level;
1286 sb->size = mddev->dev_sectors / 2;
1287 sb->raid_disks = mddev->raid_disks;
1288 sb->md_minor = mddev->md_minor;
1289 sb->not_persistent = 0;
1290 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1291 sb->state = 0;
1292 sb->events_hi = (mddev->events>>32);
1293 sb->events_lo = (u32)mddev->events;
1294
1295 if (mddev->reshape_position == MaxSector)
1296 sb->minor_version = 90;
1297 else {
1298 sb->minor_version = 91;
1299 sb->reshape_position = mddev->reshape_position;
1300 sb->new_level = mddev->new_level;
1301 sb->delta_disks = mddev->delta_disks;
1302 sb->new_layout = mddev->new_layout;
1303 sb->new_chunk = mddev->new_chunk_sectors << 9;
1304 }
1305 mddev->minor_version = sb->minor_version;
1306 if (mddev->in_sync)
1307 {
1308 sb->recovery_cp = mddev->recovery_cp;
1309 sb->cp_events_hi = (mddev->events>>32);
1310 sb->cp_events_lo = (u32)mddev->events;
1311 if (mddev->recovery_cp == MaxSector)
1312 sb->state = (1<< MD_SB_CLEAN);
1313 } else
1314 sb->recovery_cp = 0;
1315
1316 sb->layout = mddev->layout;
1317 sb->chunk_size = mddev->chunk_sectors << 9;
1318
1319 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1320 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1321
1322 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1323 rdev_for_each(rdev2, mddev) {
1324 mdp_disk_t *d;
1325 int desc_nr;
1326 int is_active = test_bit(In_sync, &rdev2->flags);
1327
1328 if (rdev2->raid_disk >= 0 &&
1329 sb->minor_version >= 91)
1330
1331
1332
1333
1334 is_active = 1;
1335 if (rdev2->raid_disk < 0 ||
1336 test_bit(Faulty, &rdev2->flags))
1337 is_active = 0;
1338 if (is_active)
1339 desc_nr = rdev2->raid_disk;
1340 else
1341 desc_nr = next_spare++;
1342 rdev2->desc_nr = desc_nr;
1343 d = &sb->disks[rdev2->desc_nr];
1344 nr_disks++;
1345 d->number = rdev2->desc_nr;
1346 d->major = MAJOR(rdev2->bdev->bd_dev);
1347 d->minor = MINOR(rdev2->bdev->bd_dev);
1348 if (is_active)
1349 d->raid_disk = rdev2->raid_disk;
1350 else
1351 d->raid_disk = rdev2->desc_nr;
1352 if (test_bit(Faulty, &rdev2->flags))
1353 d->state = (1<<MD_DISK_FAULTY);
1354 else if (is_active) {
1355 d->state = (1<<MD_DISK_ACTIVE);
1356 if (test_bit(In_sync, &rdev2->flags))
1357 d->state |= (1<<MD_DISK_SYNC);
1358 active++;
1359 working++;
1360 } else {
1361 d->state = 0;
1362 spare++;
1363 working++;
1364 }
1365 if (test_bit(WriteMostly, &rdev2->flags))
1366 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1367 if (test_bit(FailFast, &rdev2->flags))
1368 d->state |= (1<<MD_DISK_FAILFAST);
1369 }
1370
1371 for (i=0 ; i < mddev->raid_disks ; i++) {
1372 mdp_disk_t *d = &sb->disks[i];
1373 if (d->state == 0 && d->number == 0) {
1374 d->number = i;
1375 d->raid_disk = i;
1376 d->state = (1<<MD_DISK_REMOVED);
1377 d->state |= (1<<MD_DISK_FAULTY);
1378 failed++;
1379 }
1380 }
1381 sb->nr_disks = nr_disks;
1382 sb->active_disks = active;
1383 sb->working_disks = working;
1384 sb->failed_disks = failed;
1385 sb->spare_disks = spare;
1386
1387 sb->this_disk = sb->disks[rdev->desc_nr];
1388 sb->sb_csum = calc_sb_csum(sb);
1389}
1390
1391
1392
1393
1394static unsigned long long
1395super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1396{
1397 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1398 return 0;
1399 if (rdev->mddev->bitmap_info.offset)
1400 return 0;
1401 rdev->sb_start = calc_dev_sboffset(rdev);
1402 if (!num_sectors || num_sectors > rdev->sb_start)
1403 num_sectors = rdev->sb_start;
1404
1405
1406
1407 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1408 rdev->mddev->level >= 1)
1409 num_sectors = (sector_t)(2ULL << 32) - 2;
1410 do {
1411 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1412 rdev->sb_page);
1413 } while (md_super_wait(rdev->mddev) < 0);
1414 return num_sectors;
1415}
1416
1417static int
1418super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1419{
1420
1421 return new_offset == 0;
1422}
1423
1424
1425
1426
1427
1428static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1429{
1430 __le32 disk_csum;
1431 u32 csum;
1432 unsigned long long newcsum;
1433 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1434 __le32 *isuper = (__le32*)sb;
1435
1436 disk_csum = sb->sb_csum;
1437 sb->sb_csum = 0;
1438 newcsum = 0;
1439 for (; size >= 4; size -= 4)
1440 newcsum += le32_to_cpu(*isuper++);
1441
1442 if (size == 2)
1443 newcsum += le16_to_cpu(*(__le16*) isuper);
1444
1445 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1446 sb->sb_csum = disk_csum;
1447 return cpu_to_le32(csum);
1448}
1449
1450static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1451{
1452 struct mdp_superblock_1 *sb;
1453 int ret;
1454 sector_t sb_start;
1455 sector_t sectors;
1456 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1457 int bmask;
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467 switch(minor_version) {
1468 case 0:
1469 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1470 sb_start -= 8*2;
1471 sb_start &= ~(sector_t)(4*2-1);
1472 break;
1473 case 1:
1474 sb_start = 0;
1475 break;
1476 case 2:
1477 sb_start = 8;
1478 break;
1479 default:
1480 return -EINVAL;
1481 }
1482 rdev->sb_start = sb_start;
1483
1484
1485
1486
1487 ret = read_disk_sb(rdev, 4096);
1488 if (ret) return ret;
1489
1490 sb = page_address(rdev->sb_page);
1491
1492 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1493 sb->major_version != cpu_to_le32(1) ||
1494 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1495 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1496 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1497 return -EINVAL;
1498
1499 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1500 pr_warn("md: invalid superblock checksum on %s\n",
1501 bdevname(rdev->bdev,b));
1502 return -EINVAL;
1503 }
1504 if (le64_to_cpu(sb->data_size) < 10) {
1505 pr_warn("md: data_size too small on %s\n",
1506 bdevname(rdev->bdev,b));
1507 return -EINVAL;
1508 }
1509 if (sb->pad0 ||
1510 sb->pad3[0] ||
1511 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1512
1513 return -EINVAL;
1514
1515 rdev->preferred_minor = 0xffff;
1516 rdev->data_offset = le64_to_cpu(sb->data_offset);
1517 rdev->new_data_offset = rdev->data_offset;
1518 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1519 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1520 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1521 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1522
1523 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1524 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1525 if (rdev->sb_size & bmask)
1526 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1527
1528 if (minor_version
1529 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1530 return -EINVAL;
1531 if (minor_version
1532 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1533 return -EINVAL;
1534
1535 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1536 rdev->desc_nr = -1;
1537 else
1538 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1539
1540 if (!rdev->bb_page) {
1541 rdev->bb_page = alloc_page(GFP_KERNEL);
1542 if (!rdev->bb_page)
1543 return -ENOMEM;
1544 }
1545 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1546 rdev->badblocks.count == 0) {
1547
1548
1549
1550 s32 offset;
1551 sector_t bb_sector;
1552 u64 *bbp;
1553 int i;
1554 int sectors = le16_to_cpu(sb->bblog_size);
1555 if (sectors > (PAGE_SIZE / 512))
1556 return -EINVAL;
1557 offset = le32_to_cpu(sb->bblog_offset);
1558 if (offset == 0)
1559 return -EINVAL;
1560 bb_sector = (long long)offset;
1561 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1562 rdev->bb_page, REQ_OP_READ, 0, true))
1563 return -EIO;
1564 bbp = (u64 *)page_address(rdev->bb_page);
1565 rdev->badblocks.shift = sb->bblog_shift;
1566 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1567 u64 bb = le64_to_cpu(*bbp);
1568 int count = bb & (0x3ff);
1569 u64 sector = bb >> 10;
1570 sector <<= sb->bblog_shift;
1571 count <<= sb->bblog_shift;
1572 if (bb + 1 == 0)
1573 break;
1574 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1575 return -EINVAL;
1576 }
1577 } else if (sb->bblog_offset != 0)
1578 rdev->badblocks.shift = 0;
1579
1580 if ((le32_to_cpu(sb->feature_map) &
1581 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1582 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1583 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1584 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1585 }
1586
1587 if (!refdev) {
1588 ret = 1;
1589 } else {
1590 __u64 ev1, ev2;
1591 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1592
1593 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1594 sb->level != refsb->level ||
1595 sb->layout != refsb->layout ||
1596 sb->chunksize != refsb->chunksize) {
1597 pr_warn("md: %s has strangely different superblock to %s\n",
1598 bdevname(rdev->bdev,b),
1599 bdevname(refdev->bdev,b2));
1600 return -EINVAL;
1601 }
1602 ev1 = le64_to_cpu(sb->events);
1603 ev2 = le64_to_cpu(refsb->events);
1604
1605 if (ev1 > ev2)
1606 ret = 1;
1607 else
1608 ret = 0;
1609 }
1610 if (minor_version) {
1611 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1612 sectors -= rdev->data_offset;
1613 } else
1614 sectors = rdev->sb_start;
1615 if (sectors < le64_to_cpu(sb->data_size))
1616 return -EINVAL;
1617 rdev->sectors = le64_to_cpu(sb->data_size);
1618 return ret;
1619}
1620
1621static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1622{
1623 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1624 __u64 ev1 = le64_to_cpu(sb->events);
1625
1626 rdev->raid_disk = -1;
1627 clear_bit(Faulty, &rdev->flags);
1628 clear_bit(In_sync, &rdev->flags);
1629 clear_bit(Bitmap_sync, &rdev->flags);
1630 clear_bit(WriteMostly, &rdev->flags);
1631
1632 if (mddev->raid_disks == 0) {
1633 mddev->major_version = 1;
1634 mddev->patch_version = 0;
1635 mddev->external = 0;
1636 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1637 mddev->ctime = le64_to_cpu(sb->ctime);
1638 mddev->utime = le64_to_cpu(sb->utime);
1639 mddev->level = le32_to_cpu(sb->level);
1640 mddev->clevel[0] = 0;
1641 mddev->layout = le32_to_cpu(sb->layout);
1642 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1643 mddev->dev_sectors = le64_to_cpu(sb->size);
1644 mddev->events = ev1;
1645 mddev->bitmap_info.offset = 0;
1646 mddev->bitmap_info.space = 0;
1647
1648
1649
1650 mddev->bitmap_info.default_offset = 1024 >> 9;
1651 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1652 mddev->reshape_backwards = 0;
1653
1654 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1655 memcpy(mddev->uuid, sb->set_uuid, 16);
1656
1657 mddev->max_disks = (4096-256)/2;
1658
1659 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1660 mddev->bitmap_info.file == NULL) {
1661 mddev->bitmap_info.offset =
1662 (__s32)le32_to_cpu(sb->bitmap_offset);
1663
1664
1665
1666
1667
1668 if (mddev->minor_version > 0)
1669 mddev->bitmap_info.space = 0;
1670 else if (mddev->bitmap_info.offset > 0)
1671 mddev->bitmap_info.space =
1672 8 - mddev->bitmap_info.offset;
1673 else
1674 mddev->bitmap_info.space =
1675 -mddev->bitmap_info.offset;
1676 }
1677
1678 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1679 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1680 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1681 mddev->new_level = le32_to_cpu(sb->new_level);
1682 mddev->new_layout = le32_to_cpu(sb->new_layout);
1683 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1684 if (mddev->delta_disks < 0 ||
1685 (mddev->delta_disks == 0 &&
1686 (le32_to_cpu(sb->feature_map)
1687 & MD_FEATURE_RESHAPE_BACKWARDS)))
1688 mddev->reshape_backwards = 1;
1689 } else {
1690 mddev->reshape_position = MaxSector;
1691 mddev->delta_disks = 0;
1692 mddev->new_level = mddev->level;
1693 mddev->new_layout = mddev->layout;
1694 mddev->new_chunk_sectors = mddev->chunk_sectors;
1695 }
1696
1697 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1698 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1699
1700 if (le32_to_cpu(sb->feature_map) &
1701 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1702 if (le32_to_cpu(sb->feature_map) &
1703 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1704 return -EINVAL;
1705 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1706 (le32_to_cpu(sb->feature_map) &
1707 MD_FEATURE_MULTIPLE_PPLS))
1708 return -EINVAL;
1709 set_bit(MD_HAS_PPL, &mddev->flags);
1710 }
1711 } else if (mddev->pers == NULL) {
1712
1713
1714 ++ev1;
1715 if (rdev->desc_nr >= 0 &&
1716 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1717 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1718 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1719 if (ev1 < mddev->events)
1720 return -EINVAL;
1721 } else if (mddev->bitmap) {
1722
1723
1724
1725 if (ev1 < mddev->bitmap->events_cleared)
1726 return 0;
1727 if (ev1 < mddev->events)
1728 set_bit(Bitmap_sync, &rdev->flags);
1729 } else {
1730 if (ev1 < mddev->events)
1731
1732 return 0;
1733 }
1734 if (mddev->level != LEVEL_MULTIPATH) {
1735 int role;
1736 if (rdev->desc_nr < 0 ||
1737 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1738 role = MD_DISK_ROLE_SPARE;
1739 rdev->desc_nr = -1;
1740 } else
1741 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1742 switch(role) {
1743 case MD_DISK_ROLE_SPARE:
1744 break;
1745 case MD_DISK_ROLE_FAULTY:
1746 set_bit(Faulty, &rdev->flags);
1747 break;
1748 case MD_DISK_ROLE_JOURNAL:
1749 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1750
1751 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1752 return -EINVAL;
1753 }
1754 set_bit(Journal, &rdev->flags);
1755 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1756 rdev->raid_disk = 0;
1757 break;
1758 default:
1759 rdev->saved_raid_disk = role;
1760 if ((le32_to_cpu(sb->feature_map) &
1761 MD_FEATURE_RECOVERY_OFFSET)) {
1762 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1763 if (!(le32_to_cpu(sb->feature_map) &
1764 MD_FEATURE_RECOVERY_BITMAP))
1765 rdev->saved_raid_disk = -1;
1766 } else
1767 set_bit(In_sync, &rdev->flags);
1768 rdev->raid_disk = role;
1769 break;
1770 }
1771 if (sb->devflags & WriteMostly1)
1772 set_bit(WriteMostly, &rdev->flags);
1773 if (sb->devflags & FailFast1)
1774 set_bit(FailFast, &rdev->flags);
1775 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1776 set_bit(Replacement, &rdev->flags);
1777 } else
1778 set_bit(In_sync, &rdev->flags);
1779
1780 return 0;
1781}
1782
1783static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1784{
1785 struct mdp_superblock_1 *sb;
1786 struct md_rdev *rdev2;
1787 int max_dev, i;
1788
1789
1790 sb = page_address(rdev->sb_page);
1791
1792 sb->feature_map = 0;
1793 sb->pad0 = 0;
1794 sb->recovery_offset = cpu_to_le64(0);
1795 memset(sb->pad3, 0, sizeof(sb->pad3));
1796
1797 sb->utime = cpu_to_le64((__u64)mddev->utime);
1798 sb->events = cpu_to_le64(mddev->events);
1799 if (mddev->in_sync)
1800 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1801 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1802 sb->resync_offset = cpu_to_le64(MaxSector);
1803 else
1804 sb->resync_offset = cpu_to_le64(0);
1805
1806 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1807
1808 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1809 sb->size = cpu_to_le64(mddev->dev_sectors);
1810 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1811 sb->level = cpu_to_le32(mddev->level);
1812 sb->layout = cpu_to_le32(mddev->layout);
1813 if (test_bit(FailFast, &rdev->flags))
1814 sb->devflags |= FailFast1;
1815 else
1816 sb->devflags &= ~FailFast1;
1817
1818 if (test_bit(WriteMostly, &rdev->flags))
1819 sb->devflags |= WriteMostly1;
1820 else
1821 sb->devflags &= ~WriteMostly1;
1822 sb->data_offset = cpu_to_le64(rdev->data_offset);
1823 sb->data_size = cpu_to_le64(rdev->sectors);
1824
1825 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1826 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1827 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1828 }
1829
1830 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1831 !test_bit(In_sync, &rdev->flags)) {
1832 sb->feature_map |=
1833 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1834 sb->recovery_offset =
1835 cpu_to_le64(rdev->recovery_offset);
1836 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1837 sb->feature_map |=
1838 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1839 }
1840
1841 if (test_bit(Journal, &rdev->flags))
1842 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1843 if (test_bit(Replacement, &rdev->flags))
1844 sb->feature_map |=
1845 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1846
1847 if (mddev->reshape_position != MaxSector) {
1848 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1849 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1850 sb->new_layout = cpu_to_le32(mddev->new_layout);
1851 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1852 sb->new_level = cpu_to_le32(mddev->new_level);
1853 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1854 if (mddev->delta_disks == 0 &&
1855 mddev->reshape_backwards)
1856 sb->feature_map
1857 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1858 if (rdev->new_data_offset != rdev->data_offset) {
1859 sb->feature_map
1860 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1861 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1862 - rdev->data_offset));
1863 }
1864 }
1865
1866 if (mddev_is_clustered(mddev))
1867 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1868
1869 if (rdev->badblocks.count == 0)
1870 ;
1871 else if (sb->bblog_offset == 0)
1872
1873 md_error(mddev, rdev);
1874 else {
1875 struct badblocks *bb = &rdev->badblocks;
1876 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1877 u64 *p = bb->page;
1878 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1879 if (bb->changed) {
1880 unsigned seq;
1881
1882retry:
1883 seq = read_seqbegin(&bb->lock);
1884
1885 memset(bbp, 0xff, PAGE_SIZE);
1886
1887 for (i = 0 ; i < bb->count ; i++) {
1888 u64 internal_bb = p[i];
1889 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1890 | BB_LEN(internal_bb));
1891 bbp[i] = cpu_to_le64(store_bb);
1892 }
1893 bb->changed = 0;
1894 if (read_seqretry(&bb->lock, seq))
1895 goto retry;
1896
1897 bb->sector = (rdev->sb_start +
1898 (int)le32_to_cpu(sb->bblog_offset));
1899 bb->size = le16_to_cpu(sb->bblog_size);
1900 }
1901 }
1902
1903 max_dev = 0;
1904 rdev_for_each(rdev2, mddev)
1905 if (rdev2->desc_nr+1 > max_dev)
1906 max_dev = rdev2->desc_nr+1;
1907
1908 if (max_dev > le32_to_cpu(sb->max_dev)) {
1909 int bmask;
1910 sb->max_dev = cpu_to_le32(max_dev);
1911 rdev->sb_size = max_dev * 2 + 256;
1912 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1913 if (rdev->sb_size & bmask)
1914 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1915 } else
1916 max_dev = le32_to_cpu(sb->max_dev);
1917
1918 for (i=0; i<max_dev;i++)
1919 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1920
1921 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1922 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1923
1924 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
1925 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
1926 sb->feature_map |=
1927 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
1928 else
1929 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
1930 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
1931 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
1932 }
1933
1934 rdev_for_each(rdev2, mddev) {
1935 i = rdev2->desc_nr;
1936 if (test_bit(Faulty, &rdev2->flags))
1937 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1938 else if (test_bit(In_sync, &rdev2->flags))
1939 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1940 else if (test_bit(Journal, &rdev2->flags))
1941 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1942 else if (rdev2->raid_disk >= 0)
1943 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1944 else
1945 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1946 }
1947
1948 sb->sb_csum = calc_sb_1_csum(sb);
1949}
1950
1951static unsigned long long
1952super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1953{
1954 struct mdp_superblock_1 *sb;
1955 sector_t max_sectors;
1956 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1957 return 0;
1958 if (rdev->data_offset != rdev->new_data_offset)
1959 return 0;
1960 if (rdev->sb_start < rdev->data_offset) {
1961
1962 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1963 max_sectors -= rdev->data_offset;
1964 if (!num_sectors || num_sectors > max_sectors)
1965 num_sectors = max_sectors;
1966 } else if (rdev->mddev->bitmap_info.offset) {
1967
1968 return 0;
1969 } else {
1970
1971 sector_t sb_start;
1972 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1973 sb_start &= ~(sector_t)(4*2 - 1);
1974 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1975 if (!num_sectors || num_sectors > max_sectors)
1976 num_sectors = max_sectors;
1977 rdev->sb_start = sb_start;
1978 }
1979 sb = page_address(rdev->sb_page);
1980 sb->data_size = cpu_to_le64(num_sectors);
1981 sb->super_offset = cpu_to_le64(rdev->sb_start);
1982 sb->sb_csum = calc_sb_1_csum(sb);
1983 do {
1984 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1985 rdev->sb_page);
1986 } while (md_super_wait(rdev->mddev) < 0);
1987 return num_sectors;
1988
1989}
1990
1991static int
1992super_1_allow_new_offset(struct md_rdev *rdev,
1993 unsigned long long new_offset)
1994{
1995
1996 struct bitmap *bitmap;
1997 if (new_offset >= rdev->data_offset)
1998 return 1;
1999
2000
2001
2002 if (rdev->mddev->minor_version == 0)
2003 return 1;
2004
2005
2006
2007
2008
2009
2010
2011 if (rdev->sb_start + (32+4)*2 > new_offset)
2012 return 0;
2013 bitmap = rdev->mddev->bitmap;
2014 if (bitmap && !rdev->mddev->bitmap_info.file &&
2015 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2016 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2017 return 0;
2018 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2019 return 0;
2020
2021 return 1;
2022}
2023
2024static struct super_type super_types[] = {
2025 [0] = {
2026 .name = "0.90.0",
2027 .owner = THIS_MODULE,
2028 .load_super = super_90_load,
2029 .validate_super = super_90_validate,
2030 .sync_super = super_90_sync,
2031 .rdev_size_change = super_90_rdev_size_change,
2032 .allow_new_offset = super_90_allow_new_offset,
2033 },
2034 [1] = {
2035 .name = "md-1",
2036 .owner = THIS_MODULE,
2037 .load_super = super_1_load,
2038 .validate_super = super_1_validate,
2039 .sync_super = super_1_sync,
2040 .rdev_size_change = super_1_rdev_size_change,
2041 .allow_new_offset = super_1_allow_new_offset,
2042 },
2043};
2044
2045static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2046{
2047 if (mddev->sync_super) {
2048 mddev->sync_super(mddev, rdev);
2049 return;
2050 }
2051
2052 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2053
2054 super_types[mddev->major_version].sync_super(mddev, rdev);
2055}
2056
2057static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2058{
2059 struct md_rdev *rdev, *rdev2;
2060
2061 rcu_read_lock();
2062 rdev_for_each_rcu(rdev, mddev1) {
2063 if (test_bit(Faulty, &rdev->flags) ||
2064 test_bit(Journal, &rdev->flags) ||
2065 rdev->raid_disk == -1)
2066 continue;
2067 rdev_for_each_rcu(rdev2, mddev2) {
2068 if (test_bit(Faulty, &rdev2->flags) ||
2069 test_bit(Journal, &rdev2->flags) ||
2070 rdev2->raid_disk == -1)
2071 continue;
2072 if (rdev->bdev->bd_contains ==
2073 rdev2->bdev->bd_contains) {
2074 rcu_read_unlock();
2075 return 1;
2076 }
2077 }
2078 }
2079 rcu_read_unlock();
2080 return 0;
2081}
2082
2083static LIST_HEAD(pending_raid_disks);
2084
2085
2086
2087
2088
2089
2090
2091
2092int md_integrity_register(struct mddev *mddev)
2093{
2094 struct md_rdev *rdev, *reference = NULL;
2095
2096 if (list_empty(&mddev->disks))
2097 return 0;
2098 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2099 return 0;
2100 rdev_for_each(rdev, mddev) {
2101
2102 if (test_bit(Faulty, &rdev->flags))
2103 continue;
2104 if (rdev->raid_disk < 0)
2105 continue;
2106 if (!reference) {
2107
2108 reference = rdev;
2109 continue;
2110 }
2111
2112 if (blk_integrity_compare(reference->bdev->bd_disk,
2113 rdev->bdev->bd_disk) < 0)
2114 return -EINVAL;
2115 }
2116 if (!reference || !bdev_get_integrity(reference->bdev))
2117 return 0;
2118
2119
2120
2121
2122 blk_integrity_register(mddev->gendisk,
2123 bdev_get_integrity(reference->bdev));
2124
2125 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2126 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2127 pr_err("md: failed to create integrity pool for %s\n",
2128 mdname(mddev));
2129 return -EINVAL;
2130 }
2131 return 0;
2132}
2133EXPORT_SYMBOL(md_integrity_register);
2134
2135
2136
2137
2138
2139int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2140{
2141 struct blk_integrity *bi_rdev;
2142 struct blk_integrity *bi_mddev;
2143 char name[BDEVNAME_SIZE];
2144
2145 if (!mddev->gendisk)
2146 return 0;
2147
2148 bi_rdev = bdev_get_integrity(rdev->bdev);
2149 bi_mddev = blk_get_integrity(mddev->gendisk);
2150
2151 if (!bi_mddev)
2152 return 0;
2153
2154 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2155 pr_err("%s: incompatible integrity profile for %s\n",
2156 mdname(mddev), bdevname(rdev->bdev, name));
2157 return -ENXIO;
2158 }
2159
2160 return 0;
2161}
2162EXPORT_SYMBOL(md_integrity_add_rdev);
2163
2164static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2165{
2166 char b[BDEVNAME_SIZE];
2167 struct kobject *ko;
2168 int err;
2169
2170
2171 if (find_rdev(mddev, rdev->bdev->bd_dev))
2172 return -EEXIST;
2173
2174 if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) &&
2175 mddev->pers)
2176 return -EROFS;
2177
2178
2179 if (!test_bit(Journal, &rdev->flags) &&
2180 rdev->sectors &&
2181 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2182 if (mddev->pers) {
2183
2184
2185
2186
2187 if (mddev->level > 0)
2188 return -ENOSPC;
2189 } else
2190 mddev->dev_sectors = rdev->sectors;
2191 }
2192
2193
2194
2195
2196
2197 rcu_read_lock();
2198 if (rdev->desc_nr < 0) {
2199 int choice = 0;
2200 if (mddev->pers)
2201 choice = mddev->raid_disks;
2202 while (md_find_rdev_nr_rcu(mddev, choice))
2203 choice++;
2204 rdev->desc_nr = choice;
2205 } else {
2206 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2207 rcu_read_unlock();
2208 return -EBUSY;
2209 }
2210 }
2211 rcu_read_unlock();
2212 if (!test_bit(Journal, &rdev->flags) &&
2213 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2214 pr_warn("md: %s: array is limited to %d devices\n",
2215 mdname(mddev), mddev->max_disks);
2216 return -EBUSY;
2217 }
2218 bdevname(rdev->bdev,b);
2219 strreplace(b, '/', '!');
2220
2221 rdev->mddev = mddev;
2222 pr_debug("md: bind<%s>\n", b);
2223
2224 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2225 goto fail;
2226
2227 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2228 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2229 ;
2230 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2231
2232 list_add_rcu(&rdev->same_set, &mddev->disks);
2233 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2234
2235
2236 mddev->recovery_disabled++;
2237
2238 return 0;
2239
2240 fail:
2241 pr_warn("md: failed to register dev-%s for %s\n",
2242 b, mdname(mddev));
2243 return err;
2244}
2245
2246static void md_delayed_delete(struct work_struct *ws)
2247{
2248 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2249 kobject_del(&rdev->kobj);
2250 kobject_put(&rdev->kobj);
2251}
2252
2253static void unbind_rdev_from_array(struct md_rdev *rdev)
2254{
2255 char b[BDEVNAME_SIZE];
2256
2257 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2258 list_del_rcu(&rdev->same_set);
2259 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2260 rdev->mddev = NULL;
2261 sysfs_remove_link(&rdev->kobj, "block");
2262 sysfs_put(rdev->sysfs_state);
2263 rdev->sysfs_state = NULL;
2264 rdev->badblocks.count = 0;
2265
2266
2267
2268
2269 synchronize_rcu();
2270 INIT_WORK(&rdev->del_work, md_delayed_delete);
2271 kobject_get(&rdev->kobj);
2272 queue_work(md_misc_wq, &rdev->del_work);
2273}
2274
2275
2276
2277
2278
2279
2280static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2281{
2282 int err = 0;
2283 struct block_device *bdev;
2284 char b[BDEVNAME_SIZE];
2285
2286 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2287 shared ? (struct md_rdev *)lock_rdev : rdev);
2288 if (IS_ERR(bdev)) {
2289 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2290 return PTR_ERR(bdev);
2291 }
2292 rdev->bdev = bdev;
2293 return err;
2294}
2295
2296static void unlock_rdev(struct md_rdev *rdev)
2297{
2298 struct block_device *bdev = rdev->bdev;
2299 rdev->bdev = NULL;
2300 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2301}
2302
2303void md_autodetect_dev(dev_t dev);
2304
2305static void export_rdev(struct md_rdev *rdev)
2306{
2307 char b[BDEVNAME_SIZE];
2308
2309 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2310 md_rdev_clear(rdev);
2311#ifndef MODULE
2312 if (test_bit(AutoDetected, &rdev->flags))
2313 md_autodetect_dev(rdev->bdev->bd_dev);
2314#endif
2315 unlock_rdev(rdev);
2316 kobject_put(&rdev->kobj);
2317}
2318
2319void md_kick_rdev_from_array(struct md_rdev *rdev)
2320{
2321 unbind_rdev_from_array(rdev);
2322 export_rdev(rdev);
2323}
2324EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2325
2326static void export_array(struct mddev *mddev)
2327{
2328 struct md_rdev *rdev;
2329
2330 while (!list_empty(&mddev->disks)) {
2331 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2332 same_set);
2333 md_kick_rdev_from_array(rdev);
2334 }
2335 mddev->raid_disks = 0;
2336 mddev->major_version = 0;
2337}
2338
2339static bool set_in_sync(struct mddev *mddev)
2340{
2341 lockdep_assert_held(&mddev->lock);
2342 if (!mddev->in_sync) {
2343 mddev->sync_checkers++;
2344 spin_unlock(&mddev->lock);
2345 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2346 spin_lock(&mddev->lock);
2347 if (!mddev->in_sync &&
2348 percpu_ref_is_zero(&mddev->writes_pending)) {
2349 mddev->in_sync = 1;
2350
2351
2352
2353
2354 smp_mb();
2355 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2356 sysfs_notify_dirent_safe(mddev->sysfs_state);
2357 }
2358 if (--mddev->sync_checkers == 0)
2359 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2360 }
2361 if (mddev->safemode == 1)
2362 mddev->safemode = 0;
2363 return mddev->in_sync;
2364}
2365
2366static void sync_sbs(struct mddev *mddev, int nospares)
2367{
2368
2369
2370
2371
2372
2373
2374 struct md_rdev *rdev;
2375 rdev_for_each(rdev, mddev) {
2376 if (rdev->sb_events == mddev->events ||
2377 (nospares &&
2378 rdev->raid_disk < 0 &&
2379 rdev->sb_events+1 == mddev->events)) {
2380
2381 rdev->sb_loaded = 2;
2382 } else {
2383 sync_super(mddev, rdev);
2384 rdev->sb_loaded = 1;
2385 }
2386 }
2387}
2388
2389static bool does_sb_need_changing(struct mddev *mddev)
2390{
2391 struct md_rdev *rdev;
2392 struct mdp_superblock_1 *sb;
2393 int role;
2394
2395
2396 rdev_for_each(rdev, mddev)
2397 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2398 break;
2399
2400
2401 if (!rdev)
2402 return false;
2403
2404 sb = page_address(rdev->sb_page);
2405
2406 rdev_for_each(rdev, mddev) {
2407 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2408
2409 if (role == 0xffff && rdev->raid_disk >=0 &&
2410 !test_bit(Faulty, &rdev->flags))
2411 return true;
2412
2413 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2414 return true;
2415 }
2416
2417
2418 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2419 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2420 (mddev->layout != le32_to_cpu(sb->layout)) ||
2421 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2422 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2423 return true;
2424
2425 return false;
2426}
2427
2428void md_update_sb(struct mddev *mddev, int force_change)
2429{
2430 struct md_rdev *rdev;
2431 int sync_req;
2432 int nospares = 0;
2433 int any_badblocks_changed = 0;
2434 int ret = -1;
2435
2436 if (mddev->ro) {
2437 if (force_change)
2438 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2439 return;
2440 }
2441
2442repeat:
2443 if (mddev_is_clustered(mddev)) {
2444 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2445 force_change = 1;
2446 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2447 nospares = 1;
2448 ret = md_cluster_ops->metadata_update_start(mddev);
2449
2450 if (!does_sb_need_changing(mddev)) {
2451 if (ret == 0)
2452 md_cluster_ops->metadata_update_cancel(mddev);
2453 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2454 BIT(MD_SB_CHANGE_DEVS) |
2455 BIT(MD_SB_CHANGE_CLEAN));
2456 return;
2457 }
2458 }
2459
2460
2461
2462
2463
2464
2465
2466 rdev_for_each(rdev, mddev) {
2467 if (rdev->raid_disk >= 0 &&
2468 mddev->delta_disks >= 0 &&
2469 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2470 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2471 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2472 !test_bit(Journal, &rdev->flags) &&
2473 !test_bit(In_sync, &rdev->flags) &&
2474 mddev->curr_resync_completed > rdev->recovery_offset)
2475 rdev->recovery_offset = mddev->curr_resync_completed;
2476
2477 }
2478 if (!mddev->persistent) {
2479 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2480 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2481 if (!mddev->external) {
2482 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2483 rdev_for_each(rdev, mddev) {
2484 if (rdev->badblocks.changed) {
2485 rdev->badblocks.changed = 0;
2486 ack_all_badblocks(&rdev->badblocks);
2487 md_error(mddev, rdev);
2488 }
2489 clear_bit(Blocked, &rdev->flags);
2490 clear_bit(BlockedBadBlocks, &rdev->flags);
2491 wake_up(&rdev->blocked_wait);
2492 }
2493 }
2494 wake_up(&mddev->sb_wait);
2495 return;
2496 }
2497
2498 spin_lock(&mddev->lock);
2499
2500 mddev->utime = ktime_get_real_seconds();
2501
2502 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2503 force_change = 1;
2504 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2505
2506
2507
2508
2509 nospares = 1;
2510 if (force_change)
2511 nospares = 0;
2512 if (mddev->degraded)
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522 nospares = 0;
2523
2524 sync_req = mddev->in_sync;
2525
2526
2527
2528 if (nospares
2529 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2530 && mddev->can_decrease_events
2531 && mddev->events != 1) {
2532 mddev->events--;
2533 mddev->can_decrease_events = 0;
2534 } else {
2535
2536 mddev->events ++;
2537 mddev->can_decrease_events = nospares;
2538 }
2539
2540
2541
2542
2543
2544
2545 WARN_ON(mddev->events == 0);
2546
2547 rdev_for_each(rdev, mddev) {
2548 if (rdev->badblocks.changed)
2549 any_badblocks_changed++;
2550 if (test_bit(Faulty, &rdev->flags))
2551 set_bit(FaultRecorded, &rdev->flags);
2552 }
2553
2554 sync_sbs(mddev, nospares);
2555 spin_unlock(&mddev->lock);
2556
2557 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2558 mdname(mddev), mddev->in_sync);
2559
2560 if (mddev->queue)
2561 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2562rewrite:
2563 bitmap_update_sb(mddev->bitmap);
2564 rdev_for_each(rdev, mddev) {
2565 char b[BDEVNAME_SIZE];
2566
2567 if (rdev->sb_loaded != 1)
2568 continue;
2569
2570 if (!test_bit(Faulty, &rdev->flags)) {
2571 md_super_write(mddev,rdev,
2572 rdev->sb_start, rdev->sb_size,
2573 rdev->sb_page);
2574 pr_debug("md: (write) %s's sb offset: %llu\n",
2575 bdevname(rdev->bdev, b),
2576 (unsigned long long)rdev->sb_start);
2577 rdev->sb_events = mddev->events;
2578 if (rdev->badblocks.size) {
2579 md_super_write(mddev, rdev,
2580 rdev->badblocks.sector,
2581 rdev->badblocks.size << 9,
2582 rdev->bb_page);
2583 rdev->badblocks.size = 0;
2584 }
2585
2586 } else
2587 pr_debug("md: %s (skipping faulty)\n",
2588 bdevname(rdev->bdev, b));
2589
2590 if (mddev->level == LEVEL_MULTIPATH)
2591
2592 break;
2593 }
2594 if (md_super_wait(mddev) < 0)
2595 goto rewrite;
2596
2597
2598 if (mddev_is_clustered(mddev) && ret == 0)
2599 md_cluster_ops->metadata_update_finish(mddev);
2600
2601 if (mddev->in_sync != sync_req ||
2602 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2603 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2604
2605 goto repeat;
2606 wake_up(&mddev->sb_wait);
2607 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2608 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2609
2610 rdev_for_each(rdev, mddev) {
2611 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2612 clear_bit(Blocked, &rdev->flags);
2613
2614 if (any_badblocks_changed)
2615 ack_all_badblocks(&rdev->badblocks);
2616 clear_bit(BlockedBadBlocks, &rdev->flags);
2617 wake_up(&rdev->blocked_wait);
2618 }
2619}
2620EXPORT_SYMBOL(md_update_sb);
2621
2622static int add_bound_rdev(struct md_rdev *rdev)
2623{
2624 struct mddev *mddev = rdev->mddev;
2625 int err = 0;
2626 bool add_journal = test_bit(Journal, &rdev->flags);
2627
2628 if (!mddev->pers->hot_remove_disk || add_journal) {
2629
2630
2631
2632
2633 super_types[mddev->major_version].
2634 validate_super(mddev, rdev);
2635 if (add_journal)
2636 mddev_suspend(mddev);
2637 err = mddev->pers->hot_add_disk(mddev, rdev);
2638 if (add_journal)
2639 mddev_resume(mddev);
2640 if (err) {
2641 md_kick_rdev_from_array(rdev);
2642 return err;
2643 }
2644 }
2645 sysfs_notify_dirent_safe(rdev->sysfs_state);
2646
2647 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2648 if (mddev->degraded)
2649 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2650 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2651 md_new_event(mddev);
2652 md_wakeup_thread(mddev->thread);
2653 return 0;
2654}
2655
2656
2657
2658
2659static int cmd_match(const char *cmd, const char *str)
2660{
2661
2662
2663
2664
2665 while (*cmd && *str && *cmd == *str) {
2666 cmd++;
2667 str++;
2668 }
2669 if (*cmd == '\n')
2670 cmd++;
2671 if (*str || *cmd)
2672 return 0;
2673 return 1;
2674}
2675
2676struct rdev_sysfs_entry {
2677 struct attribute attr;
2678 ssize_t (*show)(struct md_rdev *, char *);
2679 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2680};
2681
2682static ssize_t
2683state_show(struct md_rdev *rdev, char *page)
2684{
2685 char *sep = ",";
2686 size_t len = 0;
2687 unsigned long flags = READ_ONCE(rdev->flags);
2688
2689 if (test_bit(Faulty, &flags) ||
2690 (!test_bit(ExternalBbl, &flags) &&
2691 rdev->badblocks.unacked_exist))
2692 len += sprintf(page+len, "faulty%s", sep);
2693 if (test_bit(In_sync, &flags))
2694 len += sprintf(page+len, "in_sync%s", sep);
2695 if (test_bit(Journal, &flags))
2696 len += sprintf(page+len, "journal%s", sep);
2697 if (test_bit(WriteMostly, &flags))
2698 len += sprintf(page+len, "write_mostly%s", sep);
2699 if (test_bit(Blocked, &flags) ||
2700 (rdev->badblocks.unacked_exist
2701 && !test_bit(Faulty, &flags)))
2702 len += sprintf(page+len, "blocked%s", sep);
2703 if (!test_bit(Faulty, &flags) &&
2704 !test_bit(Journal, &flags) &&
2705 !test_bit(In_sync, &flags))
2706 len += sprintf(page+len, "spare%s", sep);
2707 if (test_bit(WriteErrorSeen, &flags))
2708 len += sprintf(page+len, "write_error%s", sep);
2709 if (test_bit(WantReplacement, &flags))
2710 len += sprintf(page+len, "want_replacement%s", sep);
2711 if (test_bit(Replacement, &flags))
2712 len += sprintf(page+len, "replacement%s", sep);
2713 if (test_bit(ExternalBbl, &flags))
2714 len += sprintf(page+len, "external_bbl%s", sep);
2715 if (test_bit(FailFast, &flags))
2716 len += sprintf(page+len, "failfast%s", sep);
2717
2718 if (len)
2719 len -= strlen(sep);
2720
2721 return len+sprintf(page+len, "\n");
2722}
2723
2724static ssize_t
2725state_store(struct md_rdev *rdev, const char *buf, size_t len)
2726{
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741 int err = -EINVAL;
2742 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2743 md_error(rdev->mddev, rdev);
2744 if (test_bit(Faulty, &rdev->flags))
2745 err = 0;
2746 else
2747 err = -EBUSY;
2748 } else if (cmd_match(buf, "remove")) {
2749 if (rdev->mddev->pers) {
2750 clear_bit(Blocked, &rdev->flags);
2751 remove_and_add_spares(rdev->mddev, rdev);
2752 }
2753 if (rdev->raid_disk >= 0)
2754 err = -EBUSY;
2755 else {
2756 struct mddev *mddev = rdev->mddev;
2757 err = 0;
2758 if (mddev_is_clustered(mddev))
2759 err = md_cluster_ops->remove_disk(mddev, rdev);
2760
2761 if (err == 0) {
2762 md_kick_rdev_from_array(rdev);
2763 if (mddev->pers) {
2764 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2765 md_wakeup_thread(mddev->thread);
2766 }
2767 md_new_event(mddev);
2768 }
2769 }
2770 } else if (cmd_match(buf, "writemostly")) {
2771 set_bit(WriteMostly, &rdev->flags);
2772 err = 0;
2773 } else if (cmd_match(buf, "-writemostly")) {
2774 clear_bit(WriteMostly, &rdev->flags);
2775 err = 0;
2776 } else if (cmd_match(buf, "blocked")) {
2777 set_bit(Blocked, &rdev->flags);
2778 err = 0;
2779 } else if (cmd_match(buf, "-blocked")) {
2780 if (!test_bit(Faulty, &rdev->flags) &&
2781 !test_bit(ExternalBbl, &rdev->flags) &&
2782 rdev->badblocks.unacked_exist) {
2783
2784
2785
2786 md_error(rdev->mddev, rdev);
2787 }
2788 clear_bit(Blocked, &rdev->flags);
2789 clear_bit(BlockedBadBlocks, &rdev->flags);
2790 wake_up(&rdev->blocked_wait);
2791 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2792 md_wakeup_thread(rdev->mddev->thread);
2793
2794 err = 0;
2795 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2796 set_bit(In_sync, &rdev->flags);
2797 err = 0;
2798 } else if (cmd_match(buf, "failfast")) {
2799 set_bit(FailFast, &rdev->flags);
2800 err = 0;
2801 } else if (cmd_match(buf, "-failfast")) {
2802 clear_bit(FailFast, &rdev->flags);
2803 err = 0;
2804 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2805 !test_bit(Journal, &rdev->flags)) {
2806 if (rdev->mddev->pers == NULL) {
2807 clear_bit(In_sync, &rdev->flags);
2808 rdev->saved_raid_disk = rdev->raid_disk;
2809 rdev->raid_disk = -1;
2810 err = 0;
2811 }
2812 } else if (cmd_match(buf, "write_error")) {
2813 set_bit(WriteErrorSeen, &rdev->flags);
2814 err = 0;
2815 } else if (cmd_match(buf, "-write_error")) {
2816 clear_bit(WriteErrorSeen, &rdev->flags);
2817 err = 0;
2818 } else if (cmd_match(buf, "want_replacement")) {
2819
2820
2821
2822
2823 if (rdev->raid_disk >= 0 &&
2824 !test_bit(Journal, &rdev->flags) &&
2825 !test_bit(Replacement, &rdev->flags))
2826 set_bit(WantReplacement, &rdev->flags);
2827 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2828 md_wakeup_thread(rdev->mddev->thread);
2829 err = 0;
2830 } else if (cmd_match(buf, "-want_replacement")) {
2831
2832
2833
2834 err = 0;
2835 clear_bit(WantReplacement, &rdev->flags);
2836 } else if (cmd_match(buf, "replacement")) {
2837
2838
2839
2840
2841 if (rdev->mddev->pers)
2842 err = -EBUSY;
2843 else {
2844 set_bit(Replacement, &rdev->flags);
2845 err = 0;
2846 }
2847 } else if (cmd_match(buf, "-replacement")) {
2848
2849 if (rdev->mddev->pers)
2850 err = -EBUSY;
2851 else {
2852 clear_bit(Replacement, &rdev->flags);
2853 err = 0;
2854 }
2855 } else if (cmd_match(buf, "re-add")) {
2856 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2857
2858
2859
2860
2861
2862
2863 if (!mddev_is_clustered(rdev->mddev) ||
2864 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2865 clear_bit(Faulty, &rdev->flags);
2866 err = add_bound_rdev(rdev);
2867 }
2868 } else
2869 err = -EBUSY;
2870 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2871 set_bit(ExternalBbl, &rdev->flags);
2872 rdev->badblocks.shift = 0;
2873 err = 0;
2874 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2875 clear_bit(ExternalBbl, &rdev->flags);
2876 err = 0;
2877 }
2878 if (!err)
2879 sysfs_notify_dirent_safe(rdev->sysfs_state);
2880 return err ? err : len;
2881}
2882static struct rdev_sysfs_entry rdev_state =
2883__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2884
2885static ssize_t
2886errors_show(struct md_rdev *rdev, char *page)
2887{
2888 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2889}
2890
2891static ssize_t
2892errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2893{
2894 unsigned int n;
2895 int rv;
2896
2897 rv = kstrtouint(buf, 10, &n);
2898 if (rv < 0)
2899 return rv;
2900 atomic_set(&rdev->corrected_errors, n);
2901 return len;
2902}
2903static struct rdev_sysfs_entry rdev_errors =
2904__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2905
2906static ssize_t
2907slot_show(struct md_rdev *rdev, char *page)
2908{
2909 if (test_bit(Journal, &rdev->flags))
2910 return sprintf(page, "journal\n");
2911 else if (rdev->raid_disk < 0)
2912 return sprintf(page, "none\n");
2913 else
2914 return sprintf(page, "%d\n", rdev->raid_disk);
2915}
2916
2917static ssize_t
2918slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2919{
2920 int slot;
2921 int err;
2922
2923 if (test_bit(Journal, &rdev->flags))
2924 return -EBUSY;
2925 if (strncmp(buf, "none", 4)==0)
2926 slot = -1;
2927 else {
2928 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2929 if (err < 0)
2930 return err;
2931 }
2932 if (rdev->mddev->pers && slot == -1) {
2933
2934
2935
2936
2937
2938
2939
2940 if (rdev->raid_disk == -1)
2941 return -EEXIST;
2942
2943 if (rdev->mddev->pers->hot_remove_disk == NULL)
2944 return -EINVAL;
2945 clear_bit(Blocked, &rdev->flags);
2946 remove_and_add_spares(rdev->mddev, rdev);
2947 if (rdev->raid_disk >= 0)
2948 return -EBUSY;
2949 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2950 md_wakeup_thread(rdev->mddev->thread);
2951 } else if (rdev->mddev->pers) {
2952
2953
2954
2955 int err;
2956
2957 if (rdev->raid_disk != -1)
2958 return -EBUSY;
2959
2960 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2961 return -EBUSY;
2962
2963 if (rdev->mddev->pers->hot_add_disk == NULL)
2964 return -EINVAL;
2965
2966 if (slot >= rdev->mddev->raid_disks &&
2967 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2968 return -ENOSPC;
2969
2970 rdev->raid_disk = slot;
2971 if (test_bit(In_sync, &rdev->flags))
2972 rdev->saved_raid_disk = slot;
2973 else
2974 rdev->saved_raid_disk = -1;
2975 clear_bit(In_sync, &rdev->flags);
2976 clear_bit(Bitmap_sync, &rdev->flags);
2977 err = rdev->mddev->pers->
2978 hot_add_disk(rdev->mddev, rdev);
2979 if (err) {
2980 rdev->raid_disk = -1;
2981 return err;
2982 } else
2983 sysfs_notify_dirent_safe(rdev->sysfs_state);
2984 if (sysfs_link_rdev(rdev->mddev, rdev))
2985 ;
2986
2987 } else {
2988 if (slot >= rdev->mddev->raid_disks &&
2989 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2990 return -ENOSPC;
2991 rdev->raid_disk = slot;
2992
2993 clear_bit(Faulty, &rdev->flags);
2994 clear_bit(WriteMostly, &rdev->flags);
2995 set_bit(In_sync, &rdev->flags);
2996 sysfs_notify_dirent_safe(rdev->sysfs_state);
2997 }
2998 return len;
2999}
3000
3001static struct rdev_sysfs_entry rdev_slot =
3002__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3003
3004static ssize_t
3005offset_show(struct md_rdev *rdev, char *page)
3006{
3007 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3008}
3009
3010static ssize_t
3011offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3012{
3013 unsigned long long offset;
3014 if (kstrtoull(buf, 10, &offset) < 0)
3015 return -EINVAL;
3016 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3017 return -EBUSY;
3018 if (rdev->sectors && rdev->mddev->external)
3019
3020
3021 return -EBUSY;
3022 rdev->data_offset = offset;
3023 rdev->new_data_offset = offset;
3024 return len;
3025}
3026
3027static struct rdev_sysfs_entry rdev_offset =
3028__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3029
3030static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3031{
3032 return sprintf(page, "%llu\n",
3033 (unsigned long long)rdev->new_data_offset);
3034}
3035
3036static ssize_t new_offset_store(struct md_rdev *rdev,
3037 const char *buf, size_t len)
3038{
3039 unsigned long long new_offset;
3040 struct mddev *mddev = rdev->mddev;
3041
3042 if (kstrtoull(buf, 10, &new_offset) < 0)
3043 return -EINVAL;
3044
3045 if (mddev->sync_thread ||
3046 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3047 return -EBUSY;
3048 if (new_offset == rdev->data_offset)
3049
3050 ;
3051 else if (new_offset > rdev->data_offset) {
3052
3053 if (new_offset - rdev->data_offset
3054 + mddev->dev_sectors > rdev->sectors)
3055 return -E2BIG;
3056 }
3057
3058
3059
3060
3061
3062 if (new_offset < rdev->data_offset &&
3063 mddev->reshape_backwards)
3064 return -EINVAL;
3065
3066
3067
3068
3069 if (new_offset > rdev->data_offset &&
3070 !mddev->reshape_backwards)
3071 return -EINVAL;
3072
3073 if (mddev->pers && mddev->persistent &&
3074 !super_types[mddev->major_version]
3075 .allow_new_offset(rdev, new_offset))
3076 return -E2BIG;
3077 rdev->new_data_offset = new_offset;
3078 if (new_offset > rdev->data_offset)
3079 mddev->reshape_backwards = 1;
3080 else if (new_offset < rdev->data_offset)
3081 mddev->reshape_backwards = 0;
3082
3083 return len;
3084}
3085static struct rdev_sysfs_entry rdev_new_offset =
3086__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3087
3088static ssize_t
3089rdev_size_show(struct md_rdev *rdev, char *page)
3090{
3091 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3092}
3093
3094static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3095{
3096
3097 if (s1+l1 <= s2)
3098 return 0;
3099 if (s2+l2 <= s1)
3100 return 0;
3101 return 1;
3102}
3103
3104static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3105{
3106 unsigned long long blocks;
3107 sector_t new;
3108
3109 if (kstrtoull(buf, 10, &blocks) < 0)
3110 return -EINVAL;
3111
3112 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3113 return -EINVAL;
3114
3115 new = blocks * 2;
3116 if (new != blocks * 2)
3117 return -EINVAL;
3118
3119 *sectors = new;
3120 return 0;
3121}
3122
3123static ssize_t
3124rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3125{
3126 struct mddev *my_mddev = rdev->mddev;
3127 sector_t oldsectors = rdev->sectors;
3128 sector_t sectors;
3129
3130 if (test_bit(Journal, &rdev->flags))
3131 return -EBUSY;
3132 if (strict_blocks_to_sectors(buf, §ors) < 0)
3133 return -EINVAL;
3134 if (rdev->data_offset != rdev->new_data_offset)
3135 return -EINVAL;
3136 if (my_mddev->pers && rdev->raid_disk >= 0) {
3137 if (my_mddev->persistent) {
3138 sectors = super_types[my_mddev->major_version].
3139 rdev_size_change(rdev, sectors);
3140 if (!sectors)
3141 return -EBUSY;
3142 } else if (!sectors)
3143 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3144 rdev->data_offset;
3145 if (!my_mddev->pers->resize)
3146
3147 return -EINVAL;
3148 }
3149 if (sectors < my_mddev->dev_sectors)
3150 return -EINVAL;
3151
3152 rdev->sectors = sectors;
3153 if (sectors > oldsectors && my_mddev->external) {
3154
3155
3156
3157
3158
3159
3160 struct mddev *mddev;
3161 int overlap = 0;
3162 struct list_head *tmp;
3163
3164 rcu_read_lock();
3165 for_each_mddev(mddev, tmp) {
3166 struct md_rdev *rdev2;
3167
3168 rdev_for_each(rdev2, mddev)
3169 if (rdev->bdev == rdev2->bdev &&
3170 rdev != rdev2 &&
3171 overlaps(rdev->data_offset, rdev->sectors,
3172 rdev2->data_offset,
3173 rdev2->sectors)) {
3174 overlap = 1;
3175 break;
3176 }
3177 if (overlap) {
3178 mddev_put(mddev);
3179 break;
3180 }
3181 }
3182 rcu_read_unlock();
3183 if (overlap) {
3184
3185
3186
3187
3188
3189
3190 rdev->sectors = oldsectors;
3191 return -EBUSY;
3192 }
3193 }
3194 return len;
3195}
3196
3197static struct rdev_sysfs_entry rdev_size =
3198__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3199
3200static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3201{
3202 unsigned long long recovery_start = rdev->recovery_offset;
3203
3204 if (test_bit(In_sync, &rdev->flags) ||
3205 recovery_start == MaxSector)
3206 return sprintf(page, "none\n");
3207
3208 return sprintf(page, "%llu\n", recovery_start);
3209}
3210
3211static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3212{
3213 unsigned long long recovery_start;
3214
3215 if (cmd_match(buf, "none"))
3216 recovery_start = MaxSector;
3217 else if (kstrtoull(buf, 10, &recovery_start))
3218 return -EINVAL;
3219
3220 if (rdev->mddev->pers &&
3221 rdev->raid_disk >= 0)
3222 return -EBUSY;
3223
3224 rdev->recovery_offset = recovery_start;
3225 if (recovery_start == MaxSector)
3226 set_bit(In_sync, &rdev->flags);
3227 else
3228 clear_bit(In_sync, &rdev->flags);
3229 return len;
3230}
3231
3232static struct rdev_sysfs_entry rdev_recovery_start =
3233__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246static ssize_t bb_show(struct md_rdev *rdev, char *page)
3247{
3248 return badblocks_show(&rdev->badblocks, page, 0);
3249}
3250static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3251{
3252 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3253
3254 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3255 wake_up(&rdev->blocked_wait);
3256 return rv;
3257}
3258static struct rdev_sysfs_entry rdev_bad_blocks =
3259__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3260
3261static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3262{
3263 return badblocks_show(&rdev->badblocks, page, 1);
3264}
3265static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3266{
3267 return badblocks_store(&rdev->badblocks, page, len, 1);
3268}
3269static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3270__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3271
3272static ssize_t
3273ppl_sector_show(struct md_rdev *rdev, char *page)
3274{
3275 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3276}
3277
3278static ssize_t
3279ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3280{
3281 unsigned long long sector;
3282
3283 if (kstrtoull(buf, 10, §or) < 0)
3284 return -EINVAL;
3285 if (sector != (sector_t)sector)
3286 return -EINVAL;
3287
3288 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3289 rdev->raid_disk >= 0)
3290 return -EBUSY;
3291
3292 if (rdev->mddev->persistent) {
3293 if (rdev->mddev->major_version == 0)
3294 return -EINVAL;
3295 if ((sector > rdev->sb_start &&
3296 sector - rdev->sb_start > S16_MAX) ||
3297 (sector < rdev->sb_start &&
3298 rdev->sb_start - sector > -S16_MIN))
3299 return -EINVAL;
3300 rdev->ppl.offset = sector - rdev->sb_start;
3301 } else if (!rdev->mddev->external) {
3302 return -EBUSY;
3303 }
3304 rdev->ppl.sector = sector;
3305 return len;
3306}
3307
3308static struct rdev_sysfs_entry rdev_ppl_sector =
3309__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3310
3311static ssize_t
3312ppl_size_show(struct md_rdev *rdev, char *page)
3313{
3314 return sprintf(page, "%u\n", rdev->ppl.size);
3315}
3316
3317static ssize_t
3318ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3319{
3320 unsigned int size;
3321
3322 if (kstrtouint(buf, 10, &size) < 0)
3323 return -EINVAL;
3324
3325 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3326 rdev->raid_disk >= 0)
3327 return -EBUSY;
3328
3329 if (rdev->mddev->persistent) {
3330 if (rdev->mddev->major_version == 0)
3331 return -EINVAL;
3332 if (size > U16_MAX)
3333 return -EINVAL;
3334 } else if (!rdev->mddev->external) {
3335 return -EBUSY;
3336 }
3337 rdev->ppl.size = size;
3338 return len;
3339}
3340
3341static struct rdev_sysfs_entry rdev_ppl_size =
3342__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3343
3344static struct attribute *rdev_default_attrs[] = {
3345 &rdev_state.attr,
3346 &rdev_errors.attr,
3347 &rdev_slot.attr,
3348 &rdev_offset.attr,
3349 &rdev_new_offset.attr,
3350 &rdev_size.attr,
3351 &rdev_recovery_start.attr,
3352 &rdev_bad_blocks.attr,
3353 &rdev_unack_bad_blocks.attr,
3354 &rdev_ppl_sector.attr,
3355 &rdev_ppl_size.attr,
3356 NULL,
3357};
3358static ssize_t
3359rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3360{
3361 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3362 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3363
3364 if (!entry->show)
3365 return -EIO;
3366 if (!rdev->mddev)
3367 return -EBUSY;
3368 return entry->show(rdev, page);
3369}
3370
3371static ssize_t
3372rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3373 const char *page, size_t length)
3374{
3375 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3376 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3377 ssize_t rv;
3378 struct mddev *mddev = rdev->mddev;
3379
3380 if (!entry->store)
3381 return -EIO;
3382 if (!capable(CAP_SYS_ADMIN))
3383 return -EACCES;
3384 rv = mddev ? mddev_lock(mddev): -EBUSY;
3385 if (!rv) {
3386 if (rdev->mddev == NULL)
3387 rv = -EBUSY;
3388 else
3389 rv = entry->store(rdev, page, length);
3390 mddev_unlock(mddev);
3391 }
3392 return rv;
3393}
3394
3395static void rdev_free(struct kobject *ko)
3396{
3397 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3398 kfree(rdev);
3399}
3400static const struct sysfs_ops rdev_sysfs_ops = {
3401 .show = rdev_attr_show,
3402 .store = rdev_attr_store,
3403};
3404static struct kobj_type rdev_ktype = {
3405 .release = rdev_free,
3406 .sysfs_ops = &rdev_sysfs_ops,
3407 .default_attrs = rdev_default_attrs,
3408};
3409
3410int md_rdev_init(struct md_rdev *rdev)
3411{
3412 rdev->desc_nr = -1;
3413 rdev->saved_raid_disk = -1;
3414 rdev->raid_disk = -1;
3415 rdev->flags = 0;
3416 rdev->data_offset = 0;
3417 rdev->new_data_offset = 0;
3418 rdev->sb_events = 0;
3419 rdev->last_read_error = 0;
3420 rdev->sb_loaded = 0;
3421 rdev->bb_page = NULL;
3422 atomic_set(&rdev->nr_pending, 0);
3423 atomic_set(&rdev->read_errors, 0);
3424 atomic_set(&rdev->corrected_errors, 0);
3425
3426 INIT_LIST_HEAD(&rdev->same_set);
3427 init_waitqueue_head(&rdev->blocked_wait);
3428
3429
3430
3431
3432
3433 return badblocks_init(&rdev->badblocks, 0);
3434}
3435EXPORT_SYMBOL_GPL(md_rdev_init);
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3447{
3448 char b[BDEVNAME_SIZE];
3449 int err;
3450 struct md_rdev *rdev;
3451 sector_t size;
3452
3453 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3454 if (!rdev)
3455 return ERR_PTR(-ENOMEM);
3456
3457 err = md_rdev_init(rdev);
3458 if (err)
3459 goto abort_free;
3460 err = alloc_disk_sb(rdev);
3461 if (err)
3462 goto abort_free;
3463
3464 err = lock_rdev(rdev, newdev, super_format == -2);
3465 if (err)
3466 goto abort_free;
3467
3468 kobject_init(&rdev->kobj, &rdev_ktype);
3469
3470 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3471 if (!size) {
3472 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3473 bdevname(rdev->bdev,b));
3474 err = -EINVAL;
3475 goto abort_free;
3476 }
3477
3478 if (super_format >= 0) {
3479 err = super_types[super_format].
3480 load_super(rdev, NULL, super_minor);
3481 if (err == -EINVAL) {
3482 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3483 bdevname(rdev->bdev,b),
3484 super_format, super_minor);
3485 goto abort_free;
3486 }
3487 if (err < 0) {
3488 pr_warn("md: could not read %s's sb, not importing!\n",
3489 bdevname(rdev->bdev,b));
3490 goto abort_free;
3491 }
3492 }
3493
3494 return rdev;
3495
3496abort_free:
3497 if (rdev->bdev)
3498 unlock_rdev(rdev);
3499 md_rdev_clear(rdev);
3500 kfree(rdev);
3501 return ERR_PTR(err);
3502}
3503
3504
3505
3506
3507
3508static void analyze_sbs(struct mddev *mddev)
3509{
3510 int i;
3511 struct md_rdev *rdev, *freshest, *tmp;
3512 char b[BDEVNAME_SIZE];
3513
3514 freshest = NULL;
3515 rdev_for_each_safe(rdev, tmp, mddev)
3516 switch (super_types[mddev->major_version].
3517 load_super(rdev, freshest, mddev->minor_version)) {
3518 case 1:
3519 freshest = rdev;
3520 break;
3521 case 0:
3522 break;
3523 default:
3524 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3525 bdevname(rdev->bdev,b));
3526 md_kick_rdev_from_array(rdev);
3527 }
3528
3529 super_types[mddev->major_version].
3530 validate_super(mddev, freshest);
3531
3532 i = 0;
3533 rdev_for_each_safe(rdev, tmp, mddev) {
3534 if (mddev->max_disks &&
3535 (rdev->desc_nr >= mddev->max_disks ||
3536 i > mddev->max_disks)) {
3537 pr_warn("md: %s: %s: only %d devices permitted\n",
3538 mdname(mddev), bdevname(rdev->bdev, b),
3539 mddev->max_disks);
3540 md_kick_rdev_from_array(rdev);
3541 continue;
3542 }
3543 if (rdev != freshest) {
3544 if (super_types[mddev->major_version].
3545 validate_super(mddev, rdev)) {
3546 pr_warn("md: kicking non-fresh %s from array!\n",
3547 bdevname(rdev->bdev,b));
3548 md_kick_rdev_from_array(rdev);
3549 continue;
3550 }
3551 }
3552 if (mddev->level == LEVEL_MULTIPATH) {
3553 rdev->desc_nr = i++;
3554 rdev->raid_disk = rdev->desc_nr;
3555 set_bit(In_sync, &rdev->flags);
3556 } else if (rdev->raid_disk >=
3557 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3558 !test_bit(Journal, &rdev->flags)) {
3559 rdev->raid_disk = -1;
3560 clear_bit(In_sync, &rdev->flags);
3561 }
3562 }
3563}
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3576{
3577 unsigned long result = 0;
3578 long decimals = -1;
3579 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3580 if (*cp == '.')
3581 decimals = 0;
3582 else if (decimals < scale) {
3583 unsigned int value;
3584 value = *cp - '0';
3585 result = result * 10 + value;
3586 if (decimals >= 0)
3587 decimals++;
3588 }
3589 cp++;
3590 }
3591 if (*cp == '\n')
3592 cp++;
3593 if (*cp)
3594 return -EINVAL;
3595 if (decimals < 0)
3596 decimals = 0;
3597 while (decimals < scale) {
3598 result *= 10;
3599 decimals ++;
3600 }
3601 *res = result;
3602 return 0;
3603}
3604
3605static ssize_t
3606safe_delay_show(struct mddev *mddev, char *page)
3607{
3608 int msec = (mddev->safemode_delay*1000)/HZ;
3609 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3610}
3611static ssize_t
3612safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3613{
3614 unsigned long msec;
3615
3616 if (mddev_is_clustered(mddev)) {
3617 pr_warn("md: Safemode is disabled for clustered mode\n");
3618 return -EINVAL;
3619 }
3620
3621 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3622 return -EINVAL;
3623 if (msec == 0)
3624 mddev->safemode_delay = 0;
3625 else {
3626 unsigned long old_delay = mddev->safemode_delay;
3627 unsigned long new_delay = (msec*HZ)/1000;
3628
3629 if (new_delay == 0)
3630 new_delay = 1;
3631 mddev->safemode_delay = new_delay;
3632 if (new_delay < old_delay || old_delay == 0)
3633 mod_timer(&mddev->safemode_timer, jiffies+1);
3634 }
3635 return len;
3636}
3637static struct md_sysfs_entry md_safe_delay =
3638__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3639
3640static ssize_t
3641level_show(struct mddev *mddev, char *page)
3642{
3643 struct md_personality *p;
3644 int ret;
3645 spin_lock(&mddev->lock);
3646 p = mddev->pers;
3647 if (p)
3648 ret = sprintf(page, "%s\n", p->name);
3649 else if (mddev->clevel[0])
3650 ret = sprintf(page, "%s\n", mddev->clevel);
3651 else if (mddev->level != LEVEL_NONE)
3652 ret = sprintf(page, "%d\n", mddev->level);
3653 else
3654 ret = 0;
3655 spin_unlock(&mddev->lock);
3656 return ret;
3657}
3658
3659static ssize_t
3660level_store(struct mddev *mddev, const char *buf, size_t len)
3661{
3662 char clevel[16];
3663 ssize_t rv;
3664 size_t slen = len;
3665 struct md_personality *pers, *oldpers;
3666 long level;
3667 void *priv, *oldpriv;
3668 struct md_rdev *rdev;
3669
3670 if (slen == 0 || slen >= sizeof(clevel))
3671 return -EINVAL;
3672
3673 rv = mddev_lock(mddev);
3674 if (rv)
3675 return rv;
3676
3677 if (mddev->pers == NULL) {
3678 strncpy(mddev->clevel, buf, slen);
3679 if (mddev->clevel[slen-1] == '\n')
3680 slen--;
3681 mddev->clevel[slen] = 0;
3682 mddev->level = LEVEL_NONE;
3683 rv = len;
3684 goto out_unlock;
3685 }
3686 rv = -EROFS;
3687 if (mddev->ro)
3688 goto out_unlock;
3689
3690
3691
3692
3693
3694
3695
3696 rv = -EBUSY;
3697 if (mddev->sync_thread ||
3698 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3699 mddev->reshape_position != MaxSector ||
3700 mddev->sysfs_active)
3701 goto out_unlock;
3702
3703 rv = -EINVAL;
3704 if (!mddev->pers->quiesce) {
3705 pr_warn("md: %s: %s does not support online personality change\n",
3706 mdname(mddev), mddev->pers->name);
3707 goto out_unlock;
3708 }
3709
3710
3711 strncpy(clevel, buf, slen);
3712 if (clevel[slen-1] == '\n')
3713 slen--;
3714 clevel[slen] = 0;
3715 if (kstrtol(clevel, 10, &level))
3716 level = LEVEL_NONE;
3717
3718 if (request_module("md-%s", clevel) != 0)
3719 request_module("md-level-%s", clevel);
3720 spin_lock(&pers_lock);
3721 pers = find_pers(level, clevel);
3722 if (!pers || !try_module_get(pers->owner)) {
3723 spin_unlock(&pers_lock);
3724 pr_warn("md: personality %s not loaded\n", clevel);
3725 rv = -EINVAL;
3726 goto out_unlock;
3727 }
3728 spin_unlock(&pers_lock);
3729
3730 if (pers == mddev->pers) {
3731
3732 module_put(pers->owner);
3733 rv = len;
3734 goto out_unlock;
3735 }
3736 if (!pers->takeover) {
3737 module_put(pers->owner);
3738 pr_warn("md: %s: %s does not support personality takeover\n",
3739 mdname(mddev), clevel);
3740 rv = -EINVAL;
3741 goto out_unlock;
3742 }
3743
3744 rdev_for_each(rdev, mddev)
3745 rdev->new_raid_disk = rdev->raid_disk;
3746
3747
3748
3749
3750 priv = pers->takeover(mddev);
3751 if (IS_ERR(priv)) {
3752 mddev->new_level = mddev->level;
3753 mddev->new_layout = mddev->layout;
3754 mddev->new_chunk_sectors = mddev->chunk_sectors;
3755 mddev->raid_disks -= mddev->delta_disks;
3756 mddev->delta_disks = 0;
3757 mddev->reshape_backwards = 0;
3758 module_put(pers->owner);
3759 pr_warn("md: %s: %s would not accept array\n",
3760 mdname(mddev), clevel);
3761 rv = PTR_ERR(priv);
3762 goto out_unlock;
3763 }
3764
3765
3766 mddev_suspend(mddev);
3767 mddev_detach(mddev);
3768
3769 spin_lock(&mddev->lock);
3770 oldpers = mddev->pers;
3771 oldpriv = mddev->private;
3772 mddev->pers = pers;
3773 mddev->private = priv;
3774 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3775 mddev->level = mddev->new_level;
3776 mddev->layout = mddev->new_layout;
3777 mddev->chunk_sectors = mddev->new_chunk_sectors;
3778 mddev->delta_disks = 0;
3779 mddev->reshape_backwards = 0;
3780 mddev->degraded = 0;
3781 spin_unlock(&mddev->lock);
3782
3783 if (oldpers->sync_request == NULL &&
3784 mddev->external) {
3785
3786
3787
3788
3789
3790
3791
3792 mddev->in_sync = 0;
3793 mddev->safemode_delay = 0;
3794 mddev->safemode = 0;
3795 }
3796
3797 oldpers->free(mddev, oldpriv);
3798
3799 if (oldpers->sync_request == NULL &&
3800 pers->sync_request != NULL) {
3801
3802 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3803 pr_warn("md: cannot register extra attributes for %s\n",
3804 mdname(mddev));
3805 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3806 }
3807 if (oldpers->sync_request != NULL &&
3808 pers->sync_request == NULL) {
3809
3810 if (mddev->to_remove == NULL)
3811 mddev->to_remove = &md_redundancy_group;
3812 }
3813
3814 module_put(oldpers->owner);
3815
3816 rdev_for_each(rdev, mddev) {
3817 if (rdev->raid_disk < 0)
3818 continue;
3819 if (rdev->new_raid_disk >= mddev->raid_disks)
3820 rdev->new_raid_disk = -1;
3821 if (rdev->new_raid_disk == rdev->raid_disk)
3822 continue;
3823 sysfs_unlink_rdev(mddev, rdev);
3824 }
3825 rdev_for_each(rdev, mddev) {
3826 if (rdev->raid_disk < 0)
3827 continue;
3828 if (rdev->new_raid_disk == rdev->raid_disk)
3829 continue;
3830 rdev->raid_disk = rdev->new_raid_disk;
3831 if (rdev->raid_disk < 0)
3832 clear_bit(In_sync, &rdev->flags);
3833 else {
3834 if (sysfs_link_rdev(mddev, rdev))
3835 pr_warn("md: cannot register rd%d for %s after level change\n",
3836 rdev->raid_disk, mdname(mddev));
3837 }
3838 }
3839
3840 if (pers->sync_request == NULL) {
3841
3842
3843
3844 mddev->in_sync = 1;
3845 del_timer_sync(&mddev->safemode_timer);
3846 }
3847 blk_set_stacking_limits(&mddev->queue->limits);
3848 pers->run(mddev);
3849 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3850 mddev_resume(mddev);
3851 if (!mddev->thread)
3852 md_update_sb(mddev, 1);
3853 sysfs_notify(&mddev->kobj, NULL, "level");
3854 md_new_event(mddev);
3855 rv = len;
3856out_unlock:
3857 mddev_unlock(mddev);
3858 return rv;
3859}
3860
3861static struct md_sysfs_entry md_level =
3862__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3863
3864static ssize_t
3865layout_show(struct mddev *mddev, char *page)
3866{
3867
3868 if (mddev->reshape_position != MaxSector &&
3869 mddev->layout != mddev->new_layout)
3870 return sprintf(page, "%d (%d)\n",
3871 mddev->new_layout, mddev->layout);
3872 return sprintf(page, "%d\n", mddev->layout);
3873}
3874
3875static ssize_t
3876layout_store(struct mddev *mddev, const char *buf, size_t len)
3877{
3878 unsigned int n;
3879 int err;
3880
3881 err = kstrtouint(buf, 10, &n);
3882 if (err < 0)
3883 return err;
3884 err = mddev_lock(mddev);
3885 if (err)
3886 return err;
3887
3888 if (mddev->pers) {
3889 if (mddev->pers->check_reshape == NULL)
3890 err = -EBUSY;
3891 else if (mddev->ro)
3892 err = -EROFS;
3893 else {
3894 mddev->new_layout = n;
3895 err = mddev->pers->check_reshape(mddev);
3896 if (err)
3897 mddev->new_layout = mddev->layout;
3898 }
3899 } else {
3900 mddev->new_layout = n;
3901 if (mddev->reshape_position == MaxSector)
3902 mddev->layout = n;
3903 }
3904 mddev_unlock(mddev);
3905 return err ?: len;
3906}
3907static struct md_sysfs_entry md_layout =
3908__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3909
3910static ssize_t
3911raid_disks_show(struct mddev *mddev, char *page)
3912{
3913 if (mddev->raid_disks == 0)
3914 return 0;
3915 if (mddev->reshape_position != MaxSector &&
3916 mddev->delta_disks != 0)
3917 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3918 mddev->raid_disks - mddev->delta_disks);
3919 return sprintf(page, "%d\n", mddev->raid_disks);
3920}
3921
3922static int update_raid_disks(struct mddev *mddev, int raid_disks);
3923
3924static ssize_t
3925raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3926{
3927 unsigned int n;
3928 int err;
3929
3930 err = kstrtouint(buf, 10, &n);
3931 if (err < 0)
3932 return err;
3933
3934 err = mddev_lock(mddev);
3935 if (err)
3936 return err;
3937 if (mddev->pers)
3938 err = update_raid_disks(mddev, n);
3939 else if (mddev->reshape_position != MaxSector) {
3940 struct md_rdev *rdev;
3941 int olddisks = mddev->raid_disks - mddev->delta_disks;
3942
3943 err = -EINVAL;
3944 rdev_for_each(rdev, mddev) {
3945 if (olddisks < n &&
3946 rdev->data_offset < rdev->new_data_offset)
3947 goto out_unlock;
3948 if (olddisks > n &&
3949 rdev->data_offset > rdev->new_data_offset)
3950 goto out_unlock;
3951 }
3952 err = 0;
3953 mddev->delta_disks = n - olddisks;
3954 mddev->raid_disks = n;
3955 mddev->reshape_backwards = (mddev->delta_disks < 0);
3956 } else
3957 mddev->raid_disks = n;
3958out_unlock:
3959 mddev_unlock(mddev);
3960 return err ? err : len;
3961}
3962static struct md_sysfs_entry md_raid_disks =
3963__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3964
3965static ssize_t
3966chunk_size_show(struct mddev *mddev, char *page)
3967{
3968 if (mddev->reshape_position != MaxSector &&
3969 mddev->chunk_sectors != mddev->new_chunk_sectors)
3970 return sprintf(page, "%d (%d)\n",
3971 mddev->new_chunk_sectors << 9,
3972 mddev->chunk_sectors << 9);
3973 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3974}
3975
3976static ssize_t
3977chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3978{
3979 unsigned long n;
3980 int err;
3981
3982 err = kstrtoul(buf, 10, &n);
3983 if (err < 0)
3984 return err;
3985
3986 err = mddev_lock(mddev);
3987 if (err)
3988 return err;
3989 if (mddev->pers) {
3990 if (mddev->pers->check_reshape == NULL)
3991 err = -EBUSY;
3992 else if (mddev->ro)
3993 err = -EROFS;
3994 else {
3995 mddev->new_chunk_sectors = n >> 9;
3996 err = mddev->pers->check_reshape(mddev);
3997 if (err)
3998 mddev->new_chunk_sectors = mddev->chunk_sectors;
3999 }
4000 } else {
4001 mddev->new_chunk_sectors = n >> 9;
4002 if (mddev->reshape_position == MaxSector)
4003 mddev->chunk_sectors = n >> 9;
4004 }
4005 mddev_unlock(mddev);
4006 return err ?: len;
4007}
4008static struct md_sysfs_entry md_chunk_size =
4009__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4010
4011static ssize_t
4012resync_start_show(struct mddev *mddev, char *page)
4013{
4014 if (mddev->recovery_cp == MaxSector)
4015 return sprintf(page, "none\n");
4016 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4017}
4018
4019static ssize_t
4020resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4021{
4022 unsigned long long n;
4023 int err;
4024
4025 if (cmd_match(buf, "none"))
4026 n = MaxSector;
4027 else {
4028 err = kstrtoull(buf, 10, &n);
4029 if (err < 0)
4030 return err;
4031 if (n != (sector_t)n)
4032 return -EINVAL;
4033 }
4034
4035 err = mddev_lock(mddev);
4036 if (err)
4037 return err;
4038 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4039 err = -EBUSY;
4040
4041 if (!err) {
4042 mddev->recovery_cp = n;
4043 if (mddev->pers)
4044 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4045 }
4046 mddev_unlock(mddev);
4047 return err ?: len;
4048}
4049static struct md_sysfs_entry md_resync_start =
4050__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4051 resync_start_show, resync_start_store);
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4090 write_pending, active_idle, bad_word};
4091static char *array_states[] = {
4092 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4093 "write-pending", "active-idle", NULL };
4094
4095static int match_word(const char *word, char **list)
4096{
4097 int n;
4098 for (n=0; list[n]; n++)
4099 if (cmd_match(word, list[n]))
4100 break;
4101 return n;
4102}
4103
4104static ssize_t
4105array_state_show(struct mddev *mddev, char *page)
4106{
4107 enum array_state st = inactive;
4108
4109 if (mddev->pers)
4110 switch(mddev->ro) {
4111 case 1:
4112 st = readonly;
4113 break;
4114 case 2:
4115 st = read_auto;
4116 break;
4117 case 0:
4118 spin_lock(&mddev->lock);
4119 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4120 st = write_pending;
4121 else if (mddev->in_sync)
4122 st = clean;
4123 else if (mddev->safemode)
4124 st = active_idle;
4125 else
4126 st = active;
4127 spin_unlock(&mddev->lock);
4128 }
4129 else {
4130 if (list_empty(&mddev->disks) &&
4131 mddev->raid_disks == 0 &&
4132 mddev->dev_sectors == 0)
4133 st = clear;
4134 else
4135 st = inactive;
4136 }
4137 return sprintf(page, "%s\n", array_states[st]);
4138}
4139
4140static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4141static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4142static int do_md_run(struct mddev *mddev);
4143static int restart_array(struct mddev *mddev);
4144
4145static ssize_t
4146array_state_store(struct mddev *mddev, const char *buf, size_t len)
4147{
4148 int err = 0;
4149 enum array_state st = match_word(buf, array_states);
4150
4151 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4152
4153
4154
4155 spin_lock(&mddev->lock);
4156 if (st == active) {
4157 restart_array(mddev);
4158 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4159 md_wakeup_thread(mddev->thread);
4160 wake_up(&mddev->sb_wait);
4161 } else {
4162 restart_array(mddev);
4163 if (!set_in_sync(mddev))
4164 err = -EBUSY;
4165 }
4166 if (!err)
4167 sysfs_notify_dirent_safe(mddev->sysfs_state);
4168 spin_unlock(&mddev->lock);
4169 return err ?: len;
4170 }
4171 err = mddev_lock(mddev);
4172 if (err)
4173 return err;
4174 err = -EINVAL;
4175 switch(st) {
4176 case bad_word:
4177 break;
4178 case clear:
4179
4180 err = do_md_stop(mddev, 0, NULL);
4181 break;
4182 case inactive:
4183
4184 if (mddev->pers)
4185 err = do_md_stop(mddev, 2, NULL);
4186 else
4187 err = 0;
4188 break;
4189 case suspended:
4190 break;
4191 case readonly:
4192 if (mddev->pers)
4193 err = md_set_readonly(mddev, NULL);
4194 else {
4195 mddev->ro = 1;
4196 set_disk_ro(mddev->gendisk, 1);
4197 err = do_md_run(mddev);
4198 }
4199 break;
4200 case read_auto:
4201 if (mddev->pers) {
4202 if (mddev->ro == 0)
4203 err = md_set_readonly(mddev, NULL);
4204 else if (mddev->ro == 1)
4205 err = restart_array(mddev);
4206 if (err == 0) {
4207 mddev->ro = 2;
4208 set_disk_ro(mddev->gendisk, 0);
4209 }
4210 } else {
4211 mddev->ro = 2;
4212 err = do_md_run(mddev);
4213 }
4214 break;
4215 case clean:
4216 if (mddev->pers) {
4217 err = restart_array(mddev);
4218 if (err)
4219 break;
4220 spin_lock(&mddev->lock);
4221 if (!set_in_sync(mddev))
4222 err = -EBUSY;
4223 spin_unlock(&mddev->lock);
4224 } else
4225 err = -EINVAL;
4226 break;
4227 case active:
4228 if (mddev->pers) {
4229 err = restart_array(mddev);
4230 if (err)
4231 break;
4232 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4233 wake_up(&mddev->sb_wait);
4234 err = 0;
4235 } else {
4236 mddev->ro = 0;
4237 set_disk_ro(mddev->gendisk, 0);
4238 err = do_md_run(mddev);
4239 }
4240 break;
4241 case write_pending:
4242 case active_idle:
4243
4244 break;
4245 }
4246
4247 if (!err) {
4248 if (mddev->hold_active == UNTIL_IOCTL)
4249 mddev->hold_active = 0;
4250 sysfs_notify_dirent_safe(mddev->sysfs_state);
4251 }
4252 mddev_unlock(mddev);
4253 return err ?: len;
4254}
4255static struct md_sysfs_entry md_array_state =
4256__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4257
4258static ssize_t
4259max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4260 return sprintf(page, "%d\n",
4261 atomic_read(&mddev->max_corr_read_errors));
4262}
4263
4264static ssize_t
4265max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4266{
4267 unsigned int n;
4268 int rv;
4269
4270 rv = kstrtouint(buf, 10, &n);
4271 if (rv < 0)
4272 return rv;
4273 atomic_set(&mddev->max_corr_read_errors, n);
4274 return len;
4275}
4276
4277static struct md_sysfs_entry max_corr_read_errors =
4278__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4279 max_corrected_read_errors_store);
4280
4281static ssize_t
4282null_show(struct mddev *mddev, char *page)
4283{
4284 return -EINVAL;
4285}
4286
4287static ssize_t
4288new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4289{
4290
4291
4292
4293
4294
4295
4296
4297 char *e;
4298 int major = simple_strtoul(buf, &e, 10);
4299 int minor;
4300 dev_t dev;
4301 struct md_rdev *rdev;
4302 int err;
4303
4304 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4305 return -EINVAL;
4306 minor = simple_strtoul(e+1, &e, 10);
4307 if (*e && *e != '\n')
4308 return -EINVAL;
4309 dev = MKDEV(major, minor);
4310 if (major != MAJOR(dev) ||
4311 minor != MINOR(dev))
4312 return -EOVERFLOW;
4313
4314 flush_workqueue(md_misc_wq);
4315
4316 err = mddev_lock(mddev);
4317 if (err)
4318 return err;
4319 if (mddev->persistent) {
4320 rdev = md_import_device(dev, mddev->major_version,
4321 mddev->minor_version);
4322 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4323 struct md_rdev *rdev0
4324 = list_entry(mddev->disks.next,
4325 struct md_rdev, same_set);
4326 err = super_types[mddev->major_version]
4327 .load_super(rdev, rdev0, mddev->minor_version);
4328 if (err < 0)
4329 goto out;
4330 }
4331 } else if (mddev->external)
4332 rdev = md_import_device(dev, -2, -1);
4333 else
4334 rdev = md_import_device(dev, -1, -1);
4335
4336 if (IS_ERR(rdev)) {
4337 mddev_unlock(mddev);
4338 return PTR_ERR(rdev);
4339 }
4340 err = bind_rdev_to_array(rdev, mddev);
4341 out:
4342 if (err)
4343 export_rdev(rdev);
4344 mddev_unlock(mddev);
4345 if (!err)
4346 md_new_event(mddev);
4347 return err ? err : len;
4348}
4349
4350static struct md_sysfs_entry md_new_device =
4351__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4352
4353static ssize_t
4354bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4355{
4356 char *end;
4357 unsigned long chunk, end_chunk;
4358 int err;
4359
4360 err = mddev_lock(mddev);
4361 if (err)
4362 return err;
4363 if (!mddev->bitmap)
4364 goto out;
4365
4366 while (*buf) {
4367 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4368 if (buf == end) break;
4369 if (*end == '-') {
4370 buf = end + 1;
4371 end_chunk = simple_strtoul(buf, &end, 0);
4372 if (buf == end) break;
4373 }
4374 if (*end && !isspace(*end)) break;
4375 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4376 buf = skip_spaces(end);
4377 }
4378 bitmap_unplug(mddev->bitmap);
4379out:
4380 mddev_unlock(mddev);
4381 return len;
4382}
4383
4384static struct md_sysfs_entry md_bitmap =
4385__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4386
4387static ssize_t
4388size_show(struct mddev *mddev, char *page)
4389{
4390 return sprintf(page, "%llu\n",
4391 (unsigned long long)mddev->dev_sectors / 2);
4392}
4393
4394static int update_size(struct mddev *mddev, sector_t num_sectors);
4395
4396static ssize_t
4397size_store(struct mddev *mddev, const char *buf, size_t len)
4398{
4399
4400
4401
4402
4403 sector_t sectors;
4404 int err = strict_blocks_to_sectors(buf, §ors);
4405
4406 if (err < 0)
4407 return err;
4408 err = mddev_lock(mddev);
4409 if (err)
4410 return err;
4411 if (mddev->pers) {
4412 err = update_size(mddev, sectors);
4413 if (err == 0)
4414 md_update_sb(mddev, 1);
4415 } else {
4416 if (mddev->dev_sectors == 0 ||
4417 mddev->dev_sectors > sectors)
4418 mddev->dev_sectors = sectors;
4419 else
4420 err = -ENOSPC;
4421 }
4422 mddev_unlock(mddev);
4423 return err ? err : len;
4424}
4425
4426static struct md_sysfs_entry md_size =
4427__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4428
4429
4430
4431
4432
4433
4434
4435static ssize_t
4436metadata_show(struct mddev *mddev, char *page)
4437{
4438 if (mddev->persistent)
4439 return sprintf(page, "%d.%d\n",
4440 mddev->major_version, mddev->minor_version);
4441 else if (mddev->external)
4442 return sprintf(page, "external:%s\n", mddev->metadata_type);
4443 else
4444 return sprintf(page, "none\n");
4445}
4446
4447static ssize_t
4448metadata_store(struct mddev *mddev, const char *buf, size_t len)
4449{
4450 int major, minor;
4451 char *e;
4452 int err;
4453
4454
4455
4456
4457
4458 err = mddev_lock(mddev);
4459 if (err)
4460 return err;
4461 err = -EBUSY;
4462 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4463 ;
4464 else if (!list_empty(&mddev->disks))
4465 goto out_unlock;
4466
4467 err = 0;
4468 if (cmd_match(buf, "none")) {
4469 mddev->persistent = 0;
4470 mddev->external = 0;
4471 mddev->major_version = 0;
4472 mddev->minor_version = 90;
4473 goto out_unlock;
4474 }
4475 if (strncmp(buf, "external:", 9) == 0) {
4476 size_t namelen = len-9;
4477 if (namelen >= sizeof(mddev->metadata_type))
4478 namelen = sizeof(mddev->metadata_type)-1;
4479 strncpy(mddev->metadata_type, buf+9, namelen);
4480 mddev->metadata_type[namelen] = 0;
4481 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4482 mddev->metadata_type[--namelen] = 0;
4483 mddev->persistent = 0;
4484 mddev->external = 1;
4485 mddev->major_version = 0;
4486 mddev->minor_version = 90;
4487 goto out_unlock;
4488 }
4489 major = simple_strtoul(buf, &e, 10);
4490 err = -EINVAL;
4491 if (e==buf || *e != '.')
4492 goto out_unlock;
4493 buf = e+1;
4494 minor = simple_strtoul(buf, &e, 10);
4495 if (e==buf || (*e && *e != '\n') )
4496 goto out_unlock;
4497 err = -ENOENT;
4498 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4499 goto out_unlock;
4500 mddev->major_version = major;
4501 mddev->minor_version = minor;
4502 mddev->persistent = 1;
4503 mddev->external = 0;
4504 err = 0;
4505out_unlock:
4506 mddev_unlock(mddev);
4507 return err ?: len;
4508}
4509
4510static struct md_sysfs_entry md_metadata =
4511__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4512
4513static ssize_t
4514action_show(struct mddev *mddev, char *page)
4515{
4516 char *type = "idle";
4517 unsigned long recovery = mddev->recovery;
4518 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4519 type = "frozen";
4520 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4521 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4522 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4523 type = "reshape";
4524 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4525 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4526 type = "resync";
4527 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4528 type = "check";
4529 else
4530 type = "repair";
4531 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4532 type = "recover";
4533 else if (mddev->reshape_position != MaxSector)
4534 type = "reshape";
4535 }
4536 return sprintf(page, "%s\n", type);
4537}
4538
4539static ssize_t
4540action_store(struct mddev *mddev, const char *page, size_t len)
4541{
4542 if (!mddev->pers || !mddev->pers->sync_request)
4543 return -EINVAL;
4544
4545
4546 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4547 if (cmd_match(page, "frozen"))
4548 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4549 else
4550 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4551 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4552 mddev_lock(mddev) == 0) {
4553 flush_workqueue(md_misc_wq);
4554 if (mddev->sync_thread) {
4555 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4556 md_reap_sync_thread(mddev);
4557 }
4558 mddev_unlock(mddev);
4559 }
4560 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4561 return -EBUSY;
4562 else if (cmd_match(page, "resync"))
4563 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4564 else if (cmd_match(page, "recover")) {
4565 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4566 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4567 } else if (cmd_match(page, "reshape")) {
4568 int err;
4569 if (mddev->pers->start_reshape == NULL)
4570 return -EINVAL;
4571 err = mddev_lock(mddev);
4572 if (!err) {
4573 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4574 err = -EBUSY;
4575 else {
4576 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4577 err = mddev->pers->start_reshape(mddev);
4578 }
4579 mddev_unlock(mddev);
4580 }
4581 if (err)
4582 return err;
4583 sysfs_notify(&mddev->kobj, NULL, "degraded");
4584 } else {
4585 if (cmd_match(page, "check"))
4586 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4587 else if (!cmd_match(page, "repair"))
4588 return -EINVAL;
4589 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4590 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4591 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4592 }
4593 if (mddev->ro == 2) {
4594
4595
4596
4597 mddev->ro = 0;
4598 md_wakeup_thread(mddev->sync_thread);
4599 }
4600 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4601 md_wakeup_thread(mddev->thread);
4602 sysfs_notify_dirent_safe(mddev->sysfs_action);
4603 return len;
4604}
4605
4606static struct md_sysfs_entry md_scan_mode =
4607__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4608
4609static ssize_t
4610last_sync_action_show(struct mddev *mddev, char *page)
4611{
4612 return sprintf(page, "%s\n", mddev->last_sync_action);
4613}
4614
4615static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4616
4617static ssize_t
4618mismatch_cnt_show(struct mddev *mddev, char *page)
4619{
4620 return sprintf(page, "%llu\n",
4621 (unsigned long long)
4622 atomic64_read(&mddev->resync_mismatches));
4623}
4624
4625static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4626
4627static ssize_t
4628sync_min_show(struct mddev *mddev, char *page)
4629{
4630 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4631 mddev->sync_speed_min ? "local": "system");
4632}
4633
4634static ssize_t
4635sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4636{
4637 unsigned int min;
4638 int rv;
4639
4640 if (strncmp(buf, "system", 6)==0) {
4641 min = 0;
4642 } else {
4643 rv = kstrtouint(buf, 10, &min);
4644 if (rv < 0)
4645 return rv;
4646 if (min == 0)
4647 return -EINVAL;
4648 }
4649 mddev->sync_speed_min = min;
4650 return len;
4651}
4652
4653static struct md_sysfs_entry md_sync_min =
4654__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4655
4656static ssize_t
4657sync_max_show(struct mddev *mddev, char *page)
4658{
4659 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4660 mddev->sync_speed_max ? "local": "system");
4661}
4662
4663static ssize_t
4664sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4665{
4666 unsigned int max;
4667 int rv;
4668
4669 if (strncmp(buf, "system", 6)==0) {
4670 max = 0;
4671 } else {
4672 rv = kstrtouint(buf, 10, &max);
4673 if (rv < 0)
4674 return rv;
4675 if (max == 0)
4676 return -EINVAL;
4677 }
4678 mddev->sync_speed_max = max;
4679 return len;
4680}
4681
4682static struct md_sysfs_entry md_sync_max =
4683__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4684
4685static ssize_t
4686degraded_show(struct mddev *mddev, char *page)
4687{
4688 return sprintf(page, "%d\n", mddev->degraded);
4689}
4690static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4691
4692static ssize_t
4693sync_force_parallel_show(struct mddev *mddev, char *page)
4694{
4695 return sprintf(page, "%d\n", mddev->parallel_resync);
4696}
4697
4698static ssize_t
4699sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4700{
4701 long n;
4702
4703 if (kstrtol(buf, 10, &n))
4704 return -EINVAL;
4705
4706 if (n != 0 && n != 1)
4707 return -EINVAL;
4708
4709 mddev->parallel_resync = n;
4710
4711 if (mddev->sync_thread)
4712 wake_up(&resync_wait);
4713
4714 return len;
4715}
4716
4717
4718static struct md_sysfs_entry md_sync_force_parallel =
4719__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4720 sync_force_parallel_show, sync_force_parallel_store);
4721
4722static ssize_t
4723sync_speed_show(struct mddev *mddev, char *page)
4724{
4725 unsigned long resync, dt, db;
4726 if (mddev->curr_resync == 0)
4727 return sprintf(page, "none\n");
4728 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4729 dt = (jiffies - mddev->resync_mark) / HZ;
4730 if (!dt) dt++;
4731 db = resync - mddev->resync_mark_cnt;
4732 return sprintf(page, "%lu\n", db/dt/2);
4733}
4734
4735static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4736
4737static ssize_t
4738sync_completed_show(struct mddev *mddev, char *page)
4739{
4740 unsigned long long max_sectors, resync;
4741
4742 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4743 return sprintf(page, "none\n");
4744
4745 if (mddev->curr_resync == 1 ||
4746 mddev->curr_resync == 2)
4747 return sprintf(page, "delayed\n");
4748
4749 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4750 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4751 max_sectors = mddev->resync_max_sectors;
4752 else
4753 max_sectors = mddev->dev_sectors;
4754
4755 resync = mddev->curr_resync_completed;
4756 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4757}
4758
4759static struct md_sysfs_entry md_sync_completed =
4760 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4761
4762static ssize_t
4763min_sync_show(struct mddev *mddev, char *page)
4764{
4765 return sprintf(page, "%llu\n",
4766 (unsigned long long)mddev->resync_min);
4767}
4768static ssize_t
4769min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4770{
4771 unsigned long long min;
4772 int err;
4773
4774 if (kstrtoull(buf, 10, &min))
4775 return -EINVAL;
4776
4777 spin_lock(&mddev->lock);
4778 err = -EINVAL;
4779 if (min > mddev->resync_max)
4780 goto out_unlock;
4781
4782 err = -EBUSY;
4783 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4784 goto out_unlock;
4785
4786
4787 mddev->resync_min = round_down(min, 8);
4788 err = 0;
4789
4790out_unlock:
4791 spin_unlock(&mddev->lock);
4792 return err ?: len;
4793}
4794
4795static struct md_sysfs_entry md_min_sync =
4796__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4797
4798static ssize_t
4799max_sync_show(struct mddev *mddev, char *page)
4800{
4801 if (mddev->resync_max == MaxSector)
4802 return sprintf(page, "max\n");
4803 else
4804 return sprintf(page, "%llu\n",
4805 (unsigned long long)mddev->resync_max);
4806}
4807static ssize_t
4808max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4809{
4810 int err;
4811 spin_lock(&mddev->lock);
4812 if (strncmp(buf, "max", 3) == 0)
4813 mddev->resync_max = MaxSector;
4814 else {
4815 unsigned long long max;
4816 int chunk;
4817
4818 err = -EINVAL;
4819 if (kstrtoull(buf, 10, &max))
4820 goto out_unlock;
4821 if (max < mddev->resync_min)
4822 goto out_unlock;
4823
4824 err = -EBUSY;
4825 if (max < mddev->resync_max &&
4826 mddev->ro == 0 &&
4827 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4828 goto out_unlock;
4829
4830
4831 chunk = mddev->chunk_sectors;
4832 if (chunk) {
4833 sector_t temp = max;
4834
4835 err = -EINVAL;
4836 if (sector_div(temp, chunk))
4837 goto out_unlock;
4838 }
4839 mddev->resync_max = max;
4840 }
4841 wake_up(&mddev->recovery_wait);
4842 err = 0;
4843out_unlock:
4844 spin_unlock(&mddev->lock);
4845 return err ?: len;
4846}
4847
4848static struct md_sysfs_entry md_max_sync =
4849__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4850
4851static ssize_t
4852suspend_lo_show(struct mddev *mddev, char *page)
4853{
4854 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4855}
4856
4857static ssize_t
4858suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4859{
4860 unsigned long long new;
4861 int err;
4862
4863 err = kstrtoull(buf, 10, &new);
4864 if (err < 0)
4865 return err;
4866 if (new != (sector_t)new)
4867 return -EINVAL;
4868
4869 err = mddev_lock(mddev);
4870 if (err)
4871 return err;
4872 err = -EINVAL;
4873 if (mddev->pers == NULL ||
4874 mddev->pers->quiesce == NULL)
4875 goto unlock;
4876 mddev_suspend(mddev);
4877 mddev->suspend_lo = new;
4878 mddev_resume(mddev);
4879
4880 err = 0;
4881unlock:
4882 mddev_unlock(mddev);
4883 return err ?: len;
4884}
4885static struct md_sysfs_entry md_suspend_lo =
4886__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4887
4888static ssize_t
4889suspend_hi_show(struct mddev *mddev, char *page)
4890{
4891 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4892}
4893
4894static ssize_t
4895suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4896{
4897 unsigned long long new;
4898 int err;
4899
4900 err = kstrtoull(buf, 10, &new);
4901 if (err < 0)
4902 return err;
4903 if (new != (sector_t)new)
4904 return -EINVAL;
4905
4906 err = mddev_lock(mddev);
4907 if (err)
4908 return err;
4909 err = -EINVAL;
4910 if (mddev->pers == NULL)
4911 goto unlock;
4912
4913 mddev_suspend(mddev);
4914 mddev->suspend_hi = new;
4915 mddev_resume(mddev);
4916
4917 err = 0;
4918unlock:
4919 mddev_unlock(mddev);
4920 return err ?: len;
4921}
4922static struct md_sysfs_entry md_suspend_hi =
4923__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4924
4925static ssize_t
4926reshape_position_show(struct mddev *mddev, char *page)
4927{
4928 if (mddev->reshape_position != MaxSector)
4929 return sprintf(page, "%llu\n",
4930 (unsigned long long)mddev->reshape_position);
4931 strcpy(page, "none\n");
4932 return 5;
4933}
4934
4935static ssize_t
4936reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4937{
4938 struct md_rdev *rdev;
4939 unsigned long long new;
4940 int err;
4941
4942 err = kstrtoull(buf, 10, &new);
4943 if (err < 0)
4944 return err;
4945 if (new != (sector_t)new)
4946 return -EINVAL;
4947 err = mddev_lock(mddev);
4948 if (err)
4949 return err;
4950 err = -EBUSY;
4951 if (mddev->pers)
4952 goto unlock;
4953 mddev->reshape_position = new;
4954 mddev->delta_disks = 0;
4955 mddev->reshape_backwards = 0;
4956 mddev->new_level = mddev->level;
4957 mddev->new_layout = mddev->layout;
4958 mddev->new_chunk_sectors = mddev->chunk_sectors;
4959 rdev_for_each(rdev, mddev)
4960 rdev->new_data_offset = rdev->data_offset;
4961 err = 0;
4962unlock:
4963 mddev_unlock(mddev);
4964 return err ?: len;
4965}
4966
4967static struct md_sysfs_entry md_reshape_position =
4968__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4969 reshape_position_store);
4970
4971static ssize_t
4972reshape_direction_show(struct mddev *mddev, char *page)
4973{
4974 return sprintf(page, "%s\n",
4975 mddev->reshape_backwards ? "backwards" : "forwards");
4976}
4977
4978static ssize_t
4979reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4980{
4981 int backwards = 0;
4982 int err;
4983
4984 if (cmd_match(buf, "forwards"))
4985 backwards = 0;
4986 else if (cmd_match(buf, "backwards"))
4987 backwards = 1;
4988 else
4989 return -EINVAL;
4990 if (mddev->reshape_backwards == backwards)
4991 return len;
4992
4993 err = mddev_lock(mddev);
4994 if (err)
4995 return err;
4996
4997 if (mddev->delta_disks)
4998 err = -EBUSY;
4999 else if (mddev->persistent &&
5000 mddev->major_version == 0)
5001 err = -EINVAL;
5002 else
5003 mddev->reshape_backwards = backwards;
5004 mddev_unlock(mddev);
5005 return err ?: len;
5006}
5007
5008static struct md_sysfs_entry md_reshape_direction =
5009__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5010 reshape_direction_store);
5011
5012static ssize_t
5013array_size_show(struct mddev *mddev, char *page)
5014{
5015 if (mddev->external_size)
5016 return sprintf(page, "%llu\n",
5017 (unsigned long long)mddev->array_sectors/2);
5018 else
5019 return sprintf(page, "default\n");
5020}
5021
5022static ssize_t
5023array_size_store(struct mddev *mddev, const char *buf, size_t len)
5024{
5025 sector_t sectors;
5026 int err;
5027
5028 err = mddev_lock(mddev);
5029 if (err)
5030 return err;
5031
5032
5033 if (mddev_is_clustered(mddev)) {
5034 mddev_unlock(mddev);
5035 return -EINVAL;
5036 }
5037
5038 if (strncmp(buf, "default", 7) == 0) {
5039 if (mddev->pers)
5040 sectors = mddev->pers->size(mddev, 0, 0);
5041 else
5042 sectors = mddev->array_sectors;
5043
5044 mddev->external_size = 0;
5045 } else {
5046 if (strict_blocks_to_sectors(buf, §ors) < 0)
5047 err = -EINVAL;
5048 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5049 err = -E2BIG;
5050 else
5051 mddev->external_size = 1;
5052 }
5053
5054 if (!err) {
5055 mddev->array_sectors = sectors;
5056 if (mddev->pers) {
5057 set_capacity(mddev->gendisk, mddev->array_sectors);
5058 revalidate_disk(mddev->gendisk);
5059 }
5060 }
5061 mddev_unlock(mddev);
5062 return err ?: len;
5063}
5064
5065static struct md_sysfs_entry md_array_size =
5066__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5067 array_size_store);
5068
5069static ssize_t
5070consistency_policy_show(struct mddev *mddev, char *page)
5071{
5072 int ret;
5073
5074 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5075 ret = sprintf(page, "journal\n");
5076 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5077 ret = sprintf(page, "ppl\n");
5078 } else if (mddev->bitmap) {
5079 ret = sprintf(page, "bitmap\n");
5080 } else if (mddev->pers) {
5081 if (mddev->pers->sync_request)
5082 ret = sprintf(page, "resync\n");
5083 else
5084 ret = sprintf(page, "none\n");
5085 } else {
5086 ret = sprintf(page, "unknown\n");
5087 }
5088
5089 return ret;
5090}
5091
5092static ssize_t
5093consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5094{
5095 int err = 0;
5096
5097 if (mddev->pers) {
5098 if (mddev->pers->change_consistency_policy)
5099 err = mddev->pers->change_consistency_policy(mddev, buf);
5100 else
5101 err = -EBUSY;
5102 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5103 set_bit(MD_HAS_PPL, &mddev->flags);
5104 } else {
5105 err = -EINVAL;
5106 }
5107
5108 return err ? err : len;
5109}
5110
5111static struct md_sysfs_entry md_consistency_policy =
5112__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5113 consistency_policy_store);
5114
5115static struct attribute *md_default_attrs[] = {
5116 &md_level.attr,
5117 &md_layout.attr,
5118 &md_raid_disks.attr,
5119 &md_chunk_size.attr,
5120 &md_size.attr,
5121 &md_resync_start.attr,
5122 &md_metadata.attr,
5123 &md_new_device.attr,
5124 &md_safe_delay.attr,
5125 &md_array_state.attr,
5126 &md_reshape_position.attr,
5127 &md_reshape_direction.attr,
5128 &md_array_size.attr,
5129 &max_corr_read_errors.attr,
5130 &md_consistency_policy.attr,
5131 NULL,
5132};
5133
5134static struct attribute *md_redundancy_attrs[] = {
5135 &md_scan_mode.attr,
5136 &md_last_scan_mode.attr,
5137 &md_mismatches.attr,
5138 &md_sync_min.attr,
5139 &md_sync_max.attr,
5140 &md_sync_speed.attr,
5141 &md_sync_force_parallel.attr,
5142 &md_sync_completed.attr,
5143 &md_min_sync.attr,
5144 &md_max_sync.attr,
5145 &md_suspend_lo.attr,
5146 &md_suspend_hi.attr,
5147 &md_bitmap.attr,
5148 &md_degraded.attr,
5149 NULL,
5150};
5151static struct attribute_group md_redundancy_group = {
5152 .name = NULL,
5153 .attrs = md_redundancy_attrs,
5154};
5155
5156static ssize_t
5157md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5158{
5159 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5160 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5161 ssize_t rv;
5162
5163 if (!entry->show)
5164 return -EIO;
5165 spin_lock(&all_mddevs_lock);
5166 if (list_empty(&mddev->all_mddevs)) {
5167 spin_unlock(&all_mddevs_lock);
5168 return -EBUSY;
5169 }
5170 mddev_get(mddev);
5171 spin_unlock(&all_mddevs_lock);
5172
5173 rv = entry->show(mddev, page);
5174 mddev_put(mddev);
5175 return rv;
5176}
5177
5178static ssize_t
5179md_attr_store(struct kobject *kobj, struct attribute *attr,
5180 const char *page, size_t length)
5181{
5182 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5183 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5184 ssize_t rv;
5185
5186 if (!entry->store)
5187 return -EIO;
5188 if (!capable(CAP_SYS_ADMIN))
5189 return -EACCES;
5190 spin_lock(&all_mddevs_lock);
5191 if (list_empty(&mddev->all_mddevs)) {
5192 spin_unlock(&all_mddevs_lock);
5193 return -EBUSY;
5194 }
5195 mddev_get(mddev);
5196 spin_unlock(&all_mddevs_lock);
5197 rv = entry->store(mddev, page, length);
5198 mddev_put(mddev);
5199 return rv;
5200}
5201
5202static void md_free(struct kobject *ko)
5203{
5204 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5205
5206 if (mddev->sysfs_state)
5207 sysfs_put(mddev->sysfs_state);
5208
5209 if (mddev->gendisk)
5210 del_gendisk(mddev->gendisk);
5211 if (mddev->queue)
5212 blk_cleanup_queue(mddev->queue);
5213 if (mddev->gendisk)
5214 put_disk(mddev->gendisk);
5215 percpu_ref_exit(&mddev->writes_pending);
5216
5217 kfree(mddev);
5218}
5219
5220static const struct sysfs_ops md_sysfs_ops = {
5221 .show = md_attr_show,
5222 .store = md_attr_store,
5223};
5224static struct kobj_type md_ktype = {
5225 .release = md_free,
5226 .sysfs_ops = &md_sysfs_ops,
5227 .default_attrs = md_default_attrs,
5228};
5229
5230int mdp_major = 0;
5231
5232static void mddev_delayed_delete(struct work_struct *ws)
5233{
5234 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5235
5236 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5237 kobject_del(&mddev->kobj);
5238 kobject_put(&mddev->kobj);
5239}
5240
5241static void no_op(struct percpu_ref *r) {}
5242
5243int mddev_init_writes_pending(struct mddev *mddev)
5244{
5245 if (mddev->writes_pending.percpu_count_ptr)
5246 return 0;
5247 if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
5248 return -ENOMEM;
5249
5250 percpu_ref_put(&mddev->writes_pending);
5251 return 0;
5252}
5253EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5254
5255static int md_alloc(dev_t dev, char *name)
5256{
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266 static DEFINE_MUTEX(disks_mutex);
5267 struct mddev *mddev = mddev_find(dev);
5268 struct gendisk *disk;
5269 int partitioned;
5270 int shift;
5271 int unit;
5272 int error;
5273
5274 if (!mddev)
5275 return -ENODEV;
5276
5277 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5278 shift = partitioned ? MdpMinorShift : 0;
5279 unit = MINOR(mddev->unit) >> shift;
5280
5281
5282
5283
5284 flush_workqueue(md_misc_wq);
5285
5286 mutex_lock(&disks_mutex);
5287 error = -EEXIST;
5288 if (mddev->gendisk)
5289 goto abort;
5290
5291 if (name && !dev) {
5292
5293
5294 struct mddev *mddev2;
5295 spin_lock(&all_mddevs_lock);
5296
5297 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5298 if (mddev2->gendisk &&
5299 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5300 spin_unlock(&all_mddevs_lock);
5301 goto abort;
5302 }
5303 spin_unlock(&all_mddevs_lock);
5304 }
5305 if (name && dev)
5306
5307
5308
5309 mddev->hold_active = UNTIL_STOP;
5310
5311 error = -ENOMEM;
5312 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5313 if (!mddev->queue)
5314 goto abort;
5315 mddev->queue->queuedata = mddev;
5316
5317 blk_queue_make_request(mddev->queue, md_make_request);
5318 blk_set_stacking_limits(&mddev->queue->limits);
5319
5320 disk = alloc_disk(1 << shift);
5321 if (!disk) {
5322 blk_cleanup_queue(mddev->queue);
5323 mddev->queue = NULL;
5324 goto abort;
5325 }
5326 disk->major = MAJOR(mddev->unit);
5327 disk->first_minor = unit << shift;
5328 if (name)
5329 strcpy(disk->disk_name, name);
5330 else if (partitioned)
5331 sprintf(disk->disk_name, "md_d%d", unit);
5332 else
5333 sprintf(disk->disk_name, "md%d", unit);
5334 disk->fops = &md_fops;
5335 disk->private_data = mddev;
5336 disk->queue = mddev->queue;
5337 blk_queue_write_cache(mddev->queue, true, true);
5338
5339
5340
5341
5342 disk->flags |= GENHD_FL_EXT_DEVT;
5343 mddev->gendisk = disk;
5344
5345
5346
5347 mutex_lock(&mddev->open_mutex);
5348 add_disk(disk);
5349
5350 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5351 &disk_to_dev(disk)->kobj, "%s", "md");
5352 if (error) {
5353
5354
5355
5356 pr_debug("md: cannot register %s/md - name in use\n",
5357 disk->disk_name);
5358 error = 0;
5359 }
5360 if (mddev->kobj.sd &&
5361 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5362 pr_debug("pointless warning\n");
5363 mutex_unlock(&mddev->open_mutex);
5364 abort:
5365 mutex_unlock(&disks_mutex);
5366 if (!error && mddev->kobj.sd) {
5367 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5368 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5369 }
5370 mddev_put(mddev);
5371 return error;
5372}
5373
5374static struct kobject *md_probe(dev_t dev, int *part, void *data)
5375{
5376 if (create_on_open)
5377 md_alloc(dev, NULL);
5378 return NULL;
5379}
5380
5381static int add_named_array(const char *val, const struct kernel_param *kp)
5382{
5383
5384
5385
5386
5387
5388
5389
5390 int len = strlen(val);
5391 char buf[DISK_NAME_LEN];
5392 unsigned long devnum;
5393
5394 while (len && val[len-1] == '\n')
5395 len--;
5396 if (len >= DISK_NAME_LEN)
5397 return -E2BIG;
5398 strlcpy(buf, val, len+1);
5399 if (strncmp(buf, "md_", 3) == 0)
5400 return md_alloc(0, buf);
5401 if (strncmp(buf, "md", 2) == 0 &&
5402 isdigit(buf[2]) &&
5403 kstrtoul(buf+2, 10, &devnum) == 0 &&
5404 devnum <= MINORMASK)
5405 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5406
5407 return -EINVAL;
5408}
5409
5410static void md_safemode_timeout(struct timer_list *t)
5411{
5412 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5413
5414 mddev->safemode = 1;
5415 if (mddev->external)
5416 sysfs_notify_dirent_safe(mddev->sysfs_state);
5417
5418 md_wakeup_thread(mddev->thread);
5419}
5420
5421static int start_dirty_degraded;
5422
5423int md_run(struct mddev *mddev)
5424{
5425 int err;
5426 struct md_rdev *rdev;
5427 struct md_personality *pers;
5428
5429 if (list_empty(&mddev->disks))
5430
5431 return -EINVAL;
5432
5433 if (mddev->pers)
5434 return -EBUSY;
5435
5436 if (mddev->sysfs_active)
5437 return -EBUSY;
5438
5439
5440
5441
5442 if (!mddev->raid_disks) {
5443 if (!mddev->persistent)
5444 return -EINVAL;
5445 analyze_sbs(mddev);
5446 }
5447
5448 if (mddev->level != LEVEL_NONE)
5449 request_module("md-level-%d", mddev->level);
5450 else if (mddev->clevel[0])
5451 request_module("md-%s", mddev->clevel);
5452
5453
5454
5455
5456
5457
5458 mddev->has_superblocks = false;
5459 rdev_for_each(rdev, mddev) {
5460 if (test_bit(Faulty, &rdev->flags))
5461 continue;
5462 sync_blockdev(rdev->bdev);
5463 invalidate_bdev(rdev->bdev);
5464 if (mddev->ro != 1 &&
5465 (bdev_read_only(rdev->bdev) ||
5466 bdev_read_only(rdev->meta_bdev))) {
5467 mddev->ro = 1;
5468 if (mddev->gendisk)
5469 set_disk_ro(mddev->gendisk, 1);
5470 }
5471
5472 if (rdev->sb_page)
5473 mddev->has_superblocks = true;
5474
5475
5476
5477
5478
5479 if (rdev->meta_bdev) {
5480 ;
5481 } else if (rdev->data_offset < rdev->sb_start) {
5482 if (mddev->dev_sectors &&
5483 rdev->data_offset + mddev->dev_sectors
5484 > rdev->sb_start) {
5485 pr_warn("md: %s: data overlaps metadata\n",
5486 mdname(mddev));
5487 return -EINVAL;
5488 }
5489 } else {
5490 if (rdev->sb_start + rdev->sb_size/512
5491 > rdev->data_offset) {
5492 pr_warn("md: %s: metadata overlaps data\n",
5493 mdname(mddev));
5494 return -EINVAL;
5495 }
5496 }
5497 sysfs_notify_dirent_safe(rdev->sysfs_state);
5498 }
5499
5500 if (mddev->bio_set == NULL) {
5501 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5502 if (!mddev->bio_set)
5503 return -ENOMEM;
5504 }
5505 if (mddev->sync_set == NULL) {
5506 mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5507 if (!mddev->sync_set) {
5508 err = -ENOMEM;
5509 goto abort;
5510 }
5511 }
5512
5513 spin_lock(&pers_lock);
5514 pers = find_pers(mddev->level, mddev->clevel);
5515 if (!pers || !try_module_get(pers->owner)) {
5516 spin_unlock(&pers_lock);
5517 if (mddev->level != LEVEL_NONE)
5518 pr_warn("md: personality for level %d is not loaded!\n",
5519 mddev->level);
5520 else
5521 pr_warn("md: personality for level %s is not loaded!\n",
5522 mddev->clevel);
5523 err = -EINVAL;
5524 goto abort;
5525 }
5526 spin_unlock(&pers_lock);
5527 if (mddev->level != pers->level) {
5528 mddev->level = pers->level;
5529 mddev->new_level = pers->level;
5530 }
5531 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5532
5533 if (mddev->reshape_position != MaxSector &&
5534 pers->start_reshape == NULL) {
5535
5536 module_put(pers->owner);
5537 err = -EINVAL;
5538 goto abort;
5539 }
5540
5541 if (pers->sync_request) {
5542
5543
5544
5545 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5546 struct md_rdev *rdev2;
5547 int warned = 0;
5548
5549 rdev_for_each(rdev, mddev)
5550 rdev_for_each(rdev2, mddev) {
5551 if (rdev < rdev2 &&
5552 rdev->bdev->bd_contains ==
5553 rdev2->bdev->bd_contains) {
5554 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5555 mdname(mddev),
5556 bdevname(rdev->bdev,b),
5557 bdevname(rdev2->bdev,b2));
5558 warned = 1;
5559 }
5560 }
5561
5562 if (warned)
5563 pr_warn("True protection against single-disk failure might be compromised.\n");
5564 }
5565
5566 mddev->recovery = 0;
5567
5568 mddev->resync_max_sectors = mddev->dev_sectors;
5569
5570 mddev->ok_start_degraded = start_dirty_degraded;
5571
5572 if (start_readonly && mddev->ro == 0)
5573 mddev->ro = 2;
5574
5575 err = pers->run(mddev);
5576 if (err)
5577 pr_warn("md: pers->run() failed ...\n");
5578 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5579 WARN_ONCE(!mddev->external_size,
5580 "%s: default size too small, but 'external_size' not in effect?\n",
5581 __func__);
5582 pr_warn("md: invalid array_size %llu > default size %llu\n",
5583 (unsigned long long)mddev->array_sectors / 2,
5584 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5585 err = -EINVAL;
5586 }
5587 if (err == 0 && pers->sync_request &&
5588 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5589 struct bitmap *bitmap;
5590
5591 bitmap = bitmap_create(mddev, -1);
5592 if (IS_ERR(bitmap)) {
5593 err = PTR_ERR(bitmap);
5594 pr_warn("%s: failed to create bitmap (%d)\n",
5595 mdname(mddev), err);
5596 } else
5597 mddev->bitmap = bitmap;
5598
5599 }
5600 if (err) {
5601 mddev_detach(mddev);
5602 if (mddev->private)
5603 pers->free(mddev, mddev->private);
5604 mddev->private = NULL;
5605 module_put(pers->owner);
5606 bitmap_destroy(mddev);
5607 goto abort;
5608 }
5609 if (mddev->queue) {
5610 bool nonrot = true;
5611
5612 rdev_for_each(rdev, mddev) {
5613 if (rdev->raid_disk >= 0 &&
5614 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5615 nonrot = false;
5616 break;
5617 }
5618 }
5619 if (mddev->degraded)
5620 nonrot = false;
5621 if (nonrot)
5622 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5623 else
5624 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5625 mddev->queue->backing_dev_info->congested_data = mddev;
5626 mddev->queue->backing_dev_info->congested_fn = md_congested;
5627 }
5628 if (pers->sync_request) {
5629 if (mddev->kobj.sd &&
5630 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5631 pr_warn("md: cannot register extra attributes for %s\n",
5632 mdname(mddev));
5633 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5634 } else if (mddev->ro == 2)
5635 mddev->ro = 0;
5636
5637 atomic_set(&mddev->max_corr_read_errors,
5638 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5639 mddev->safemode = 0;
5640 if (mddev_is_clustered(mddev))
5641 mddev->safemode_delay = 0;
5642 else
5643 mddev->safemode_delay = (200 * HZ)/1000 +1;
5644 mddev->in_sync = 1;
5645 smp_wmb();
5646 spin_lock(&mddev->lock);
5647 mddev->pers = pers;
5648 spin_unlock(&mddev->lock);
5649 rdev_for_each(rdev, mddev)
5650 if (rdev->raid_disk >= 0)
5651 if (sysfs_link_rdev(mddev, rdev))
5652 ;
5653
5654 if (mddev->degraded && !mddev->ro)
5655
5656
5657
5658 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5659 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5660
5661 if (mddev->sb_flags)
5662 md_update_sb(mddev, 0);
5663
5664 md_new_event(mddev);
5665 sysfs_notify_dirent_safe(mddev->sysfs_state);
5666 sysfs_notify_dirent_safe(mddev->sysfs_action);
5667 sysfs_notify(&mddev->kobj, NULL, "degraded");
5668 return 0;
5669
5670abort:
5671 if (mddev->bio_set) {
5672 bioset_free(mddev->bio_set);
5673 mddev->bio_set = NULL;
5674 }
5675 if (mddev->sync_set) {
5676 bioset_free(mddev->sync_set);
5677 mddev->sync_set = NULL;
5678 }
5679
5680 return err;
5681}
5682EXPORT_SYMBOL_GPL(md_run);
5683
5684static int do_md_run(struct mddev *mddev)
5685{
5686 int err;
5687
5688 err = md_run(mddev);
5689 if (err)
5690 goto out;
5691 err = bitmap_load(mddev);
5692 if (err) {
5693 bitmap_destroy(mddev);
5694 goto out;
5695 }
5696
5697 if (mddev_is_clustered(mddev))
5698 md_allow_write(mddev);
5699
5700
5701 md_start(mddev);
5702
5703 md_wakeup_thread(mddev->thread);
5704 md_wakeup_thread(mddev->sync_thread);
5705
5706 set_capacity(mddev->gendisk, mddev->array_sectors);
5707 revalidate_disk(mddev->gendisk);
5708 mddev->changed = 1;
5709 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5710out:
5711 return err;
5712}
5713
5714int md_start(struct mddev *mddev)
5715{
5716 int ret = 0;
5717
5718 if (mddev->pers->start) {
5719 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5720 md_wakeup_thread(mddev->thread);
5721 ret = mddev->pers->start(mddev);
5722 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
5723 md_wakeup_thread(mddev->sync_thread);
5724 }
5725 return ret;
5726}
5727EXPORT_SYMBOL_GPL(md_start);
5728
5729static int restart_array(struct mddev *mddev)
5730{
5731 struct gendisk *disk = mddev->gendisk;
5732 struct md_rdev *rdev;
5733 bool has_journal = false;
5734 bool has_readonly = false;
5735
5736
5737 if (list_empty(&mddev->disks))
5738 return -ENXIO;
5739 if (!mddev->pers)
5740 return -EINVAL;
5741 if (!mddev->ro)
5742 return -EBUSY;
5743
5744 rcu_read_lock();
5745 rdev_for_each_rcu(rdev, mddev) {
5746 if (test_bit(Journal, &rdev->flags) &&
5747 !test_bit(Faulty, &rdev->flags))
5748 has_journal = true;
5749 if (bdev_read_only(rdev->bdev))
5750 has_readonly = true;
5751 }
5752 rcu_read_unlock();
5753 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
5754
5755 return -EINVAL;
5756 if (has_readonly)
5757 return -EROFS;
5758
5759 mddev->safemode = 0;
5760 mddev->ro = 0;
5761 set_disk_ro(disk, 0);
5762 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5763
5764 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5765 md_wakeup_thread(mddev->thread);
5766 md_wakeup_thread(mddev->sync_thread);
5767 sysfs_notify_dirent_safe(mddev->sysfs_state);
5768 return 0;
5769}
5770
5771static void md_clean(struct mddev *mddev)
5772{
5773 mddev->array_sectors = 0;
5774 mddev->external_size = 0;
5775 mddev->dev_sectors = 0;
5776 mddev->raid_disks = 0;
5777 mddev->recovery_cp = 0;
5778 mddev->resync_min = 0;
5779 mddev->resync_max = MaxSector;
5780 mddev->reshape_position = MaxSector;
5781 mddev->external = 0;
5782 mddev->persistent = 0;
5783 mddev->level = LEVEL_NONE;
5784 mddev->clevel[0] = 0;
5785 mddev->flags = 0;
5786 mddev->sb_flags = 0;
5787 mddev->ro = 0;
5788 mddev->metadata_type[0] = 0;
5789 mddev->chunk_sectors = 0;
5790 mddev->ctime = mddev->utime = 0;
5791 mddev->layout = 0;
5792 mddev->max_disks = 0;
5793 mddev->events = 0;
5794 mddev->can_decrease_events = 0;
5795 mddev->delta_disks = 0;
5796 mddev->reshape_backwards = 0;
5797 mddev->new_level = LEVEL_NONE;
5798 mddev->new_layout = 0;
5799 mddev->new_chunk_sectors = 0;
5800 mddev->curr_resync = 0;
5801 atomic64_set(&mddev->resync_mismatches, 0);
5802 mddev->suspend_lo = mddev->suspend_hi = 0;
5803 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5804 mddev->recovery = 0;
5805 mddev->in_sync = 0;
5806 mddev->changed = 0;
5807 mddev->degraded = 0;
5808 mddev->safemode = 0;
5809 mddev->private = NULL;
5810 mddev->cluster_info = NULL;
5811 mddev->bitmap_info.offset = 0;
5812 mddev->bitmap_info.default_offset = 0;
5813 mddev->bitmap_info.default_space = 0;
5814 mddev->bitmap_info.chunksize = 0;
5815 mddev->bitmap_info.daemon_sleep = 0;
5816 mddev->bitmap_info.max_write_behind = 0;
5817 mddev->bitmap_info.nodes = 0;
5818}
5819
5820static void __md_stop_writes(struct mddev *mddev)
5821{
5822 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5823 flush_workqueue(md_misc_wq);
5824 if (mddev->sync_thread) {
5825 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5826 md_reap_sync_thread(mddev);
5827 }
5828
5829 del_timer_sync(&mddev->safemode_timer);
5830
5831 if (mddev->pers && mddev->pers->quiesce) {
5832 mddev->pers->quiesce(mddev, 1);
5833 mddev->pers->quiesce(mddev, 0);
5834 }
5835 bitmap_flush(mddev);
5836
5837 if (mddev->ro == 0 &&
5838 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5839 mddev->sb_flags)) {
5840
5841 if (!mddev_is_clustered(mddev))
5842 mddev->in_sync = 1;
5843 md_update_sb(mddev, 1);
5844 }
5845}
5846
5847void md_stop_writes(struct mddev *mddev)
5848{
5849 mddev_lock_nointr(mddev);
5850 __md_stop_writes(mddev);
5851 mddev_unlock(mddev);
5852}
5853EXPORT_SYMBOL_GPL(md_stop_writes);
5854
5855static void mddev_detach(struct mddev *mddev)
5856{
5857 bitmap_wait_behind_writes(mddev);
5858 if (mddev->pers && mddev->pers->quiesce) {
5859 mddev->pers->quiesce(mddev, 1);
5860 mddev->pers->quiesce(mddev, 0);
5861 }
5862 md_unregister_thread(&mddev->thread);
5863 if (mddev->queue)
5864 blk_sync_queue(mddev->queue);
5865}
5866
5867static void __md_stop(struct mddev *mddev)
5868{
5869 struct md_personality *pers = mddev->pers;
5870 bitmap_destroy(mddev);
5871 mddev_detach(mddev);
5872
5873 flush_workqueue(md_misc_wq);
5874 spin_lock(&mddev->lock);
5875 mddev->pers = NULL;
5876 spin_unlock(&mddev->lock);
5877 pers->free(mddev, mddev->private);
5878 mddev->private = NULL;
5879 if (pers->sync_request && mddev->to_remove == NULL)
5880 mddev->to_remove = &md_redundancy_group;
5881 module_put(pers->owner);
5882 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5883}
5884
5885void md_stop(struct mddev *mddev)
5886{
5887
5888
5889
5890 __md_stop(mddev);
5891 if (mddev->bio_set) {
5892 bioset_free(mddev->bio_set);
5893 mddev->bio_set = NULL;
5894 }
5895 if (mddev->sync_set) {
5896 bioset_free(mddev->sync_set);
5897 mddev->sync_set = NULL;
5898 }
5899}
5900
5901EXPORT_SYMBOL_GPL(md_stop);
5902
5903static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5904{
5905 int err = 0;
5906 int did_freeze = 0;
5907
5908 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5909 did_freeze = 1;
5910 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5911 md_wakeup_thread(mddev->thread);
5912 }
5913 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5914 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5915 if (mddev->sync_thread)
5916
5917
5918 wake_up_process(mddev->sync_thread->tsk);
5919
5920 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
5921 return -EBUSY;
5922 mddev_unlock(mddev);
5923 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5924 &mddev->recovery));
5925 wait_event(mddev->sb_wait,
5926 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
5927 mddev_lock_nointr(mddev);
5928
5929 mutex_lock(&mddev->open_mutex);
5930 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5931 mddev->sync_thread ||
5932 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5933 pr_warn("md: %s still in use.\n",mdname(mddev));
5934 if (did_freeze) {
5935 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5936 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5937 md_wakeup_thread(mddev->thread);
5938 }
5939 err = -EBUSY;
5940 goto out;
5941 }
5942 if (mddev->pers) {
5943 __md_stop_writes(mddev);
5944
5945 err = -ENXIO;
5946 if (mddev->ro==1)
5947 goto out;
5948 mddev->ro = 1;
5949 set_disk_ro(mddev->gendisk, 1);
5950 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5951 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5952 md_wakeup_thread(mddev->thread);
5953 sysfs_notify_dirent_safe(mddev->sysfs_state);
5954 err = 0;
5955 }
5956out:
5957 mutex_unlock(&mddev->open_mutex);
5958 return err;
5959}
5960
5961
5962
5963
5964
5965static int do_md_stop(struct mddev *mddev, int mode,
5966 struct block_device *bdev)
5967{
5968 struct gendisk *disk = mddev->gendisk;
5969 struct md_rdev *rdev;
5970 int did_freeze = 0;
5971
5972 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5973 did_freeze = 1;
5974 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5975 md_wakeup_thread(mddev->thread);
5976 }
5977 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5978 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5979 if (mddev->sync_thread)
5980
5981
5982 wake_up_process(mddev->sync_thread->tsk);
5983
5984 mddev_unlock(mddev);
5985 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5986 !test_bit(MD_RECOVERY_RUNNING,
5987 &mddev->recovery)));
5988 mddev_lock_nointr(mddev);
5989
5990 mutex_lock(&mddev->open_mutex);
5991 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5992 mddev->sysfs_active ||
5993 mddev->sync_thread ||
5994 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5995 pr_warn("md: %s still in use.\n",mdname(mddev));
5996 mutex_unlock(&mddev->open_mutex);
5997 if (did_freeze) {
5998 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5999 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6000 md_wakeup_thread(mddev->thread);
6001 }
6002 return -EBUSY;
6003 }
6004 if (mddev->pers) {
6005 if (mddev->ro)
6006 set_disk_ro(disk, 0);
6007
6008 __md_stop_writes(mddev);
6009 __md_stop(mddev);
6010 mddev->queue->backing_dev_info->congested_fn = NULL;
6011
6012
6013 sysfs_notify_dirent_safe(mddev->sysfs_state);
6014
6015 rdev_for_each(rdev, mddev)
6016 if (rdev->raid_disk >= 0)
6017 sysfs_unlink_rdev(mddev, rdev);
6018
6019 set_capacity(disk, 0);
6020 mutex_unlock(&mddev->open_mutex);
6021 mddev->changed = 1;
6022 revalidate_disk(disk);
6023
6024 if (mddev->ro)
6025 mddev->ro = 0;
6026 } else
6027 mutex_unlock(&mddev->open_mutex);
6028
6029
6030
6031 if (mode == 0) {
6032 pr_info("md: %s stopped.\n", mdname(mddev));
6033
6034 if (mddev->bitmap_info.file) {
6035 struct file *f = mddev->bitmap_info.file;
6036 spin_lock(&mddev->lock);
6037 mddev->bitmap_info.file = NULL;
6038 spin_unlock(&mddev->lock);
6039 fput(f);
6040 }
6041 mddev->bitmap_info.offset = 0;
6042
6043 export_array(mddev);
6044
6045 md_clean(mddev);
6046 if (mddev->hold_active == UNTIL_STOP)
6047 mddev->hold_active = 0;
6048 }
6049 md_new_event(mddev);
6050 sysfs_notify_dirent_safe(mddev->sysfs_state);
6051 return 0;
6052}
6053
6054#ifndef MODULE
6055static void autorun_array(struct mddev *mddev)
6056{
6057 struct md_rdev *rdev;
6058 int err;
6059
6060 if (list_empty(&mddev->disks))
6061 return;
6062
6063 pr_info("md: running: ");
6064
6065 rdev_for_each(rdev, mddev) {
6066 char b[BDEVNAME_SIZE];
6067 pr_cont("<%s>", bdevname(rdev->bdev,b));
6068 }
6069 pr_cont("\n");
6070
6071 err = do_md_run(mddev);
6072 if (err) {
6073 pr_warn("md: do_md_run() returned %d\n", err);
6074 do_md_stop(mddev, 0, NULL);
6075 }
6076}
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090static void autorun_devices(int part)
6091{
6092 struct md_rdev *rdev0, *rdev, *tmp;
6093 struct mddev *mddev;
6094 char b[BDEVNAME_SIZE];
6095
6096 pr_info("md: autorun ...\n");
6097 while (!list_empty(&pending_raid_disks)) {
6098 int unit;
6099 dev_t dev;
6100 LIST_HEAD(candidates);
6101 rdev0 = list_entry(pending_raid_disks.next,
6102 struct md_rdev, same_set);
6103
6104 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6105 INIT_LIST_HEAD(&candidates);
6106 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6107 if (super_90_load(rdev, rdev0, 0) >= 0) {
6108 pr_debug("md: adding %s ...\n",
6109 bdevname(rdev->bdev,b));
6110 list_move(&rdev->same_set, &candidates);
6111 }
6112
6113
6114
6115
6116
6117 if (part) {
6118 dev = MKDEV(mdp_major,
6119 rdev0->preferred_minor << MdpMinorShift);
6120 unit = MINOR(dev) >> MdpMinorShift;
6121 } else {
6122 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6123 unit = MINOR(dev);
6124 }
6125 if (rdev0->preferred_minor != unit) {
6126 pr_warn("md: unit number in %s is bad: %d\n",
6127 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6128 break;
6129 }
6130
6131 md_probe(dev, NULL, NULL);
6132 mddev = mddev_find(dev);
6133 if (!mddev || !mddev->gendisk) {
6134 if (mddev)
6135 mddev_put(mddev);
6136 break;
6137 }
6138 if (mddev_lock(mddev))
6139 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6140 else if (mddev->raid_disks || mddev->major_version
6141 || !list_empty(&mddev->disks)) {
6142 pr_warn("md: %s already running, cannot run %s\n",
6143 mdname(mddev), bdevname(rdev0->bdev,b));
6144 mddev_unlock(mddev);
6145 } else {
6146 pr_debug("md: created %s\n", mdname(mddev));
6147 mddev->persistent = 1;
6148 rdev_for_each_list(rdev, tmp, &candidates) {
6149 list_del_init(&rdev->same_set);
6150 if (bind_rdev_to_array(rdev, mddev))
6151 export_rdev(rdev);
6152 }
6153 autorun_array(mddev);
6154 mddev_unlock(mddev);
6155 }
6156
6157
6158
6159 rdev_for_each_list(rdev, tmp, &candidates) {
6160 list_del_init(&rdev->same_set);
6161 export_rdev(rdev);
6162 }
6163 mddev_put(mddev);
6164 }
6165 pr_info("md: ... autorun DONE.\n");
6166}
6167#endif
6168
6169static int get_version(void __user *arg)
6170{
6171 mdu_version_t ver;
6172
6173 ver.major = MD_MAJOR_VERSION;
6174 ver.minor = MD_MINOR_VERSION;
6175 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6176
6177 if (copy_to_user(arg, &ver, sizeof(ver)))
6178 return -EFAULT;
6179
6180 return 0;
6181}
6182
6183static int get_array_info(struct mddev *mddev, void __user *arg)
6184{
6185 mdu_array_info_t info;
6186 int nr,working,insync,failed,spare;
6187 struct md_rdev *rdev;
6188
6189 nr = working = insync = failed = spare = 0;
6190 rcu_read_lock();
6191 rdev_for_each_rcu(rdev, mddev) {
6192 nr++;
6193 if (test_bit(Faulty, &rdev->flags))
6194 failed++;
6195 else {
6196 working++;
6197 if (test_bit(In_sync, &rdev->flags))
6198 insync++;
6199 else if (test_bit(Journal, &rdev->flags))
6200
6201 ;
6202 else
6203 spare++;
6204 }
6205 }
6206 rcu_read_unlock();
6207
6208 info.major_version = mddev->major_version;
6209 info.minor_version = mddev->minor_version;
6210 info.patch_version = MD_PATCHLEVEL_VERSION;
6211 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6212 info.level = mddev->level;
6213 info.size = mddev->dev_sectors / 2;
6214 if (info.size != mddev->dev_sectors / 2)
6215 info.size = -1;
6216 info.nr_disks = nr;
6217 info.raid_disks = mddev->raid_disks;
6218 info.md_minor = mddev->md_minor;
6219 info.not_persistent= !mddev->persistent;
6220
6221 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6222 info.state = 0;
6223 if (mddev->in_sync)
6224 info.state = (1<<MD_SB_CLEAN);
6225 if (mddev->bitmap && mddev->bitmap_info.offset)
6226 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6227 if (mddev_is_clustered(mddev))
6228 info.state |= (1<<MD_SB_CLUSTERED);
6229 info.active_disks = insync;
6230 info.working_disks = working;
6231 info.failed_disks = failed;
6232 info.spare_disks = spare;
6233
6234 info.layout = mddev->layout;
6235 info.chunk_size = mddev->chunk_sectors << 9;
6236
6237 if (copy_to_user(arg, &info, sizeof(info)))
6238 return -EFAULT;
6239
6240 return 0;
6241}
6242
6243static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6244{
6245 mdu_bitmap_file_t *file = NULL;
6246 char *ptr;
6247 int err;
6248
6249 file = kzalloc(sizeof(*file), GFP_NOIO);
6250 if (!file)
6251 return -ENOMEM;
6252
6253 err = 0;
6254 spin_lock(&mddev->lock);
6255
6256 if (mddev->bitmap_info.file) {
6257 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6258 sizeof(file->pathname));
6259 if (IS_ERR(ptr))
6260 err = PTR_ERR(ptr);
6261 else
6262 memmove(file->pathname, ptr,
6263 sizeof(file->pathname)-(ptr-file->pathname));
6264 }
6265 spin_unlock(&mddev->lock);
6266
6267 if (err == 0 &&
6268 copy_to_user(arg, file, sizeof(*file)))
6269 err = -EFAULT;
6270
6271 kfree(file);
6272 return err;
6273}
6274
6275static int get_disk_info(struct mddev *mddev, void __user * arg)
6276{
6277 mdu_disk_info_t info;
6278 struct md_rdev *rdev;
6279
6280 if (copy_from_user(&info, arg, sizeof(info)))
6281 return -EFAULT;
6282
6283 rcu_read_lock();
6284 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6285 if (rdev) {
6286 info.major = MAJOR(rdev->bdev->bd_dev);
6287 info.minor = MINOR(rdev->bdev->bd_dev);
6288 info.raid_disk = rdev->raid_disk;
6289 info.state = 0;
6290 if (test_bit(Faulty, &rdev->flags))
6291 info.state |= (1<<MD_DISK_FAULTY);
6292 else if (test_bit(In_sync, &rdev->flags)) {
6293 info.state |= (1<<MD_DISK_ACTIVE);
6294 info.state |= (1<<MD_DISK_SYNC);
6295 }
6296 if (test_bit(Journal, &rdev->flags))
6297 info.state |= (1<<MD_DISK_JOURNAL);
6298 if (test_bit(WriteMostly, &rdev->flags))
6299 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6300 if (test_bit(FailFast, &rdev->flags))
6301 info.state |= (1<<MD_DISK_FAILFAST);
6302 } else {
6303 info.major = info.minor = 0;
6304 info.raid_disk = -1;
6305 info.state = (1<<MD_DISK_REMOVED);
6306 }
6307 rcu_read_unlock();
6308
6309 if (copy_to_user(arg, &info, sizeof(info)))
6310 return -EFAULT;
6311
6312 return 0;
6313}
6314
6315static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6316{
6317 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6318 struct md_rdev *rdev;
6319 dev_t dev = MKDEV(info->major,info->minor);
6320
6321 if (mddev_is_clustered(mddev) &&
6322 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6323 pr_warn("%s: Cannot add to clustered mddev.\n",
6324 mdname(mddev));
6325 return -EINVAL;
6326 }
6327
6328 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6329 return -EOVERFLOW;
6330
6331 if (!mddev->raid_disks) {
6332 int err;
6333
6334 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6335 if (IS_ERR(rdev)) {
6336 pr_warn("md: md_import_device returned %ld\n",
6337 PTR_ERR(rdev));
6338 return PTR_ERR(rdev);
6339 }
6340 if (!list_empty(&mddev->disks)) {
6341 struct md_rdev *rdev0
6342 = list_entry(mddev->disks.next,
6343 struct md_rdev, same_set);
6344 err = super_types[mddev->major_version]
6345 .load_super(rdev, rdev0, mddev->minor_version);
6346 if (err < 0) {
6347 pr_warn("md: %s has different UUID to %s\n",
6348 bdevname(rdev->bdev,b),
6349 bdevname(rdev0->bdev,b2));
6350 export_rdev(rdev);
6351 return -EINVAL;
6352 }
6353 }
6354 err = bind_rdev_to_array(rdev, mddev);
6355 if (err)
6356 export_rdev(rdev);
6357 return err;
6358 }
6359
6360
6361
6362
6363
6364
6365 if (mddev->pers) {
6366 int err;
6367 if (!mddev->pers->hot_add_disk) {
6368 pr_warn("%s: personality does not support diskops!\n",
6369 mdname(mddev));
6370 return -EINVAL;
6371 }
6372 if (mddev->persistent)
6373 rdev = md_import_device(dev, mddev->major_version,
6374 mddev->minor_version);
6375 else
6376 rdev = md_import_device(dev, -1, -1);
6377 if (IS_ERR(rdev)) {
6378 pr_warn("md: md_import_device returned %ld\n",
6379 PTR_ERR(rdev));
6380 return PTR_ERR(rdev);
6381 }
6382
6383 if (!mddev->persistent) {
6384 if (info->state & (1<<MD_DISK_SYNC) &&
6385 info->raid_disk < mddev->raid_disks) {
6386 rdev->raid_disk = info->raid_disk;
6387 set_bit(In_sync, &rdev->flags);
6388 clear_bit(Bitmap_sync, &rdev->flags);
6389 } else
6390 rdev->raid_disk = -1;
6391 rdev->saved_raid_disk = rdev->raid_disk;
6392 } else
6393 super_types[mddev->major_version].
6394 validate_super(mddev, rdev);
6395 if ((info->state & (1<<MD_DISK_SYNC)) &&
6396 rdev->raid_disk != info->raid_disk) {
6397
6398
6399
6400 export_rdev(rdev);
6401 return -EINVAL;
6402 }
6403
6404 clear_bit(In_sync, &rdev->flags);
6405 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6406 set_bit(WriteMostly, &rdev->flags);
6407 else
6408 clear_bit(WriteMostly, &rdev->flags);
6409 if (info->state & (1<<MD_DISK_FAILFAST))
6410 set_bit(FailFast, &rdev->flags);
6411 else
6412 clear_bit(FailFast, &rdev->flags);
6413
6414 if (info->state & (1<<MD_DISK_JOURNAL)) {
6415 struct md_rdev *rdev2;
6416 bool has_journal = false;
6417
6418
6419 rdev_for_each(rdev2, mddev) {
6420 if (test_bit(Journal, &rdev2->flags)) {
6421 has_journal = true;
6422 break;
6423 }
6424 }
6425 if (has_journal || mddev->bitmap) {
6426 export_rdev(rdev);
6427 return -EBUSY;
6428 }
6429 set_bit(Journal, &rdev->flags);
6430 }
6431
6432
6433
6434 if (mddev_is_clustered(mddev)) {
6435 if (info->state & (1 << MD_DISK_CANDIDATE))
6436 set_bit(Candidate, &rdev->flags);
6437 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6438
6439 err = md_cluster_ops->add_new_disk(mddev, rdev);
6440 if (err) {
6441 export_rdev(rdev);
6442 return err;
6443 }
6444 }
6445 }
6446
6447 rdev->raid_disk = -1;
6448 err = bind_rdev_to_array(rdev, mddev);
6449
6450 if (err)
6451 export_rdev(rdev);
6452
6453 if (mddev_is_clustered(mddev)) {
6454 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6455 if (!err) {
6456 err = md_cluster_ops->new_disk_ack(mddev,
6457 err == 0);
6458 if (err)
6459 md_kick_rdev_from_array(rdev);
6460 }
6461 } else {
6462 if (err)
6463 md_cluster_ops->add_new_disk_cancel(mddev);
6464 else
6465 err = add_bound_rdev(rdev);
6466 }
6467
6468 } else if (!err)
6469 err = add_bound_rdev(rdev);
6470
6471 return err;
6472 }
6473
6474
6475
6476
6477 if (mddev->major_version != 0) {
6478 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6479 return -EINVAL;
6480 }
6481
6482 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6483 int err;
6484 rdev = md_import_device(dev, -1, 0);
6485 if (IS_ERR(rdev)) {
6486 pr_warn("md: error, md_import_device() returned %ld\n",
6487 PTR_ERR(rdev));
6488 return PTR_ERR(rdev);
6489 }
6490 rdev->desc_nr = info->number;
6491 if (info->raid_disk < mddev->raid_disks)
6492 rdev->raid_disk = info->raid_disk;
6493 else
6494 rdev->raid_disk = -1;
6495
6496 if (rdev->raid_disk < mddev->raid_disks)
6497 if (info->state & (1<<MD_DISK_SYNC))
6498 set_bit(In_sync, &rdev->flags);
6499
6500 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6501 set_bit(WriteMostly, &rdev->flags);
6502 if (info->state & (1<<MD_DISK_FAILFAST))
6503 set_bit(FailFast, &rdev->flags);
6504
6505 if (!mddev->persistent) {
6506 pr_debug("md: nonpersistent superblock ...\n");
6507 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6508 } else
6509 rdev->sb_start = calc_dev_sboffset(rdev);
6510 rdev->sectors = rdev->sb_start;
6511
6512 err = bind_rdev_to_array(rdev, mddev);
6513 if (err) {
6514 export_rdev(rdev);
6515 return err;
6516 }
6517 }
6518
6519 return 0;
6520}
6521
6522static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6523{
6524 char b[BDEVNAME_SIZE];
6525 struct md_rdev *rdev;
6526
6527 rdev = find_rdev(mddev, dev);
6528 if (!rdev)
6529 return -ENXIO;
6530
6531 if (rdev->raid_disk < 0)
6532 goto kick_rdev;
6533
6534 clear_bit(Blocked, &rdev->flags);
6535 remove_and_add_spares(mddev, rdev);
6536
6537 if (rdev->raid_disk >= 0)
6538 goto busy;
6539
6540kick_rdev:
6541 if (mddev_is_clustered(mddev))
6542 md_cluster_ops->remove_disk(mddev, rdev);
6543
6544 md_kick_rdev_from_array(rdev);
6545 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6546 if (mddev->thread)
6547 md_wakeup_thread(mddev->thread);
6548 else
6549 md_update_sb(mddev, 1);
6550 md_new_event(mddev);
6551
6552 return 0;
6553busy:
6554 pr_debug("md: cannot remove active disk %s from %s ...\n",
6555 bdevname(rdev->bdev,b), mdname(mddev));
6556 return -EBUSY;
6557}
6558
6559static int hot_add_disk(struct mddev *mddev, dev_t dev)
6560{
6561 char b[BDEVNAME_SIZE];
6562 int err;
6563 struct md_rdev *rdev;
6564
6565 if (!mddev->pers)
6566 return -ENODEV;
6567
6568 if (mddev->major_version != 0) {
6569 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6570 mdname(mddev));
6571 return -EINVAL;
6572 }
6573 if (!mddev->pers->hot_add_disk) {
6574 pr_warn("%s: personality does not support diskops!\n",
6575 mdname(mddev));
6576 return -EINVAL;
6577 }
6578
6579 rdev = md_import_device(dev, -1, 0);
6580 if (IS_ERR(rdev)) {
6581 pr_warn("md: error, md_import_device() returned %ld\n",
6582 PTR_ERR(rdev));
6583 return -EINVAL;
6584 }
6585
6586 if (mddev->persistent)
6587 rdev->sb_start = calc_dev_sboffset(rdev);
6588 else
6589 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6590
6591 rdev->sectors = rdev->sb_start;
6592
6593 if (test_bit(Faulty, &rdev->flags)) {
6594 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6595 bdevname(rdev->bdev,b), mdname(mddev));
6596 err = -EINVAL;
6597 goto abort_export;
6598 }
6599
6600 clear_bit(In_sync, &rdev->flags);
6601 rdev->desc_nr = -1;
6602 rdev->saved_raid_disk = -1;
6603 err = bind_rdev_to_array(rdev, mddev);
6604 if (err)
6605 goto abort_export;
6606
6607
6608
6609
6610
6611
6612 rdev->raid_disk = -1;
6613
6614 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6615 if (!mddev->thread)
6616 md_update_sb(mddev, 1);
6617
6618
6619
6620
6621 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6622 md_wakeup_thread(mddev->thread);
6623 md_new_event(mddev);
6624 return 0;
6625
6626abort_export:
6627 export_rdev(rdev);
6628 return err;
6629}
6630
6631static int set_bitmap_file(struct mddev *mddev, int fd)
6632{
6633 int err = 0;
6634
6635 if (mddev->pers) {
6636 if (!mddev->pers->quiesce || !mddev->thread)
6637 return -EBUSY;
6638 if (mddev->recovery || mddev->sync_thread)
6639 return -EBUSY;
6640
6641 }
6642
6643 if (fd >= 0) {
6644 struct inode *inode;
6645 struct file *f;
6646
6647 if (mddev->bitmap || mddev->bitmap_info.file)
6648 return -EEXIST;
6649 f = fget(fd);
6650
6651 if (f == NULL) {
6652 pr_warn("%s: error: failed to get bitmap file\n",
6653 mdname(mddev));
6654 return -EBADF;
6655 }
6656
6657 inode = f->f_mapping->host;
6658 if (!S_ISREG(inode->i_mode)) {
6659 pr_warn("%s: error: bitmap file must be a regular file\n",
6660 mdname(mddev));
6661 err = -EBADF;
6662 } else if (!(f->f_mode & FMODE_WRITE)) {
6663 pr_warn("%s: error: bitmap file must open for write\n",
6664 mdname(mddev));
6665 err = -EBADF;
6666 } else if (atomic_read(&inode->i_writecount) != 1) {
6667 pr_warn("%s: error: bitmap file is already in use\n",
6668 mdname(mddev));
6669 err = -EBUSY;
6670 }
6671 if (err) {
6672 fput(f);
6673 return err;
6674 }
6675 mddev->bitmap_info.file = f;
6676 mddev->bitmap_info.offset = 0;
6677 } else if (mddev->bitmap == NULL)
6678 return -ENOENT;
6679 err = 0;
6680 if (mddev->pers) {
6681 if (fd >= 0) {
6682 struct bitmap *bitmap;
6683
6684 bitmap = bitmap_create(mddev, -1);
6685 mddev_suspend(mddev);
6686 if (!IS_ERR(bitmap)) {
6687 mddev->bitmap = bitmap;
6688 err = bitmap_load(mddev);
6689 } else
6690 err = PTR_ERR(bitmap);
6691 if (err) {
6692 bitmap_destroy(mddev);
6693 fd = -1;
6694 }
6695 mddev_resume(mddev);
6696 } else if (fd < 0) {
6697 mddev_suspend(mddev);
6698 bitmap_destroy(mddev);
6699 mddev_resume(mddev);
6700 }
6701 }
6702 if (fd < 0) {
6703 struct file *f = mddev->bitmap_info.file;
6704 if (f) {
6705 spin_lock(&mddev->lock);
6706 mddev->bitmap_info.file = NULL;
6707 spin_unlock(&mddev->lock);
6708 fput(f);
6709 }
6710 }
6711
6712 return err;
6713}
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6729{
6730
6731 if (info->raid_disks == 0) {
6732
6733 if (info->major_version < 0 ||
6734 info->major_version >= ARRAY_SIZE(super_types) ||
6735 super_types[info->major_version].name == NULL) {
6736
6737 pr_warn("md: superblock version %d not known\n",
6738 info->major_version);
6739 return -EINVAL;
6740 }
6741 mddev->major_version = info->major_version;
6742 mddev->minor_version = info->minor_version;
6743 mddev->patch_version = info->patch_version;
6744 mddev->persistent = !info->not_persistent;
6745
6746
6747
6748 mddev->ctime = ktime_get_real_seconds();
6749 return 0;
6750 }
6751 mddev->major_version = MD_MAJOR_VERSION;
6752 mddev->minor_version = MD_MINOR_VERSION;
6753 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6754 mddev->ctime = ktime_get_real_seconds();
6755
6756 mddev->level = info->level;
6757 mddev->clevel[0] = 0;
6758 mddev->dev_sectors = 2 * (sector_t)info->size;
6759 mddev->raid_disks = info->raid_disks;
6760
6761
6762
6763 if (info->state & (1<<MD_SB_CLEAN))
6764 mddev->recovery_cp = MaxSector;
6765 else
6766 mddev->recovery_cp = 0;
6767 mddev->persistent = ! info->not_persistent;
6768 mddev->external = 0;
6769
6770 mddev->layout = info->layout;
6771 mddev->chunk_sectors = info->chunk_size >> 9;
6772
6773 if (mddev->persistent) {
6774 mddev->max_disks = MD_SB_DISKS;
6775 mddev->flags = 0;
6776 mddev->sb_flags = 0;
6777 }
6778 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6779
6780 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6781 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6782 mddev->bitmap_info.offset = 0;
6783
6784 mddev->reshape_position = MaxSector;
6785
6786
6787
6788
6789 get_random_bytes(mddev->uuid, 16);
6790
6791 mddev->new_level = mddev->level;
6792 mddev->new_chunk_sectors = mddev->chunk_sectors;
6793 mddev->new_layout = mddev->layout;
6794 mddev->delta_disks = 0;
6795 mddev->reshape_backwards = 0;
6796
6797 return 0;
6798}
6799
6800void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6801{
6802 lockdep_assert_held(&mddev->reconfig_mutex);
6803
6804 if (mddev->external_size)
6805 return;
6806
6807 mddev->array_sectors = array_sectors;
6808}
6809EXPORT_SYMBOL(md_set_array_sectors);
6810
6811static int update_size(struct mddev *mddev, sector_t num_sectors)
6812{
6813 struct md_rdev *rdev;
6814 int rv;
6815 int fit = (num_sectors == 0);
6816 sector_t old_dev_sectors = mddev->dev_sectors;
6817
6818 if (mddev->pers->resize == NULL)
6819 return -EINVAL;
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6830 mddev->sync_thread)
6831 return -EBUSY;
6832 if (mddev->ro)
6833 return -EROFS;
6834
6835 rdev_for_each(rdev, mddev) {
6836 sector_t avail = rdev->sectors;
6837
6838 if (fit && (num_sectors == 0 || num_sectors > avail))
6839 num_sectors = avail;
6840 if (avail < num_sectors)
6841 return -ENOSPC;
6842 }
6843 rv = mddev->pers->resize(mddev, num_sectors);
6844 if (!rv) {
6845 if (mddev_is_clustered(mddev))
6846 md_cluster_ops->update_size(mddev, old_dev_sectors);
6847 else if (mddev->queue) {
6848 set_capacity(mddev->gendisk, mddev->array_sectors);
6849 revalidate_disk(mddev->gendisk);
6850 }
6851 }
6852 return rv;
6853}
6854
6855static int update_raid_disks(struct mddev *mddev, int raid_disks)
6856{
6857 int rv;
6858 struct md_rdev *rdev;
6859
6860 if (mddev->pers->check_reshape == NULL)
6861 return -EINVAL;
6862 if (mddev->ro)
6863 return -EROFS;
6864 if (raid_disks <= 0 ||
6865 (mddev->max_disks && raid_disks >= mddev->max_disks))
6866 return -EINVAL;
6867 if (mddev->sync_thread ||
6868 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6869 mddev->reshape_position != MaxSector)
6870 return -EBUSY;
6871
6872 rdev_for_each(rdev, mddev) {
6873 if (mddev->raid_disks < raid_disks &&
6874 rdev->data_offset < rdev->new_data_offset)
6875 return -EINVAL;
6876 if (mddev->raid_disks > raid_disks &&
6877 rdev->data_offset > rdev->new_data_offset)
6878 return -EINVAL;
6879 }
6880
6881 mddev->delta_disks = raid_disks - mddev->raid_disks;
6882 if (mddev->delta_disks < 0)
6883 mddev->reshape_backwards = 1;
6884 else if (mddev->delta_disks > 0)
6885 mddev->reshape_backwards = 0;
6886
6887 rv = mddev->pers->check_reshape(mddev);
6888 if (rv < 0) {
6889 mddev->delta_disks = 0;
6890 mddev->reshape_backwards = 0;
6891 }
6892 return rv;
6893}
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6904{
6905 int rv = 0;
6906 int cnt = 0;
6907 int state = 0;
6908
6909
6910 if (mddev->bitmap && mddev->bitmap_info.offset)
6911 state |= (1 << MD_SB_BITMAP_PRESENT);
6912
6913 if (mddev->major_version != info->major_version ||
6914 mddev->minor_version != info->minor_version ||
6915
6916 mddev->ctime != info->ctime ||
6917 mddev->level != info->level ||
6918
6919 mddev->persistent != !info->not_persistent ||
6920 mddev->chunk_sectors != info->chunk_size >> 9 ||
6921
6922 ((state^info->state) & 0xfffffe00)
6923 )
6924 return -EINVAL;
6925
6926 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6927 cnt++;
6928 if (mddev->raid_disks != info->raid_disks)
6929 cnt++;
6930 if (mddev->layout != info->layout)
6931 cnt++;
6932 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6933 cnt++;
6934 if (cnt == 0)
6935 return 0;
6936 if (cnt > 1)
6937 return -EINVAL;
6938
6939 if (mddev->layout != info->layout) {
6940
6941
6942
6943
6944 if (mddev->pers->check_reshape == NULL)
6945 return -EINVAL;
6946 else {
6947 mddev->new_layout = info->layout;
6948 rv = mddev->pers->check_reshape(mddev);
6949 if (rv)
6950 mddev->new_layout = mddev->layout;
6951 return rv;
6952 }
6953 }
6954 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6955 rv = update_size(mddev, (sector_t)info->size * 2);
6956
6957 if (mddev->raid_disks != info->raid_disks)
6958 rv = update_raid_disks(mddev, info->raid_disks);
6959
6960 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6961 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6962 rv = -EINVAL;
6963 goto err;
6964 }
6965 if (mddev->recovery || mddev->sync_thread) {
6966 rv = -EBUSY;
6967 goto err;
6968 }
6969 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6970 struct bitmap *bitmap;
6971
6972 if (mddev->bitmap) {
6973 rv = -EEXIST;
6974 goto err;
6975 }
6976 if (mddev->bitmap_info.default_offset == 0) {
6977 rv = -EINVAL;
6978 goto err;
6979 }
6980 mddev->bitmap_info.offset =
6981 mddev->bitmap_info.default_offset;
6982 mddev->bitmap_info.space =
6983 mddev->bitmap_info.default_space;
6984 bitmap = bitmap_create(mddev, -1);
6985 mddev_suspend(mddev);
6986 if (!IS_ERR(bitmap)) {
6987 mddev->bitmap = bitmap;
6988 rv = bitmap_load(mddev);
6989 } else
6990 rv = PTR_ERR(bitmap);
6991 if (rv)
6992 bitmap_destroy(mddev);
6993 mddev_resume(mddev);
6994 } else {
6995
6996 if (!mddev->bitmap) {
6997 rv = -ENOENT;
6998 goto err;
6999 }
7000 if (mddev->bitmap->storage.file) {
7001 rv = -EINVAL;
7002 goto err;
7003 }
7004 if (mddev->bitmap_info.nodes) {
7005
7006 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7007 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7008 rv = -EPERM;
7009 md_cluster_ops->unlock_all_bitmaps(mddev);
7010 goto err;
7011 }
7012
7013 mddev->bitmap_info.nodes = 0;
7014 md_cluster_ops->leave(mddev);
7015 }
7016 mddev_suspend(mddev);
7017 bitmap_destroy(mddev);
7018 mddev_resume(mddev);
7019 mddev->bitmap_info.offset = 0;
7020 }
7021 }
7022 md_update_sb(mddev, 1);
7023 return rv;
7024err:
7025 return rv;
7026}
7027
7028static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7029{
7030 struct md_rdev *rdev;
7031 int err = 0;
7032
7033 if (mddev->pers == NULL)
7034 return -ENODEV;
7035
7036 rcu_read_lock();
7037 rdev = md_find_rdev_rcu(mddev, dev);
7038 if (!rdev)
7039 err = -ENODEV;
7040 else {
7041 md_error(mddev, rdev);
7042 if (!test_bit(Faulty, &rdev->flags))
7043 err = -EBUSY;
7044 }
7045 rcu_read_unlock();
7046 return err;
7047}
7048
7049
7050
7051
7052
7053
7054
7055static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7056{
7057 struct mddev *mddev = bdev->bd_disk->private_data;
7058
7059 geo->heads = 2;
7060 geo->sectors = 4;
7061 geo->cylinders = mddev->array_sectors / 8;
7062 return 0;
7063}
7064
7065static inline bool md_ioctl_valid(unsigned int cmd)
7066{
7067 switch (cmd) {
7068 case ADD_NEW_DISK:
7069 case BLKROSET:
7070 case GET_ARRAY_INFO:
7071 case GET_BITMAP_FILE:
7072 case GET_DISK_INFO:
7073 case HOT_ADD_DISK:
7074 case HOT_REMOVE_DISK:
7075 case RAID_AUTORUN:
7076 case RAID_VERSION:
7077 case RESTART_ARRAY_RW:
7078 case RUN_ARRAY:
7079 case SET_ARRAY_INFO:
7080 case SET_BITMAP_FILE:
7081 case SET_DISK_FAULTY:
7082 case STOP_ARRAY:
7083 case STOP_ARRAY_RO:
7084 case CLUSTERED_DISK_NACK:
7085 return true;
7086 default:
7087 return false;
7088 }
7089}
7090
7091static int md_ioctl(struct block_device *bdev, fmode_t mode,
7092 unsigned int cmd, unsigned long arg)
7093{
7094 int err = 0;
7095 void __user *argp = (void __user *)arg;
7096 struct mddev *mddev = NULL;
7097 int ro;
7098 bool did_set_md_closing = false;
7099
7100 if (!md_ioctl_valid(cmd))
7101 return -ENOTTY;
7102
7103 switch (cmd) {
7104 case RAID_VERSION:
7105 case GET_ARRAY_INFO:
7106 case GET_DISK_INFO:
7107 break;
7108 default:
7109 if (!capable(CAP_SYS_ADMIN))
7110 return -EACCES;
7111 }
7112
7113
7114
7115
7116
7117 switch (cmd) {
7118 case RAID_VERSION:
7119 err = get_version(argp);
7120 goto out;
7121
7122#ifndef MODULE
7123 case RAID_AUTORUN:
7124 err = 0;
7125 autostart_arrays(arg);
7126 goto out;
7127#endif
7128 default:;
7129 }
7130
7131
7132
7133
7134
7135 mddev = bdev->bd_disk->private_data;
7136
7137 if (!mddev) {
7138 BUG();
7139 goto out;
7140 }
7141
7142
7143 switch (cmd) {
7144 case GET_ARRAY_INFO:
7145 if (!mddev->raid_disks && !mddev->external)
7146 err = -ENODEV;
7147 else
7148 err = get_array_info(mddev, argp);
7149 goto out;
7150
7151 case GET_DISK_INFO:
7152 if (!mddev->raid_disks && !mddev->external)
7153 err = -ENODEV;
7154 else
7155 err = get_disk_info(mddev, argp);
7156 goto out;
7157
7158 case SET_DISK_FAULTY:
7159 err = set_disk_faulty(mddev, new_decode_dev(arg));
7160 goto out;
7161
7162 case GET_BITMAP_FILE:
7163 err = get_bitmap_file(mddev, argp);
7164 goto out;
7165
7166 }
7167
7168 if (cmd == ADD_NEW_DISK)
7169
7170 flush_workqueue(md_misc_wq);
7171
7172 if (cmd == HOT_REMOVE_DISK)
7173
7174 wait_event_interruptible_timeout(mddev->sb_wait,
7175 !test_bit(MD_RECOVERY_NEEDED,
7176 &mddev->recovery),
7177 msecs_to_jiffies(5000));
7178 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7179
7180
7181
7182 mutex_lock(&mddev->open_mutex);
7183 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7184 mutex_unlock(&mddev->open_mutex);
7185 err = -EBUSY;
7186 goto out;
7187 }
7188 WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
7189 set_bit(MD_CLOSING, &mddev->flags);
7190 did_set_md_closing = true;
7191 mutex_unlock(&mddev->open_mutex);
7192 sync_blockdev(bdev);
7193 }
7194 err = mddev_lock(mddev);
7195 if (err) {
7196 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7197 err, cmd);
7198 goto out;
7199 }
7200
7201 if (cmd == SET_ARRAY_INFO) {
7202 mdu_array_info_t info;
7203 if (!arg)
7204 memset(&info, 0, sizeof(info));
7205 else if (copy_from_user(&info, argp, sizeof(info))) {
7206 err = -EFAULT;
7207 goto unlock;
7208 }
7209 if (mddev->pers) {
7210 err = update_array_info(mddev, &info);
7211 if (err) {
7212 pr_warn("md: couldn't update array info. %d\n", err);
7213 goto unlock;
7214 }
7215 goto unlock;
7216 }
7217 if (!list_empty(&mddev->disks)) {
7218 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7219 err = -EBUSY;
7220 goto unlock;
7221 }
7222 if (mddev->raid_disks) {
7223 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7224 err = -EBUSY;
7225 goto unlock;
7226 }
7227 err = set_array_info(mddev, &info);
7228 if (err) {
7229 pr_warn("md: couldn't set array info. %d\n", err);
7230 goto unlock;
7231 }
7232 goto unlock;
7233 }
7234
7235
7236
7237
7238
7239
7240 if ((!mddev->raid_disks && !mddev->external)
7241 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7242 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7243 && cmd != GET_BITMAP_FILE) {
7244 err = -ENODEV;
7245 goto unlock;
7246 }
7247
7248
7249
7250
7251 switch (cmd) {
7252 case RESTART_ARRAY_RW:
7253 err = restart_array(mddev);
7254 goto unlock;
7255
7256 case STOP_ARRAY:
7257 err = do_md_stop(mddev, 0, bdev);
7258 goto unlock;
7259
7260 case STOP_ARRAY_RO:
7261 err = md_set_readonly(mddev, bdev);
7262 goto unlock;
7263
7264 case HOT_REMOVE_DISK:
7265 err = hot_remove_disk(mddev, new_decode_dev(arg));
7266 goto unlock;
7267
7268 case ADD_NEW_DISK:
7269
7270
7271
7272
7273 if (mddev->pers) {
7274 mdu_disk_info_t info;
7275 if (copy_from_user(&info, argp, sizeof(info)))
7276 err = -EFAULT;
7277 else if (!(info.state & (1<<MD_DISK_SYNC)))
7278
7279 break;
7280 else
7281 err = add_new_disk(mddev, &info);
7282 goto unlock;
7283 }
7284 break;
7285
7286 case BLKROSET:
7287 if (get_user(ro, (int __user *)(arg))) {
7288 err = -EFAULT;
7289 goto unlock;
7290 }
7291 err = -EINVAL;
7292
7293
7294
7295
7296 if (ro)
7297 goto unlock;
7298
7299
7300 if (mddev->ro != 1)
7301 goto unlock;
7302
7303
7304
7305
7306 if (mddev->pers) {
7307 err = restart_array(mddev);
7308 if (err == 0) {
7309 mddev->ro = 2;
7310 set_disk_ro(mddev->gendisk, 0);
7311 }
7312 }
7313 goto unlock;
7314 }
7315
7316
7317
7318
7319
7320 if (mddev->ro && mddev->pers) {
7321 if (mddev->ro == 2) {
7322 mddev->ro = 0;
7323 sysfs_notify_dirent_safe(mddev->sysfs_state);
7324 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7325
7326
7327
7328
7329 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7330 mddev_unlock(mddev);
7331 wait_event(mddev->sb_wait,
7332 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7333 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7334 mddev_lock_nointr(mddev);
7335 }
7336 } else {
7337 err = -EROFS;
7338 goto unlock;
7339 }
7340 }
7341
7342 switch (cmd) {
7343 case ADD_NEW_DISK:
7344 {
7345 mdu_disk_info_t info;
7346 if (copy_from_user(&info, argp, sizeof(info)))
7347 err = -EFAULT;
7348 else
7349 err = add_new_disk(mddev, &info);
7350 goto unlock;
7351 }
7352
7353 case CLUSTERED_DISK_NACK:
7354 if (mddev_is_clustered(mddev))
7355 md_cluster_ops->new_disk_ack(mddev, false);
7356 else
7357 err = -EINVAL;
7358 goto unlock;
7359
7360 case HOT_ADD_DISK:
7361 err = hot_add_disk(mddev, new_decode_dev(arg));
7362 goto unlock;
7363
7364 case RUN_ARRAY:
7365 err = do_md_run(mddev);
7366 goto unlock;
7367
7368 case SET_BITMAP_FILE:
7369 err = set_bitmap_file(mddev, (int)arg);
7370 goto unlock;
7371
7372 default:
7373 err = -EINVAL;
7374 goto unlock;
7375 }
7376
7377unlock:
7378 if (mddev->hold_active == UNTIL_IOCTL &&
7379 err != -EINVAL)
7380 mddev->hold_active = 0;
7381 mddev_unlock(mddev);
7382out:
7383 if(did_set_md_closing)
7384 clear_bit(MD_CLOSING, &mddev->flags);
7385 return err;
7386}
7387#ifdef CONFIG_COMPAT
7388static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7389 unsigned int cmd, unsigned long arg)
7390{
7391 switch (cmd) {
7392 case HOT_REMOVE_DISK:
7393 case HOT_ADD_DISK:
7394 case SET_DISK_FAULTY:
7395 case SET_BITMAP_FILE:
7396
7397 break;
7398 default:
7399 arg = (unsigned long)compat_ptr(arg);
7400 break;
7401 }
7402
7403 return md_ioctl(bdev, mode, cmd, arg);
7404}
7405#endif
7406
7407static int md_open(struct block_device *bdev, fmode_t mode)
7408{
7409
7410
7411
7412
7413 struct mddev *mddev = mddev_find(bdev->bd_dev);
7414 int err;
7415
7416 if (!mddev)
7417 return -ENODEV;
7418
7419 if (mddev->gendisk != bdev->bd_disk) {
7420
7421
7422
7423 mddev_put(mddev);
7424
7425 flush_workqueue(md_misc_wq);
7426
7427 return -ERESTARTSYS;
7428 }
7429 BUG_ON(mddev != bdev->bd_disk->private_data);
7430
7431 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7432 goto out;
7433
7434 if (test_bit(MD_CLOSING, &mddev->flags)) {
7435 mutex_unlock(&mddev->open_mutex);
7436 err = -ENODEV;
7437 goto out;
7438 }
7439
7440 err = 0;
7441 atomic_inc(&mddev->openers);
7442 mutex_unlock(&mddev->open_mutex);
7443
7444 check_disk_change(bdev);
7445 out:
7446 if (err)
7447 mddev_put(mddev);
7448 return err;
7449}
7450
7451static void md_release(struct gendisk *disk, fmode_t mode)
7452{
7453 struct mddev *mddev = disk->private_data;
7454
7455 BUG_ON(!mddev);
7456 atomic_dec(&mddev->openers);
7457 mddev_put(mddev);
7458}
7459
7460static int md_media_changed(struct gendisk *disk)
7461{
7462 struct mddev *mddev = disk->private_data;
7463
7464 return mddev->changed;
7465}
7466
7467static int md_revalidate(struct gendisk *disk)
7468{
7469 struct mddev *mddev = disk->private_data;
7470
7471 mddev->changed = 0;
7472 return 0;
7473}
7474static const struct block_device_operations md_fops =
7475{
7476 .owner = THIS_MODULE,
7477 .open = md_open,
7478 .release = md_release,
7479 .ioctl = md_ioctl,
7480#ifdef CONFIG_COMPAT
7481 .compat_ioctl = md_compat_ioctl,
7482#endif
7483 .getgeo = md_getgeo,
7484 .media_changed = md_media_changed,
7485 .revalidate_disk= md_revalidate,
7486};
7487
7488static int md_thread(void *arg)
7489{
7490 struct md_thread *thread = arg;
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504 allow_signal(SIGKILL);
7505 while (!kthread_should_stop()) {
7506
7507
7508
7509
7510
7511
7512 if (signal_pending(current))
7513 flush_signals(current);
7514
7515 wait_event_interruptible_timeout
7516 (thread->wqueue,
7517 test_bit(THREAD_WAKEUP, &thread->flags)
7518 || kthread_should_stop() || kthread_should_park(),
7519 thread->timeout);
7520
7521 clear_bit(THREAD_WAKEUP, &thread->flags);
7522 if (kthread_should_park())
7523 kthread_parkme();
7524 if (!kthread_should_stop())
7525 thread->run(thread);
7526 }
7527
7528 return 0;
7529}
7530
7531void md_wakeup_thread(struct md_thread *thread)
7532{
7533 if (thread) {
7534 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7535 set_bit(THREAD_WAKEUP, &thread->flags);
7536 wake_up(&thread->wqueue);
7537 }
7538}
7539EXPORT_SYMBOL(md_wakeup_thread);
7540
7541struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7542 struct mddev *mddev, const char *name)
7543{
7544 struct md_thread *thread;
7545
7546 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7547 if (!thread)
7548 return NULL;
7549
7550 init_waitqueue_head(&thread->wqueue);
7551
7552 thread->run = run;
7553 thread->mddev = mddev;
7554 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7555 thread->tsk = kthread_run(md_thread, thread,
7556 "%s_%s",
7557 mdname(thread->mddev),
7558 name);
7559 if (IS_ERR(thread->tsk)) {
7560 kfree(thread);
7561 return NULL;
7562 }
7563 return thread;
7564}
7565EXPORT_SYMBOL(md_register_thread);
7566
7567void md_unregister_thread(struct md_thread **threadp)
7568{
7569 struct md_thread *thread = *threadp;
7570 if (!thread)
7571 return;
7572 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7573
7574
7575
7576 spin_lock(&pers_lock);
7577 *threadp = NULL;
7578 spin_unlock(&pers_lock);
7579
7580 kthread_stop(thread->tsk);
7581 kfree(thread);
7582}
7583EXPORT_SYMBOL(md_unregister_thread);
7584
7585void md_error(struct mddev *mddev, struct md_rdev *rdev)
7586{
7587 if (!rdev || test_bit(Faulty, &rdev->flags))
7588 return;
7589
7590 if (!mddev->pers || !mddev->pers->error_handler)
7591 return;
7592 mddev->pers->error_handler(mddev,rdev);
7593 if (mddev->degraded)
7594 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7595 sysfs_notify_dirent_safe(rdev->sysfs_state);
7596 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7597 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7598 md_wakeup_thread(mddev->thread);
7599 if (mddev->event_work.func)
7600 queue_work(md_misc_wq, &mddev->event_work);
7601 md_new_event(mddev);
7602}
7603EXPORT_SYMBOL(md_error);
7604
7605
7606
7607static void status_unused(struct seq_file *seq)
7608{
7609 int i = 0;
7610 struct md_rdev *rdev;
7611
7612 seq_printf(seq, "unused devices: ");
7613
7614 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7615 char b[BDEVNAME_SIZE];
7616 i++;
7617 seq_printf(seq, "%s ",
7618 bdevname(rdev->bdev,b));
7619 }
7620 if (!i)
7621 seq_printf(seq, "<none>");
7622
7623 seq_printf(seq, "\n");
7624}
7625
7626static int status_resync(struct seq_file *seq, struct mddev *mddev)
7627{
7628 sector_t max_sectors, resync, res;
7629 unsigned long dt, db;
7630 sector_t rt;
7631 int scale;
7632 unsigned int per_milli;
7633
7634 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7635 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7636 max_sectors = mddev->resync_max_sectors;
7637 else
7638 max_sectors = mddev->dev_sectors;
7639
7640 resync = mddev->curr_resync;
7641 if (resync <= 3) {
7642 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7643
7644 resync = max_sectors;
7645 } else if (resync > max_sectors)
7646 resync = max_sectors;
7647 else
7648 resync -= atomic_read(&mddev->recovery_active);
7649
7650 if (resync == 0) {
7651 if (mddev->recovery_cp < MaxSector) {
7652 seq_printf(seq, "\tresync=PENDING");
7653 return 1;
7654 }
7655 return 0;
7656 }
7657 if (resync < 3) {
7658 seq_printf(seq, "\tresync=DELAYED");
7659 return 1;
7660 }
7661
7662 WARN_ON(max_sectors == 0);
7663
7664
7665
7666
7667
7668 scale = 10;
7669 if (sizeof(sector_t) > sizeof(unsigned long)) {
7670 while ( max_sectors/2 > (1ULL<<(scale+32)))
7671 scale++;
7672 }
7673 res = (resync>>scale)*1000;
7674 sector_div(res, (u32)((max_sectors>>scale)+1));
7675
7676 per_milli = res;
7677 {
7678 int i, x = per_milli/50, y = 20-x;
7679 seq_printf(seq, "[");
7680 for (i = 0; i < x; i++)
7681 seq_printf(seq, "=");
7682 seq_printf(seq, ">");
7683 for (i = 0; i < y; i++)
7684 seq_printf(seq, ".");
7685 seq_printf(seq, "] ");
7686 }
7687 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7688 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7689 "reshape" :
7690 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7691 "check" :
7692 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7693 "resync" : "recovery"))),
7694 per_milli/10, per_milli % 10,
7695 (unsigned long long) resync/2,
7696 (unsigned long long) max_sectors/2);
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712 dt = ((jiffies - mddev->resync_mark) / HZ);
7713 if (!dt) dt++;
7714 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7715 - mddev->resync_mark_cnt;
7716
7717 rt = max_sectors - resync;
7718 sector_div(rt, db/32+1);
7719 rt *= dt;
7720 rt >>= 5;
7721
7722 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7723 ((unsigned long)rt % 60)/6);
7724
7725 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7726 return 1;
7727}
7728
7729static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7730{
7731 struct list_head *tmp;
7732 loff_t l = *pos;
7733 struct mddev *mddev;
7734
7735 if (l >= 0x10000)
7736 return NULL;
7737 if (!l--)
7738
7739 return (void*)1;
7740
7741 spin_lock(&all_mddevs_lock);
7742 list_for_each(tmp,&all_mddevs)
7743 if (!l--) {
7744 mddev = list_entry(tmp, struct mddev, all_mddevs);
7745 mddev_get(mddev);
7746 spin_unlock(&all_mddevs_lock);
7747 return mddev;
7748 }
7749 spin_unlock(&all_mddevs_lock);
7750 if (!l--)
7751 return (void*)2;
7752 return NULL;
7753}
7754
7755static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7756{
7757 struct list_head *tmp;
7758 struct mddev *next_mddev, *mddev = v;
7759
7760 ++*pos;
7761 if (v == (void*)2)
7762 return NULL;
7763
7764 spin_lock(&all_mddevs_lock);
7765 if (v == (void*)1)
7766 tmp = all_mddevs.next;
7767 else
7768 tmp = mddev->all_mddevs.next;
7769 if (tmp != &all_mddevs)
7770 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7771 else {
7772 next_mddev = (void*)2;
7773 *pos = 0x10000;
7774 }
7775 spin_unlock(&all_mddevs_lock);
7776
7777 if (v != (void*)1)
7778 mddev_put(mddev);
7779 return next_mddev;
7780
7781}
7782
7783static void md_seq_stop(struct seq_file *seq, void *v)
7784{
7785 struct mddev *mddev = v;
7786
7787 if (mddev && v != (void*)1 && v != (void*)2)
7788 mddev_put(mddev);
7789}
7790
7791static int md_seq_show(struct seq_file *seq, void *v)
7792{
7793 struct mddev *mddev = v;
7794 sector_t sectors;
7795 struct md_rdev *rdev;
7796
7797 if (v == (void*)1) {
7798 struct md_personality *pers;
7799 seq_printf(seq, "Personalities : ");
7800 spin_lock(&pers_lock);
7801 list_for_each_entry(pers, &pers_list, list)
7802 seq_printf(seq, "[%s] ", pers->name);
7803
7804 spin_unlock(&pers_lock);
7805 seq_printf(seq, "\n");
7806 seq->poll_event = atomic_read(&md_event_count);
7807 return 0;
7808 }
7809 if (v == (void*)2) {
7810 status_unused(seq);
7811 return 0;
7812 }
7813
7814 spin_lock(&mddev->lock);
7815 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7816 seq_printf(seq, "%s : %sactive", mdname(mddev),
7817 mddev->pers ? "" : "in");
7818 if (mddev->pers) {
7819 if (mddev->ro==1)
7820 seq_printf(seq, " (read-only)");
7821 if (mddev->ro==2)
7822 seq_printf(seq, " (auto-read-only)");
7823 seq_printf(seq, " %s", mddev->pers->name);
7824 }
7825
7826 sectors = 0;
7827 rcu_read_lock();
7828 rdev_for_each_rcu(rdev, mddev) {
7829 char b[BDEVNAME_SIZE];
7830 seq_printf(seq, " %s[%d]",
7831 bdevname(rdev->bdev,b), rdev->desc_nr);
7832 if (test_bit(WriteMostly, &rdev->flags))
7833 seq_printf(seq, "(W)");
7834 if (test_bit(Journal, &rdev->flags))
7835 seq_printf(seq, "(J)");
7836 if (test_bit(Faulty, &rdev->flags)) {
7837 seq_printf(seq, "(F)");
7838 continue;
7839 }
7840 if (rdev->raid_disk < 0)
7841 seq_printf(seq, "(S)");
7842 if (test_bit(Replacement, &rdev->flags))
7843 seq_printf(seq, "(R)");
7844 sectors += rdev->sectors;
7845 }
7846 rcu_read_unlock();
7847
7848 if (!list_empty(&mddev->disks)) {
7849 if (mddev->pers)
7850 seq_printf(seq, "\n %llu blocks",
7851 (unsigned long long)
7852 mddev->array_sectors / 2);
7853 else
7854 seq_printf(seq, "\n %llu blocks",
7855 (unsigned long long)sectors / 2);
7856 }
7857 if (mddev->persistent) {
7858 if (mddev->major_version != 0 ||
7859 mddev->minor_version != 90) {
7860 seq_printf(seq," super %d.%d",
7861 mddev->major_version,
7862 mddev->minor_version);
7863 }
7864 } else if (mddev->external)
7865 seq_printf(seq, " super external:%s",
7866 mddev->metadata_type);
7867 else
7868 seq_printf(seq, " super non-persistent");
7869
7870 if (mddev->pers) {
7871 mddev->pers->status(seq, mddev);
7872 seq_printf(seq, "\n ");
7873 if (mddev->pers->sync_request) {
7874 if (status_resync(seq, mddev))
7875 seq_printf(seq, "\n ");
7876 }
7877 } else
7878 seq_printf(seq, "\n ");
7879
7880 bitmap_status(seq, mddev->bitmap);
7881
7882 seq_printf(seq, "\n");
7883 }
7884 spin_unlock(&mddev->lock);
7885
7886 return 0;
7887}
7888
7889static const struct seq_operations md_seq_ops = {
7890 .start = md_seq_start,
7891 .next = md_seq_next,
7892 .stop = md_seq_stop,
7893 .show = md_seq_show,
7894};
7895
7896static int md_seq_open(struct inode *inode, struct file *file)
7897{
7898 struct seq_file *seq;
7899 int error;
7900
7901 error = seq_open(file, &md_seq_ops);
7902 if (error)
7903 return error;
7904
7905 seq = file->private_data;
7906 seq->poll_event = atomic_read(&md_event_count);
7907 return error;
7908}
7909
7910static int md_unloading;
7911static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
7912{
7913 struct seq_file *seq = filp->private_data;
7914 __poll_t mask;
7915
7916 if (md_unloading)
7917 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
7918 poll_wait(filp, &md_event_waiters, wait);
7919
7920
7921 mask = EPOLLIN | EPOLLRDNORM;
7922
7923 if (seq->poll_event != atomic_read(&md_event_count))
7924 mask |= EPOLLERR | EPOLLPRI;
7925 return mask;
7926}
7927
7928static const struct file_operations md_seq_fops = {
7929 .owner = THIS_MODULE,
7930 .open = md_seq_open,
7931 .read = seq_read,
7932 .llseek = seq_lseek,
7933 .release = seq_release,
7934 .poll = mdstat_poll,
7935};
7936
7937int register_md_personality(struct md_personality *p)
7938{
7939 pr_debug("md: %s personality registered for level %d\n",
7940 p->name, p->level);
7941 spin_lock(&pers_lock);
7942 list_add_tail(&p->list, &pers_list);
7943 spin_unlock(&pers_lock);
7944 return 0;
7945}
7946EXPORT_SYMBOL(register_md_personality);
7947
7948int unregister_md_personality(struct md_personality *p)
7949{
7950 pr_debug("md: %s personality unregistered\n", p->name);
7951 spin_lock(&pers_lock);
7952 list_del_init(&p->list);
7953 spin_unlock(&pers_lock);
7954 return 0;
7955}
7956EXPORT_SYMBOL(unregister_md_personality);
7957
7958int register_md_cluster_operations(struct md_cluster_operations *ops,
7959 struct module *module)
7960{
7961 int ret = 0;
7962 spin_lock(&pers_lock);
7963 if (md_cluster_ops != NULL)
7964 ret = -EALREADY;
7965 else {
7966 md_cluster_ops = ops;
7967 md_cluster_mod = module;
7968 }
7969 spin_unlock(&pers_lock);
7970 return ret;
7971}
7972EXPORT_SYMBOL(register_md_cluster_operations);
7973
7974int unregister_md_cluster_operations(void)
7975{
7976 spin_lock(&pers_lock);
7977 md_cluster_ops = NULL;
7978 spin_unlock(&pers_lock);
7979 return 0;
7980}
7981EXPORT_SYMBOL(unregister_md_cluster_operations);
7982
7983int md_setup_cluster(struct mddev *mddev, int nodes)
7984{
7985 if (!md_cluster_ops)
7986 request_module("md-cluster");
7987 spin_lock(&pers_lock);
7988
7989 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7990 pr_warn("can't find md-cluster module or get it's reference.\n");
7991 spin_unlock(&pers_lock);
7992 return -ENOENT;
7993 }
7994 spin_unlock(&pers_lock);
7995
7996 return md_cluster_ops->join(mddev, nodes);
7997}
7998
7999void md_cluster_stop(struct mddev *mddev)
8000{
8001 if (!md_cluster_ops)
8002 return;
8003 md_cluster_ops->leave(mddev);
8004 module_put(md_cluster_mod);
8005}
8006
8007static int is_mddev_idle(struct mddev *mddev, int init)
8008{
8009 struct md_rdev *rdev;
8010 int idle;
8011 int curr_events;
8012
8013 idle = 1;
8014 rcu_read_lock();
8015 rdev_for_each_rcu(rdev, mddev) {
8016 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
8017 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
8018 (int)part_stat_read(&disk->part0, sectors[1]) -
8019 atomic_read(&disk->sync_io);
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042 if (init || curr_events - rdev->last_events > 64) {
8043 rdev->last_events = curr_events;
8044 idle = 0;
8045 }
8046 }
8047 rcu_read_unlock();
8048 return idle;
8049}
8050
8051void md_done_sync(struct mddev *mddev, int blocks, int ok)
8052{
8053
8054 atomic_sub(blocks, &mddev->recovery_active);
8055 wake_up(&mddev->recovery_wait);
8056 if (!ok) {
8057 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8058 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8059 md_wakeup_thread(mddev->thread);
8060
8061 }
8062}
8063EXPORT_SYMBOL(md_done_sync);
8064
8065
8066
8067
8068
8069
8070
8071
8072bool md_write_start(struct mddev *mddev, struct bio *bi)
8073{
8074 int did_change = 0;
8075
8076 if (bio_data_dir(bi) != WRITE)
8077 return true;
8078
8079 BUG_ON(mddev->ro == 1);
8080 if (mddev->ro == 2) {
8081
8082 mddev->ro = 0;
8083 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8084 md_wakeup_thread(mddev->thread);
8085 md_wakeup_thread(mddev->sync_thread);
8086 did_change = 1;
8087 }
8088 rcu_read_lock();
8089 percpu_ref_get(&mddev->writes_pending);
8090 smp_mb();
8091 if (mddev->safemode == 1)
8092 mddev->safemode = 0;
8093
8094 if (mddev->in_sync || mddev->sync_checkers) {
8095 spin_lock(&mddev->lock);
8096 if (mddev->in_sync) {
8097 mddev->in_sync = 0;
8098 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8099 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8100 md_wakeup_thread(mddev->thread);
8101 did_change = 1;
8102 }
8103 spin_unlock(&mddev->lock);
8104 }
8105 rcu_read_unlock();
8106 if (did_change)
8107 sysfs_notify_dirent_safe(mddev->sysfs_state);
8108 if (!mddev->has_superblocks)
8109 return true;
8110 wait_event(mddev->sb_wait,
8111 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8112 mddev->suspended);
8113 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8114 percpu_ref_put(&mddev->writes_pending);
8115 return false;
8116 }
8117 return true;
8118}
8119EXPORT_SYMBOL(md_write_start);
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129void md_write_inc(struct mddev *mddev, struct bio *bi)
8130{
8131 if (bio_data_dir(bi) != WRITE)
8132 return;
8133 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8134 percpu_ref_get(&mddev->writes_pending);
8135}
8136EXPORT_SYMBOL(md_write_inc);
8137
8138void md_write_end(struct mddev *mddev)
8139{
8140 percpu_ref_put(&mddev->writes_pending);
8141
8142 if (mddev->safemode == 2)
8143 md_wakeup_thread(mddev->thread);
8144 else if (mddev->safemode_delay)
8145
8146
8147
8148 mod_timer(&mddev->safemode_timer,
8149 roundup(jiffies, mddev->safemode_delay) +
8150 mddev->safemode_delay);
8151}
8152
8153EXPORT_SYMBOL(md_write_end);
8154
8155
8156
8157
8158
8159
8160
8161void md_allow_write(struct mddev *mddev)
8162{
8163 if (!mddev->pers)
8164 return;
8165 if (mddev->ro)
8166 return;
8167 if (!mddev->pers->sync_request)
8168 return;
8169
8170 spin_lock(&mddev->lock);
8171 if (mddev->in_sync) {
8172 mddev->in_sync = 0;
8173 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8174 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8175 if (mddev->safemode_delay &&
8176 mddev->safemode == 0)
8177 mddev->safemode = 1;
8178 spin_unlock(&mddev->lock);
8179 md_update_sb(mddev, 0);
8180 sysfs_notify_dirent_safe(mddev->sysfs_state);
8181
8182 wait_event(mddev->sb_wait,
8183 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8184 } else
8185 spin_unlock(&mddev->lock);
8186}
8187EXPORT_SYMBOL_GPL(md_allow_write);
8188
8189#define SYNC_MARKS 10
8190#define SYNC_MARK_STEP (3*HZ)
8191#define UPDATE_FREQUENCY (5*60*HZ)
8192void md_do_sync(struct md_thread *thread)
8193{
8194 struct mddev *mddev = thread->mddev;
8195 struct mddev *mddev2;
8196 unsigned int currspeed = 0,
8197 window;
8198 sector_t max_sectors,j, io_sectors, recovery_done;
8199 unsigned long mark[SYNC_MARKS];
8200 unsigned long update_time;
8201 sector_t mark_cnt[SYNC_MARKS];
8202 int last_mark,m;
8203 struct list_head *tmp;
8204 sector_t last_check;
8205 int skipped = 0;
8206 struct md_rdev *rdev;
8207 char *desc, *action = NULL;
8208 struct blk_plug plug;
8209 int ret;
8210
8211
8212 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8213 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8214 return;
8215 if (mddev->ro) {
8216 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8217 return;
8218 }
8219
8220 if (mddev_is_clustered(mddev)) {
8221 ret = md_cluster_ops->resync_start(mddev);
8222 if (ret)
8223 goto skip;
8224
8225 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8226 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8227 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8228 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8229 && ((unsigned long long)mddev->curr_resync_completed
8230 < (unsigned long long)mddev->resync_max_sectors))
8231 goto skip;
8232 }
8233
8234 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8235 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8236 desc = "data-check";
8237 action = "check";
8238 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8239 desc = "requested-resync";
8240 action = "repair";
8241 } else
8242 desc = "resync";
8243 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8244 desc = "reshape";
8245 else
8246 desc = "recovery";
8247
8248 mddev->last_sync_action = action ?: desc;
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266 do {
8267 int mddev2_minor = -1;
8268 mddev->curr_resync = 2;
8269
8270 try_again:
8271 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8272 goto skip;
8273 for_each_mddev(mddev2, tmp) {
8274 if (mddev2 == mddev)
8275 continue;
8276 if (!mddev->parallel_resync
8277 && mddev2->curr_resync
8278 && match_mddev_units(mddev, mddev2)) {
8279 DEFINE_WAIT(wq);
8280 if (mddev < mddev2 && mddev->curr_resync == 2) {
8281
8282 mddev->curr_resync = 1;
8283 wake_up(&resync_wait);
8284 }
8285 if (mddev > mddev2 && mddev->curr_resync == 1)
8286
8287
8288
8289 continue;
8290
8291
8292
8293
8294 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8295 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8296 mddev2->curr_resync >= mddev->curr_resync) {
8297 if (mddev2_minor != mddev2->md_minor) {
8298 mddev2_minor = mddev2->md_minor;
8299 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8300 desc, mdname(mddev),
8301 mdname(mddev2));
8302 }
8303 mddev_put(mddev2);
8304 if (signal_pending(current))
8305 flush_signals(current);
8306 schedule();
8307 finish_wait(&resync_wait, &wq);
8308 goto try_again;
8309 }
8310 finish_wait(&resync_wait, &wq);
8311 }
8312 }
8313 } while (mddev->curr_resync < 2);
8314
8315 j = 0;
8316 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8317
8318
8319
8320 max_sectors = mddev->resync_max_sectors;
8321 atomic64_set(&mddev->resync_mismatches, 0);
8322
8323 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8324 j = mddev->resync_min;
8325 else if (!mddev->bitmap)
8326 j = mddev->recovery_cp;
8327
8328 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8329 max_sectors = mddev->resync_max_sectors;
8330 else {
8331
8332 max_sectors = mddev->dev_sectors;
8333 j = MaxSector;
8334 rcu_read_lock();
8335 rdev_for_each_rcu(rdev, mddev)
8336 if (rdev->raid_disk >= 0 &&
8337 !test_bit(Journal, &rdev->flags) &&
8338 !test_bit(Faulty, &rdev->flags) &&
8339 !test_bit(In_sync, &rdev->flags) &&
8340 rdev->recovery_offset < j)
8341 j = rdev->recovery_offset;
8342 rcu_read_unlock();
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352 if (mddev->bitmap) {
8353 mddev->pers->quiesce(mddev, 1);
8354 mddev->pers->quiesce(mddev, 0);
8355 }
8356 }
8357
8358 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8359 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8360 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8361 speed_max(mddev), desc);
8362
8363 is_mddev_idle(mddev, 1);
8364
8365 io_sectors = 0;
8366 for (m = 0; m < SYNC_MARKS; m++) {
8367 mark[m] = jiffies;
8368 mark_cnt[m] = io_sectors;
8369 }
8370 last_mark = 0;
8371 mddev->resync_mark = mark[last_mark];
8372 mddev->resync_mark_cnt = mark_cnt[last_mark];
8373
8374
8375
8376
8377 window = 32*(PAGE_SIZE/512);
8378 pr_debug("md: using %dk window, over a total of %lluk.\n",
8379 window/2, (unsigned long long)max_sectors/2);
8380
8381 atomic_set(&mddev->recovery_active, 0);
8382 last_check = 0;
8383
8384 if (j>2) {
8385 pr_debug("md: resuming %s of %s from checkpoint.\n",
8386 desc, mdname(mddev));
8387 mddev->curr_resync = j;
8388 } else
8389 mddev->curr_resync = 3;
8390 mddev->curr_resync_completed = j;
8391 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8392 md_new_event(mddev);
8393 update_time = jiffies;
8394
8395 blk_start_plug(&plug);
8396 while (j < max_sectors) {
8397 sector_t sectors;
8398
8399 skipped = 0;
8400
8401 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8402 ((mddev->curr_resync > mddev->curr_resync_completed &&
8403 (mddev->curr_resync - mddev->curr_resync_completed)
8404 > (max_sectors >> 4)) ||
8405 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8406 (j - mddev->curr_resync_completed)*2
8407 >= mddev->resync_max - mddev->curr_resync_completed ||
8408 mddev->curr_resync_completed > mddev->resync_max
8409 )) {
8410
8411 wait_event(mddev->recovery_wait,
8412 atomic_read(&mddev->recovery_active) == 0);
8413 mddev->curr_resync_completed = j;
8414 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8415 j > mddev->recovery_cp)
8416 mddev->recovery_cp = j;
8417 update_time = jiffies;
8418 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8419 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8420 }
8421
8422 while (j >= mddev->resync_max &&
8423 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8424
8425
8426
8427
8428 flush_signals(current);
8429 wait_event_interruptible(mddev->recovery_wait,
8430 mddev->resync_max > j
8431 || test_bit(MD_RECOVERY_INTR,
8432 &mddev->recovery));
8433 }
8434
8435 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8436 break;
8437
8438 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8439 if (sectors == 0) {
8440 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8441 break;
8442 }
8443
8444 if (!skipped) {
8445 io_sectors += sectors;
8446 atomic_add(sectors, &mddev->recovery_active);
8447 }
8448
8449 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8450 break;
8451
8452 j += sectors;
8453 if (j > max_sectors)
8454
8455 j = max_sectors;
8456 if (j > 2)
8457 mddev->curr_resync = j;
8458 mddev->curr_mark_cnt = io_sectors;
8459 if (last_check == 0)
8460
8461
8462
8463 md_new_event(mddev);
8464
8465 if (last_check + window > io_sectors || j == max_sectors)
8466 continue;
8467
8468 last_check = io_sectors;
8469 repeat:
8470 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8471
8472 int next = (last_mark+1) % SYNC_MARKS;
8473
8474 mddev->resync_mark = mark[next];
8475 mddev->resync_mark_cnt = mark_cnt[next];
8476 mark[next] = jiffies;
8477 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8478 last_mark = next;
8479 }
8480
8481 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8482 break;
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492 cond_resched();
8493
8494 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8495 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8496 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8497
8498 if (currspeed > speed_min(mddev)) {
8499 if (currspeed > speed_max(mddev)) {
8500 msleep(500);
8501 goto repeat;
8502 }
8503 if (!is_mddev_idle(mddev, 0)) {
8504
8505
8506
8507
8508 wait_event(mddev->recovery_wait,
8509 !atomic_read(&mddev->recovery_active));
8510 }
8511 }
8512 }
8513 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8514 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8515 ? "interrupted" : "done");
8516
8517
8518
8519 blk_finish_plug(&plug);
8520 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8521
8522 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8523 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8524 mddev->curr_resync > 3) {
8525 mddev->curr_resync_completed = mddev->curr_resync;
8526 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8527 }
8528 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8529
8530 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8531 mddev->curr_resync > 3) {
8532 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8533 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8534 if (mddev->curr_resync >= mddev->recovery_cp) {
8535 pr_debug("md: checkpointing %s of %s.\n",
8536 desc, mdname(mddev));
8537 if (test_bit(MD_RECOVERY_ERROR,
8538 &mddev->recovery))
8539 mddev->recovery_cp =
8540 mddev->curr_resync_completed;
8541 else
8542 mddev->recovery_cp =
8543 mddev->curr_resync;
8544 }
8545 } else
8546 mddev->recovery_cp = MaxSector;
8547 } else {
8548 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8549 mddev->curr_resync = MaxSector;
8550 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8551 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
8552 rcu_read_lock();
8553 rdev_for_each_rcu(rdev, mddev)
8554 if (rdev->raid_disk >= 0 &&
8555 mddev->delta_disks >= 0 &&
8556 !test_bit(Journal, &rdev->flags) &&
8557 !test_bit(Faulty, &rdev->flags) &&
8558 !test_bit(In_sync, &rdev->flags) &&
8559 rdev->recovery_offset < mddev->curr_resync)
8560 rdev->recovery_offset = mddev->curr_resync;
8561 rcu_read_unlock();
8562 }
8563 }
8564 }
8565 skip:
8566
8567
8568
8569 set_mask_bits(&mddev->sb_flags, 0,
8570 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8571
8572 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8573 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8574 mddev->delta_disks > 0 &&
8575 mddev->pers->finish_reshape &&
8576 mddev->pers->size &&
8577 mddev->queue) {
8578 mddev_lock_nointr(mddev);
8579 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
8580 mddev_unlock(mddev);
8581 set_capacity(mddev->gendisk, mddev->array_sectors);
8582 revalidate_disk(mddev->gendisk);
8583 }
8584
8585 spin_lock(&mddev->lock);
8586 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8587
8588 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8589 mddev->resync_min = 0;
8590 mddev->resync_max = MaxSector;
8591 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8592 mddev->resync_min = mddev->curr_resync_completed;
8593 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8594 mddev->curr_resync = 0;
8595 spin_unlock(&mddev->lock);
8596
8597 wake_up(&resync_wait);
8598 md_wakeup_thread(mddev->thread);
8599 return;
8600}
8601EXPORT_SYMBOL_GPL(md_do_sync);
8602
8603static int remove_and_add_spares(struct mddev *mddev,
8604 struct md_rdev *this)
8605{
8606 struct md_rdev *rdev;
8607 int spares = 0;
8608 int removed = 0;
8609 bool remove_some = false;
8610
8611 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
8612
8613 return 0;
8614
8615 rdev_for_each(rdev, mddev) {
8616 if ((this == NULL || rdev == this) &&
8617 rdev->raid_disk >= 0 &&
8618 !test_bit(Blocked, &rdev->flags) &&
8619 test_bit(Faulty, &rdev->flags) &&
8620 atomic_read(&rdev->nr_pending)==0) {
8621
8622
8623
8624
8625
8626 remove_some = true;
8627 set_bit(RemoveSynchronized, &rdev->flags);
8628 }
8629 }
8630
8631 if (remove_some)
8632 synchronize_rcu();
8633 rdev_for_each(rdev, mddev) {
8634 if ((this == NULL || rdev == this) &&
8635 rdev->raid_disk >= 0 &&
8636 !test_bit(Blocked, &rdev->flags) &&
8637 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8638 (!test_bit(In_sync, &rdev->flags) &&
8639 !test_bit(Journal, &rdev->flags))) &&
8640 atomic_read(&rdev->nr_pending)==0)) {
8641 if (mddev->pers->hot_remove_disk(
8642 mddev, rdev) == 0) {
8643 sysfs_unlink_rdev(mddev, rdev);
8644 rdev->raid_disk = -1;
8645 removed++;
8646 }
8647 }
8648 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8649 clear_bit(RemoveSynchronized, &rdev->flags);
8650 }
8651
8652 if (removed && mddev->kobj.sd)
8653 sysfs_notify(&mddev->kobj, NULL, "degraded");
8654
8655 if (this && removed)
8656 goto no_add;
8657
8658 rdev_for_each(rdev, mddev) {
8659 if (this && this != rdev)
8660 continue;
8661 if (test_bit(Candidate, &rdev->flags))
8662 continue;
8663 if (rdev->raid_disk >= 0 &&
8664 !test_bit(In_sync, &rdev->flags) &&
8665 !test_bit(Journal, &rdev->flags) &&
8666 !test_bit(Faulty, &rdev->flags))
8667 spares++;
8668 if (rdev->raid_disk >= 0)
8669 continue;
8670 if (test_bit(Faulty, &rdev->flags))
8671 continue;
8672 if (!test_bit(Journal, &rdev->flags)) {
8673 if (mddev->ro &&
8674 ! (rdev->saved_raid_disk >= 0 &&
8675 !test_bit(Bitmap_sync, &rdev->flags)))
8676 continue;
8677
8678 rdev->recovery_offset = 0;
8679 }
8680 if (mddev->pers->
8681 hot_add_disk(mddev, rdev) == 0) {
8682 if (sysfs_link_rdev(mddev, rdev))
8683 ;
8684 if (!test_bit(Journal, &rdev->flags))
8685 spares++;
8686 md_new_event(mddev);
8687 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8688 }
8689 }
8690no_add:
8691 if (removed)
8692 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8693 return spares;
8694}
8695
8696static void md_start_sync(struct work_struct *ws)
8697{
8698 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8699
8700 mddev->sync_thread = md_register_thread(md_do_sync,
8701 mddev,
8702 "resync");
8703 if (!mddev->sync_thread) {
8704 pr_warn("%s: could not start resync thread...\n",
8705 mdname(mddev));
8706
8707 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8708 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8709 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8710 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8711 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8712 wake_up(&resync_wait);
8713 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8714 &mddev->recovery))
8715 if (mddev->sysfs_action)
8716 sysfs_notify_dirent_safe(mddev->sysfs_action);
8717 } else
8718 md_wakeup_thread(mddev->sync_thread);
8719 sysfs_notify_dirent_safe(mddev->sysfs_action);
8720 md_new_event(mddev);
8721}
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745void md_check_recovery(struct mddev *mddev)
8746{
8747 if (mddev->suspended)
8748 return;
8749
8750 if (mddev->bitmap)
8751 bitmap_daemon_work(mddev);
8752
8753 if (signal_pending(current)) {
8754 if (mddev->pers->sync_request && !mddev->external) {
8755 pr_debug("md: %s in immediate safe mode\n",
8756 mdname(mddev));
8757 mddev->safemode = 2;
8758 }
8759 flush_signals(current);
8760 }
8761
8762 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8763 return;
8764 if ( ! (
8765 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8766 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8767 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8768 (mddev->external == 0 && mddev->safemode == 1) ||
8769 (mddev->safemode == 2
8770 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8771 ))
8772 return;
8773
8774 if (mddev_trylock(mddev)) {
8775 int spares = 0;
8776
8777 if (!mddev->external && mddev->safemode == 1)
8778 mddev->safemode = 0;
8779
8780 if (mddev->ro) {
8781 struct md_rdev *rdev;
8782 if (!mddev->external && mddev->in_sync)
8783
8784
8785
8786
8787
8788 rdev_for_each(rdev, mddev)
8789 clear_bit(Blocked, &rdev->flags);
8790
8791
8792
8793
8794
8795
8796
8797 remove_and_add_spares(mddev, NULL);
8798
8799
8800
8801 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8802 md_reap_sync_thread(mddev);
8803 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8804 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8805 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8806 goto unlock;
8807 }
8808
8809 if (mddev_is_clustered(mddev)) {
8810 struct md_rdev *rdev;
8811
8812
8813
8814 rdev_for_each(rdev, mddev) {
8815 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8816 rdev->raid_disk < 0)
8817 md_kick_rdev_from_array(rdev);
8818 }
8819 }
8820
8821 if (!mddev->external && !mddev->in_sync) {
8822 spin_lock(&mddev->lock);
8823 set_in_sync(mddev);
8824 spin_unlock(&mddev->lock);
8825 }
8826
8827 if (mddev->sb_flags)
8828 md_update_sb(mddev, 0);
8829
8830 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8831 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8832
8833 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8834 goto unlock;
8835 }
8836 if (mddev->sync_thread) {
8837 md_reap_sync_thread(mddev);
8838 goto unlock;
8839 }
8840
8841
8842
8843 mddev->curr_resync_completed = 0;
8844 spin_lock(&mddev->lock);
8845 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8846 spin_unlock(&mddev->lock);
8847
8848
8849
8850 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8851 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8852
8853 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8854 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8855 goto not_running;
8856
8857
8858
8859
8860
8861
8862
8863 if (mddev->reshape_position != MaxSector) {
8864 if (mddev->pers->check_reshape == NULL ||
8865 mddev->pers->check_reshape(mddev) != 0)
8866
8867 goto not_running;
8868 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8869 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8870 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8871 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8872 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8873 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8874 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8875 } else if (mddev->recovery_cp < MaxSector) {
8876 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8877 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8878 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8879
8880 goto not_running;
8881
8882 if (mddev->pers->sync_request) {
8883 if (spares) {
8884
8885
8886
8887
8888 bitmap_write_all(mddev->bitmap);
8889 }
8890 INIT_WORK(&mddev->del_work, md_start_sync);
8891 queue_work(md_misc_wq, &mddev->del_work);
8892 goto unlock;
8893 }
8894 not_running:
8895 if (!mddev->sync_thread) {
8896 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8897 wake_up(&resync_wait);
8898 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8899 &mddev->recovery))
8900 if (mddev->sysfs_action)
8901 sysfs_notify_dirent_safe(mddev->sysfs_action);
8902 }
8903 unlock:
8904 wake_up(&mddev->sb_wait);
8905 mddev_unlock(mddev);
8906 } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
8907
8908
8909
8910 set_bit(MD_UPDATING_SB, &mddev->flags);
8911 smp_mb__after_atomic();
8912 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
8913 md_update_sb(mddev, 0);
8914 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
8915 wake_up(&mddev->sb_wait);
8916 }
8917}
8918EXPORT_SYMBOL(md_check_recovery);
8919
8920void md_reap_sync_thread(struct mddev *mddev)
8921{
8922 struct md_rdev *rdev;
8923
8924
8925 md_unregister_thread(&mddev->sync_thread);
8926 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8927 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8928
8929
8930 if (mddev->pers->spare_active(mddev)) {
8931 sysfs_notify(&mddev->kobj, NULL,
8932 "degraded");
8933 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8934 }
8935 }
8936 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8937 mddev->pers->finish_reshape)
8938 mddev->pers->finish_reshape(mddev);
8939
8940
8941
8942
8943 if (!mddev->degraded)
8944 rdev_for_each(rdev, mddev)
8945 rdev->saved_raid_disk = -1;
8946
8947 md_update_sb(mddev, 1);
8948
8949
8950
8951 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
8952 md_cluster_ops->resync_finish(mddev);
8953 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8954 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8955 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8956 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8957 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8958 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8959 wake_up(&resync_wait);
8960
8961 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8962 sysfs_notify_dirent_safe(mddev->sysfs_action);
8963 md_new_event(mddev);
8964 if (mddev->event_work.func)
8965 queue_work(md_misc_wq, &mddev->event_work);
8966}
8967EXPORT_SYMBOL(md_reap_sync_thread);
8968
8969void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8970{
8971 sysfs_notify_dirent_safe(rdev->sysfs_state);
8972 wait_event_timeout(rdev->blocked_wait,
8973 !test_bit(Blocked, &rdev->flags) &&
8974 !test_bit(BlockedBadBlocks, &rdev->flags),
8975 msecs_to_jiffies(5000));
8976 rdev_dec_pending(rdev, mddev);
8977}
8978EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8979
8980void md_finish_reshape(struct mddev *mddev)
8981{
8982
8983 struct md_rdev *rdev;
8984
8985 rdev_for_each(rdev, mddev) {
8986 if (rdev->data_offset > rdev->new_data_offset)
8987 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8988 else
8989 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8990 rdev->data_offset = rdev->new_data_offset;
8991 }
8992}
8993EXPORT_SYMBOL(md_finish_reshape);
8994
8995
8996
8997
8998int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8999 int is_new)
9000{
9001 struct mddev *mddev = rdev->mddev;
9002 int rv;
9003 if (is_new)
9004 s += rdev->new_data_offset;
9005 else
9006 s += rdev->data_offset;
9007 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9008 if (rv == 0) {
9009
9010 if (test_bit(ExternalBbl, &rdev->flags))
9011 sysfs_notify(&rdev->kobj, NULL,
9012 "unacknowledged_bad_blocks");
9013 sysfs_notify_dirent_safe(rdev->sysfs_state);
9014 set_mask_bits(&mddev->sb_flags, 0,
9015 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9016 md_wakeup_thread(rdev->mddev->thread);
9017 return 1;
9018 } else
9019 return 0;
9020}
9021EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9022
9023int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9024 int is_new)
9025{
9026 int rv;
9027 if (is_new)
9028 s += rdev->new_data_offset;
9029 else
9030 s += rdev->data_offset;
9031 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9032 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9033 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
9034 return rv;
9035}
9036EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9037
9038static int md_notify_reboot(struct notifier_block *this,
9039 unsigned long code, void *x)
9040{
9041 struct list_head *tmp;
9042 struct mddev *mddev;
9043 int need_delay = 0;
9044
9045 for_each_mddev(mddev, tmp) {
9046 if (mddev_trylock(mddev)) {
9047 if (mddev->pers)
9048 __md_stop_writes(mddev);
9049 if (mddev->persistent)
9050 mddev->safemode = 2;
9051 mddev_unlock(mddev);
9052 }
9053 need_delay = 1;
9054 }
9055
9056
9057
9058
9059
9060
9061 if (need_delay)
9062 mdelay(1000*1);
9063
9064 return NOTIFY_DONE;
9065}
9066
9067static struct notifier_block md_notifier = {
9068 .notifier_call = md_notify_reboot,
9069 .next = NULL,
9070 .priority = INT_MAX,
9071};
9072
9073static void md_geninit(void)
9074{
9075 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9076
9077 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
9078}
9079
9080static int __init md_init(void)
9081{
9082 int ret = -ENOMEM;
9083
9084 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9085 if (!md_wq)
9086 goto err_wq;
9087
9088 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9089 if (!md_misc_wq)
9090 goto err_misc_wq;
9091
9092 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
9093 goto err_md;
9094
9095 if ((ret = register_blkdev(0, "mdp")) < 0)
9096 goto err_mdp;
9097 mdp_major = ret;
9098
9099 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
9100 md_probe, NULL, NULL);
9101 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
9102 md_probe, NULL, NULL);
9103
9104 register_reboot_notifier(&md_notifier);
9105 raid_table_header = register_sysctl_table(raid_root_table);
9106
9107 md_geninit();
9108 return 0;
9109
9110err_mdp:
9111 unregister_blkdev(MD_MAJOR, "md");
9112err_md:
9113 destroy_workqueue(md_misc_wq);
9114err_misc_wq:
9115 destroy_workqueue(md_wq);
9116err_wq:
9117 return ret;
9118}
9119
9120static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9121{
9122 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9123 struct md_rdev *rdev2;
9124 int role, ret;
9125 char b[BDEVNAME_SIZE];
9126
9127
9128
9129
9130
9131 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9132 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9133 if (ret)
9134 pr_info("md-cluster: resize failed\n");
9135 else
9136 bitmap_update_sb(mddev->bitmap);
9137 }
9138
9139
9140 rdev_for_each(rdev2, mddev) {
9141 if (test_bit(Faulty, &rdev2->flags))
9142 continue;
9143
9144
9145 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9146
9147 if (test_bit(Candidate, &rdev2->flags)) {
9148 if (role == 0xfffe) {
9149 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9150 md_kick_rdev_from_array(rdev2);
9151 continue;
9152 }
9153 else
9154 clear_bit(Candidate, &rdev2->flags);
9155 }
9156
9157 if (role != rdev2->raid_disk) {
9158
9159 if (rdev2->raid_disk == -1 && role != 0xffff) {
9160 rdev2->saved_raid_disk = role;
9161 ret = remove_and_add_spares(mddev, rdev2);
9162 pr_info("Activated spare: %s\n",
9163 bdevname(rdev2->bdev,b));
9164
9165
9166 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9167 md_wakeup_thread(mddev->thread);
9168
9169 }
9170
9171
9172
9173
9174
9175 if ((role == 0xfffe) || (role == 0xfffd)) {
9176 md_error(mddev, rdev2);
9177 clear_bit(Blocked, &rdev2->flags);
9178 }
9179 }
9180 }
9181
9182 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
9183 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9184
9185
9186 mddev->events = le64_to_cpu(sb->events);
9187}
9188
9189static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9190{
9191 int err;
9192 struct page *swapout = rdev->sb_page;
9193 struct mdp_superblock_1 *sb;
9194
9195
9196
9197
9198 rdev->sb_page = NULL;
9199 err = alloc_disk_sb(rdev);
9200 if (err == 0) {
9201 ClearPageUptodate(rdev->sb_page);
9202 rdev->sb_loaded = 0;
9203 err = super_types[mddev->major_version].
9204 load_super(rdev, NULL, mddev->minor_version);
9205 }
9206 if (err < 0) {
9207 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9208 __func__, __LINE__, rdev->desc_nr, err);
9209 if (rdev->sb_page)
9210 put_page(rdev->sb_page);
9211 rdev->sb_page = swapout;
9212 rdev->sb_loaded = 1;
9213 return err;
9214 }
9215
9216 sb = page_address(rdev->sb_page);
9217
9218
9219
9220
9221 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9222 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9223
9224
9225
9226
9227 if (rdev->recovery_offset == MaxSector &&
9228 !test_bit(In_sync, &rdev->flags) &&
9229 mddev->pers->spare_active(mddev))
9230 sysfs_notify(&mddev->kobj, NULL, "degraded");
9231
9232 put_page(swapout);
9233 return 0;
9234}
9235
9236void md_reload_sb(struct mddev *mddev, int nr)
9237{
9238 struct md_rdev *rdev;
9239 int err;
9240
9241
9242 rdev_for_each_rcu(rdev, mddev) {
9243 if (rdev->desc_nr == nr)
9244 break;
9245 }
9246
9247 if (!rdev || rdev->desc_nr != nr) {
9248 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9249 return;
9250 }
9251
9252 err = read_rdev(mddev, rdev);
9253 if (err < 0)
9254 return;
9255
9256 check_sb_changes(mddev, rdev);
9257
9258
9259 rdev_for_each_rcu(rdev, mddev) {
9260 if (!test_bit(Faulty, &rdev->flags))
9261 read_rdev(mddev, rdev);
9262 }
9263}
9264EXPORT_SYMBOL(md_reload_sb);
9265
9266#ifndef MODULE
9267
9268
9269
9270
9271
9272
9273static DEFINE_MUTEX(detected_devices_mutex);
9274static LIST_HEAD(all_detected_devices);
9275struct detected_devices_node {
9276 struct list_head list;
9277 dev_t dev;
9278};
9279
9280void md_autodetect_dev(dev_t dev)
9281{
9282 struct detected_devices_node *node_detected_dev;
9283
9284 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9285 if (node_detected_dev) {
9286 node_detected_dev->dev = dev;
9287 mutex_lock(&detected_devices_mutex);
9288 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9289 mutex_unlock(&detected_devices_mutex);
9290 }
9291}
9292
9293static void autostart_arrays(int part)
9294{
9295 struct md_rdev *rdev;
9296 struct detected_devices_node *node_detected_dev;
9297 dev_t dev;
9298 int i_scanned, i_passed;
9299
9300 i_scanned = 0;
9301 i_passed = 0;
9302
9303 pr_info("md: Autodetecting RAID arrays.\n");
9304
9305 mutex_lock(&detected_devices_mutex);
9306 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9307 i_scanned++;
9308 node_detected_dev = list_entry(all_detected_devices.next,
9309 struct detected_devices_node, list);
9310 list_del(&node_detected_dev->list);
9311 dev = node_detected_dev->dev;
9312 kfree(node_detected_dev);
9313 mutex_unlock(&detected_devices_mutex);
9314 rdev = md_import_device(dev,0, 90);
9315 mutex_lock(&detected_devices_mutex);
9316 if (IS_ERR(rdev))
9317 continue;
9318
9319 if (test_bit(Faulty, &rdev->flags))
9320 continue;
9321
9322 set_bit(AutoDetected, &rdev->flags);
9323 list_add(&rdev->same_set, &pending_raid_disks);
9324 i_passed++;
9325 }
9326 mutex_unlock(&detected_devices_mutex);
9327
9328 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9329
9330 autorun_devices(part);
9331}
9332
9333#endif
9334
9335static __exit void md_exit(void)
9336{
9337 struct mddev *mddev;
9338 struct list_head *tmp;
9339 int delay = 1;
9340
9341 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
9342 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
9343
9344 unregister_blkdev(MD_MAJOR,"md");
9345 unregister_blkdev(mdp_major, "mdp");
9346 unregister_reboot_notifier(&md_notifier);
9347 unregister_sysctl_table(raid_table_header);
9348
9349
9350
9351
9352 md_unloading = 1;
9353 while (waitqueue_active(&md_event_waiters)) {
9354
9355 wake_up(&md_event_waiters);
9356 msleep(delay);
9357 delay += delay;
9358 }
9359 remove_proc_entry("mdstat", NULL);
9360
9361 for_each_mddev(mddev, tmp) {
9362 export_array(mddev);
9363 mddev->ctime = 0;
9364 mddev->hold_active = 0;
9365
9366
9367
9368
9369
9370
9371 }
9372 destroy_workqueue(md_misc_wq);
9373 destroy_workqueue(md_wq);
9374}
9375
9376subsys_initcall(md_init);
9377module_exit(md_exit)
9378
9379static int get_ro(char *buffer, const struct kernel_param *kp)
9380{
9381 return sprintf(buffer, "%d", start_readonly);
9382}
9383static int set_ro(const char *val, const struct kernel_param *kp)
9384{
9385 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9386}
9387
9388module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9389module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9390module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9391module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9392
9393MODULE_LICENSE("GPL");
9394MODULE_DESCRIPTION("MD RAID framework");
9395MODULE_ALIAS("md");
9396MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9397