1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/kthread.h>
48#include <linux/blkdev.h>
49#include <linux/badblocks.h>
50#include <linux/sysctl.h>
51#include <linux/seq_file.h>
52#include <linux/fs.h>
53#include <linux/poll.h>
54#include <linux/ctype.h>
55#include <linux/string.h>
56#include <linux/hdreg.h>
57#include <linux/proc_fs.h>
58#include <linux/random.h>
59#include <linux/module.h>
60#include <linux/reboot.h>
61#include <linux/file.h>
62#include <linux/compat.h>
63#include <linux/delay.h>
64#include <linux/raid/md_p.h>
65#include <linux/raid/md_u.h>
66#include <linux/slab.h>
67#include <trace/events/block.h>
68#include "md.h"
69#include "bitmap.h"
70#include "md-cluster.h"
71
72#ifndef MODULE
73static void autostart_arrays(int part);
74#endif
75
76
77
78
79
80
81static LIST_HEAD(pers_list);
82static DEFINE_SPINLOCK(pers_lock);
83
84struct md_cluster_operations *md_cluster_ops;
85EXPORT_SYMBOL(md_cluster_ops);
86struct module *md_cluster_mod;
87EXPORT_SYMBOL(md_cluster_mod);
88
89static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
90static struct workqueue_struct *md_wq;
91static struct workqueue_struct *md_misc_wq;
92
93static int remove_and_add_spares(struct mddev *mddev,
94 struct md_rdev *this);
95static void mddev_detach(struct mddev *mddev);
96
97
98
99
100
101
102#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
103
104
105
106
107
108
109
110
111
112
113
114
115
116static int sysctl_speed_limit_min = 1000;
117static int sysctl_speed_limit_max = 200000;
118static inline int speed_min(struct mddev *mddev)
119{
120 return mddev->sync_speed_min ?
121 mddev->sync_speed_min : sysctl_speed_limit_min;
122}
123
124static inline int speed_max(struct mddev *mddev)
125{
126 return mddev->sync_speed_max ?
127 mddev->sync_speed_max : sysctl_speed_limit_max;
128}
129
130static struct ctl_table_header *raid_table_header;
131
132static struct ctl_table raid_table[] = {
133 {
134 .procname = "speed_limit_min",
135 .data = &sysctl_speed_limit_min,
136 .maxlen = sizeof(int),
137 .mode = S_IRUGO|S_IWUSR,
138 .proc_handler = proc_dointvec,
139 },
140 {
141 .procname = "speed_limit_max",
142 .data = &sysctl_speed_limit_max,
143 .maxlen = sizeof(int),
144 .mode = S_IRUGO|S_IWUSR,
145 .proc_handler = proc_dointvec,
146 },
147 { }
148};
149
150static struct ctl_table raid_dir_table[] = {
151 {
152 .procname = "raid",
153 .maxlen = 0,
154 .mode = S_IRUGO|S_IXUGO,
155 .child = raid_table,
156 },
157 { }
158};
159
160static struct ctl_table raid_root_table[] = {
161 {
162 .procname = "dev",
163 .maxlen = 0,
164 .mode = 0555,
165 .child = raid_dir_table,
166 },
167 { }
168};
169
170static const struct block_device_operations md_fops;
171
172static int start_readonly;
173
174
175
176
177
178struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
179 struct mddev *mddev)
180{
181 struct bio *b;
182
183 if (!mddev || !mddev->bio_set)
184 return bio_alloc(gfp_mask, nr_iovecs);
185
186 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
187 if (!b)
188 return NULL;
189 return b;
190}
191EXPORT_SYMBOL_GPL(bio_alloc_mddev);
192
193struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
194 struct mddev *mddev)
195{
196 if (!mddev || !mddev->bio_set)
197 return bio_clone(bio, gfp_mask);
198
199 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
200}
201EXPORT_SYMBOL_GPL(bio_clone_mddev);
202
203
204
205
206
207
208
209
210
211
212
213static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
214static atomic_t md_event_count;
215void md_new_event(struct mddev *mddev)
216{
217 atomic_inc(&md_event_count);
218 wake_up(&md_event_waiters);
219}
220EXPORT_SYMBOL_GPL(md_new_event);
221
222
223
224
225
226static LIST_HEAD(all_mddevs);
227static DEFINE_SPINLOCK(all_mddevs_lock);
228
229
230
231
232
233
234
235
236#define for_each_mddev(_mddev,_tmp) \
237 \
238 for (({ spin_lock(&all_mddevs_lock); \
239 _tmp = all_mddevs.next; \
240 _mddev = NULL;}); \
241 ({ if (_tmp != &all_mddevs) \
242 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
243 spin_unlock(&all_mddevs_lock); \
244 if (_mddev) mddev_put(_mddev); \
245 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
246 _tmp != &all_mddevs;}); \
247 ({ spin_lock(&all_mddevs_lock); \
248 _tmp = _tmp->next;}) \
249 )
250
251
252
253
254
255
256
257
258static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
259{
260 const int rw = bio_data_dir(bio);
261 struct mddev *mddev = q->queuedata;
262 unsigned int sectors;
263 int cpu;
264
265 blk_queue_split(q, &bio, q->bio_split);
266
267 if (mddev == NULL || mddev->pers == NULL) {
268 bio_io_error(bio);
269 return BLK_QC_T_NONE;
270 }
271 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
272 if (bio_sectors(bio) != 0)
273 bio->bi_error = -EROFS;
274 bio_endio(bio);
275 return BLK_QC_T_NONE;
276 }
277 smp_rmb();
278 rcu_read_lock();
279 if (mddev->suspended) {
280 DEFINE_WAIT(__wait);
281 for (;;) {
282 prepare_to_wait(&mddev->sb_wait, &__wait,
283 TASK_UNINTERRUPTIBLE);
284 if (!mddev->suspended)
285 break;
286 rcu_read_unlock();
287 schedule();
288 rcu_read_lock();
289 }
290 finish_wait(&mddev->sb_wait, &__wait);
291 }
292 atomic_inc(&mddev->active_io);
293 rcu_read_unlock();
294
295
296
297
298
299 sectors = bio_sectors(bio);
300
301 bio->bi_opf &= ~REQ_NOMERGE;
302 mddev->pers->make_request(mddev, bio);
303
304 cpu = part_stat_lock();
305 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
306 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
307 part_stat_unlock();
308
309 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
310 wake_up(&mddev->sb_wait);
311
312 return BLK_QC_T_NONE;
313}
314
315
316
317
318
319
320
321void mddev_suspend(struct mddev *mddev)
322{
323 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
324 if (mddev->suspended++)
325 return;
326 synchronize_rcu();
327 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
328 mddev->pers->quiesce(mddev, 1);
329
330 del_timer_sync(&mddev->safemode_timer);
331}
332EXPORT_SYMBOL_GPL(mddev_suspend);
333
334void mddev_resume(struct mddev *mddev)
335{
336 if (--mddev->suspended)
337 return;
338 wake_up(&mddev->sb_wait);
339 mddev->pers->quiesce(mddev, 0);
340
341 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
342 md_wakeup_thread(mddev->thread);
343 md_wakeup_thread(mddev->sync_thread);
344}
345EXPORT_SYMBOL_GPL(mddev_resume);
346
347int mddev_congested(struct mddev *mddev, int bits)
348{
349 struct md_personality *pers = mddev->pers;
350 int ret = 0;
351
352 rcu_read_lock();
353 if (mddev->suspended)
354 ret = 1;
355 else if (pers && pers->congested)
356 ret = pers->congested(mddev, bits);
357 rcu_read_unlock();
358 return ret;
359}
360EXPORT_SYMBOL_GPL(mddev_congested);
361static int md_congested(void *data, int bits)
362{
363 struct mddev *mddev = data;
364 return mddev_congested(mddev, bits);
365}
366
367
368
369
370
371static void md_end_flush(struct bio *bio)
372{
373 struct md_rdev *rdev = bio->bi_private;
374 struct mddev *mddev = rdev->mddev;
375
376 rdev_dec_pending(rdev, mddev);
377
378 if (atomic_dec_and_test(&mddev->flush_pending)) {
379
380 queue_work(md_wq, &mddev->flush_work);
381 }
382 bio_put(bio);
383}
384
385static void md_submit_flush_data(struct work_struct *ws);
386
387static void submit_flushes(struct work_struct *ws)
388{
389 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
390 struct md_rdev *rdev;
391
392 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
393 atomic_set(&mddev->flush_pending, 1);
394 rcu_read_lock();
395 rdev_for_each_rcu(rdev, mddev)
396 if (rdev->raid_disk >= 0 &&
397 !test_bit(Faulty, &rdev->flags)) {
398
399
400
401
402 struct bio *bi;
403 atomic_inc(&rdev->nr_pending);
404 atomic_inc(&rdev->nr_pending);
405 rcu_read_unlock();
406 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
407 bi->bi_end_io = md_end_flush;
408 bi->bi_private = rdev;
409 bi->bi_bdev = rdev->bdev;
410 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
411 atomic_inc(&mddev->flush_pending);
412 submit_bio(bi);
413 rcu_read_lock();
414 rdev_dec_pending(rdev, mddev);
415 }
416 rcu_read_unlock();
417 if (atomic_dec_and_test(&mddev->flush_pending))
418 queue_work(md_wq, &mddev->flush_work);
419}
420
421static void md_submit_flush_data(struct work_struct *ws)
422{
423 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
424 struct bio *bio = mddev->flush_bio;
425
426 if (bio->bi_iter.bi_size == 0)
427
428 bio_endio(bio);
429 else {
430 bio->bi_opf &= ~REQ_PREFLUSH;
431 mddev->pers->make_request(mddev, bio);
432 }
433
434 mddev->flush_bio = NULL;
435 wake_up(&mddev->sb_wait);
436}
437
438void md_flush_request(struct mddev *mddev, struct bio *bio)
439{
440 spin_lock_irq(&mddev->lock);
441 wait_event_lock_irq(mddev->sb_wait,
442 !mddev->flush_bio,
443 mddev->lock);
444 mddev->flush_bio = bio;
445 spin_unlock_irq(&mddev->lock);
446
447 INIT_WORK(&mddev->flush_work, submit_flushes);
448 queue_work(md_wq, &mddev->flush_work);
449}
450EXPORT_SYMBOL(md_flush_request);
451
452void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
453{
454 struct mddev *mddev = cb->data;
455 md_wakeup_thread(mddev->thread);
456 kfree(cb);
457}
458EXPORT_SYMBOL(md_unplug);
459
460static inline struct mddev *mddev_get(struct mddev *mddev)
461{
462 atomic_inc(&mddev->active);
463 return mddev;
464}
465
466static void mddev_delayed_delete(struct work_struct *ws);
467
468static void mddev_put(struct mddev *mddev)
469{
470 struct bio_set *bs = NULL;
471
472 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
473 return;
474 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
475 mddev->ctime == 0 && !mddev->hold_active) {
476
477
478 list_del_init(&mddev->all_mddevs);
479 bs = mddev->bio_set;
480 mddev->bio_set = NULL;
481 if (mddev->gendisk) {
482
483
484
485
486
487 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
488 queue_work(md_misc_wq, &mddev->del_work);
489 } else
490 kfree(mddev);
491 }
492 spin_unlock(&all_mddevs_lock);
493 if (bs)
494 bioset_free(bs);
495}
496
497static void md_safemode_timeout(unsigned long data);
498
499void mddev_init(struct mddev *mddev)
500{
501 mutex_init(&mddev->open_mutex);
502 mutex_init(&mddev->reconfig_mutex);
503 mutex_init(&mddev->bitmap_info.mutex);
504 INIT_LIST_HEAD(&mddev->disks);
505 INIT_LIST_HEAD(&mddev->all_mddevs);
506 setup_timer(&mddev->safemode_timer, md_safemode_timeout,
507 (unsigned long) mddev);
508 atomic_set(&mddev->active, 1);
509 atomic_set(&mddev->openers, 0);
510 atomic_set(&mddev->active_io, 0);
511 spin_lock_init(&mddev->lock);
512 atomic_set(&mddev->flush_pending, 0);
513 init_waitqueue_head(&mddev->sb_wait);
514 init_waitqueue_head(&mddev->recovery_wait);
515 mddev->reshape_position = MaxSector;
516 mddev->reshape_backwards = 0;
517 mddev->last_sync_action = "none";
518 mddev->resync_min = 0;
519 mddev->resync_max = MaxSector;
520 mddev->level = LEVEL_NONE;
521}
522EXPORT_SYMBOL_GPL(mddev_init);
523
524static struct mddev *mddev_find(dev_t unit)
525{
526 struct mddev *mddev, *new = NULL;
527
528 if (unit && MAJOR(unit) != MD_MAJOR)
529 unit &= ~((1<<MdpMinorShift)-1);
530
531 retry:
532 spin_lock(&all_mddevs_lock);
533
534 if (unit) {
535 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
536 if (mddev->unit == unit) {
537 mddev_get(mddev);
538 spin_unlock(&all_mddevs_lock);
539 kfree(new);
540 return mddev;
541 }
542
543 if (new) {
544 list_add(&new->all_mddevs, &all_mddevs);
545 spin_unlock(&all_mddevs_lock);
546 new->hold_active = UNTIL_IOCTL;
547 return new;
548 }
549 } else if (new) {
550
551 static int next_minor = 512;
552 int start = next_minor;
553 int is_free = 0;
554 int dev = 0;
555 while (!is_free) {
556 dev = MKDEV(MD_MAJOR, next_minor);
557 next_minor++;
558 if (next_minor > MINORMASK)
559 next_minor = 0;
560 if (next_minor == start) {
561
562 spin_unlock(&all_mddevs_lock);
563 kfree(new);
564 return NULL;
565 }
566
567 is_free = 1;
568 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
569 if (mddev->unit == dev) {
570 is_free = 0;
571 break;
572 }
573 }
574 new->unit = dev;
575 new->md_minor = MINOR(dev);
576 new->hold_active = UNTIL_STOP;
577 list_add(&new->all_mddevs, &all_mddevs);
578 spin_unlock(&all_mddevs_lock);
579 return new;
580 }
581 spin_unlock(&all_mddevs_lock);
582
583 new = kzalloc(sizeof(*new), GFP_KERNEL);
584 if (!new)
585 return NULL;
586
587 new->unit = unit;
588 if (MAJOR(unit) == MD_MAJOR)
589 new->md_minor = MINOR(unit);
590 else
591 new->md_minor = MINOR(unit) >> MdpMinorShift;
592
593 mddev_init(new);
594
595 goto retry;
596}
597
598static struct attribute_group md_redundancy_group;
599
600void mddev_unlock(struct mddev *mddev)
601{
602 if (mddev->to_remove) {
603
604
605
606
607
608
609
610
611
612
613
614
615 struct attribute_group *to_remove = mddev->to_remove;
616 mddev->to_remove = NULL;
617 mddev->sysfs_active = 1;
618 mutex_unlock(&mddev->reconfig_mutex);
619
620 if (mddev->kobj.sd) {
621 if (to_remove != &md_redundancy_group)
622 sysfs_remove_group(&mddev->kobj, to_remove);
623 if (mddev->pers == NULL ||
624 mddev->pers->sync_request == NULL) {
625 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
626 if (mddev->sysfs_action)
627 sysfs_put(mddev->sysfs_action);
628 mddev->sysfs_action = NULL;
629 }
630 }
631 mddev->sysfs_active = 0;
632 } else
633 mutex_unlock(&mddev->reconfig_mutex);
634
635
636
637
638 spin_lock(&pers_lock);
639 md_wakeup_thread(mddev->thread);
640 spin_unlock(&pers_lock);
641}
642EXPORT_SYMBOL_GPL(mddev_unlock);
643
644struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
645{
646 struct md_rdev *rdev;
647
648 rdev_for_each_rcu(rdev, mddev)
649 if (rdev->desc_nr == nr)
650 return rdev;
651
652 return NULL;
653}
654EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
655
656static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
657{
658 struct md_rdev *rdev;
659
660 rdev_for_each(rdev, mddev)
661 if (rdev->bdev->bd_dev == dev)
662 return rdev;
663
664 return NULL;
665}
666
667static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
668{
669 struct md_rdev *rdev;
670
671 rdev_for_each_rcu(rdev, mddev)
672 if (rdev->bdev->bd_dev == dev)
673 return rdev;
674
675 return NULL;
676}
677
678static struct md_personality *find_pers(int level, char *clevel)
679{
680 struct md_personality *pers;
681 list_for_each_entry(pers, &pers_list, list) {
682 if (level != LEVEL_NONE && pers->level == level)
683 return pers;
684 if (strcmp(pers->name, clevel)==0)
685 return pers;
686 }
687 return NULL;
688}
689
690
691static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
692{
693 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
694 return MD_NEW_SIZE_SECTORS(num_sectors);
695}
696
697static int alloc_disk_sb(struct md_rdev *rdev)
698{
699 rdev->sb_page = alloc_page(GFP_KERNEL);
700 if (!rdev->sb_page)
701 return -ENOMEM;
702 return 0;
703}
704
705void md_rdev_clear(struct md_rdev *rdev)
706{
707 if (rdev->sb_page) {
708 put_page(rdev->sb_page);
709 rdev->sb_loaded = 0;
710 rdev->sb_page = NULL;
711 rdev->sb_start = 0;
712 rdev->sectors = 0;
713 }
714 if (rdev->bb_page) {
715 put_page(rdev->bb_page);
716 rdev->bb_page = NULL;
717 }
718 badblocks_exit(&rdev->badblocks);
719}
720EXPORT_SYMBOL_GPL(md_rdev_clear);
721
722static void super_written(struct bio *bio)
723{
724 struct md_rdev *rdev = bio->bi_private;
725 struct mddev *mddev = rdev->mddev;
726
727 if (bio->bi_error) {
728 pr_err("md: super_written gets error=%d\n", bio->bi_error);
729 md_error(mddev, rdev);
730 if (!test_bit(Faulty, &rdev->flags)
731 && (bio->bi_opf & MD_FAILFAST)) {
732 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
733 set_bit(LastDev, &rdev->flags);
734 }
735 } else
736 clear_bit(LastDev, &rdev->flags);
737
738 if (atomic_dec_and_test(&mddev->pending_writes))
739 wake_up(&mddev->sb_wait);
740 rdev_dec_pending(rdev, mddev);
741 bio_put(bio);
742}
743
744void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
745 sector_t sector, int size, struct page *page)
746{
747
748
749
750
751
752
753 struct bio *bio;
754 int ff = 0;
755
756 if (test_bit(Faulty, &rdev->flags))
757 return;
758
759 bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
760
761 atomic_inc(&rdev->nr_pending);
762
763 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
764 bio->bi_iter.bi_sector = sector;
765 bio_add_page(bio, page, size, 0);
766 bio->bi_private = rdev;
767 bio->bi_end_io = super_written;
768
769 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
770 test_bit(FailFast, &rdev->flags) &&
771 !test_bit(LastDev, &rdev->flags))
772 ff = MD_FAILFAST;
773 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
774
775 atomic_inc(&mddev->pending_writes);
776 submit_bio(bio);
777}
778
779int md_super_wait(struct mddev *mddev)
780{
781
782 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
783 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
784 return -EAGAIN;
785 return 0;
786}
787
788int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
789 struct page *page, int op, int op_flags, bool metadata_op)
790{
791 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
792 int ret;
793
794 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
795 rdev->meta_bdev : rdev->bdev;
796 bio_set_op_attrs(bio, op, op_flags);
797 if (metadata_op)
798 bio->bi_iter.bi_sector = sector + rdev->sb_start;
799 else if (rdev->mddev->reshape_position != MaxSector &&
800 (rdev->mddev->reshape_backwards ==
801 (sector >= rdev->mddev->reshape_position)))
802 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
803 else
804 bio->bi_iter.bi_sector = sector + rdev->data_offset;
805 bio_add_page(bio, page, size, 0);
806
807 submit_bio_wait(bio);
808
809 ret = !bio->bi_error;
810 bio_put(bio);
811 return ret;
812}
813EXPORT_SYMBOL_GPL(sync_page_io);
814
815static int read_disk_sb(struct md_rdev *rdev, int size)
816{
817 char b[BDEVNAME_SIZE];
818
819 if (rdev->sb_loaded)
820 return 0;
821
822 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
823 goto fail;
824 rdev->sb_loaded = 1;
825 return 0;
826
827fail:
828 pr_err("md: disabled device %s, could not read superblock.\n",
829 bdevname(rdev->bdev,b));
830 return -EINVAL;
831}
832
833static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
834{
835 return sb1->set_uuid0 == sb2->set_uuid0 &&
836 sb1->set_uuid1 == sb2->set_uuid1 &&
837 sb1->set_uuid2 == sb2->set_uuid2 &&
838 sb1->set_uuid3 == sb2->set_uuid3;
839}
840
841static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
842{
843 int ret;
844 mdp_super_t *tmp1, *tmp2;
845
846 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
847 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
848
849 if (!tmp1 || !tmp2) {
850 ret = 0;
851 goto abort;
852 }
853
854 *tmp1 = *sb1;
855 *tmp2 = *sb2;
856
857
858
859
860 tmp1->nr_disks = 0;
861 tmp2->nr_disks = 0;
862
863 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
864abort:
865 kfree(tmp1);
866 kfree(tmp2);
867 return ret;
868}
869
870static u32 md_csum_fold(u32 csum)
871{
872 csum = (csum & 0xffff) + (csum >> 16);
873 return (csum & 0xffff) + (csum >> 16);
874}
875
876static unsigned int calc_sb_csum(mdp_super_t *sb)
877{
878 u64 newcsum = 0;
879 u32 *sb32 = (u32*)sb;
880 int i;
881 unsigned int disk_csum, csum;
882
883 disk_csum = sb->sb_csum;
884 sb->sb_csum = 0;
885
886 for (i = 0; i < MD_SB_BYTES/4 ; i++)
887 newcsum += sb32[i];
888 csum = (newcsum & 0xffffffff) + (newcsum>>32);
889
890#ifdef CONFIG_ALPHA
891
892
893
894
895
896
897
898
899 sb->sb_csum = md_csum_fold(disk_csum);
900#else
901 sb->sb_csum = disk_csum;
902#endif
903 return csum;
904}
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936struct super_type {
937 char *name;
938 struct module *owner;
939 int (*load_super)(struct md_rdev *rdev,
940 struct md_rdev *refdev,
941 int minor_version);
942 int (*validate_super)(struct mddev *mddev,
943 struct md_rdev *rdev);
944 void (*sync_super)(struct mddev *mddev,
945 struct md_rdev *rdev);
946 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
947 sector_t num_sectors);
948 int (*allow_new_offset)(struct md_rdev *rdev,
949 unsigned long long new_offset);
950};
951
952
953
954
955
956
957
958
959
960int md_check_no_bitmap(struct mddev *mddev)
961{
962 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
963 return 0;
964 pr_warn("%s: bitmaps are not supported for %s\n",
965 mdname(mddev), mddev->pers->name);
966 return 1;
967}
968EXPORT_SYMBOL(md_check_no_bitmap);
969
970
971
972
973static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
974{
975 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
976 mdp_super_t *sb;
977 int ret;
978
979
980
981
982
983
984
985 rdev->sb_start = calc_dev_sboffset(rdev);
986
987 ret = read_disk_sb(rdev, MD_SB_BYTES);
988 if (ret)
989 return ret;
990
991 ret = -EINVAL;
992
993 bdevname(rdev->bdev, b);
994 sb = page_address(rdev->sb_page);
995
996 if (sb->md_magic != MD_SB_MAGIC) {
997 pr_warn("md: invalid raid superblock magic on %s\n", b);
998 goto abort;
999 }
1000
1001 if (sb->major_version != 0 ||
1002 sb->minor_version < 90 ||
1003 sb->minor_version > 91) {
1004 pr_warn("Bad version number %d.%d on %s\n",
1005 sb->major_version, sb->minor_version, b);
1006 goto abort;
1007 }
1008
1009 if (sb->raid_disks <= 0)
1010 goto abort;
1011
1012 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1013 pr_warn("md: invalid superblock checksum on %s\n", b);
1014 goto abort;
1015 }
1016
1017 rdev->preferred_minor = sb->md_minor;
1018 rdev->data_offset = 0;
1019 rdev->new_data_offset = 0;
1020 rdev->sb_size = MD_SB_BYTES;
1021 rdev->badblocks.shift = -1;
1022
1023 if (sb->level == LEVEL_MULTIPATH)
1024 rdev->desc_nr = -1;
1025 else
1026 rdev->desc_nr = sb->this_disk.number;
1027
1028 if (!refdev) {
1029 ret = 1;
1030 } else {
1031 __u64 ev1, ev2;
1032 mdp_super_t *refsb = page_address(refdev->sb_page);
1033 if (!uuid_equal(refsb, sb)) {
1034 pr_warn("md: %s has different UUID to %s\n",
1035 b, bdevname(refdev->bdev,b2));
1036 goto abort;
1037 }
1038 if (!sb_equal(refsb, sb)) {
1039 pr_warn("md: %s has same UUID but different superblock to %s\n",
1040 b, bdevname(refdev->bdev, b2));
1041 goto abort;
1042 }
1043 ev1 = md_event(sb);
1044 ev2 = md_event(refsb);
1045 if (ev1 > ev2)
1046 ret = 1;
1047 else
1048 ret = 0;
1049 }
1050 rdev->sectors = rdev->sb_start;
1051
1052
1053
1054
1055 if (IS_ENABLED(CONFIG_LBDAF) && (u64)rdev->sectors >= (2ULL << 32) &&
1056 sb->level >= 1)
1057 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1058
1059 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1060
1061 ret = -EINVAL;
1062
1063 abort:
1064 return ret;
1065}
1066
1067
1068
1069
1070static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1071{
1072 mdp_disk_t *desc;
1073 mdp_super_t *sb = page_address(rdev->sb_page);
1074 __u64 ev1 = md_event(sb);
1075
1076 rdev->raid_disk = -1;
1077 clear_bit(Faulty, &rdev->flags);
1078 clear_bit(In_sync, &rdev->flags);
1079 clear_bit(Bitmap_sync, &rdev->flags);
1080 clear_bit(WriteMostly, &rdev->flags);
1081
1082 if (mddev->raid_disks == 0) {
1083 mddev->major_version = 0;
1084 mddev->minor_version = sb->minor_version;
1085 mddev->patch_version = sb->patch_version;
1086 mddev->external = 0;
1087 mddev->chunk_sectors = sb->chunk_size >> 9;
1088 mddev->ctime = sb->ctime;
1089 mddev->utime = sb->utime;
1090 mddev->level = sb->level;
1091 mddev->clevel[0] = 0;
1092 mddev->layout = sb->layout;
1093 mddev->raid_disks = sb->raid_disks;
1094 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1095 mddev->events = ev1;
1096 mddev->bitmap_info.offset = 0;
1097 mddev->bitmap_info.space = 0;
1098
1099 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1100 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1101 mddev->reshape_backwards = 0;
1102
1103 if (mddev->minor_version >= 91) {
1104 mddev->reshape_position = sb->reshape_position;
1105 mddev->delta_disks = sb->delta_disks;
1106 mddev->new_level = sb->new_level;
1107 mddev->new_layout = sb->new_layout;
1108 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1109 if (mddev->delta_disks < 0)
1110 mddev->reshape_backwards = 1;
1111 } else {
1112 mddev->reshape_position = MaxSector;
1113 mddev->delta_disks = 0;
1114 mddev->new_level = mddev->level;
1115 mddev->new_layout = mddev->layout;
1116 mddev->new_chunk_sectors = mddev->chunk_sectors;
1117 }
1118
1119 if (sb->state & (1<<MD_SB_CLEAN))
1120 mddev->recovery_cp = MaxSector;
1121 else {
1122 if (sb->events_hi == sb->cp_events_hi &&
1123 sb->events_lo == sb->cp_events_lo) {
1124 mddev->recovery_cp = sb->recovery_cp;
1125 } else
1126 mddev->recovery_cp = 0;
1127 }
1128
1129 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1130 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1131 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1132 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1133
1134 mddev->max_disks = MD_SB_DISKS;
1135
1136 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1137 mddev->bitmap_info.file == NULL) {
1138 mddev->bitmap_info.offset =
1139 mddev->bitmap_info.default_offset;
1140 mddev->bitmap_info.space =
1141 mddev->bitmap_info.default_space;
1142 }
1143
1144 } else if (mddev->pers == NULL) {
1145
1146
1147 ++ev1;
1148 if (sb->disks[rdev->desc_nr].state & (
1149 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1150 if (ev1 < mddev->events)
1151 return -EINVAL;
1152 } else if (mddev->bitmap) {
1153
1154
1155
1156 if (ev1 < mddev->bitmap->events_cleared)
1157 return 0;
1158 if (ev1 < mddev->events)
1159 set_bit(Bitmap_sync, &rdev->flags);
1160 } else {
1161 if (ev1 < mddev->events)
1162
1163 return 0;
1164 }
1165
1166 if (mddev->level != LEVEL_MULTIPATH) {
1167 desc = sb->disks + rdev->desc_nr;
1168
1169 if (desc->state & (1<<MD_DISK_FAULTY))
1170 set_bit(Faulty, &rdev->flags);
1171 else if (desc->state & (1<<MD_DISK_SYNC)
1172) {
1173 set_bit(In_sync, &rdev->flags);
1174 rdev->raid_disk = desc->raid_disk;
1175 rdev->saved_raid_disk = desc->raid_disk;
1176 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1177
1178
1179
1180 if (mddev->minor_version >= 91) {
1181 rdev->recovery_offset = 0;
1182 rdev->raid_disk = desc->raid_disk;
1183 }
1184 }
1185 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1186 set_bit(WriteMostly, &rdev->flags);
1187 if (desc->state & (1<<MD_DISK_FAILFAST))
1188 set_bit(FailFast, &rdev->flags);
1189 } else
1190 set_bit(In_sync, &rdev->flags);
1191 return 0;
1192}
1193
1194
1195
1196
1197static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1198{
1199 mdp_super_t *sb;
1200 struct md_rdev *rdev2;
1201 int next_spare = mddev->raid_disks;
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213 int i;
1214 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1215
1216 rdev->sb_size = MD_SB_BYTES;
1217
1218 sb = page_address(rdev->sb_page);
1219
1220 memset(sb, 0, sizeof(*sb));
1221
1222 sb->md_magic = MD_SB_MAGIC;
1223 sb->major_version = mddev->major_version;
1224 sb->patch_version = mddev->patch_version;
1225 sb->gvalid_words = 0;
1226 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1227 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1228 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1229 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1230
1231 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1232 sb->level = mddev->level;
1233 sb->size = mddev->dev_sectors / 2;
1234 sb->raid_disks = mddev->raid_disks;
1235 sb->md_minor = mddev->md_minor;
1236 sb->not_persistent = 0;
1237 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1238 sb->state = 0;
1239 sb->events_hi = (mddev->events>>32);
1240 sb->events_lo = (u32)mddev->events;
1241
1242 if (mddev->reshape_position == MaxSector)
1243 sb->minor_version = 90;
1244 else {
1245 sb->minor_version = 91;
1246 sb->reshape_position = mddev->reshape_position;
1247 sb->new_level = mddev->new_level;
1248 sb->delta_disks = mddev->delta_disks;
1249 sb->new_layout = mddev->new_layout;
1250 sb->new_chunk = mddev->new_chunk_sectors << 9;
1251 }
1252 mddev->minor_version = sb->minor_version;
1253 if (mddev->in_sync)
1254 {
1255 sb->recovery_cp = mddev->recovery_cp;
1256 sb->cp_events_hi = (mddev->events>>32);
1257 sb->cp_events_lo = (u32)mddev->events;
1258 if (mddev->recovery_cp == MaxSector)
1259 sb->state = (1<< MD_SB_CLEAN);
1260 } else
1261 sb->recovery_cp = 0;
1262
1263 sb->layout = mddev->layout;
1264 sb->chunk_size = mddev->chunk_sectors << 9;
1265
1266 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1267 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1268
1269 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1270 rdev_for_each(rdev2, mddev) {
1271 mdp_disk_t *d;
1272 int desc_nr;
1273 int is_active = test_bit(In_sync, &rdev2->flags);
1274
1275 if (rdev2->raid_disk >= 0 &&
1276 sb->minor_version >= 91)
1277
1278
1279
1280
1281 is_active = 1;
1282 if (rdev2->raid_disk < 0 ||
1283 test_bit(Faulty, &rdev2->flags))
1284 is_active = 0;
1285 if (is_active)
1286 desc_nr = rdev2->raid_disk;
1287 else
1288 desc_nr = next_spare++;
1289 rdev2->desc_nr = desc_nr;
1290 d = &sb->disks[rdev2->desc_nr];
1291 nr_disks++;
1292 d->number = rdev2->desc_nr;
1293 d->major = MAJOR(rdev2->bdev->bd_dev);
1294 d->minor = MINOR(rdev2->bdev->bd_dev);
1295 if (is_active)
1296 d->raid_disk = rdev2->raid_disk;
1297 else
1298 d->raid_disk = rdev2->desc_nr;
1299 if (test_bit(Faulty, &rdev2->flags))
1300 d->state = (1<<MD_DISK_FAULTY);
1301 else if (is_active) {
1302 d->state = (1<<MD_DISK_ACTIVE);
1303 if (test_bit(In_sync, &rdev2->flags))
1304 d->state |= (1<<MD_DISK_SYNC);
1305 active++;
1306 working++;
1307 } else {
1308 d->state = 0;
1309 spare++;
1310 working++;
1311 }
1312 if (test_bit(WriteMostly, &rdev2->flags))
1313 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1314 if (test_bit(FailFast, &rdev2->flags))
1315 d->state |= (1<<MD_DISK_FAILFAST);
1316 }
1317
1318 for (i=0 ; i < mddev->raid_disks ; i++) {
1319 mdp_disk_t *d = &sb->disks[i];
1320 if (d->state == 0 && d->number == 0) {
1321 d->number = i;
1322 d->raid_disk = i;
1323 d->state = (1<<MD_DISK_REMOVED);
1324 d->state |= (1<<MD_DISK_FAULTY);
1325 failed++;
1326 }
1327 }
1328 sb->nr_disks = nr_disks;
1329 sb->active_disks = active;
1330 sb->working_disks = working;
1331 sb->failed_disks = failed;
1332 sb->spare_disks = spare;
1333
1334 sb->this_disk = sb->disks[rdev->desc_nr];
1335 sb->sb_csum = calc_sb_csum(sb);
1336}
1337
1338
1339
1340
1341static unsigned long long
1342super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1343{
1344 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1345 return 0;
1346 if (rdev->mddev->bitmap_info.offset)
1347 return 0;
1348 rdev->sb_start = calc_dev_sboffset(rdev);
1349 if (!num_sectors || num_sectors > rdev->sb_start)
1350 num_sectors = rdev->sb_start;
1351
1352
1353
1354 if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
1355 rdev->mddev->level >= 1)
1356 num_sectors = (sector_t)(2ULL << 32) - 2;
1357 do {
1358 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1359 rdev->sb_page);
1360 } while (md_super_wait(rdev->mddev) < 0);
1361 return num_sectors;
1362}
1363
1364static int
1365super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1366{
1367
1368 return new_offset == 0;
1369}
1370
1371
1372
1373
1374
1375static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1376{
1377 __le32 disk_csum;
1378 u32 csum;
1379 unsigned long long newcsum;
1380 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1381 __le32 *isuper = (__le32*)sb;
1382
1383 disk_csum = sb->sb_csum;
1384 sb->sb_csum = 0;
1385 newcsum = 0;
1386 for (; size >= 4; size -= 4)
1387 newcsum += le32_to_cpu(*isuper++);
1388
1389 if (size == 2)
1390 newcsum += le16_to_cpu(*(__le16*) isuper);
1391
1392 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1393 sb->sb_csum = disk_csum;
1394 return cpu_to_le32(csum);
1395}
1396
1397static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1398{
1399 struct mdp_superblock_1 *sb;
1400 int ret;
1401 sector_t sb_start;
1402 sector_t sectors;
1403 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1404 int bmask;
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414 switch(minor_version) {
1415 case 0:
1416 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1417 sb_start -= 8*2;
1418 sb_start &= ~(sector_t)(4*2-1);
1419 break;
1420 case 1:
1421 sb_start = 0;
1422 break;
1423 case 2:
1424 sb_start = 8;
1425 break;
1426 default:
1427 return -EINVAL;
1428 }
1429 rdev->sb_start = sb_start;
1430
1431
1432
1433
1434 ret = read_disk_sb(rdev, 4096);
1435 if (ret) return ret;
1436
1437 sb = page_address(rdev->sb_page);
1438
1439 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1440 sb->major_version != cpu_to_le32(1) ||
1441 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1442 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1443 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1444 return -EINVAL;
1445
1446 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1447 pr_warn("md: invalid superblock checksum on %s\n",
1448 bdevname(rdev->bdev,b));
1449 return -EINVAL;
1450 }
1451 if (le64_to_cpu(sb->data_size) < 10) {
1452 pr_warn("md: data_size too small on %s\n",
1453 bdevname(rdev->bdev,b));
1454 return -EINVAL;
1455 }
1456 if (sb->pad0 ||
1457 sb->pad3[0] ||
1458 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1459
1460 return -EINVAL;
1461
1462 rdev->preferred_minor = 0xffff;
1463 rdev->data_offset = le64_to_cpu(sb->data_offset);
1464 rdev->new_data_offset = rdev->data_offset;
1465 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1466 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1467 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1468 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1469
1470 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1471 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1472 if (rdev->sb_size & bmask)
1473 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1474
1475 if (minor_version
1476 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1477 return -EINVAL;
1478 if (minor_version
1479 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1480 return -EINVAL;
1481
1482 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1483 rdev->desc_nr = -1;
1484 else
1485 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1486
1487 if (!rdev->bb_page) {
1488 rdev->bb_page = alloc_page(GFP_KERNEL);
1489 if (!rdev->bb_page)
1490 return -ENOMEM;
1491 }
1492 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1493 rdev->badblocks.count == 0) {
1494
1495
1496
1497 s32 offset;
1498 sector_t bb_sector;
1499 u64 *bbp;
1500 int i;
1501 int sectors = le16_to_cpu(sb->bblog_size);
1502 if (sectors > (PAGE_SIZE / 512))
1503 return -EINVAL;
1504 offset = le32_to_cpu(sb->bblog_offset);
1505 if (offset == 0)
1506 return -EINVAL;
1507 bb_sector = (long long)offset;
1508 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1509 rdev->bb_page, REQ_OP_READ, 0, true))
1510 return -EIO;
1511 bbp = (u64 *)page_address(rdev->bb_page);
1512 rdev->badblocks.shift = sb->bblog_shift;
1513 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1514 u64 bb = le64_to_cpu(*bbp);
1515 int count = bb & (0x3ff);
1516 u64 sector = bb >> 10;
1517 sector <<= sb->bblog_shift;
1518 count <<= sb->bblog_shift;
1519 if (bb + 1 == 0)
1520 break;
1521 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1522 return -EINVAL;
1523 }
1524 } else if (sb->bblog_offset != 0)
1525 rdev->badblocks.shift = 0;
1526
1527 if (!refdev) {
1528 ret = 1;
1529 } else {
1530 __u64 ev1, ev2;
1531 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1532
1533 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1534 sb->level != refsb->level ||
1535 sb->layout != refsb->layout ||
1536 sb->chunksize != refsb->chunksize) {
1537 pr_warn("md: %s has strangely different superblock to %s\n",
1538 bdevname(rdev->bdev,b),
1539 bdevname(refdev->bdev,b2));
1540 return -EINVAL;
1541 }
1542 ev1 = le64_to_cpu(sb->events);
1543 ev2 = le64_to_cpu(refsb->events);
1544
1545 if (ev1 > ev2)
1546 ret = 1;
1547 else
1548 ret = 0;
1549 }
1550 if (minor_version) {
1551 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1552 sectors -= rdev->data_offset;
1553 } else
1554 sectors = rdev->sb_start;
1555 if (sectors < le64_to_cpu(sb->data_size))
1556 return -EINVAL;
1557 rdev->sectors = le64_to_cpu(sb->data_size);
1558 return ret;
1559}
1560
1561static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1562{
1563 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1564 __u64 ev1 = le64_to_cpu(sb->events);
1565
1566 rdev->raid_disk = -1;
1567 clear_bit(Faulty, &rdev->flags);
1568 clear_bit(In_sync, &rdev->flags);
1569 clear_bit(Bitmap_sync, &rdev->flags);
1570 clear_bit(WriteMostly, &rdev->flags);
1571
1572 if (mddev->raid_disks == 0) {
1573 mddev->major_version = 1;
1574 mddev->patch_version = 0;
1575 mddev->external = 0;
1576 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1577 mddev->ctime = le64_to_cpu(sb->ctime);
1578 mddev->utime = le64_to_cpu(sb->utime);
1579 mddev->level = le32_to_cpu(sb->level);
1580 mddev->clevel[0] = 0;
1581 mddev->layout = le32_to_cpu(sb->layout);
1582 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1583 mddev->dev_sectors = le64_to_cpu(sb->size);
1584 mddev->events = ev1;
1585 mddev->bitmap_info.offset = 0;
1586 mddev->bitmap_info.space = 0;
1587
1588
1589
1590 mddev->bitmap_info.default_offset = 1024 >> 9;
1591 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1592 mddev->reshape_backwards = 0;
1593
1594 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1595 memcpy(mddev->uuid, sb->set_uuid, 16);
1596
1597 mddev->max_disks = (4096-256)/2;
1598
1599 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1600 mddev->bitmap_info.file == NULL) {
1601 mddev->bitmap_info.offset =
1602 (__s32)le32_to_cpu(sb->bitmap_offset);
1603
1604
1605
1606
1607
1608 if (mddev->minor_version > 0)
1609 mddev->bitmap_info.space = 0;
1610 else if (mddev->bitmap_info.offset > 0)
1611 mddev->bitmap_info.space =
1612 8 - mddev->bitmap_info.offset;
1613 else
1614 mddev->bitmap_info.space =
1615 -mddev->bitmap_info.offset;
1616 }
1617
1618 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1619 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1620 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1621 mddev->new_level = le32_to_cpu(sb->new_level);
1622 mddev->new_layout = le32_to_cpu(sb->new_layout);
1623 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1624 if (mddev->delta_disks < 0 ||
1625 (mddev->delta_disks == 0 &&
1626 (le32_to_cpu(sb->feature_map)
1627 & MD_FEATURE_RESHAPE_BACKWARDS)))
1628 mddev->reshape_backwards = 1;
1629 } else {
1630 mddev->reshape_position = MaxSector;
1631 mddev->delta_disks = 0;
1632 mddev->new_level = mddev->level;
1633 mddev->new_layout = mddev->layout;
1634 mddev->new_chunk_sectors = mddev->chunk_sectors;
1635 }
1636
1637 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1638 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1639 } else if (mddev->pers == NULL) {
1640
1641
1642 ++ev1;
1643 if (rdev->desc_nr >= 0 &&
1644 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1645 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1646 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1647 if (ev1 < mddev->events)
1648 return -EINVAL;
1649 } else if (mddev->bitmap) {
1650
1651
1652
1653 if (ev1 < mddev->bitmap->events_cleared)
1654 return 0;
1655 if (ev1 < mddev->events)
1656 set_bit(Bitmap_sync, &rdev->flags);
1657 } else {
1658 if (ev1 < mddev->events)
1659
1660 return 0;
1661 }
1662 if (mddev->level != LEVEL_MULTIPATH) {
1663 int role;
1664 if (rdev->desc_nr < 0 ||
1665 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1666 role = MD_DISK_ROLE_SPARE;
1667 rdev->desc_nr = -1;
1668 } else
1669 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1670 switch(role) {
1671 case MD_DISK_ROLE_SPARE:
1672 break;
1673 case MD_DISK_ROLE_FAULTY:
1674 set_bit(Faulty, &rdev->flags);
1675 break;
1676 case MD_DISK_ROLE_JOURNAL:
1677 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1678
1679 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1680 return -EINVAL;
1681 }
1682 set_bit(Journal, &rdev->flags);
1683 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1684 rdev->raid_disk = 0;
1685 break;
1686 default:
1687 rdev->saved_raid_disk = role;
1688 if ((le32_to_cpu(sb->feature_map) &
1689 MD_FEATURE_RECOVERY_OFFSET)) {
1690 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1691 if (!(le32_to_cpu(sb->feature_map) &
1692 MD_FEATURE_RECOVERY_BITMAP))
1693 rdev->saved_raid_disk = -1;
1694 } else
1695 set_bit(In_sync, &rdev->flags);
1696 rdev->raid_disk = role;
1697 break;
1698 }
1699 if (sb->devflags & WriteMostly1)
1700 set_bit(WriteMostly, &rdev->flags);
1701 if (sb->devflags & FailFast1)
1702 set_bit(FailFast, &rdev->flags);
1703 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1704 set_bit(Replacement, &rdev->flags);
1705 } else
1706 set_bit(In_sync, &rdev->flags);
1707
1708 return 0;
1709}
1710
1711static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1712{
1713 struct mdp_superblock_1 *sb;
1714 struct md_rdev *rdev2;
1715 int max_dev, i;
1716
1717
1718 sb = page_address(rdev->sb_page);
1719
1720 sb->feature_map = 0;
1721 sb->pad0 = 0;
1722 sb->recovery_offset = cpu_to_le64(0);
1723 memset(sb->pad3, 0, sizeof(sb->pad3));
1724
1725 sb->utime = cpu_to_le64((__u64)mddev->utime);
1726 sb->events = cpu_to_le64(mddev->events);
1727 if (mddev->in_sync)
1728 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1729 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1730 sb->resync_offset = cpu_to_le64(MaxSector);
1731 else
1732 sb->resync_offset = cpu_to_le64(0);
1733
1734 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1735
1736 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1737 sb->size = cpu_to_le64(mddev->dev_sectors);
1738 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1739 sb->level = cpu_to_le32(mddev->level);
1740 sb->layout = cpu_to_le32(mddev->layout);
1741 if (test_bit(FailFast, &rdev->flags))
1742 sb->devflags |= FailFast1;
1743 else
1744 sb->devflags &= ~FailFast1;
1745
1746 if (test_bit(WriteMostly, &rdev->flags))
1747 sb->devflags |= WriteMostly1;
1748 else
1749 sb->devflags &= ~WriteMostly1;
1750 sb->data_offset = cpu_to_le64(rdev->data_offset);
1751 sb->data_size = cpu_to_le64(rdev->sectors);
1752
1753 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1754 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1755 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1756 }
1757
1758 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
1759 !test_bit(In_sync, &rdev->flags)) {
1760 sb->feature_map |=
1761 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1762 sb->recovery_offset =
1763 cpu_to_le64(rdev->recovery_offset);
1764 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1765 sb->feature_map |=
1766 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1767 }
1768
1769 if (test_bit(Journal, &rdev->flags))
1770 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
1771 if (test_bit(Replacement, &rdev->flags))
1772 sb->feature_map |=
1773 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1774
1775 if (mddev->reshape_position != MaxSector) {
1776 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1777 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1778 sb->new_layout = cpu_to_le32(mddev->new_layout);
1779 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1780 sb->new_level = cpu_to_le32(mddev->new_level);
1781 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1782 if (mddev->delta_disks == 0 &&
1783 mddev->reshape_backwards)
1784 sb->feature_map
1785 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1786 if (rdev->new_data_offset != rdev->data_offset) {
1787 sb->feature_map
1788 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1789 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1790 - rdev->data_offset));
1791 }
1792 }
1793
1794 if (mddev_is_clustered(mddev))
1795 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
1796
1797 if (rdev->badblocks.count == 0)
1798 ;
1799 else if (sb->bblog_offset == 0)
1800
1801 md_error(mddev, rdev);
1802 else {
1803 struct badblocks *bb = &rdev->badblocks;
1804 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1805 u64 *p = bb->page;
1806 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1807 if (bb->changed) {
1808 unsigned seq;
1809
1810retry:
1811 seq = read_seqbegin(&bb->lock);
1812
1813 memset(bbp, 0xff, PAGE_SIZE);
1814
1815 for (i = 0 ; i < bb->count ; i++) {
1816 u64 internal_bb = p[i];
1817 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1818 | BB_LEN(internal_bb));
1819 bbp[i] = cpu_to_le64(store_bb);
1820 }
1821 bb->changed = 0;
1822 if (read_seqretry(&bb->lock, seq))
1823 goto retry;
1824
1825 bb->sector = (rdev->sb_start +
1826 (int)le32_to_cpu(sb->bblog_offset));
1827 bb->size = le16_to_cpu(sb->bblog_size);
1828 }
1829 }
1830
1831 max_dev = 0;
1832 rdev_for_each(rdev2, mddev)
1833 if (rdev2->desc_nr+1 > max_dev)
1834 max_dev = rdev2->desc_nr+1;
1835
1836 if (max_dev > le32_to_cpu(sb->max_dev)) {
1837 int bmask;
1838 sb->max_dev = cpu_to_le32(max_dev);
1839 rdev->sb_size = max_dev * 2 + 256;
1840 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1841 if (rdev->sb_size & bmask)
1842 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1843 } else
1844 max_dev = le32_to_cpu(sb->max_dev);
1845
1846 for (i=0; i<max_dev;i++)
1847 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1848
1849 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
1850 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
1851
1852 rdev_for_each(rdev2, mddev) {
1853 i = rdev2->desc_nr;
1854 if (test_bit(Faulty, &rdev2->flags))
1855 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
1856 else if (test_bit(In_sync, &rdev2->flags))
1857 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1858 else if (test_bit(Journal, &rdev2->flags))
1859 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
1860 else if (rdev2->raid_disk >= 0)
1861 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1862 else
1863 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
1864 }
1865
1866 sb->sb_csum = calc_sb_1_csum(sb);
1867}
1868
1869static unsigned long long
1870super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1871{
1872 struct mdp_superblock_1 *sb;
1873 sector_t max_sectors;
1874 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1875 return 0;
1876 if (rdev->data_offset != rdev->new_data_offset)
1877 return 0;
1878 if (rdev->sb_start < rdev->data_offset) {
1879
1880 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1881 max_sectors -= rdev->data_offset;
1882 if (!num_sectors || num_sectors > max_sectors)
1883 num_sectors = max_sectors;
1884 } else if (rdev->mddev->bitmap_info.offset) {
1885
1886 return 0;
1887 } else {
1888
1889 sector_t sb_start;
1890 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1891 sb_start &= ~(sector_t)(4*2 - 1);
1892 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1893 if (!num_sectors || num_sectors > max_sectors)
1894 num_sectors = max_sectors;
1895 rdev->sb_start = sb_start;
1896 }
1897 sb = page_address(rdev->sb_page);
1898 sb->data_size = cpu_to_le64(num_sectors);
1899 sb->super_offset = rdev->sb_start;
1900 sb->sb_csum = calc_sb_1_csum(sb);
1901 do {
1902 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1903 rdev->sb_page);
1904 } while (md_super_wait(rdev->mddev) < 0);
1905 return num_sectors;
1906
1907}
1908
1909static int
1910super_1_allow_new_offset(struct md_rdev *rdev,
1911 unsigned long long new_offset)
1912{
1913
1914 struct bitmap *bitmap;
1915 if (new_offset >= rdev->data_offset)
1916 return 1;
1917
1918
1919
1920 if (rdev->mddev->minor_version == 0)
1921 return 1;
1922
1923
1924
1925
1926
1927
1928
1929 if (rdev->sb_start + (32+4)*2 > new_offset)
1930 return 0;
1931 bitmap = rdev->mddev->bitmap;
1932 if (bitmap && !rdev->mddev->bitmap_info.file &&
1933 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1934 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1935 return 0;
1936 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1937 return 0;
1938
1939 return 1;
1940}
1941
1942static struct super_type super_types[] = {
1943 [0] = {
1944 .name = "0.90.0",
1945 .owner = THIS_MODULE,
1946 .load_super = super_90_load,
1947 .validate_super = super_90_validate,
1948 .sync_super = super_90_sync,
1949 .rdev_size_change = super_90_rdev_size_change,
1950 .allow_new_offset = super_90_allow_new_offset,
1951 },
1952 [1] = {
1953 .name = "md-1",
1954 .owner = THIS_MODULE,
1955 .load_super = super_1_load,
1956 .validate_super = super_1_validate,
1957 .sync_super = super_1_sync,
1958 .rdev_size_change = super_1_rdev_size_change,
1959 .allow_new_offset = super_1_allow_new_offset,
1960 },
1961};
1962
1963static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1964{
1965 if (mddev->sync_super) {
1966 mddev->sync_super(mddev, rdev);
1967 return;
1968 }
1969
1970 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1971
1972 super_types[mddev->major_version].sync_super(mddev, rdev);
1973}
1974
1975static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1976{
1977 struct md_rdev *rdev, *rdev2;
1978
1979 rcu_read_lock();
1980 rdev_for_each_rcu(rdev, mddev1) {
1981 if (test_bit(Faulty, &rdev->flags) ||
1982 test_bit(Journal, &rdev->flags) ||
1983 rdev->raid_disk == -1)
1984 continue;
1985 rdev_for_each_rcu(rdev2, mddev2) {
1986 if (test_bit(Faulty, &rdev2->flags) ||
1987 test_bit(Journal, &rdev2->flags) ||
1988 rdev2->raid_disk == -1)
1989 continue;
1990 if (rdev->bdev->bd_contains ==
1991 rdev2->bdev->bd_contains) {
1992 rcu_read_unlock();
1993 return 1;
1994 }
1995 }
1996 }
1997 rcu_read_unlock();
1998 return 0;
1999}
2000
2001static LIST_HEAD(pending_raid_disks);
2002
2003
2004
2005
2006
2007
2008
2009
2010int md_integrity_register(struct mddev *mddev)
2011{
2012 struct md_rdev *rdev, *reference = NULL;
2013
2014 if (list_empty(&mddev->disks))
2015 return 0;
2016 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2017 return 0;
2018 rdev_for_each(rdev, mddev) {
2019
2020 if (test_bit(Faulty, &rdev->flags))
2021 continue;
2022 if (rdev->raid_disk < 0)
2023 continue;
2024 if (!reference) {
2025
2026 reference = rdev;
2027 continue;
2028 }
2029
2030 if (blk_integrity_compare(reference->bdev->bd_disk,
2031 rdev->bdev->bd_disk) < 0)
2032 return -EINVAL;
2033 }
2034 if (!reference || !bdev_get_integrity(reference->bdev))
2035 return 0;
2036
2037
2038
2039
2040 blk_integrity_register(mddev->gendisk,
2041 bdev_get_integrity(reference->bdev));
2042
2043 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2044 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2045 pr_err("md: failed to create integrity pool for %s\n",
2046 mdname(mddev));
2047 return -EINVAL;
2048 }
2049 return 0;
2050}
2051EXPORT_SYMBOL(md_integrity_register);
2052
2053
2054
2055
2056
2057int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2058{
2059 struct blk_integrity *bi_rdev;
2060 struct blk_integrity *bi_mddev;
2061 char name[BDEVNAME_SIZE];
2062
2063 if (!mddev->gendisk)
2064 return 0;
2065
2066 bi_rdev = bdev_get_integrity(rdev->bdev);
2067 bi_mddev = blk_get_integrity(mddev->gendisk);
2068
2069 if (!bi_mddev)
2070 return 0;
2071
2072 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2073 pr_err("%s: incompatible integrity profile for %s\n",
2074 mdname(mddev), bdevname(rdev->bdev, name));
2075 return -ENXIO;
2076 }
2077
2078 return 0;
2079}
2080EXPORT_SYMBOL(md_integrity_add_rdev);
2081
2082static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2083{
2084 char b[BDEVNAME_SIZE];
2085 struct kobject *ko;
2086 int err;
2087
2088
2089 if (find_rdev(mddev, rdev->bdev->bd_dev))
2090 return -EEXIST;
2091
2092
2093 if (!test_bit(Journal, &rdev->flags) &&
2094 rdev->sectors &&
2095 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2096 if (mddev->pers) {
2097
2098
2099
2100
2101 if (mddev->level > 0)
2102 return -ENOSPC;
2103 } else
2104 mddev->dev_sectors = rdev->sectors;
2105 }
2106
2107
2108
2109
2110
2111 rcu_read_lock();
2112 if (rdev->desc_nr < 0) {
2113 int choice = 0;
2114 if (mddev->pers)
2115 choice = mddev->raid_disks;
2116 while (md_find_rdev_nr_rcu(mddev, choice))
2117 choice++;
2118 rdev->desc_nr = choice;
2119 } else {
2120 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2121 rcu_read_unlock();
2122 return -EBUSY;
2123 }
2124 }
2125 rcu_read_unlock();
2126 if (!test_bit(Journal, &rdev->flags) &&
2127 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2128 pr_warn("md: %s: array is limited to %d devices\n",
2129 mdname(mddev), mddev->max_disks);
2130 return -EBUSY;
2131 }
2132 bdevname(rdev->bdev,b);
2133 strreplace(b, '/', '!');
2134
2135 rdev->mddev = mddev;
2136 pr_debug("md: bind<%s>\n", b);
2137
2138 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2139 goto fail;
2140
2141 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2142 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2143 ;
2144 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2145
2146 list_add_rcu(&rdev->same_set, &mddev->disks);
2147 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2148
2149
2150 mddev->recovery_disabled++;
2151
2152 return 0;
2153
2154 fail:
2155 pr_warn("md: failed to register dev-%s for %s\n",
2156 b, mdname(mddev));
2157 return err;
2158}
2159
2160static void md_delayed_delete(struct work_struct *ws)
2161{
2162 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2163 kobject_del(&rdev->kobj);
2164 kobject_put(&rdev->kobj);
2165}
2166
2167static void unbind_rdev_from_array(struct md_rdev *rdev)
2168{
2169 char b[BDEVNAME_SIZE];
2170
2171 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2172 list_del_rcu(&rdev->same_set);
2173 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2174 rdev->mddev = NULL;
2175 sysfs_remove_link(&rdev->kobj, "block");
2176 sysfs_put(rdev->sysfs_state);
2177 rdev->sysfs_state = NULL;
2178 rdev->badblocks.count = 0;
2179
2180
2181
2182
2183 synchronize_rcu();
2184 INIT_WORK(&rdev->del_work, md_delayed_delete);
2185 kobject_get(&rdev->kobj);
2186 queue_work(md_misc_wq, &rdev->del_work);
2187}
2188
2189
2190
2191
2192
2193
2194static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2195{
2196 int err = 0;
2197 struct block_device *bdev;
2198 char b[BDEVNAME_SIZE];
2199
2200 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2201 shared ? (struct md_rdev *)lock_rdev : rdev);
2202 if (IS_ERR(bdev)) {
2203 pr_warn("md: could not open %s.\n", __bdevname(dev, b));
2204 return PTR_ERR(bdev);
2205 }
2206 rdev->bdev = bdev;
2207 return err;
2208}
2209
2210static void unlock_rdev(struct md_rdev *rdev)
2211{
2212 struct block_device *bdev = rdev->bdev;
2213 rdev->bdev = NULL;
2214 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2215}
2216
2217void md_autodetect_dev(dev_t dev);
2218
2219static void export_rdev(struct md_rdev *rdev)
2220{
2221 char b[BDEVNAME_SIZE];
2222
2223 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2224 md_rdev_clear(rdev);
2225#ifndef MODULE
2226 if (test_bit(AutoDetected, &rdev->flags))
2227 md_autodetect_dev(rdev->bdev->bd_dev);
2228#endif
2229 unlock_rdev(rdev);
2230 kobject_put(&rdev->kobj);
2231}
2232
2233void md_kick_rdev_from_array(struct md_rdev *rdev)
2234{
2235 unbind_rdev_from_array(rdev);
2236 export_rdev(rdev);
2237}
2238EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2239
2240static void export_array(struct mddev *mddev)
2241{
2242 struct md_rdev *rdev;
2243
2244 while (!list_empty(&mddev->disks)) {
2245 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2246 same_set);
2247 md_kick_rdev_from_array(rdev);
2248 }
2249 mddev->raid_disks = 0;
2250 mddev->major_version = 0;
2251}
2252
2253static void sync_sbs(struct mddev *mddev, int nospares)
2254{
2255
2256
2257
2258
2259
2260
2261 struct md_rdev *rdev;
2262 rdev_for_each(rdev, mddev) {
2263 if (rdev->sb_events == mddev->events ||
2264 (nospares &&
2265 rdev->raid_disk < 0 &&
2266 rdev->sb_events+1 == mddev->events)) {
2267
2268 rdev->sb_loaded = 2;
2269 } else {
2270 sync_super(mddev, rdev);
2271 rdev->sb_loaded = 1;
2272 }
2273 }
2274}
2275
2276static bool does_sb_need_changing(struct mddev *mddev)
2277{
2278 struct md_rdev *rdev;
2279 struct mdp_superblock_1 *sb;
2280 int role;
2281
2282
2283 rdev_for_each(rdev, mddev)
2284 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2285 break;
2286
2287
2288 if (!rdev)
2289 return false;
2290
2291 sb = page_address(rdev->sb_page);
2292
2293 rdev_for_each(rdev, mddev) {
2294 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2295
2296 if (role == 0xffff && rdev->raid_disk >=0 &&
2297 !test_bit(Faulty, &rdev->flags))
2298 return true;
2299
2300 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2301 return true;
2302 }
2303
2304
2305 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2306 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2307 (mddev->layout != le64_to_cpu(sb->layout)) ||
2308 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2309 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2310 return true;
2311
2312 return false;
2313}
2314
2315void md_update_sb(struct mddev *mddev, int force_change)
2316{
2317 struct md_rdev *rdev;
2318 int sync_req;
2319 int nospares = 0;
2320 int any_badblocks_changed = 0;
2321 int ret = -1;
2322
2323 if (mddev->ro) {
2324 if (force_change)
2325 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2326 return;
2327 }
2328
2329repeat:
2330 if (mddev_is_clustered(mddev)) {
2331 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2332 force_change = 1;
2333 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2334 nospares = 1;
2335 ret = md_cluster_ops->metadata_update_start(mddev);
2336
2337 if (!does_sb_need_changing(mddev)) {
2338 if (ret == 0)
2339 md_cluster_ops->metadata_update_cancel(mddev);
2340 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2341 BIT(MD_SB_CHANGE_DEVS) |
2342 BIT(MD_SB_CHANGE_CLEAN));
2343 return;
2344 }
2345 }
2346
2347
2348 rdev_for_each(rdev, mddev) {
2349 if (rdev->raid_disk >= 0 &&
2350 mddev->delta_disks >= 0 &&
2351 !test_bit(Journal, &rdev->flags) &&
2352 !test_bit(In_sync, &rdev->flags) &&
2353 mddev->curr_resync_completed > rdev->recovery_offset)
2354 rdev->recovery_offset = mddev->curr_resync_completed;
2355
2356 }
2357 if (!mddev->persistent) {
2358 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2359 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2360 if (!mddev->external) {
2361 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2362 rdev_for_each(rdev, mddev) {
2363 if (rdev->badblocks.changed) {
2364 rdev->badblocks.changed = 0;
2365 ack_all_badblocks(&rdev->badblocks);
2366 md_error(mddev, rdev);
2367 }
2368 clear_bit(Blocked, &rdev->flags);
2369 clear_bit(BlockedBadBlocks, &rdev->flags);
2370 wake_up(&rdev->blocked_wait);
2371 }
2372 }
2373 wake_up(&mddev->sb_wait);
2374 return;
2375 }
2376
2377 spin_lock(&mddev->lock);
2378
2379 mddev->utime = ktime_get_real_seconds();
2380
2381 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2382 force_change = 1;
2383 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2384
2385
2386
2387
2388 nospares = 1;
2389 if (force_change)
2390 nospares = 0;
2391 if (mddev->degraded)
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401 nospares = 0;
2402
2403 sync_req = mddev->in_sync;
2404
2405
2406
2407 if (nospares
2408 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2409 && mddev->can_decrease_events
2410 && mddev->events != 1) {
2411 mddev->events--;
2412 mddev->can_decrease_events = 0;
2413 } else {
2414
2415 mddev->events ++;
2416 mddev->can_decrease_events = nospares;
2417 }
2418
2419
2420
2421
2422
2423
2424 WARN_ON(mddev->events == 0);
2425
2426 rdev_for_each(rdev, mddev) {
2427 if (rdev->badblocks.changed)
2428 any_badblocks_changed++;
2429 if (test_bit(Faulty, &rdev->flags))
2430 set_bit(FaultRecorded, &rdev->flags);
2431 }
2432
2433 sync_sbs(mddev, nospares);
2434 spin_unlock(&mddev->lock);
2435
2436 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2437 mdname(mddev), mddev->in_sync);
2438
2439 if (mddev->queue)
2440 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2441rewrite:
2442 bitmap_update_sb(mddev->bitmap);
2443 rdev_for_each(rdev, mddev) {
2444 char b[BDEVNAME_SIZE];
2445
2446 if (rdev->sb_loaded != 1)
2447 continue;
2448
2449 if (!test_bit(Faulty, &rdev->flags)) {
2450 md_super_write(mddev,rdev,
2451 rdev->sb_start, rdev->sb_size,
2452 rdev->sb_page);
2453 pr_debug("md: (write) %s's sb offset: %llu\n",
2454 bdevname(rdev->bdev, b),
2455 (unsigned long long)rdev->sb_start);
2456 rdev->sb_events = mddev->events;
2457 if (rdev->badblocks.size) {
2458 md_super_write(mddev, rdev,
2459 rdev->badblocks.sector,
2460 rdev->badblocks.size << 9,
2461 rdev->bb_page);
2462 rdev->badblocks.size = 0;
2463 }
2464
2465 } else
2466 pr_debug("md: %s (skipping faulty)\n",
2467 bdevname(rdev->bdev, b));
2468
2469 if (mddev->level == LEVEL_MULTIPATH)
2470
2471 break;
2472 }
2473 if (md_super_wait(mddev) < 0)
2474 goto rewrite;
2475
2476
2477 if (mddev_is_clustered(mddev) && ret == 0)
2478 md_cluster_ops->metadata_update_finish(mddev);
2479
2480 if (mddev->in_sync != sync_req ||
2481 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2482 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2483
2484 goto repeat;
2485 wake_up(&mddev->sb_wait);
2486 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2487 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2488
2489 rdev_for_each(rdev, mddev) {
2490 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2491 clear_bit(Blocked, &rdev->flags);
2492
2493 if (any_badblocks_changed)
2494 ack_all_badblocks(&rdev->badblocks);
2495 clear_bit(BlockedBadBlocks, &rdev->flags);
2496 wake_up(&rdev->blocked_wait);
2497 }
2498}
2499EXPORT_SYMBOL(md_update_sb);
2500
2501static int add_bound_rdev(struct md_rdev *rdev)
2502{
2503 struct mddev *mddev = rdev->mddev;
2504 int err = 0;
2505 bool add_journal = test_bit(Journal, &rdev->flags);
2506
2507 if (!mddev->pers->hot_remove_disk || add_journal) {
2508
2509
2510
2511
2512 super_types[mddev->major_version].
2513 validate_super(mddev, rdev);
2514 if (add_journal)
2515 mddev_suspend(mddev);
2516 err = mddev->pers->hot_add_disk(mddev, rdev);
2517 if (add_journal)
2518 mddev_resume(mddev);
2519 if (err) {
2520 md_kick_rdev_from_array(rdev);
2521 return err;
2522 }
2523 }
2524 sysfs_notify_dirent_safe(rdev->sysfs_state);
2525
2526 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2527 if (mddev->degraded)
2528 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2529 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2530 md_new_event(mddev);
2531 md_wakeup_thread(mddev->thread);
2532 return 0;
2533}
2534
2535
2536
2537
2538static int cmd_match(const char *cmd, const char *str)
2539{
2540
2541
2542
2543
2544 while (*cmd && *str && *cmd == *str) {
2545 cmd++;
2546 str++;
2547 }
2548 if (*cmd == '\n')
2549 cmd++;
2550 if (*str || *cmd)
2551 return 0;
2552 return 1;
2553}
2554
2555struct rdev_sysfs_entry {
2556 struct attribute attr;
2557 ssize_t (*show)(struct md_rdev *, char *);
2558 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2559};
2560
2561static ssize_t
2562state_show(struct md_rdev *rdev, char *page)
2563{
2564 char *sep = ",";
2565 size_t len = 0;
2566 unsigned long flags = ACCESS_ONCE(rdev->flags);
2567
2568 if (test_bit(Faulty, &flags) ||
2569 (!test_bit(ExternalBbl, &flags) &&
2570 rdev->badblocks.unacked_exist))
2571 len += sprintf(page+len, "faulty%s", sep);
2572 if (test_bit(In_sync, &flags))
2573 len += sprintf(page+len, "in_sync%s", sep);
2574 if (test_bit(Journal, &flags))
2575 len += sprintf(page+len, "journal%s", sep);
2576 if (test_bit(WriteMostly, &flags))
2577 len += sprintf(page+len, "write_mostly%s", sep);
2578 if (test_bit(Blocked, &flags) ||
2579 (rdev->badblocks.unacked_exist
2580 && !test_bit(Faulty, &flags)))
2581 len += sprintf(page+len, "blocked%s", sep);
2582 if (!test_bit(Faulty, &flags) &&
2583 !test_bit(Journal, &flags) &&
2584 !test_bit(In_sync, &flags))
2585 len += sprintf(page+len, "spare%s", sep);
2586 if (test_bit(WriteErrorSeen, &flags))
2587 len += sprintf(page+len, "write_error%s", sep);
2588 if (test_bit(WantReplacement, &flags))
2589 len += sprintf(page+len, "want_replacement%s", sep);
2590 if (test_bit(Replacement, &flags))
2591 len += sprintf(page+len, "replacement%s", sep);
2592 if (test_bit(ExternalBbl, &flags))
2593 len += sprintf(page+len, "external_bbl%s", sep);
2594 if (test_bit(FailFast, &flags))
2595 len += sprintf(page+len, "failfast%s", sep);
2596
2597 if (len)
2598 len -= strlen(sep);
2599
2600 return len+sprintf(page+len, "\n");
2601}
2602
2603static ssize_t
2604state_store(struct md_rdev *rdev, const char *buf, size_t len)
2605{
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620 int err = -EINVAL;
2621 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2622 md_error(rdev->mddev, rdev);
2623 if (test_bit(Faulty, &rdev->flags))
2624 err = 0;
2625 else
2626 err = -EBUSY;
2627 } else if (cmd_match(buf, "remove")) {
2628 if (rdev->mddev->pers) {
2629 clear_bit(Blocked, &rdev->flags);
2630 remove_and_add_spares(rdev->mddev, rdev);
2631 }
2632 if (rdev->raid_disk >= 0)
2633 err = -EBUSY;
2634 else {
2635 struct mddev *mddev = rdev->mddev;
2636 err = 0;
2637 if (mddev_is_clustered(mddev))
2638 err = md_cluster_ops->remove_disk(mddev, rdev);
2639
2640 if (err == 0) {
2641 md_kick_rdev_from_array(rdev);
2642 if (mddev->pers) {
2643 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2644 md_wakeup_thread(mddev->thread);
2645 }
2646 md_new_event(mddev);
2647 }
2648 }
2649 } else if (cmd_match(buf, "writemostly")) {
2650 set_bit(WriteMostly, &rdev->flags);
2651 err = 0;
2652 } else if (cmd_match(buf, "-writemostly")) {
2653 clear_bit(WriteMostly, &rdev->flags);
2654 err = 0;
2655 } else if (cmd_match(buf, "blocked")) {
2656 set_bit(Blocked, &rdev->flags);
2657 err = 0;
2658 } else if (cmd_match(buf, "-blocked")) {
2659 if (!test_bit(Faulty, &rdev->flags) &&
2660 !test_bit(ExternalBbl, &rdev->flags) &&
2661 rdev->badblocks.unacked_exist) {
2662
2663
2664
2665 md_error(rdev->mddev, rdev);
2666 }
2667 clear_bit(Blocked, &rdev->flags);
2668 clear_bit(BlockedBadBlocks, &rdev->flags);
2669 wake_up(&rdev->blocked_wait);
2670 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2671 md_wakeup_thread(rdev->mddev->thread);
2672
2673 err = 0;
2674 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2675 set_bit(In_sync, &rdev->flags);
2676 err = 0;
2677 } else if (cmd_match(buf, "failfast")) {
2678 set_bit(FailFast, &rdev->flags);
2679 err = 0;
2680 } else if (cmd_match(buf, "-failfast")) {
2681 clear_bit(FailFast, &rdev->flags);
2682 err = 0;
2683 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
2684 !test_bit(Journal, &rdev->flags)) {
2685 if (rdev->mddev->pers == NULL) {
2686 clear_bit(In_sync, &rdev->flags);
2687 rdev->saved_raid_disk = rdev->raid_disk;
2688 rdev->raid_disk = -1;
2689 err = 0;
2690 }
2691 } else if (cmd_match(buf, "write_error")) {
2692 set_bit(WriteErrorSeen, &rdev->flags);
2693 err = 0;
2694 } else if (cmd_match(buf, "-write_error")) {
2695 clear_bit(WriteErrorSeen, &rdev->flags);
2696 err = 0;
2697 } else if (cmd_match(buf, "want_replacement")) {
2698
2699
2700
2701
2702 if (rdev->raid_disk >= 0 &&
2703 !test_bit(Journal, &rdev->flags) &&
2704 !test_bit(Replacement, &rdev->flags))
2705 set_bit(WantReplacement, &rdev->flags);
2706 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2707 md_wakeup_thread(rdev->mddev->thread);
2708 err = 0;
2709 } else if (cmd_match(buf, "-want_replacement")) {
2710
2711
2712
2713 err = 0;
2714 clear_bit(WantReplacement, &rdev->flags);
2715 } else if (cmd_match(buf, "replacement")) {
2716
2717
2718
2719
2720 if (rdev->mddev->pers)
2721 err = -EBUSY;
2722 else {
2723 set_bit(Replacement, &rdev->flags);
2724 err = 0;
2725 }
2726 } else if (cmd_match(buf, "-replacement")) {
2727
2728 if (rdev->mddev->pers)
2729 err = -EBUSY;
2730 else {
2731 clear_bit(Replacement, &rdev->flags);
2732 err = 0;
2733 }
2734 } else if (cmd_match(buf, "re-add")) {
2735 if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
2736
2737
2738
2739
2740
2741
2742 if (!mddev_is_clustered(rdev->mddev) ||
2743 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
2744 clear_bit(Faulty, &rdev->flags);
2745 err = add_bound_rdev(rdev);
2746 }
2747 } else
2748 err = -EBUSY;
2749 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
2750 set_bit(ExternalBbl, &rdev->flags);
2751 rdev->badblocks.shift = 0;
2752 err = 0;
2753 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
2754 clear_bit(ExternalBbl, &rdev->flags);
2755 err = 0;
2756 }
2757 if (!err)
2758 sysfs_notify_dirent_safe(rdev->sysfs_state);
2759 return err ? err : len;
2760}
2761static struct rdev_sysfs_entry rdev_state =
2762__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2763
2764static ssize_t
2765errors_show(struct md_rdev *rdev, char *page)
2766{
2767 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2768}
2769
2770static ssize_t
2771errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2772{
2773 unsigned int n;
2774 int rv;
2775
2776 rv = kstrtouint(buf, 10, &n);
2777 if (rv < 0)
2778 return rv;
2779 atomic_set(&rdev->corrected_errors, n);
2780 return len;
2781}
2782static struct rdev_sysfs_entry rdev_errors =
2783__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2784
2785static ssize_t
2786slot_show(struct md_rdev *rdev, char *page)
2787{
2788 if (test_bit(Journal, &rdev->flags))
2789 return sprintf(page, "journal\n");
2790 else if (rdev->raid_disk < 0)
2791 return sprintf(page, "none\n");
2792 else
2793 return sprintf(page, "%d\n", rdev->raid_disk);
2794}
2795
2796static ssize_t
2797slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2798{
2799 int slot;
2800 int err;
2801
2802 if (test_bit(Journal, &rdev->flags))
2803 return -EBUSY;
2804 if (strncmp(buf, "none", 4)==0)
2805 slot = -1;
2806 else {
2807 err = kstrtouint(buf, 10, (unsigned int *)&slot);
2808 if (err < 0)
2809 return err;
2810 }
2811 if (rdev->mddev->pers && slot == -1) {
2812
2813
2814
2815
2816
2817
2818
2819 if (rdev->raid_disk == -1)
2820 return -EEXIST;
2821
2822 if (rdev->mddev->pers->hot_remove_disk == NULL)
2823 return -EINVAL;
2824 clear_bit(Blocked, &rdev->flags);
2825 remove_and_add_spares(rdev->mddev, rdev);
2826 if (rdev->raid_disk >= 0)
2827 return -EBUSY;
2828 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2829 md_wakeup_thread(rdev->mddev->thread);
2830 } else if (rdev->mddev->pers) {
2831
2832
2833
2834 int err;
2835
2836 if (rdev->raid_disk != -1)
2837 return -EBUSY;
2838
2839 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2840 return -EBUSY;
2841
2842 if (rdev->mddev->pers->hot_add_disk == NULL)
2843 return -EINVAL;
2844
2845 if (slot >= rdev->mddev->raid_disks &&
2846 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2847 return -ENOSPC;
2848
2849 rdev->raid_disk = slot;
2850 if (test_bit(In_sync, &rdev->flags))
2851 rdev->saved_raid_disk = slot;
2852 else
2853 rdev->saved_raid_disk = -1;
2854 clear_bit(In_sync, &rdev->flags);
2855 clear_bit(Bitmap_sync, &rdev->flags);
2856 err = rdev->mddev->pers->
2857 hot_add_disk(rdev->mddev, rdev);
2858 if (err) {
2859 rdev->raid_disk = -1;
2860 return err;
2861 } else
2862 sysfs_notify_dirent_safe(rdev->sysfs_state);
2863 if (sysfs_link_rdev(rdev->mddev, rdev))
2864 ;
2865
2866 } else {
2867 if (slot >= rdev->mddev->raid_disks &&
2868 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2869 return -ENOSPC;
2870 rdev->raid_disk = slot;
2871
2872 clear_bit(Faulty, &rdev->flags);
2873 clear_bit(WriteMostly, &rdev->flags);
2874 set_bit(In_sync, &rdev->flags);
2875 sysfs_notify_dirent_safe(rdev->sysfs_state);
2876 }
2877 return len;
2878}
2879
2880static struct rdev_sysfs_entry rdev_slot =
2881__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2882
2883static ssize_t
2884offset_show(struct md_rdev *rdev, char *page)
2885{
2886 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2887}
2888
2889static ssize_t
2890offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2891{
2892 unsigned long long offset;
2893 if (kstrtoull(buf, 10, &offset) < 0)
2894 return -EINVAL;
2895 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2896 return -EBUSY;
2897 if (rdev->sectors && rdev->mddev->external)
2898
2899
2900 return -EBUSY;
2901 rdev->data_offset = offset;
2902 rdev->new_data_offset = offset;
2903 return len;
2904}
2905
2906static struct rdev_sysfs_entry rdev_offset =
2907__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2908
2909static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2910{
2911 return sprintf(page, "%llu\n",
2912 (unsigned long long)rdev->new_data_offset);
2913}
2914
2915static ssize_t new_offset_store(struct md_rdev *rdev,
2916 const char *buf, size_t len)
2917{
2918 unsigned long long new_offset;
2919 struct mddev *mddev = rdev->mddev;
2920
2921 if (kstrtoull(buf, 10, &new_offset) < 0)
2922 return -EINVAL;
2923
2924 if (mddev->sync_thread ||
2925 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
2926 return -EBUSY;
2927 if (new_offset == rdev->data_offset)
2928
2929 ;
2930 else if (new_offset > rdev->data_offset) {
2931
2932 if (new_offset - rdev->data_offset
2933 + mddev->dev_sectors > rdev->sectors)
2934 return -E2BIG;
2935 }
2936
2937
2938
2939
2940
2941 if (new_offset < rdev->data_offset &&
2942 mddev->reshape_backwards)
2943 return -EINVAL;
2944
2945
2946
2947
2948 if (new_offset > rdev->data_offset &&
2949 !mddev->reshape_backwards)
2950 return -EINVAL;
2951
2952 if (mddev->pers && mddev->persistent &&
2953 !super_types[mddev->major_version]
2954 .allow_new_offset(rdev, new_offset))
2955 return -E2BIG;
2956 rdev->new_data_offset = new_offset;
2957 if (new_offset > rdev->data_offset)
2958 mddev->reshape_backwards = 1;
2959 else if (new_offset < rdev->data_offset)
2960 mddev->reshape_backwards = 0;
2961
2962 return len;
2963}
2964static struct rdev_sysfs_entry rdev_new_offset =
2965__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2966
2967static ssize_t
2968rdev_size_show(struct md_rdev *rdev, char *page)
2969{
2970 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2971}
2972
2973static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2974{
2975
2976 if (s1+l1 <= s2)
2977 return 0;
2978 if (s2+l2 <= s1)
2979 return 0;
2980 return 1;
2981}
2982
2983static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2984{
2985 unsigned long long blocks;
2986 sector_t new;
2987
2988 if (kstrtoull(buf, 10, &blocks) < 0)
2989 return -EINVAL;
2990
2991 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2992 return -EINVAL;
2993
2994 new = blocks * 2;
2995 if (new != blocks * 2)
2996 return -EINVAL;
2997
2998 *sectors = new;
2999 return 0;
3000}
3001
3002static ssize_t
3003rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3004{
3005 struct mddev *my_mddev = rdev->mddev;
3006 sector_t oldsectors = rdev->sectors;
3007 sector_t sectors;
3008
3009 if (test_bit(Journal, &rdev->flags))
3010 return -EBUSY;
3011 if (strict_blocks_to_sectors(buf, §ors) < 0)
3012 return -EINVAL;
3013 if (rdev->data_offset != rdev->new_data_offset)
3014 return -EINVAL;
3015 if (my_mddev->pers && rdev->raid_disk >= 0) {
3016 if (my_mddev->persistent) {
3017 sectors = super_types[my_mddev->major_version].
3018 rdev_size_change(rdev, sectors);
3019 if (!sectors)
3020 return -EBUSY;
3021 } else if (!sectors)
3022 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3023 rdev->data_offset;
3024 if (!my_mddev->pers->resize)
3025
3026 return -EINVAL;
3027 }
3028 if (sectors < my_mddev->dev_sectors)
3029 return -EINVAL;
3030
3031 rdev->sectors = sectors;
3032 if (sectors > oldsectors && my_mddev->external) {
3033
3034
3035
3036
3037
3038
3039 struct mddev *mddev;
3040 int overlap = 0;
3041 struct list_head *tmp;
3042
3043 rcu_read_lock();
3044 for_each_mddev(mddev, tmp) {
3045 struct md_rdev *rdev2;
3046
3047 rdev_for_each(rdev2, mddev)
3048 if (rdev->bdev == rdev2->bdev &&
3049 rdev != rdev2 &&
3050 overlaps(rdev->data_offset, rdev->sectors,
3051 rdev2->data_offset,
3052 rdev2->sectors)) {
3053 overlap = 1;
3054 break;
3055 }
3056 if (overlap) {
3057 mddev_put(mddev);
3058 break;
3059 }
3060 }
3061 rcu_read_unlock();
3062 if (overlap) {
3063
3064
3065
3066
3067
3068
3069 rdev->sectors = oldsectors;
3070 return -EBUSY;
3071 }
3072 }
3073 return len;
3074}
3075
3076static struct rdev_sysfs_entry rdev_size =
3077__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3078
3079static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3080{
3081 unsigned long long recovery_start = rdev->recovery_offset;
3082
3083 if (test_bit(In_sync, &rdev->flags) ||
3084 recovery_start == MaxSector)
3085 return sprintf(page, "none\n");
3086
3087 return sprintf(page, "%llu\n", recovery_start);
3088}
3089
3090static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3091{
3092 unsigned long long recovery_start;
3093
3094 if (cmd_match(buf, "none"))
3095 recovery_start = MaxSector;
3096 else if (kstrtoull(buf, 10, &recovery_start))
3097 return -EINVAL;
3098
3099 if (rdev->mddev->pers &&
3100 rdev->raid_disk >= 0)
3101 return -EBUSY;
3102
3103 rdev->recovery_offset = recovery_start;
3104 if (recovery_start == MaxSector)
3105 set_bit(In_sync, &rdev->flags);
3106 else
3107 clear_bit(In_sync, &rdev->flags);
3108 return len;
3109}
3110
3111static struct rdev_sysfs_entry rdev_recovery_start =
3112__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125static ssize_t bb_show(struct md_rdev *rdev, char *page)
3126{
3127 return badblocks_show(&rdev->badblocks, page, 0);
3128}
3129static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3130{
3131 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3132
3133 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3134 wake_up(&rdev->blocked_wait);
3135 return rv;
3136}
3137static struct rdev_sysfs_entry rdev_bad_blocks =
3138__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3139
3140static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3141{
3142 return badblocks_show(&rdev->badblocks, page, 1);
3143}
3144static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3145{
3146 return badblocks_store(&rdev->badblocks, page, len, 1);
3147}
3148static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3149__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3150
3151static struct attribute *rdev_default_attrs[] = {
3152 &rdev_state.attr,
3153 &rdev_errors.attr,
3154 &rdev_slot.attr,
3155 &rdev_offset.attr,
3156 &rdev_new_offset.attr,
3157 &rdev_size.attr,
3158 &rdev_recovery_start.attr,
3159 &rdev_bad_blocks.attr,
3160 &rdev_unack_bad_blocks.attr,
3161 NULL,
3162};
3163static ssize_t
3164rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3165{
3166 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3167 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3168
3169 if (!entry->show)
3170 return -EIO;
3171 if (!rdev->mddev)
3172 return -EBUSY;
3173 return entry->show(rdev, page);
3174}
3175
3176static ssize_t
3177rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3178 const char *page, size_t length)
3179{
3180 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3181 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3182 ssize_t rv;
3183 struct mddev *mddev = rdev->mddev;
3184
3185 if (!entry->store)
3186 return -EIO;
3187 if (!capable(CAP_SYS_ADMIN))
3188 return -EACCES;
3189 rv = mddev ? mddev_lock(mddev): -EBUSY;
3190 if (!rv) {
3191 if (rdev->mddev == NULL)
3192 rv = -EBUSY;
3193 else
3194 rv = entry->store(rdev, page, length);
3195 mddev_unlock(mddev);
3196 }
3197 return rv;
3198}
3199
3200static void rdev_free(struct kobject *ko)
3201{
3202 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3203 kfree(rdev);
3204}
3205static const struct sysfs_ops rdev_sysfs_ops = {
3206 .show = rdev_attr_show,
3207 .store = rdev_attr_store,
3208};
3209static struct kobj_type rdev_ktype = {
3210 .release = rdev_free,
3211 .sysfs_ops = &rdev_sysfs_ops,
3212 .default_attrs = rdev_default_attrs,
3213};
3214
3215int md_rdev_init(struct md_rdev *rdev)
3216{
3217 rdev->desc_nr = -1;
3218 rdev->saved_raid_disk = -1;
3219 rdev->raid_disk = -1;
3220 rdev->flags = 0;
3221 rdev->data_offset = 0;
3222 rdev->new_data_offset = 0;
3223 rdev->sb_events = 0;
3224 rdev->last_read_error = 0;
3225 rdev->sb_loaded = 0;
3226 rdev->bb_page = NULL;
3227 atomic_set(&rdev->nr_pending, 0);
3228 atomic_set(&rdev->read_errors, 0);
3229 atomic_set(&rdev->corrected_errors, 0);
3230
3231 INIT_LIST_HEAD(&rdev->same_set);
3232 init_waitqueue_head(&rdev->blocked_wait);
3233
3234
3235
3236
3237
3238 return badblocks_init(&rdev->badblocks, 0);
3239}
3240EXPORT_SYMBOL_GPL(md_rdev_init);
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3252{
3253 char b[BDEVNAME_SIZE];
3254 int err;
3255 struct md_rdev *rdev;
3256 sector_t size;
3257
3258 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3259 if (!rdev)
3260 return ERR_PTR(-ENOMEM);
3261
3262 err = md_rdev_init(rdev);
3263 if (err)
3264 goto abort_free;
3265 err = alloc_disk_sb(rdev);
3266 if (err)
3267 goto abort_free;
3268
3269 err = lock_rdev(rdev, newdev, super_format == -2);
3270 if (err)
3271 goto abort_free;
3272
3273 kobject_init(&rdev->kobj, &rdev_ktype);
3274
3275 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3276 if (!size) {
3277 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3278 bdevname(rdev->bdev,b));
3279 err = -EINVAL;
3280 goto abort_free;
3281 }
3282
3283 if (super_format >= 0) {
3284 err = super_types[super_format].
3285 load_super(rdev, NULL, super_minor);
3286 if (err == -EINVAL) {
3287 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3288 bdevname(rdev->bdev,b),
3289 super_format, super_minor);
3290 goto abort_free;
3291 }
3292 if (err < 0) {
3293 pr_warn("md: could not read %s's sb, not importing!\n",
3294 bdevname(rdev->bdev,b));
3295 goto abort_free;
3296 }
3297 }
3298
3299 return rdev;
3300
3301abort_free:
3302 if (rdev->bdev)
3303 unlock_rdev(rdev);
3304 md_rdev_clear(rdev);
3305 kfree(rdev);
3306 return ERR_PTR(err);
3307}
3308
3309
3310
3311
3312
3313static void analyze_sbs(struct mddev *mddev)
3314{
3315 int i;
3316 struct md_rdev *rdev, *freshest, *tmp;
3317 char b[BDEVNAME_SIZE];
3318
3319 freshest = NULL;
3320 rdev_for_each_safe(rdev, tmp, mddev)
3321 switch (super_types[mddev->major_version].
3322 load_super(rdev, freshest, mddev->minor_version)) {
3323 case 1:
3324 freshest = rdev;
3325 break;
3326 case 0:
3327 break;
3328 default:
3329 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3330 bdevname(rdev->bdev,b));
3331 md_kick_rdev_from_array(rdev);
3332 }
3333
3334 super_types[mddev->major_version].
3335 validate_super(mddev, freshest);
3336
3337 i = 0;
3338 rdev_for_each_safe(rdev, tmp, mddev) {
3339 if (mddev->max_disks &&
3340 (rdev->desc_nr >= mddev->max_disks ||
3341 i > mddev->max_disks)) {
3342 pr_warn("md: %s: %s: only %d devices permitted\n",
3343 mdname(mddev), bdevname(rdev->bdev, b),
3344 mddev->max_disks);
3345 md_kick_rdev_from_array(rdev);
3346 continue;
3347 }
3348 if (rdev != freshest) {
3349 if (super_types[mddev->major_version].
3350 validate_super(mddev, rdev)) {
3351 pr_warn("md: kicking non-fresh %s from array!\n",
3352 bdevname(rdev->bdev,b));
3353 md_kick_rdev_from_array(rdev);
3354 continue;
3355 }
3356 }
3357 if (mddev->level == LEVEL_MULTIPATH) {
3358 rdev->desc_nr = i++;
3359 rdev->raid_disk = rdev->desc_nr;
3360 set_bit(In_sync, &rdev->flags);
3361 } else if (rdev->raid_disk >=
3362 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3363 !test_bit(Journal, &rdev->flags)) {
3364 rdev->raid_disk = -1;
3365 clear_bit(In_sync, &rdev->flags);
3366 }
3367 }
3368}
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3381{
3382 unsigned long result = 0;
3383 long decimals = -1;
3384 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3385 if (*cp == '.')
3386 decimals = 0;
3387 else if (decimals < scale) {
3388 unsigned int value;
3389 value = *cp - '0';
3390 result = result * 10 + value;
3391 if (decimals >= 0)
3392 decimals++;
3393 }
3394 cp++;
3395 }
3396 if (*cp == '\n')
3397 cp++;
3398 if (*cp)
3399 return -EINVAL;
3400 if (decimals < 0)
3401 decimals = 0;
3402 while (decimals < scale) {
3403 result *= 10;
3404 decimals ++;
3405 }
3406 *res = result;
3407 return 0;
3408}
3409
3410static ssize_t
3411safe_delay_show(struct mddev *mddev, char *page)
3412{
3413 int msec = (mddev->safemode_delay*1000)/HZ;
3414 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3415}
3416static ssize_t
3417safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3418{
3419 unsigned long msec;
3420
3421 if (mddev_is_clustered(mddev)) {
3422 pr_warn("md: Safemode is disabled for clustered mode\n");
3423 return -EINVAL;
3424 }
3425
3426 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3427 return -EINVAL;
3428 if (msec == 0)
3429 mddev->safemode_delay = 0;
3430 else {
3431 unsigned long old_delay = mddev->safemode_delay;
3432 unsigned long new_delay = (msec*HZ)/1000;
3433
3434 if (new_delay == 0)
3435 new_delay = 1;
3436 mddev->safemode_delay = new_delay;
3437 if (new_delay < old_delay || old_delay == 0)
3438 mod_timer(&mddev->safemode_timer, jiffies+1);
3439 }
3440 return len;
3441}
3442static struct md_sysfs_entry md_safe_delay =
3443__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3444
3445static ssize_t
3446level_show(struct mddev *mddev, char *page)
3447{
3448 struct md_personality *p;
3449 int ret;
3450 spin_lock(&mddev->lock);
3451 p = mddev->pers;
3452 if (p)
3453 ret = sprintf(page, "%s\n", p->name);
3454 else if (mddev->clevel[0])
3455 ret = sprintf(page, "%s\n", mddev->clevel);
3456 else if (mddev->level != LEVEL_NONE)
3457 ret = sprintf(page, "%d\n", mddev->level);
3458 else
3459 ret = 0;
3460 spin_unlock(&mddev->lock);
3461 return ret;
3462}
3463
3464static ssize_t
3465level_store(struct mddev *mddev, const char *buf, size_t len)
3466{
3467 char clevel[16];
3468 ssize_t rv;
3469 size_t slen = len;
3470 struct md_personality *pers, *oldpers;
3471 long level;
3472 void *priv, *oldpriv;
3473 struct md_rdev *rdev;
3474
3475 if (slen == 0 || slen >= sizeof(clevel))
3476 return -EINVAL;
3477
3478 rv = mddev_lock(mddev);
3479 if (rv)
3480 return rv;
3481
3482 if (mddev->pers == NULL) {
3483 strncpy(mddev->clevel, buf, slen);
3484 if (mddev->clevel[slen-1] == '\n')
3485 slen--;
3486 mddev->clevel[slen] = 0;
3487 mddev->level = LEVEL_NONE;
3488 rv = len;
3489 goto out_unlock;
3490 }
3491 rv = -EROFS;
3492 if (mddev->ro)
3493 goto out_unlock;
3494
3495
3496
3497
3498
3499
3500
3501 rv = -EBUSY;
3502 if (mddev->sync_thread ||
3503 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3504 mddev->reshape_position != MaxSector ||
3505 mddev->sysfs_active)
3506 goto out_unlock;
3507
3508 rv = -EINVAL;
3509 if (!mddev->pers->quiesce) {
3510 pr_warn("md: %s: %s does not support online personality change\n",
3511 mdname(mddev), mddev->pers->name);
3512 goto out_unlock;
3513 }
3514
3515
3516 strncpy(clevel, buf, slen);
3517 if (clevel[slen-1] == '\n')
3518 slen--;
3519 clevel[slen] = 0;
3520 if (kstrtol(clevel, 10, &level))
3521 level = LEVEL_NONE;
3522
3523 if (request_module("md-%s", clevel) != 0)
3524 request_module("md-level-%s", clevel);
3525 spin_lock(&pers_lock);
3526 pers = find_pers(level, clevel);
3527 if (!pers || !try_module_get(pers->owner)) {
3528 spin_unlock(&pers_lock);
3529 pr_warn("md: personality %s not loaded\n", clevel);
3530 rv = -EINVAL;
3531 goto out_unlock;
3532 }
3533 spin_unlock(&pers_lock);
3534
3535 if (pers == mddev->pers) {
3536
3537 module_put(pers->owner);
3538 rv = len;
3539 goto out_unlock;
3540 }
3541 if (!pers->takeover) {
3542 module_put(pers->owner);
3543 pr_warn("md: %s: %s does not support personality takeover\n",
3544 mdname(mddev), clevel);
3545 rv = -EINVAL;
3546 goto out_unlock;
3547 }
3548
3549 rdev_for_each(rdev, mddev)
3550 rdev->new_raid_disk = rdev->raid_disk;
3551
3552
3553
3554
3555 priv = pers->takeover(mddev);
3556 if (IS_ERR(priv)) {
3557 mddev->new_level = mddev->level;
3558 mddev->new_layout = mddev->layout;
3559 mddev->new_chunk_sectors = mddev->chunk_sectors;
3560 mddev->raid_disks -= mddev->delta_disks;
3561 mddev->delta_disks = 0;
3562 mddev->reshape_backwards = 0;
3563 module_put(pers->owner);
3564 pr_warn("md: %s: %s would not accept array\n",
3565 mdname(mddev), clevel);
3566 rv = PTR_ERR(priv);
3567 goto out_unlock;
3568 }
3569
3570
3571 mddev_suspend(mddev);
3572 mddev_detach(mddev);
3573
3574 spin_lock(&mddev->lock);
3575 oldpers = mddev->pers;
3576 oldpriv = mddev->private;
3577 mddev->pers = pers;
3578 mddev->private = priv;
3579 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3580 mddev->level = mddev->new_level;
3581 mddev->layout = mddev->new_layout;
3582 mddev->chunk_sectors = mddev->new_chunk_sectors;
3583 mddev->delta_disks = 0;
3584 mddev->reshape_backwards = 0;
3585 mddev->degraded = 0;
3586 spin_unlock(&mddev->lock);
3587
3588 if (oldpers->sync_request == NULL &&
3589 mddev->external) {
3590
3591
3592
3593
3594
3595
3596
3597 mddev->in_sync = 0;
3598 mddev->safemode_delay = 0;
3599 mddev->safemode = 0;
3600 }
3601
3602 oldpers->free(mddev, oldpriv);
3603
3604 if (oldpers->sync_request == NULL &&
3605 pers->sync_request != NULL) {
3606
3607 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3608 pr_warn("md: cannot register extra attributes for %s\n",
3609 mdname(mddev));
3610 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3611 }
3612 if (oldpers->sync_request != NULL &&
3613 pers->sync_request == NULL) {
3614
3615 if (mddev->to_remove == NULL)
3616 mddev->to_remove = &md_redundancy_group;
3617 }
3618
3619 module_put(oldpers->owner);
3620
3621 rdev_for_each(rdev, mddev) {
3622 if (rdev->raid_disk < 0)
3623 continue;
3624 if (rdev->new_raid_disk >= mddev->raid_disks)
3625 rdev->new_raid_disk = -1;
3626 if (rdev->new_raid_disk == rdev->raid_disk)
3627 continue;
3628 sysfs_unlink_rdev(mddev, rdev);
3629 }
3630 rdev_for_each(rdev, mddev) {
3631 if (rdev->raid_disk < 0)
3632 continue;
3633 if (rdev->new_raid_disk == rdev->raid_disk)
3634 continue;
3635 rdev->raid_disk = rdev->new_raid_disk;
3636 if (rdev->raid_disk < 0)
3637 clear_bit(In_sync, &rdev->flags);
3638 else {
3639 if (sysfs_link_rdev(mddev, rdev))
3640 pr_warn("md: cannot register rd%d for %s after level change\n",
3641 rdev->raid_disk, mdname(mddev));
3642 }
3643 }
3644
3645 if (pers->sync_request == NULL) {
3646
3647
3648
3649 mddev->in_sync = 1;
3650 del_timer_sync(&mddev->safemode_timer);
3651 }
3652 blk_set_stacking_limits(&mddev->queue->limits);
3653 pers->run(mddev);
3654 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3655 mddev_resume(mddev);
3656 if (!mddev->thread)
3657 md_update_sb(mddev, 1);
3658 sysfs_notify(&mddev->kobj, NULL, "level");
3659 md_new_event(mddev);
3660 rv = len;
3661out_unlock:
3662 mddev_unlock(mddev);
3663 return rv;
3664}
3665
3666static struct md_sysfs_entry md_level =
3667__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3668
3669static ssize_t
3670layout_show(struct mddev *mddev, char *page)
3671{
3672
3673 if (mddev->reshape_position != MaxSector &&
3674 mddev->layout != mddev->new_layout)
3675 return sprintf(page, "%d (%d)\n",
3676 mddev->new_layout, mddev->layout);
3677 return sprintf(page, "%d\n", mddev->layout);
3678}
3679
3680static ssize_t
3681layout_store(struct mddev *mddev, const char *buf, size_t len)
3682{
3683 unsigned int n;
3684 int err;
3685
3686 err = kstrtouint(buf, 10, &n);
3687 if (err < 0)
3688 return err;
3689 err = mddev_lock(mddev);
3690 if (err)
3691 return err;
3692
3693 if (mddev->pers) {
3694 if (mddev->pers->check_reshape == NULL)
3695 err = -EBUSY;
3696 else if (mddev->ro)
3697 err = -EROFS;
3698 else {
3699 mddev->new_layout = n;
3700 err = mddev->pers->check_reshape(mddev);
3701 if (err)
3702 mddev->new_layout = mddev->layout;
3703 }
3704 } else {
3705 mddev->new_layout = n;
3706 if (mddev->reshape_position == MaxSector)
3707 mddev->layout = n;
3708 }
3709 mddev_unlock(mddev);
3710 return err ?: len;
3711}
3712static struct md_sysfs_entry md_layout =
3713__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3714
3715static ssize_t
3716raid_disks_show(struct mddev *mddev, char *page)
3717{
3718 if (mddev->raid_disks == 0)
3719 return 0;
3720 if (mddev->reshape_position != MaxSector &&
3721 mddev->delta_disks != 0)
3722 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3723 mddev->raid_disks - mddev->delta_disks);
3724 return sprintf(page, "%d\n", mddev->raid_disks);
3725}
3726
3727static int update_raid_disks(struct mddev *mddev, int raid_disks);
3728
3729static ssize_t
3730raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3731{
3732 unsigned int n;
3733 int err;
3734
3735 err = kstrtouint(buf, 10, &n);
3736 if (err < 0)
3737 return err;
3738
3739 err = mddev_lock(mddev);
3740 if (err)
3741 return err;
3742 if (mddev->pers)
3743 err = update_raid_disks(mddev, n);
3744 else if (mddev->reshape_position != MaxSector) {
3745 struct md_rdev *rdev;
3746 int olddisks = mddev->raid_disks - mddev->delta_disks;
3747
3748 err = -EINVAL;
3749 rdev_for_each(rdev, mddev) {
3750 if (olddisks < n &&
3751 rdev->data_offset < rdev->new_data_offset)
3752 goto out_unlock;
3753 if (olddisks > n &&
3754 rdev->data_offset > rdev->new_data_offset)
3755 goto out_unlock;
3756 }
3757 err = 0;
3758 mddev->delta_disks = n - olddisks;
3759 mddev->raid_disks = n;
3760 mddev->reshape_backwards = (mddev->delta_disks < 0);
3761 } else
3762 mddev->raid_disks = n;
3763out_unlock:
3764 mddev_unlock(mddev);
3765 return err ? err : len;
3766}
3767static struct md_sysfs_entry md_raid_disks =
3768__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3769
3770static ssize_t
3771chunk_size_show(struct mddev *mddev, char *page)
3772{
3773 if (mddev->reshape_position != MaxSector &&
3774 mddev->chunk_sectors != mddev->new_chunk_sectors)
3775 return sprintf(page, "%d (%d)\n",
3776 mddev->new_chunk_sectors << 9,
3777 mddev->chunk_sectors << 9);
3778 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3779}
3780
3781static ssize_t
3782chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3783{
3784 unsigned long n;
3785 int err;
3786
3787 err = kstrtoul(buf, 10, &n);
3788 if (err < 0)
3789 return err;
3790
3791 err = mddev_lock(mddev);
3792 if (err)
3793 return err;
3794 if (mddev->pers) {
3795 if (mddev->pers->check_reshape == NULL)
3796 err = -EBUSY;
3797 else if (mddev->ro)
3798 err = -EROFS;
3799 else {
3800 mddev->new_chunk_sectors = n >> 9;
3801 err = mddev->pers->check_reshape(mddev);
3802 if (err)
3803 mddev->new_chunk_sectors = mddev->chunk_sectors;
3804 }
3805 } else {
3806 mddev->new_chunk_sectors = n >> 9;
3807 if (mddev->reshape_position == MaxSector)
3808 mddev->chunk_sectors = n >> 9;
3809 }
3810 mddev_unlock(mddev);
3811 return err ?: len;
3812}
3813static struct md_sysfs_entry md_chunk_size =
3814__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3815
3816static ssize_t
3817resync_start_show(struct mddev *mddev, char *page)
3818{
3819 if (mddev->recovery_cp == MaxSector)
3820 return sprintf(page, "none\n");
3821 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3822}
3823
3824static ssize_t
3825resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3826{
3827 unsigned long long n;
3828 int err;
3829
3830 if (cmd_match(buf, "none"))
3831 n = MaxSector;
3832 else {
3833 err = kstrtoull(buf, 10, &n);
3834 if (err < 0)
3835 return err;
3836 if (n != (sector_t)n)
3837 return -EINVAL;
3838 }
3839
3840 err = mddev_lock(mddev);
3841 if (err)
3842 return err;
3843 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3844 err = -EBUSY;
3845
3846 if (!err) {
3847 mddev->recovery_cp = n;
3848 if (mddev->pers)
3849 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
3850 }
3851 mddev_unlock(mddev);
3852 return err ?: len;
3853}
3854static struct md_sysfs_entry md_resync_start =
3855__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3856 resync_start_show, resync_start_store);
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3895 write_pending, active_idle, bad_word};
3896static char *array_states[] = {
3897 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3898 "write-pending", "active-idle", NULL };
3899
3900static int match_word(const char *word, char **list)
3901{
3902 int n;
3903 for (n=0; list[n]; n++)
3904 if (cmd_match(word, list[n]))
3905 break;
3906 return n;
3907}
3908
3909static ssize_t
3910array_state_show(struct mddev *mddev, char *page)
3911{
3912 enum array_state st = inactive;
3913
3914 if (mddev->pers)
3915 switch(mddev->ro) {
3916 case 1:
3917 st = readonly;
3918 break;
3919 case 2:
3920 st = read_auto;
3921 break;
3922 case 0:
3923 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
3924 st = write_pending;
3925 else if (mddev->in_sync)
3926 st = clean;
3927 else if (mddev->safemode)
3928 st = active_idle;
3929 else
3930 st = active;
3931 }
3932 else {
3933 if (list_empty(&mddev->disks) &&
3934 mddev->raid_disks == 0 &&
3935 mddev->dev_sectors == 0)
3936 st = clear;
3937 else
3938 st = inactive;
3939 }
3940 return sprintf(page, "%s\n", array_states[st]);
3941}
3942
3943static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
3944static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
3945static int do_md_run(struct mddev *mddev);
3946static int restart_array(struct mddev *mddev);
3947
3948static ssize_t
3949array_state_store(struct mddev *mddev, const char *buf, size_t len)
3950{
3951 int err;
3952 enum array_state st = match_word(buf, array_states);
3953
3954 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3955
3956
3957
3958 spin_lock(&mddev->lock);
3959 if (st == active) {
3960 restart_array(mddev);
3961 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
3962 md_wakeup_thread(mddev->thread);
3963 wake_up(&mddev->sb_wait);
3964 err = 0;
3965 } else {
3966 restart_array(mddev);
3967 if (atomic_read(&mddev->writes_pending) == 0) {
3968 if (mddev->in_sync == 0) {
3969 mddev->in_sync = 1;
3970 if (mddev->safemode == 1)
3971 mddev->safemode = 0;
3972 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
3973 }
3974 err = 0;
3975 } else
3976 err = -EBUSY;
3977 }
3978 if (!err)
3979 sysfs_notify_dirent_safe(mddev->sysfs_state);
3980 spin_unlock(&mddev->lock);
3981 return err ?: len;
3982 }
3983 err = mddev_lock(mddev);
3984 if (err)
3985 return err;
3986 err = -EINVAL;
3987 switch(st) {
3988 case bad_word:
3989 break;
3990 case clear:
3991
3992 err = do_md_stop(mddev, 0, NULL);
3993 break;
3994 case inactive:
3995
3996 if (mddev->pers)
3997 err = do_md_stop(mddev, 2, NULL);
3998 else
3999 err = 0;
4000 break;
4001 case suspended:
4002 break;
4003 case readonly:
4004 if (mddev->pers)
4005 err = md_set_readonly(mddev, NULL);
4006 else {
4007 mddev->ro = 1;
4008 set_disk_ro(mddev->gendisk, 1);
4009 err = do_md_run(mddev);
4010 }
4011 break;
4012 case read_auto:
4013 if (mddev->pers) {
4014 if (mddev->ro == 0)
4015 err = md_set_readonly(mddev, NULL);
4016 else if (mddev->ro == 1)
4017 err = restart_array(mddev);
4018 if (err == 0) {
4019 mddev->ro = 2;
4020 set_disk_ro(mddev->gendisk, 0);
4021 }
4022 } else {
4023 mddev->ro = 2;
4024 err = do_md_run(mddev);
4025 }
4026 break;
4027 case clean:
4028 if (mddev->pers) {
4029 err = restart_array(mddev);
4030 if (err)
4031 break;
4032 spin_lock(&mddev->lock);
4033 if (atomic_read(&mddev->writes_pending) == 0) {
4034 if (mddev->in_sync == 0) {
4035 mddev->in_sync = 1;
4036 if (mddev->safemode == 1)
4037 mddev->safemode = 0;
4038 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4039 }
4040 err = 0;
4041 } else
4042 err = -EBUSY;
4043 spin_unlock(&mddev->lock);
4044 } else
4045 err = -EINVAL;
4046 break;
4047 case active:
4048 if (mddev->pers) {
4049 err = restart_array(mddev);
4050 if (err)
4051 break;
4052 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4053 wake_up(&mddev->sb_wait);
4054 err = 0;
4055 } else {
4056 mddev->ro = 0;
4057 set_disk_ro(mddev->gendisk, 0);
4058 err = do_md_run(mddev);
4059 }
4060 break;
4061 case write_pending:
4062 case active_idle:
4063
4064 break;
4065 }
4066
4067 if (!err) {
4068 if (mddev->hold_active == UNTIL_IOCTL)
4069 mddev->hold_active = 0;
4070 sysfs_notify_dirent_safe(mddev->sysfs_state);
4071 }
4072 mddev_unlock(mddev);
4073 return err ?: len;
4074}
4075static struct md_sysfs_entry md_array_state =
4076__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4077
4078static ssize_t
4079max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4080 return sprintf(page, "%d\n",
4081 atomic_read(&mddev->max_corr_read_errors));
4082}
4083
4084static ssize_t
4085max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4086{
4087 unsigned int n;
4088 int rv;
4089
4090 rv = kstrtouint(buf, 10, &n);
4091 if (rv < 0)
4092 return rv;
4093 atomic_set(&mddev->max_corr_read_errors, n);
4094 return len;
4095}
4096
4097static struct md_sysfs_entry max_corr_read_errors =
4098__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4099 max_corrected_read_errors_store);
4100
4101static ssize_t
4102null_show(struct mddev *mddev, char *page)
4103{
4104 return -EINVAL;
4105}
4106
4107static ssize_t
4108new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4109{
4110
4111
4112
4113
4114
4115
4116
4117 char *e;
4118 int major = simple_strtoul(buf, &e, 10);
4119 int minor;
4120 dev_t dev;
4121 struct md_rdev *rdev;
4122 int err;
4123
4124 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4125 return -EINVAL;
4126 minor = simple_strtoul(e+1, &e, 10);
4127 if (*e && *e != '\n')
4128 return -EINVAL;
4129 dev = MKDEV(major, minor);
4130 if (major != MAJOR(dev) ||
4131 minor != MINOR(dev))
4132 return -EOVERFLOW;
4133
4134 flush_workqueue(md_misc_wq);
4135
4136 err = mddev_lock(mddev);
4137 if (err)
4138 return err;
4139 if (mddev->persistent) {
4140 rdev = md_import_device(dev, mddev->major_version,
4141 mddev->minor_version);
4142 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4143 struct md_rdev *rdev0
4144 = list_entry(mddev->disks.next,
4145 struct md_rdev, same_set);
4146 err = super_types[mddev->major_version]
4147 .load_super(rdev, rdev0, mddev->minor_version);
4148 if (err < 0)
4149 goto out;
4150 }
4151 } else if (mddev->external)
4152 rdev = md_import_device(dev, -2, -1);
4153 else
4154 rdev = md_import_device(dev, -1, -1);
4155
4156 if (IS_ERR(rdev)) {
4157 mddev_unlock(mddev);
4158 return PTR_ERR(rdev);
4159 }
4160 err = bind_rdev_to_array(rdev, mddev);
4161 out:
4162 if (err)
4163 export_rdev(rdev);
4164 mddev_unlock(mddev);
4165 return err ? err : len;
4166}
4167
4168static struct md_sysfs_entry md_new_device =
4169__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4170
4171static ssize_t
4172bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4173{
4174 char *end;
4175 unsigned long chunk, end_chunk;
4176 int err;
4177
4178 err = mddev_lock(mddev);
4179 if (err)
4180 return err;
4181 if (!mddev->bitmap)
4182 goto out;
4183
4184 while (*buf) {
4185 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4186 if (buf == end) break;
4187 if (*end == '-') {
4188 buf = end + 1;
4189 end_chunk = simple_strtoul(buf, &end, 0);
4190 if (buf == end) break;
4191 }
4192 if (*end && !isspace(*end)) break;
4193 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4194 buf = skip_spaces(end);
4195 }
4196 bitmap_unplug(mddev->bitmap);
4197out:
4198 mddev_unlock(mddev);
4199 return len;
4200}
4201
4202static struct md_sysfs_entry md_bitmap =
4203__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4204
4205static ssize_t
4206size_show(struct mddev *mddev, char *page)
4207{
4208 return sprintf(page, "%llu\n",
4209 (unsigned long long)mddev->dev_sectors / 2);
4210}
4211
4212static int update_size(struct mddev *mddev, sector_t num_sectors);
4213
4214static ssize_t
4215size_store(struct mddev *mddev, const char *buf, size_t len)
4216{
4217
4218
4219
4220
4221 sector_t sectors;
4222 int err = strict_blocks_to_sectors(buf, §ors);
4223
4224 if (err < 0)
4225 return err;
4226 err = mddev_lock(mddev);
4227 if (err)
4228 return err;
4229 if (mddev->pers) {
4230 err = update_size(mddev, sectors);
4231 if (err == 0)
4232 md_update_sb(mddev, 1);
4233 } else {
4234 if (mddev->dev_sectors == 0 ||
4235 mddev->dev_sectors > sectors)
4236 mddev->dev_sectors = sectors;
4237 else
4238 err = -ENOSPC;
4239 }
4240 mddev_unlock(mddev);
4241 return err ? err : len;
4242}
4243
4244static struct md_sysfs_entry md_size =
4245__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4246
4247
4248
4249
4250
4251
4252
4253static ssize_t
4254metadata_show(struct mddev *mddev, char *page)
4255{
4256 if (mddev->persistent)
4257 return sprintf(page, "%d.%d\n",
4258 mddev->major_version, mddev->minor_version);
4259 else if (mddev->external)
4260 return sprintf(page, "external:%s\n", mddev->metadata_type);
4261 else
4262 return sprintf(page, "none\n");
4263}
4264
4265static ssize_t
4266metadata_store(struct mddev *mddev, const char *buf, size_t len)
4267{
4268 int major, minor;
4269 char *e;
4270 int err;
4271
4272
4273
4274
4275
4276 err = mddev_lock(mddev);
4277 if (err)
4278 return err;
4279 err = -EBUSY;
4280 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4281 ;
4282 else if (!list_empty(&mddev->disks))
4283 goto out_unlock;
4284
4285 err = 0;
4286 if (cmd_match(buf, "none")) {
4287 mddev->persistent = 0;
4288 mddev->external = 0;
4289 mddev->major_version = 0;
4290 mddev->minor_version = 90;
4291 goto out_unlock;
4292 }
4293 if (strncmp(buf, "external:", 9) == 0) {
4294 size_t namelen = len-9;
4295 if (namelen >= sizeof(mddev->metadata_type))
4296 namelen = sizeof(mddev->metadata_type)-1;
4297 strncpy(mddev->metadata_type, buf+9, namelen);
4298 mddev->metadata_type[namelen] = 0;
4299 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4300 mddev->metadata_type[--namelen] = 0;
4301 mddev->persistent = 0;
4302 mddev->external = 1;
4303 mddev->major_version = 0;
4304 mddev->minor_version = 90;
4305 goto out_unlock;
4306 }
4307 major = simple_strtoul(buf, &e, 10);
4308 err = -EINVAL;
4309 if (e==buf || *e != '.')
4310 goto out_unlock;
4311 buf = e+1;
4312 minor = simple_strtoul(buf, &e, 10);
4313 if (e==buf || (*e && *e != '\n') )
4314 goto out_unlock;
4315 err = -ENOENT;
4316 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4317 goto out_unlock;
4318 mddev->major_version = major;
4319 mddev->minor_version = minor;
4320 mddev->persistent = 1;
4321 mddev->external = 0;
4322 err = 0;
4323out_unlock:
4324 mddev_unlock(mddev);
4325 return err ?: len;
4326}
4327
4328static struct md_sysfs_entry md_metadata =
4329__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4330
4331static ssize_t
4332action_show(struct mddev *mddev, char *page)
4333{
4334 char *type = "idle";
4335 unsigned long recovery = mddev->recovery;
4336 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4337 type = "frozen";
4338 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4339 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4340 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4341 type = "reshape";
4342 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4343 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4344 type = "resync";
4345 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4346 type = "check";
4347 else
4348 type = "repair";
4349 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4350 type = "recover";
4351 else if (mddev->reshape_position != MaxSector)
4352 type = "reshape";
4353 }
4354 return sprintf(page, "%s\n", type);
4355}
4356
4357static ssize_t
4358action_store(struct mddev *mddev, const char *page, size_t len)
4359{
4360 if (!mddev->pers || !mddev->pers->sync_request)
4361 return -EINVAL;
4362
4363
4364 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4365 if (cmd_match(page, "frozen"))
4366 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4367 else
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4369 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4370 mddev_lock(mddev) == 0) {
4371 flush_workqueue(md_misc_wq);
4372 if (mddev->sync_thread) {
4373 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4374 md_reap_sync_thread(mddev);
4375 }
4376 mddev_unlock(mddev);
4377 }
4378 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4379 return -EBUSY;
4380 else if (cmd_match(page, "resync"))
4381 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4382 else if (cmd_match(page, "recover")) {
4383 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4384 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4385 } else if (cmd_match(page, "reshape")) {
4386 int err;
4387 if (mddev->pers->start_reshape == NULL)
4388 return -EINVAL;
4389 err = mddev_lock(mddev);
4390 if (!err) {
4391 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4392 err = -EBUSY;
4393 else {
4394 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4395 err = mddev->pers->start_reshape(mddev);
4396 }
4397 mddev_unlock(mddev);
4398 }
4399 if (err)
4400 return err;
4401 sysfs_notify(&mddev->kobj, NULL, "degraded");
4402 } else {
4403 if (cmd_match(page, "check"))
4404 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4405 else if (!cmd_match(page, "repair"))
4406 return -EINVAL;
4407 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4408 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4409 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4410 }
4411 if (mddev->ro == 2) {
4412
4413
4414
4415 mddev->ro = 0;
4416 md_wakeup_thread(mddev->sync_thread);
4417 }
4418 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4419 md_wakeup_thread(mddev->thread);
4420 sysfs_notify_dirent_safe(mddev->sysfs_action);
4421 return len;
4422}
4423
4424static struct md_sysfs_entry md_scan_mode =
4425__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4426
4427static ssize_t
4428last_sync_action_show(struct mddev *mddev, char *page)
4429{
4430 return sprintf(page, "%s\n", mddev->last_sync_action);
4431}
4432
4433static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4434
4435static ssize_t
4436mismatch_cnt_show(struct mddev *mddev, char *page)
4437{
4438 return sprintf(page, "%llu\n",
4439 (unsigned long long)
4440 atomic64_read(&mddev->resync_mismatches));
4441}
4442
4443static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4444
4445static ssize_t
4446sync_min_show(struct mddev *mddev, char *page)
4447{
4448 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4449 mddev->sync_speed_min ? "local": "system");
4450}
4451
4452static ssize_t
4453sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4454{
4455 unsigned int min;
4456 int rv;
4457
4458 if (strncmp(buf, "system", 6)==0) {
4459 min = 0;
4460 } else {
4461 rv = kstrtouint(buf, 10, &min);
4462 if (rv < 0)
4463 return rv;
4464 if (min == 0)
4465 return -EINVAL;
4466 }
4467 mddev->sync_speed_min = min;
4468 return len;
4469}
4470
4471static struct md_sysfs_entry md_sync_min =
4472__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4473
4474static ssize_t
4475sync_max_show(struct mddev *mddev, char *page)
4476{
4477 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4478 mddev->sync_speed_max ? "local": "system");
4479}
4480
4481static ssize_t
4482sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4483{
4484 unsigned int max;
4485 int rv;
4486
4487 if (strncmp(buf, "system", 6)==0) {
4488 max = 0;
4489 } else {
4490 rv = kstrtouint(buf, 10, &max);
4491 if (rv < 0)
4492 return rv;
4493 if (max == 0)
4494 return -EINVAL;
4495 }
4496 mddev->sync_speed_max = max;
4497 return len;
4498}
4499
4500static struct md_sysfs_entry md_sync_max =
4501__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4502
4503static ssize_t
4504degraded_show(struct mddev *mddev, char *page)
4505{
4506 return sprintf(page, "%d\n", mddev->degraded);
4507}
4508static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4509
4510static ssize_t
4511sync_force_parallel_show(struct mddev *mddev, char *page)
4512{
4513 return sprintf(page, "%d\n", mddev->parallel_resync);
4514}
4515
4516static ssize_t
4517sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4518{
4519 long n;
4520
4521 if (kstrtol(buf, 10, &n))
4522 return -EINVAL;
4523
4524 if (n != 0 && n != 1)
4525 return -EINVAL;
4526
4527 mddev->parallel_resync = n;
4528
4529 if (mddev->sync_thread)
4530 wake_up(&resync_wait);
4531
4532 return len;
4533}
4534
4535
4536static struct md_sysfs_entry md_sync_force_parallel =
4537__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4538 sync_force_parallel_show, sync_force_parallel_store);
4539
4540static ssize_t
4541sync_speed_show(struct mddev *mddev, char *page)
4542{
4543 unsigned long resync, dt, db;
4544 if (mddev->curr_resync == 0)
4545 return sprintf(page, "none\n");
4546 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4547 dt = (jiffies - mddev->resync_mark) / HZ;
4548 if (!dt) dt++;
4549 db = resync - mddev->resync_mark_cnt;
4550 return sprintf(page, "%lu\n", db/dt/2);
4551}
4552
4553static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4554
4555static ssize_t
4556sync_completed_show(struct mddev *mddev, char *page)
4557{
4558 unsigned long long max_sectors, resync;
4559
4560 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4561 return sprintf(page, "none\n");
4562
4563 if (mddev->curr_resync == 1 ||
4564 mddev->curr_resync == 2)
4565 return sprintf(page, "delayed\n");
4566
4567 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4568 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4569 max_sectors = mddev->resync_max_sectors;
4570 else
4571 max_sectors = mddev->dev_sectors;
4572
4573 resync = mddev->curr_resync_completed;
4574 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4575}
4576
4577static struct md_sysfs_entry md_sync_completed =
4578 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4579
4580static ssize_t
4581min_sync_show(struct mddev *mddev, char *page)
4582{
4583 return sprintf(page, "%llu\n",
4584 (unsigned long long)mddev->resync_min);
4585}
4586static ssize_t
4587min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4588{
4589 unsigned long long min;
4590 int err;
4591
4592 if (kstrtoull(buf, 10, &min))
4593 return -EINVAL;
4594
4595 spin_lock(&mddev->lock);
4596 err = -EINVAL;
4597 if (min > mddev->resync_max)
4598 goto out_unlock;
4599
4600 err = -EBUSY;
4601 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4602 goto out_unlock;
4603
4604
4605 mddev->resync_min = round_down(min, 8);
4606 err = 0;
4607
4608out_unlock:
4609 spin_unlock(&mddev->lock);
4610 return err ?: len;
4611}
4612
4613static struct md_sysfs_entry md_min_sync =
4614__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4615
4616static ssize_t
4617max_sync_show(struct mddev *mddev, char *page)
4618{
4619 if (mddev->resync_max == MaxSector)
4620 return sprintf(page, "max\n");
4621 else
4622 return sprintf(page, "%llu\n",
4623 (unsigned long long)mddev->resync_max);
4624}
4625static ssize_t
4626max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4627{
4628 int err;
4629 spin_lock(&mddev->lock);
4630 if (strncmp(buf, "max", 3) == 0)
4631 mddev->resync_max = MaxSector;
4632 else {
4633 unsigned long long max;
4634 int chunk;
4635
4636 err = -EINVAL;
4637 if (kstrtoull(buf, 10, &max))
4638 goto out_unlock;
4639 if (max < mddev->resync_min)
4640 goto out_unlock;
4641
4642 err = -EBUSY;
4643 if (max < mddev->resync_max &&
4644 mddev->ro == 0 &&
4645 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4646 goto out_unlock;
4647
4648
4649 chunk = mddev->chunk_sectors;
4650 if (chunk) {
4651 sector_t temp = max;
4652
4653 err = -EINVAL;
4654 if (sector_div(temp, chunk))
4655 goto out_unlock;
4656 }
4657 mddev->resync_max = max;
4658 }
4659 wake_up(&mddev->recovery_wait);
4660 err = 0;
4661out_unlock:
4662 spin_unlock(&mddev->lock);
4663 return err ?: len;
4664}
4665
4666static struct md_sysfs_entry md_max_sync =
4667__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4668
4669static ssize_t
4670suspend_lo_show(struct mddev *mddev, char *page)
4671{
4672 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4673}
4674
4675static ssize_t
4676suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4677{
4678 unsigned long long old, new;
4679 int err;
4680
4681 err = kstrtoull(buf, 10, &new);
4682 if (err < 0)
4683 return err;
4684 if (new != (sector_t)new)
4685 return -EINVAL;
4686
4687 err = mddev_lock(mddev);
4688 if (err)
4689 return err;
4690 err = -EINVAL;
4691 if (mddev->pers == NULL ||
4692 mddev->pers->quiesce == NULL)
4693 goto unlock;
4694 old = mddev->suspend_lo;
4695 mddev->suspend_lo = new;
4696 if (new >= old)
4697
4698 mddev->pers->quiesce(mddev, 2);
4699 else {
4700
4701 mddev->pers->quiesce(mddev, 1);
4702 mddev->pers->quiesce(mddev, 0);
4703 }
4704 err = 0;
4705unlock:
4706 mddev_unlock(mddev);
4707 return err ?: len;
4708}
4709static struct md_sysfs_entry md_suspend_lo =
4710__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4711
4712static ssize_t
4713suspend_hi_show(struct mddev *mddev, char *page)
4714{
4715 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4716}
4717
4718static ssize_t
4719suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4720{
4721 unsigned long long old, new;
4722 int err;
4723
4724 err = kstrtoull(buf, 10, &new);
4725 if (err < 0)
4726 return err;
4727 if (new != (sector_t)new)
4728 return -EINVAL;
4729
4730 err = mddev_lock(mddev);
4731 if (err)
4732 return err;
4733 err = -EINVAL;
4734 if (mddev->pers == NULL ||
4735 mddev->pers->quiesce == NULL)
4736 goto unlock;
4737 old = mddev->suspend_hi;
4738 mddev->suspend_hi = new;
4739 if (new <= old)
4740
4741 mddev->pers->quiesce(mddev, 2);
4742 else {
4743
4744 mddev->pers->quiesce(mddev, 1);
4745 mddev->pers->quiesce(mddev, 0);
4746 }
4747 err = 0;
4748unlock:
4749 mddev_unlock(mddev);
4750 return err ?: len;
4751}
4752static struct md_sysfs_entry md_suspend_hi =
4753__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4754
4755static ssize_t
4756reshape_position_show(struct mddev *mddev, char *page)
4757{
4758 if (mddev->reshape_position != MaxSector)
4759 return sprintf(page, "%llu\n",
4760 (unsigned long long)mddev->reshape_position);
4761 strcpy(page, "none\n");
4762 return 5;
4763}
4764
4765static ssize_t
4766reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4767{
4768 struct md_rdev *rdev;
4769 unsigned long long new;
4770 int err;
4771
4772 err = kstrtoull(buf, 10, &new);
4773 if (err < 0)
4774 return err;
4775 if (new != (sector_t)new)
4776 return -EINVAL;
4777 err = mddev_lock(mddev);
4778 if (err)
4779 return err;
4780 err = -EBUSY;
4781 if (mddev->pers)
4782 goto unlock;
4783 mddev->reshape_position = new;
4784 mddev->delta_disks = 0;
4785 mddev->reshape_backwards = 0;
4786 mddev->new_level = mddev->level;
4787 mddev->new_layout = mddev->layout;
4788 mddev->new_chunk_sectors = mddev->chunk_sectors;
4789 rdev_for_each(rdev, mddev)
4790 rdev->new_data_offset = rdev->data_offset;
4791 err = 0;
4792unlock:
4793 mddev_unlock(mddev);
4794 return err ?: len;
4795}
4796
4797static struct md_sysfs_entry md_reshape_position =
4798__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4799 reshape_position_store);
4800
4801static ssize_t
4802reshape_direction_show(struct mddev *mddev, char *page)
4803{
4804 return sprintf(page, "%s\n",
4805 mddev->reshape_backwards ? "backwards" : "forwards");
4806}
4807
4808static ssize_t
4809reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4810{
4811 int backwards = 0;
4812 int err;
4813
4814 if (cmd_match(buf, "forwards"))
4815 backwards = 0;
4816 else if (cmd_match(buf, "backwards"))
4817 backwards = 1;
4818 else
4819 return -EINVAL;
4820 if (mddev->reshape_backwards == backwards)
4821 return len;
4822
4823 err = mddev_lock(mddev);
4824 if (err)
4825 return err;
4826
4827 if (mddev->delta_disks)
4828 err = -EBUSY;
4829 else if (mddev->persistent &&
4830 mddev->major_version == 0)
4831 err = -EINVAL;
4832 else
4833 mddev->reshape_backwards = backwards;
4834 mddev_unlock(mddev);
4835 return err ?: len;
4836}
4837
4838static struct md_sysfs_entry md_reshape_direction =
4839__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4840 reshape_direction_store);
4841
4842static ssize_t
4843array_size_show(struct mddev *mddev, char *page)
4844{
4845 if (mddev->external_size)
4846 return sprintf(page, "%llu\n",
4847 (unsigned long long)mddev->array_sectors/2);
4848 else
4849 return sprintf(page, "default\n");
4850}
4851
4852static ssize_t
4853array_size_store(struct mddev *mddev, const char *buf, size_t len)
4854{
4855 sector_t sectors;
4856 int err;
4857
4858 err = mddev_lock(mddev);
4859 if (err)
4860 return err;
4861
4862
4863 if (mddev_is_clustered(mddev))
4864 return -EINVAL;
4865
4866 if (strncmp(buf, "default", 7) == 0) {
4867 if (mddev->pers)
4868 sectors = mddev->pers->size(mddev, 0, 0);
4869 else
4870 sectors = mddev->array_sectors;
4871
4872 mddev->external_size = 0;
4873 } else {
4874 if (strict_blocks_to_sectors(buf, §ors) < 0)
4875 err = -EINVAL;
4876 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4877 err = -E2BIG;
4878 else
4879 mddev->external_size = 1;
4880 }
4881
4882 if (!err) {
4883 mddev->array_sectors = sectors;
4884 if (mddev->pers) {
4885 set_capacity(mddev->gendisk, mddev->array_sectors);
4886 revalidate_disk(mddev->gendisk);
4887 }
4888 }
4889 mddev_unlock(mddev);
4890 return err ?: len;
4891}
4892
4893static struct md_sysfs_entry md_array_size =
4894__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4895 array_size_store);
4896
4897static struct attribute *md_default_attrs[] = {
4898 &md_level.attr,
4899 &md_layout.attr,
4900 &md_raid_disks.attr,
4901 &md_chunk_size.attr,
4902 &md_size.attr,
4903 &md_resync_start.attr,
4904 &md_metadata.attr,
4905 &md_new_device.attr,
4906 &md_safe_delay.attr,
4907 &md_array_state.attr,
4908 &md_reshape_position.attr,
4909 &md_reshape_direction.attr,
4910 &md_array_size.attr,
4911 &max_corr_read_errors.attr,
4912 NULL,
4913};
4914
4915static struct attribute *md_redundancy_attrs[] = {
4916 &md_scan_mode.attr,
4917 &md_last_scan_mode.attr,
4918 &md_mismatches.attr,
4919 &md_sync_min.attr,
4920 &md_sync_max.attr,
4921 &md_sync_speed.attr,
4922 &md_sync_force_parallel.attr,
4923 &md_sync_completed.attr,
4924 &md_min_sync.attr,
4925 &md_max_sync.attr,
4926 &md_suspend_lo.attr,
4927 &md_suspend_hi.attr,
4928 &md_bitmap.attr,
4929 &md_degraded.attr,
4930 NULL,
4931};
4932static struct attribute_group md_redundancy_group = {
4933 .name = NULL,
4934 .attrs = md_redundancy_attrs,
4935};
4936
4937static ssize_t
4938md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4939{
4940 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4941 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4942 ssize_t rv;
4943
4944 if (!entry->show)
4945 return -EIO;
4946 spin_lock(&all_mddevs_lock);
4947 if (list_empty(&mddev->all_mddevs)) {
4948 spin_unlock(&all_mddevs_lock);
4949 return -EBUSY;
4950 }
4951 mddev_get(mddev);
4952 spin_unlock(&all_mddevs_lock);
4953
4954 rv = entry->show(mddev, page);
4955 mddev_put(mddev);
4956 return rv;
4957}
4958
4959static ssize_t
4960md_attr_store(struct kobject *kobj, struct attribute *attr,
4961 const char *page, size_t length)
4962{
4963 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4964 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4965 ssize_t rv;
4966
4967 if (!entry->store)
4968 return -EIO;
4969 if (!capable(CAP_SYS_ADMIN))
4970 return -EACCES;
4971 spin_lock(&all_mddevs_lock);
4972 if (list_empty(&mddev->all_mddevs)) {
4973 spin_unlock(&all_mddevs_lock);
4974 return -EBUSY;
4975 }
4976 mddev_get(mddev);
4977 spin_unlock(&all_mddevs_lock);
4978 rv = entry->store(mddev, page, length);
4979 mddev_put(mddev);
4980 return rv;
4981}
4982
4983static void md_free(struct kobject *ko)
4984{
4985 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4986
4987 if (mddev->sysfs_state)
4988 sysfs_put(mddev->sysfs_state);
4989
4990 if (mddev->queue)
4991 blk_cleanup_queue(mddev->queue);
4992 if (mddev->gendisk) {
4993 del_gendisk(mddev->gendisk);
4994 put_disk(mddev->gendisk);
4995 }
4996
4997 kfree(mddev);
4998}
4999
5000static const struct sysfs_ops md_sysfs_ops = {
5001 .show = md_attr_show,
5002 .store = md_attr_store,
5003};
5004static struct kobj_type md_ktype = {
5005 .release = md_free,
5006 .sysfs_ops = &md_sysfs_ops,
5007 .default_attrs = md_default_attrs,
5008};
5009
5010int mdp_major = 0;
5011
5012static void mddev_delayed_delete(struct work_struct *ws)
5013{
5014 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5015
5016 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5017 kobject_del(&mddev->kobj);
5018 kobject_put(&mddev->kobj);
5019}
5020
5021static int md_alloc(dev_t dev, char *name)
5022{
5023 static DEFINE_MUTEX(disks_mutex);
5024 struct mddev *mddev = mddev_find(dev);
5025 struct gendisk *disk;
5026 int partitioned;
5027 int shift;
5028 int unit;
5029 int error;
5030
5031 if (!mddev)
5032 return -ENODEV;
5033
5034 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5035 shift = partitioned ? MdpMinorShift : 0;
5036 unit = MINOR(mddev->unit) >> shift;
5037
5038
5039
5040
5041 flush_workqueue(md_misc_wq);
5042
5043 mutex_lock(&disks_mutex);
5044 error = -EEXIST;
5045 if (mddev->gendisk)
5046 goto abort;
5047
5048 if (name) {
5049
5050
5051 struct mddev *mddev2;
5052 spin_lock(&all_mddevs_lock);
5053
5054 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5055 if (mddev2->gendisk &&
5056 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5057 spin_unlock(&all_mddevs_lock);
5058 goto abort;
5059 }
5060 spin_unlock(&all_mddevs_lock);
5061 }
5062
5063 error = -ENOMEM;
5064 mddev->queue = blk_alloc_queue(GFP_KERNEL);
5065 if (!mddev->queue)
5066 goto abort;
5067 mddev->queue->queuedata = mddev;
5068
5069 blk_queue_make_request(mddev->queue, md_make_request);
5070 blk_set_stacking_limits(&mddev->queue->limits);
5071
5072 disk = alloc_disk(1 << shift);
5073 if (!disk) {
5074 blk_cleanup_queue(mddev->queue);
5075 mddev->queue = NULL;
5076 goto abort;
5077 }
5078 disk->major = MAJOR(mddev->unit);
5079 disk->first_minor = unit << shift;
5080 if (name)
5081 strcpy(disk->disk_name, name);
5082 else if (partitioned)
5083 sprintf(disk->disk_name, "md_d%d", unit);
5084 else
5085 sprintf(disk->disk_name, "md%d", unit);
5086 disk->fops = &md_fops;
5087 disk->private_data = mddev;
5088 disk->queue = mddev->queue;
5089 blk_queue_write_cache(mddev->queue, true, true);
5090
5091
5092
5093
5094 disk->flags |= GENHD_FL_EXT_DEVT;
5095 mddev->gendisk = disk;
5096
5097
5098
5099 mutex_lock(&mddev->open_mutex);
5100 add_disk(disk);
5101
5102 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
5103 &disk_to_dev(disk)->kobj, "%s", "md");
5104 if (error) {
5105
5106
5107
5108 pr_debug("md: cannot register %s/md - name in use\n",
5109 disk->disk_name);
5110 error = 0;
5111 }
5112 if (mddev->kobj.sd &&
5113 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5114 pr_debug("pointless warning\n");
5115 mutex_unlock(&mddev->open_mutex);
5116 abort:
5117 mutex_unlock(&disks_mutex);
5118 if (!error && mddev->kobj.sd) {
5119 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5120 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5121 }
5122 mddev_put(mddev);
5123 return error;
5124}
5125
5126static struct kobject *md_probe(dev_t dev, int *part, void *data)
5127{
5128 md_alloc(dev, NULL);
5129 return NULL;
5130}
5131
5132static int add_named_array(const char *val, struct kernel_param *kp)
5133{
5134
5135
5136
5137
5138 int len = strlen(val);
5139 char buf[DISK_NAME_LEN];
5140
5141 while (len && val[len-1] == '\n')
5142 len--;
5143 if (len >= DISK_NAME_LEN)
5144 return -E2BIG;
5145 strlcpy(buf, val, len+1);
5146 if (strncmp(buf, "md_", 3) != 0)
5147 return -EINVAL;
5148 return md_alloc(0, buf);
5149}
5150
5151static void md_safemode_timeout(unsigned long data)
5152{
5153 struct mddev *mddev = (struct mddev *) data;
5154
5155 if (!atomic_read(&mddev->writes_pending)) {
5156 mddev->safemode = 1;
5157 if (mddev->external)
5158 sysfs_notify_dirent_safe(mddev->sysfs_state);
5159 }
5160 md_wakeup_thread(mddev->thread);
5161}
5162
5163static int start_dirty_degraded;
5164
5165int md_run(struct mddev *mddev)
5166{
5167 int err;
5168 struct md_rdev *rdev;
5169 struct md_personality *pers;
5170
5171 if (list_empty(&mddev->disks))
5172
5173 return -EINVAL;
5174
5175 if (mddev->pers)
5176 return -EBUSY;
5177
5178 if (mddev->sysfs_active)
5179 return -EBUSY;
5180
5181
5182
5183
5184 if (!mddev->raid_disks) {
5185 if (!mddev->persistent)
5186 return -EINVAL;
5187 analyze_sbs(mddev);
5188 }
5189
5190 if (mddev->level != LEVEL_NONE)
5191 request_module("md-level-%d", mddev->level);
5192 else if (mddev->clevel[0])
5193 request_module("md-%s", mddev->clevel);
5194
5195
5196
5197
5198
5199
5200 rdev_for_each(rdev, mddev) {
5201 if (test_bit(Faulty, &rdev->flags))
5202 continue;
5203 sync_blockdev(rdev->bdev);
5204 invalidate_bdev(rdev->bdev);
5205
5206
5207
5208
5209
5210 if (rdev->meta_bdev) {
5211 ;
5212 } else if (rdev->data_offset < rdev->sb_start) {
5213 if (mddev->dev_sectors &&
5214 rdev->data_offset + mddev->dev_sectors
5215 > rdev->sb_start) {
5216 pr_warn("md: %s: data overlaps metadata\n",
5217 mdname(mddev));
5218 return -EINVAL;
5219 }
5220 } else {
5221 if (rdev->sb_start + rdev->sb_size/512
5222 > rdev->data_offset) {
5223 pr_warn("md: %s: metadata overlaps data\n",
5224 mdname(mddev));
5225 return -EINVAL;
5226 }
5227 }
5228 sysfs_notify_dirent_safe(rdev->sysfs_state);
5229 }
5230
5231 if (mddev->bio_set == NULL)
5232 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5233
5234 spin_lock(&pers_lock);
5235 pers = find_pers(mddev->level, mddev->clevel);
5236 if (!pers || !try_module_get(pers->owner)) {
5237 spin_unlock(&pers_lock);
5238 if (mddev->level != LEVEL_NONE)
5239 pr_warn("md: personality for level %d is not loaded!\n",
5240 mddev->level);
5241 else
5242 pr_warn("md: personality for level %s is not loaded!\n",
5243 mddev->clevel);
5244 return -EINVAL;
5245 }
5246 spin_unlock(&pers_lock);
5247 if (mddev->level != pers->level) {
5248 mddev->level = pers->level;
5249 mddev->new_level = pers->level;
5250 }
5251 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5252
5253 if (mddev->reshape_position != MaxSector &&
5254 pers->start_reshape == NULL) {
5255
5256 module_put(pers->owner);
5257 return -EINVAL;
5258 }
5259
5260 if (pers->sync_request) {
5261
5262
5263
5264 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5265 struct md_rdev *rdev2;
5266 int warned = 0;
5267
5268 rdev_for_each(rdev, mddev)
5269 rdev_for_each(rdev2, mddev) {
5270 if (rdev < rdev2 &&
5271 rdev->bdev->bd_contains ==
5272 rdev2->bdev->bd_contains) {
5273 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5274 mdname(mddev),
5275 bdevname(rdev->bdev,b),
5276 bdevname(rdev2->bdev,b2));
5277 warned = 1;
5278 }
5279 }
5280
5281 if (warned)
5282 pr_warn("True protection against single-disk failure might be compromised.\n");
5283 }
5284
5285 mddev->recovery = 0;
5286
5287 mddev->resync_max_sectors = mddev->dev_sectors;
5288
5289 mddev->ok_start_degraded = start_dirty_degraded;
5290
5291 if (start_readonly && mddev->ro == 0)
5292 mddev->ro = 2;
5293
5294
5295
5296
5297
5298
5299 err = pers->run(mddev);
5300 if (err)
5301 pr_warn("md: pers->run() failed ...\n");
5302 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5303 WARN_ONCE(!mddev->external_size,
5304 "%s: default size too small, but 'external_size' not in effect?\n",
5305 __func__);
5306 pr_warn("md: invalid array_size %llu > default size %llu\n",
5307 (unsigned long long)mddev->array_sectors / 2,
5308 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5309 err = -EINVAL;
5310 }
5311 if (err == 0 && pers->sync_request &&
5312 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5313 struct bitmap *bitmap;
5314
5315 bitmap = bitmap_create(mddev, -1);
5316 if (IS_ERR(bitmap)) {
5317 err = PTR_ERR(bitmap);
5318 pr_warn("%s: failed to create bitmap (%d)\n",
5319 mdname(mddev), err);
5320 } else
5321 mddev->bitmap = bitmap;
5322
5323 }
5324 if (err) {
5325 mddev_detach(mddev);
5326 if (mddev->private)
5327 pers->free(mddev, mddev->private);
5328 mddev->private = NULL;
5329 module_put(pers->owner);
5330 bitmap_destroy(mddev);
5331 return err;
5332 }
5333 if (mddev->queue) {
5334 bool nonrot = true;
5335
5336 rdev_for_each(rdev, mddev) {
5337 if (rdev->raid_disk >= 0 &&
5338 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5339 nonrot = false;
5340 break;
5341 }
5342 }
5343 if (mddev->degraded)
5344 nonrot = false;
5345 if (nonrot)
5346 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5347 else
5348 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5349 mddev->queue->backing_dev_info.congested_data = mddev;
5350 mddev->queue->backing_dev_info.congested_fn = md_congested;
5351 }
5352 if (pers->sync_request) {
5353 if (mddev->kobj.sd &&
5354 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5355 pr_warn("md: cannot register extra attributes for %s\n",
5356 mdname(mddev));
5357 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5358 } else if (mddev->ro == 2)
5359 mddev->ro = 0;
5360
5361 atomic_set(&mddev->writes_pending,0);
5362 atomic_set(&mddev->max_corr_read_errors,
5363 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5364 mddev->safemode = 0;
5365 if (mddev_is_clustered(mddev))
5366 mddev->safemode_delay = 0;
5367 else
5368 mddev->safemode_delay = (200 * HZ)/1000 +1;
5369 mddev->in_sync = 1;
5370 smp_wmb();
5371 spin_lock(&mddev->lock);
5372 mddev->pers = pers;
5373 spin_unlock(&mddev->lock);
5374 rdev_for_each(rdev, mddev)
5375 if (rdev->raid_disk >= 0)
5376 if (sysfs_link_rdev(mddev, rdev))
5377 ;
5378
5379 if (mddev->degraded && !mddev->ro)
5380
5381
5382
5383 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5384 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5385
5386 if (mddev->sb_flags)
5387 md_update_sb(mddev, 0);
5388
5389 md_new_event(mddev);
5390 sysfs_notify_dirent_safe(mddev->sysfs_state);
5391 sysfs_notify_dirent_safe(mddev->sysfs_action);
5392 sysfs_notify(&mddev->kobj, NULL, "degraded");
5393 return 0;
5394}
5395EXPORT_SYMBOL_GPL(md_run);
5396
5397static int do_md_run(struct mddev *mddev)
5398{
5399 int err;
5400
5401 err = md_run(mddev);
5402 if (err)
5403 goto out;
5404 err = bitmap_load(mddev);
5405 if (err) {
5406 bitmap_destroy(mddev);
5407 goto out;
5408 }
5409
5410 if (mddev_is_clustered(mddev))
5411 md_allow_write(mddev);
5412
5413 md_wakeup_thread(mddev->thread);
5414 md_wakeup_thread(mddev->sync_thread);
5415
5416 set_capacity(mddev->gendisk, mddev->array_sectors);
5417 revalidate_disk(mddev->gendisk);
5418 mddev->changed = 1;
5419 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5420out:
5421 return err;
5422}
5423
5424static int restart_array(struct mddev *mddev)
5425{
5426 struct gendisk *disk = mddev->gendisk;
5427
5428
5429 if (list_empty(&mddev->disks))
5430 return -ENXIO;
5431 if (!mddev->pers)
5432 return -EINVAL;
5433 if (!mddev->ro)
5434 return -EBUSY;
5435 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5436 struct md_rdev *rdev;
5437 bool has_journal = false;
5438
5439 rcu_read_lock();
5440 rdev_for_each_rcu(rdev, mddev) {
5441 if (test_bit(Journal, &rdev->flags) &&
5442 !test_bit(Faulty, &rdev->flags)) {
5443 has_journal = true;
5444 break;
5445 }
5446 }
5447 rcu_read_unlock();
5448
5449
5450 if (!has_journal)
5451 return -EINVAL;
5452 }
5453
5454 mddev->safemode = 0;
5455 mddev->ro = 0;
5456 set_disk_ro(disk, 0);
5457 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
5458
5459 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5460 md_wakeup_thread(mddev->thread);
5461 md_wakeup_thread(mddev->sync_thread);
5462 sysfs_notify_dirent_safe(mddev->sysfs_state);
5463 return 0;
5464}
5465
5466static void md_clean(struct mddev *mddev)
5467{
5468 mddev->array_sectors = 0;
5469 mddev->external_size = 0;
5470 mddev->dev_sectors = 0;
5471 mddev->raid_disks = 0;
5472 mddev->recovery_cp = 0;
5473 mddev->resync_min = 0;
5474 mddev->resync_max = MaxSector;
5475 mddev->reshape_position = MaxSector;
5476 mddev->external = 0;
5477 mddev->persistent = 0;
5478 mddev->level = LEVEL_NONE;
5479 mddev->clevel[0] = 0;
5480 mddev->flags = 0;
5481 mddev->sb_flags = 0;
5482 mddev->ro = 0;
5483 mddev->metadata_type[0] = 0;
5484 mddev->chunk_sectors = 0;
5485 mddev->ctime = mddev->utime = 0;
5486 mddev->layout = 0;
5487 mddev->max_disks = 0;
5488 mddev->events = 0;
5489 mddev->can_decrease_events = 0;
5490 mddev->delta_disks = 0;
5491 mddev->reshape_backwards = 0;
5492 mddev->new_level = LEVEL_NONE;
5493 mddev->new_layout = 0;
5494 mddev->new_chunk_sectors = 0;
5495 mddev->curr_resync = 0;
5496 atomic64_set(&mddev->resync_mismatches, 0);
5497 mddev->suspend_lo = mddev->suspend_hi = 0;
5498 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5499 mddev->recovery = 0;
5500 mddev->in_sync = 0;
5501 mddev->changed = 0;
5502 mddev->degraded = 0;
5503 mddev->safemode = 0;
5504 mddev->private = NULL;
5505 mddev->cluster_info = NULL;
5506 mddev->bitmap_info.offset = 0;
5507 mddev->bitmap_info.default_offset = 0;
5508 mddev->bitmap_info.default_space = 0;
5509 mddev->bitmap_info.chunksize = 0;
5510 mddev->bitmap_info.daemon_sleep = 0;
5511 mddev->bitmap_info.max_write_behind = 0;
5512 mddev->bitmap_info.nodes = 0;
5513}
5514
5515static void __md_stop_writes(struct mddev *mddev)
5516{
5517 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5518 flush_workqueue(md_misc_wq);
5519 if (mddev->sync_thread) {
5520 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5521 md_reap_sync_thread(mddev);
5522 }
5523
5524 del_timer_sync(&mddev->safemode_timer);
5525
5526 if (mddev->pers && mddev->pers->quiesce) {
5527 mddev->pers->quiesce(mddev, 1);
5528 mddev->pers->quiesce(mddev, 0);
5529 }
5530 bitmap_flush(mddev);
5531
5532 if (mddev->ro == 0 &&
5533 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
5534 mddev->sb_flags)) {
5535
5536 if (!mddev_is_clustered(mddev))
5537 mddev->in_sync = 1;
5538 md_update_sb(mddev, 1);
5539 }
5540}
5541
5542void md_stop_writes(struct mddev *mddev)
5543{
5544 mddev_lock_nointr(mddev);
5545 __md_stop_writes(mddev);
5546 mddev_unlock(mddev);
5547}
5548EXPORT_SYMBOL_GPL(md_stop_writes);
5549
5550static void mddev_detach(struct mddev *mddev)
5551{
5552 struct bitmap *bitmap = mddev->bitmap;
5553
5554 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5555 pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
5556 mdname(mddev));
5557
5558 wait_event(bitmap->behind_wait,
5559 atomic_read(&bitmap->behind_writes) == 0);
5560 }
5561 if (mddev->pers && mddev->pers->quiesce) {
5562 mddev->pers->quiesce(mddev, 1);
5563 mddev->pers->quiesce(mddev, 0);
5564 }
5565 md_unregister_thread(&mddev->thread);
5566 if (mddev->queue)
5567 blk_sync_queue(mddev->queue);
5568}
5569
5570static void __md_stop(struct mddev *mddev)
5571{
5572 struct md_personality *pers = mddev->pers;
5573 mddev_detach(mddev);
5574
5575 flush_workqueue(md_misc_wq);
5576 spin_lock(&mddev->lock);
5577 mddev->pers = NULL;
5578 spin_unlock(&mddev->lock);
5579 pers->free(mddev, mddev->private);
5580 mddev->private = NULL;
5581 if (pers->sync_request && mddev->to_remove == NULL)
5582 mddev->to_remove = &md_redundancy_group;
5583 module_put(pers->owner);
5584 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5585}
5586
5587void md_stop(struct mddev *mddev)
5588{
5589
5590
5591
5592 __md_stop(mddev);
5593 bitmap_destroy(mddev);
5594 if (mddev->bio_set)
5595 bioset_free(mddev->bio_set);
5596}
5597
5598EXPORT_SYMBOL_GPL(md_stop);
5599
5600static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5601{
5602 int err = 0;
5603 int did_freeze = 0;
5604
5605 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5606 did_freeze = 1;
5607 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5608 md_wakeup_thread(mddev->thread);
5609 }
5610 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5611 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5612 if (mddev->sync_thread)
5613
5614
5615 wake_up_process(mddev->sync_thread->tsk);
5616
5617 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
5618 return -EBUSY;
5619 mddev_unlock(mddev);
5620 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
5621 &mddev->recovery));
5622 wait_event(mddev->sb_wait,
5623 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
5624 mddev_lock_nointr(mddev);
5625
5626 mutex_lock(&mddev->open_mutex);
5627 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5628 mddev->sync_thread ||
5629 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5630 pr_warn("md: %s still in use.\n",mdname(mddev));
5631 if (did_freeze) {
5632 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5633 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5634 md_wakeup_thread(mddev->thread);
5635 }
5636 err = -EBUSY;
5637 goto out;
5638 }
5639 if (mddev->pers) {
5640 __md_stop_writes(mddev);
5641
5642 err = -ENXIO;
5643 if (mddev->ro==1)
5644 goto out;
5645 mddev->ro = 1;
5646 set_disk_ro(mddev->gendisk, 1);
5647 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5648 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5649 md_wakeup_thread(mddev->thread);
5650 sysfs_notify_dirent_safe(mddev->sysfs_state);
5651 err = 0;
5652 }
5653out:
5654 mutex_unlock(&mddev->open_mutex);
5655 return err;
5656}
5657
5658
5659
5660
5661
5662static int do_md_stop(struct mddev *mddev, int mode,
5663 struct block_device *bdev)
5664{
5665 struct gendisk *disk = mddev->gendisk;
5666 struct md_rdev *rdev;
5667 int did_freeze = 0;
5668
5669 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5670 did_freeze = 1;
5671 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5672 md_wakeup_thread(mddev->thread);
5673 }
5674 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5675 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5676 if (mddev->sync_thread)
5677
5678
5679 wake_up_process(mddev->sync_thread->tsk);
5680
5681 mddev_unlock(mddev);
5682 wait_event(resync_wait, (mddev->sync_thread == NULL &&
5683 !test_bit(MD_RECOVERY_RUNNING,
5684 &mddev->recovery)));
5685 mddev_lock_nointr(mddev);
5686
5687 mutex_lock(&mddev->open_mutex);
5688 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
5689 mddev->sysfs_active ||
5690 mddev->sync_thread ||
5691 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5692 pr_warn("md: %s still in use.\n",mdname(mddev));
5693 mutex_unlock(&mddev->open_mutex);
5694 if (did_freeze) {
5695 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5696 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5697 md_wakeup_thread(mddev->thread);
5698 }
5699 return -EBUSY;
5700 }
5701 if (mddev->pers) {
5702 if (mddev->ro)
5703 set_disk_ro(disk, 0);
5704
5705 __md_stop_writes(mddev);
5706 __md_stop(mddev);
5707 mddev->queue->backing_dev_info.congested_fn = NULL;
5708
5709
5710 sysfs_notify_dirent_safe(mddev->sysfs_state);
5711
5712 rdev_for_each(rdev, mddev)
5713 if (rdev->raid_disk >= 0)
5714 sysfs_unlink_rdev(mddev, rdev);
5715
5716 set_capacity(disk, 0);
5717 mutex_unlock(&mddev->open_mutex);
5718 mddev->changed = 1;
5719 revalidate_disk(disk);
5720
5721 if (mddev->ro)
5722 mddev->ro = 0;
5723 } else
5724 mutex_unlock(&mddev->open_mutex);
5725
5726
5727
5728 if (mode == 0) {
5729 pr_info("md: %s stopped.\n", mdname(mddev));
5730
5731 bitmap_destroy(mddev);
5732 if (mddev->bitmap_info.file) {
5733 struct file *f = mddev->bitmap_info.file;
5734 spin_lock(&mddev->lock);
5735 mddev->bitmap_info.file = NULL;
5736 spin_unlock(&mddev->lock);
5737 fput(f);
5738 }
5739 mddev->bitmap_info.offset = 0;
5740
5741 export_array(mddev);
5742
5743 md_clean(mddev);
5744 if (mddev->hold_active == UNTIL_STOP)
5745 mddev->hold_active = 0;
5746 }
5747 md_new_event(mddev);
5748 sysfs_notify_dirent_safe(mddev->sysfs_state);
5749 return 0;
5750}
5751
5752#ifndef MODULE
5753static void autorun_array(struct mddev *mddev)
5754{
5755 struct md_rdev *rdev;
5756 int err;
5757
5758 if (list_empty(&mddev->disks))
5759 return;
5760
5761 pr_info("md: running: ");
5762
5763 rdev_for_each(rdev, mddev) {
5764 char b[BDEVNAME_SIZE];
5765 pr_cont("<%s>", bdevname(rdev->bdev,b));
5766 }
5767 pr_cont("\n");
5768
5769 err = do_md_run(mddev);
5770 if (err) {
5771 pr_warn("md: do_md_run() returned %d\n", err);
5772 do_md_stop(mddev, 0, NULL);
5773 }
5774}
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788static void autorun_devices(int part)
5789{
5790 struct md_rdev *rdev0, *rdev, *tmp;
5791 struct mddev *mddev;
5792 char b[BDEVNAME_SIZE];
5793
5794 pr_info("md: autorun ...\n");
5795 while (!list_empty(&pending_raid_disks)) {
5796 int unit;
5797 dev_t dev;
5798 LIST_HEAD(candidates);
5799 rdev0 = list_entry(pending_raid_disks.next,
5800 struct md_rdev, same_set);
5801
5802 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
5803 INIT_LIST_HEAD(&candidates);
5804 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5805 if (super_90_load(rdev, rdev0, 0) >= 0) {
5806 pr_debug("md: adding %s ...\n",
5807 bdevname(rdev->bdev,b));
5808 list_move(&rdev->same_set, &candidates);
5809 }
5810
5811
5812
5813
5814
5815 if (part) {
5816 dev = MKDEV(mdp_major,
5817 rdev0->preferred_minor << MdpMinorShift);
5818 unit = MINOR(dev) >> MdpMinorShift;
5819 } else {
5820 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5821 unit = MINOR(dev);
5822 }
5823 if (rdev0->preferred_minor != unit) {
5824 pr_warn("md: unit number in %s is bad: %d\n",
5825 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5826 break;
5827 }
5828
5829 md_probe(dev, NULL, NULL);
5830 mddev = mddev_find(dev);
5831 if (!mddev || !mddev->gendisk) {
5832 if (mddev)
5833 mddev_put(mddev);
5834 break;
5835 }
5836 if (mddev_lock(mddev))
5837 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
5838 else if (mddev->raid_disks || mddev->major_version
5839 || !list_empty(&mddev->disks)) {
5840 pr_warn("md: %s already running, cannot run %s\n",
5841 mdname(mddev), bdevname(rdev0->bdev,b));
5842 mddev_unlock(mddev);
5843 } else {
5844 pr_debug("md: created %s\n", mdname(mddev));
5845 mddev->persistent = 1;
5846 rdev_for_each_list(rdev, tmp, &candidates) {
5847 list_del_init(&rdev->same_set);
5848 if (bind_rdev_to_array(rdev, mddev))
5849 export_rdev(rdev);
5850 }
5851 autorun_array(mddev);
5852 mddev_unlock(mddev);
5853 }
5854
5855
5856
5857 rdev_for_each_list(rdev, tmp, &candidates) {
5858 list_del_init(&rdev->same_set);
5859 export_rdev(rdev);
5860 }
5861 mddev_put(mddev);
5862 }
5863 pr_info("md: ... autorun DONE.\n");
5864}
5865#endif
5866
5867static int get_version(void __user *arg)
5868{
5869 mdu_version_t ver;
5870
5871 ver.major = MD_MAJOR_VERSION;
5872 ver.minor = MD_MINOR_VERSION;
5873 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5874
5875 if (copy_to_user(arg, &ver, sizeof(ver)))
5876 return -EFAULT;
5877
5878 return 0;
5879}
5880
5881static int get_array_info(struct mddev *mddev, void __user *arg)
5882{
5883 mdu_array_info_t info;
5884 int nr,working,insync,failed,spare;
5885 struct md_rdev *rdev;
5886
5887 nr = working = insync = failed = spare = 0;
5888 rcu_read_lock();
5889 rdev_for_each_rcu(rdev, mddev) {
5890 nr++;
5891 if (test_bit(Faulty, &rdev->flags))
5892 failed++;
5893 else {
5894 working++;
5895 if (test_bit(In_sync, &rdev->flags))
5896 insync++;
5897 else if (test_bit(Journal, &rdev->flags))
5898
5899 ;
5900 else
5901 spare++;
5902 }
5903 }
5904 rcu_read_unlock();
5905
5906 info.major_version = mddev->major_version;
5907 info.minor_version = mddev->minor_version;
5908 info.patch_version = MD_PATCHLEVEL_VERSION;
5909 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
5910 info.level = mddev->level;
5911 info.size = mddev->dev_sectors / 2;
5912 if (info.size != mddev->dev_sectors / 2)
5913 info.size = -1;
5914 info.nr_disks = nr;
5915 info.raid_disks = mddev->raid_disks;
5916 info.md_minor = mddev->md_minor;
5917 info.not_persistent= !mddev->persistent;
5918
5919 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
5920 info.state = 0;
5921 if (mddev->in_sync)
5922 info.state = (1<<MD_SB_CLEAN);
5923 if (mddev->bitmap && mddev->bitmap_info.offset)
5924 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5925 if (mddev_is_clustered(mddev))
5926 info.state |= (1<<MD_SB_CLUSTERED);
5927 info.active_disks = insync;
5928 info.working_disks = working;
5929 info.failed_disks = failed;
5930 info.spare_disks = spare;
5931
5932 info.layout = mddev->layout;
5933 info.chunk_size = mddev->chunk_sectors << 9;
5934
5935 if (copy_to_user(arg, &info, sizeof(info)))
5936 return -EFAULT;
5937
5938 return 0;
5939}
5940
5941static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5942{
5943 mdu_bitmap_file_t *file = NULL;
5944 char *ptr;
5945 int err;
5946
5947 file = kzalloc(sizeof(*file), GFP_NOIO);
5948 if (!file)
5949 return -ENOMEM;
5950
5951 err = 0;
5952 spin_lock(&mddev->lock);
5953
5954 if (mddev->bitmap_info.file) {
5955 ptr = file_path(mddev->bitmap_info.file, file->pathname,
5956 sizeof(file->pathname));
5957 if (IS_ERR(ptr))
5958 err = PTR_ERR(ptr);
5959 else
5960 memmove(file->pathname, ptr,
5961 sizeof(file->pathname)-(ptr-file->pathname));
5962 }
5963 spin_unlock(&mddev->lock);
5964
5965 if (err == 0 &&
5966 copy_to_user(arg, file, sizeof(*file)))
5967 err = -EFAULT;
5968
5969 kfree(file);
5970 return err;
5971}
5972
5973static int get_disk_info(struct mddev *mddev, void __user * arg)
5974{
5975 mdu_disk_info_t info;
5976 struct md_rdev *rdev;
5977
5978 if (copy_from_user(&info, arg, sizeof(info)))
5979 return -EFAULT;
5980
5981 rcu_read_lock();
5982 rdev = md_find_rdev_nr_rcu(mddev, info.number);
5983 if (rdev) {
5984 info.major = MAJOR(rdev->bdev->bd_dev);
5985 info.minor = MINOR(rdev->bdev->bd_dev);
5986 info.raid_disk = rdev->raid_disk;
5987 info.state = 0;
5988 if (test_bit(Faulty, &rdev->flags))
5989 info.state |= (1<<MD_DISK_FAULTY);
5990 else if (test_bit(In_sync, &rdev->flags)) {
5991 info.state |= (1<<MD_DISK_ACTIVE);
5992 info.state |= (1<<MD_DISK_SYNC);
5993 }
5994 if (test_bit(Journal, &rdev->flags))
5995 info.state |= (1<<MD_DISK_JOURNAL);
5996 if (test_bit(WriteMostly, &rdev->flags))
5997 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5998 if (test_bit(FailFast, &rdev->flags))
5999 info.state |= (1<<MD_DISK_FAILFAST);
6000 } else {
6001 info.major = info.minor = 0;
6002 info.raid_disk = -1;
6003 info.state = (1<<MD_DISK_REMOVED);
6004 }
6005 rcu_read_unlock();
6006
6007 if (copy_to_user(arg, &info, sizeof(info)))
6008 return -EFAULT;
6009
6010 return 0;
6011}
6012
6013static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6014{
6015 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6016 struct md_rdev *rdev;
6017 dev_t dev = MKDEV(info->major,info->minor);
6018
6019 if (mddev_is_clustered(mddev) &&
6020 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6021 pr_warn("%s: Cannot add to clustered mddev.\n",
6022 mdname(mddev));
6023 return -EINVAL;
6024 }
6025
6026 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6027 return -EOVERFLOW;
6028
6029 if (!mddev->raid_disks) {
6030 int err;
6031
6032 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6033 if (IS_ERR(rdev)) {
6034 pr_warn("md: md_import_device returned %ld\n",
6035 PTR_ERR(rdev));
6036 return PTR_ERR(rdev);
6037 }
6038 if (!list_empty(&mddev->disks)) {
6039 struct md_rdev *rdev0
6040 = list_entry(mddev->disks.next,
6041 struct md_rdev, same_set);
6042 err = super_types[mddev->major_version]
6043 .load_super(rdev, rdev0, mddev->minor_version);
6044 if (err < 0) {
6045 pr_warn("md: %s has different UUID to %s\n",
6046 bdevname(rdev->bdev,b),
6047 bdevname(rdev0->bdev,b2));
6048 export_rdev(rdev);
6049 return -EINVAL;
6050 }
6051 }
6052 err = bind_rdev_to_array(rdev, mddev);
6053 if (err)
6054 export_rdev(rdev);
6055 return err;
6056 }
6057
6058
6059
6060
6061
6062
6063 if (mddev->pers) {
6064 int err;
6065 if (!mddev->pers->hot_add_disk) {
6066 pr_warn("%s: personality does not support diskops!\n",
6067 mdname(mddev));
6068 return -EINVAL;
6069 }
6070 if (mddev->persistent)
6071 rdev = md_import_device(dev, mddev->major_version,
6072 mddev->minor_version);
6073 else
6074 rdev = md_import_device(dev, -1, -1);
6075 if (IS_ERR(rdev)) {
6076 pr_warn("md: md_import_device returned %ld\n",
6077 PTR_ERR(rdev));
6078 return PTR_ERR(rdev);
6079 }
6080
6081 if (!mddev->persistent) {
6082 if (info->state & (1<<MD_DISK_SYNC) &&
6083 info->raid_disk < mddev->raid_disks) {
6084 rdev->raid_disk = info->raid_disk;
6085 set_bit(In_sync, &rdev->flags);
6086 clear_bit(Bitmap_sync, &rdev->flags);
6087 } else
6088 rdev->raid_disk = -1;
6089 rdev->saved_raid_disk = rdev->raid_disk;
6090 } else
6091 super_types[mddev->major_version].
6092 validate_super(mddev, rdev);
6093 if ((info->state & (1<<MD_DISK_SYNC)) &&
6094 rdev->raid_disk != info->raid_disk) {
6095
6096
6097
6098 export_rdev(rdev);
6099 return -EINVAL;
6100 }
6101
6102 clear_bit(In_sync, &rdev->flags);
6103 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6104 set_bit(WriteMostly, &rdev->flags);
6105 else
6106 clear_bit(WriteMostly, &rdev->flags);
6107 if (info->state & (1<<MD_DISK_FAILFAST))
6108 set_bit(FailFast, &rdev->flags);
6109 else
6110 clear_bit(FailFast, &rdev->flags);
6111
6112 if (info->state & (1<<MD_DISK_JOURNAL)) {
6113 struct md_rdev *rdev2;
6114 bool has_journal = false;
6115
6116
6117 rdev_for_each(rdev2, mddev) {
6118 if (test_bit(Journal, &rdev2->flags)) {
6119 has_journal = true;
6120 break;
6121 }
6122 }
6123 if (has_journal) {
6124 export_rdev(rdev);
6125 return -EBUSY;
6126 }
6127 set_bit(Journal, &rdev->flags);
6128 }
6129
6130
6131
6132 if (mddev_is_clustered(mddev)) {
6133 if (info->state & (1 << MD_DISK_CANDIDATE))
6134 set_bit(Candidate, &rdev->flags);
6135 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6136
6137 err = md_cluster_ops->add_new_disk(mddev, rdev);
6138 if (err) {
6139 export_rdev(rdev);
6140 return err;
6141 }
6142 }
6143 }
6144
6145 rdev->raid_disk = -1;
6146 err = bind_rdev_to_array(rdev, mddev);
6147
6148 if (err)
6149 export_rdev(rdev);
6150
6151 if (mddev_is_clustered(mddev)) {
6152 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6153 if (!err) {
6154 err = md_cluster_ops->new_disk_ack(mddev,
6155 err == 0);
6156 if (err)
6157 md_kick_rdev_from_array(rdev);
6158 }
6159 } else {
6160 if (err)
6161 md_cluster_ops->add_new_disk_cancel(mddev);
6162 else
6163 err = add_bound_rdev(rdev);
6164 }
6165
6166 } else if (!err)
6167 err = add_bound_rdev(rdev);
6168
6169 return err;
6170 }
6171
6172
6173
6174
6175 if (mddev->major_version != 0) {
6176 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6177 return -EINVAL;
6178 }
6179
6180 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6181 int err;
6182 rdev = md_import_device(dev, -1, 0);
6183 if (IS_ERR(rdev)) {
6184 pr_warn("md: error, md_import_device() returned %ld\n",
6185 PTR_ERR(rdev));
6186 return PTR_ERR(rdev);
6187 }
6188 rdev->desc_nr = info->number;
6189 if (info->raid_disk < mddev->raid_disks)
6190 rdev->raid_disk = info->raid_disk;
6191 else
6192 rdev->raid_disk = -1;
6193
6194 if (rdev->raid_disk < mddev->raid_disks)
6195 if (info->state & (1<<MD_DISK_SYNC))
6196 set_bit(In_sync, &rdev->flags);
6197
6198 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6199 set_bit(WriteMostly, &rdev->flags);
6200 if (info->state & (1<<MD_DISK_FAILFAST))
6201 set_bit(FailFast, &rdev->flags);
6202
6203 if (!mddev->persistent) {
6204 pr_debug("md: nonpersistent superblock ...\n");
6205 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6206 } else
6207 rdev->sb_start = calc_dev_sboffset(rdev);
6208 rdev->sectors = rdev->sb_start;
6209
6210 err = bind_rdev_to_array(rdev, mddev);
6211 if (err) {
6212 export_rdev(rdev);
6213 return err;
6214 }
6215 }
6216
6217 return 0;
6218}
6219
6220static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6221{
6222 char b[BDEVNAME_SIZE];
6223 struct md_rdev *rdev;
6224
6225 rdev = find_rdev(mddev, dev);
6226 if (!rdev)
6227 return -ENXIO;
6228
6229 if (rdev->raid_disk < 0)
6230 goto kick_rdev;
6231
6232 clear_bit(Blocked, &rdev->flags);
6233 remove_and_add_spares(mddev, rdev);
6234
6235 if (rdev->raid_disk >= 0)
6236 goto busy;
6237
6238kick_rdev:
6239 if (mddev_is_clustered(mddev))
6240 md_cluster_ops->remove_disk(mddev, rdev);
6241
6242 md_kick_rdev_from_array(rdev);
6243 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6244 if (mddev->thread)
6245 md_wakeup_thread(mddev->thread);
6246 else
6247 md_update_sb(mddev, 1);
6248 md_new_event(mddev);
6249
6250 return 0;
6251busy:
6252 pr_debug("md: cannot remove active disk %s from %s ...\n",
6253 bdevname(rdev->bdev,b), mdname(mddev));
6254 return -EBUSY;
6255}
6256
6257static int hot_add_disk(struct mddev *mddev, dev_t dev)
6258{
6259 char b[BDEVNAME_SIZE];
6260 int err;
6261 struct md_rdev *rdev;
6262
6263 if (!mddev->pers)
6264 return -ENODEV;
6265
6266 if (mddev->major_version != 0) {
6267 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6268 mdname(mddev));
6269 return -EINVAL;
6270 }
6271 if (!mddev->pers->hot_add_disk) {
6272 pr_warn("%s: personality does not support diskops!\n",
6273 mdname(mddev));
6274 return -EINVAL;
6275 }
6276
6277 rdev = md_import_device(dev, -1, 0);
6278 if (IS_ERR(rdev)) {
6279 pr_warn("md: error, md_import_device() returned %ld\n",
6280 PTR_ERR(rdev));
6281 return -EINVAL;
6282 }
6283
6284 if (mddev->persistent)
6285 rdev->sb_start = calc_dev_sboffset(rdev);
6286 else
6287 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6288
6289 rdev->sectors = rdev->sb_start;
6290
6291 if (test_bit(Faulty, &rdev->flags)) {
6292 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6293 bdevname(rdev->bdev,b), mdname(mddev));
6294 err = -EINVAL;
6295 goto abort_export;
6296 }
6297
6298 clear_bit(In_sync, &rdev->flags);
6299 rdev->desc_nr = -1;
6300 rdev->saved_raid_disk = -1;
6301 err = bind_rdev_to_array(rdev, mddev);
6302 if (err)
6303 goto abort_export;
6304
6305
6306
6307
6308
6309
6310 rdev->raid_disk = -1;
6311
6312 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6313 if (!mddev->thread)
6314 md_update_sb(mddev, 1);
6315
6316
6317
6318
6319 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6320 md_wakeup_thread(mddev->thread);
6321 md_new_event(mddev);
6322 return 0;
6323
6324abort_export:
6325 export_rdev(rdev);
6326 return err;
6327}
6328
6329static int set_bitmap_file(struct mddev *mddev, int fd)
6330{
6331 int err = 0;
6332
6333 if (mddev->pers) {
6334 if (!mddev->pers->quiesce || !mddev->thread)
6335 return -EBUSY;
6336 if (mddev->recovery || mddev->sync_thread)
6337 return -EBUSY;
6338
6339 }
6340
6341 if (fd >= 0) {
6342 struct inode *inode;
6343 struct file *f;
6344
6345 if (mddev->bitmap || mddev->bitmap_info.file)
6346 return -EEXIST;
6347 f = fget(fd);
6348
6349 if (f == NULL) {
6350 pr_warn("%s: error: failed to get bitmap file\n",
6351 mdname(mddev));
6352 return -EBADF;
6353 }
6354
6355 inode = f->f_mapping->host;
6356 if (!S_ISREG(inode->i_mode)) {
6357 pr_warn("%s: error: bitmap file must be a regular file\n",
6358 mdname(mddev));
6359 err = -EBADF;
6360 } else if (!(f->f_mode & FMODE_WRITE)) {
6361 pr_warn("%s: error: bitmap file must open for write\n",
6362 mdname(mddev));
6363 err = -EBADF;
6364 } else if (atomic_read(&inode->i_writecount) != 1) {
6365 pr_warn("%s: error: bitmap file is already in use\n",
6366 mdname(mddev));
6367 err = -EBUSY;
6368 }
6369 if (err) {
6370 fput(f);
6371 return err;
6372 }
6373 mddev->bitmap_info.file = f;
6374 mddev->bitmap_info.offset = 0;
6375 } else if (mddev->bitmap == NULL)
6376 return -ENOENT;
6377 err = 0;
6378 if (mddev->pers) {
6379 mddev->pers->quiesce(mddev, 1);
6380 if (fd >= 0) {
6381 struct bitmap *bitmap;
6382
6383 bitmap = bitmap_create(mddev, -1);
6384 if (!IS_ERR(bitmap)) {
6385 mddev->bitmap = bitmap;
6386 err = bitmap_load(mddev);
6387 } else
6388 err = PTR_ERR(bitmap);
6389 }
6390 if (fd < 0 || err) {
6391 bitmap_destroy(mddev);
6392 fd = -1;
6393 }
6394 mddev->pers->quiesce(mddev, 0);
6395 }
6396 if (fd < 0) {
6397 struct file *f = mddev->bitmap_info.file;
6398 if (f) {
6399 spin_lock(&mddev->lock);
6400 mddev->bitmap_info.file = NULL;
6401 spin_unlock(&mddev->lock);
6402 fput(f);
6403 }
6404 }
6405
6406 return err;
6407}
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
6423{
6424
6425 if (info->raid_disks == 0) {
6426
6427 if (info->major_version < 0 ||
6428 info->major_version >= ARRAY_SIZE(super_types) ||
6429 super_types[info->major_version].name == NULL) {
6430
6431 pr_warn("md: superblock version %d not known\n",
6432 info->major_version);
6433 return -EINVAL;
6434 }
6435 mddev->major_version = info->major_version;
6436 mddev->minor_version = info->minor_version;
6437 mddev->patch_version = info->patch_version;
6438 mddev->persistent = !info->not_persistent;
6439
6440
6441
6442 mddev->ctime = ktime_get_real_seconds();
6443 return 0;
6444 }
6445 mddev->major_version = MD_MAJOR_VERSION;
6446 mddev->minor_version = MD_MINOR_VERSION;
6447 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6448 mddev->ctime = ktime_get_real_seconds();
6449
6450 mddev->level = info->level;
6451 mddev->clevel[0] = 0;
6452 mddev->dev_sectors = 2 * (sector_t)info->size;
6453 mddev->raid_disks = info->raid_disks;
6454
6455
6456
6457 if (info->state & (1<<MD_SB_CLEAN))
6458 mddev->recovery_cp = MaxSector;
6459 else
6460 mddev->recovery_cp = 0;
6461 mddev->persistent = ! info->not_persistent;
6462 mddev->external = 0;
6463
6464 mddev->layout = info->layout;
6465 mddev->chunk_sectors = info->chunk_size >> 9;
6466
6467 mddev->max_disks = MD_SB_DISKS;
6468
6469 if (mddev->persistent) {
6470 mddev->flags = 0;
6471 mddev->sb_flags = 0;
6472 }
6473 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6474
6475 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6476 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6477 mddev->bitmap_info.offset = 0;
6478
6479 mddev->reshape_position = MaxSector;
6480
6481
6482
6483
6484 get_random_bytes(mddev->uuid, 16);
6485
6486 mddev->new_level = mddev->level;
6487 mddev->new_chunk_sectors = mddev->chunk_sectors;
6488 mddev->new_layout = mddev->layout;
6489 mddev->delta_disks = 0;
6490 mddev->reshape_backwards = 0;
6491
6492 return 0;
6493}
6494
6495void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6496{
6497 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6498
6499 if (mddev->external_size)
6500 return;
6501
6502 mddev->array_sectors = array_sectors;
6503}
6504EXPORT_SYMBOL(md_set_array_sectors);
6505
6506static int update_size(struct mddev *mddev, sector_t num_sectors)
6507{
6508 struct md_rdev *rdev;
6509 int rv;
6510 int fit = (num_sectors == 0);
6511
6512
6513 if (mddev_is_clustered(mddev))
6514 return -EINVAL;
6515
6516 if (mddev->pers->resize == NULL)
6517 return -EINVAL;
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6528 mddev->sync_thread)
6529 return -EBUSY;
6530 if (mddev->ro)
6531 return -EROFS;
6532
6533 rdev_for_each(rdev, mddev) {
6534 sector_t avail = rdev->sectors;
6535
6536 if (fit && (num_sectors == 0 || num_sectors > avail))
6537 num_sectors = avail;
6538 if (avail < num_sectors)
6539 return -ENOSPC;
6540 }
6541 rv = mddev->pers->resize(mddev, num_sectors);
6542 if (!rv)
6543 revalidate_disk(mddev->gendisk);
6544 return rv;
6545}
6546
6547static int update_raid_disks(struct mddev *mddev, int raid_disks)
6548{
6549 int rv;
6550 struct md_rdev *rdev;
6551
6552 if (mddev->pers->check_reshape == NULL)
6553 return -EINVAL;
6554 if (mddev->ro)
6555 return -EROFS;
6556 if (raid_disks <= 0 ||
6557 (mddev->max_disks && raid_disks >= mddev->max_disks))
6558 return -EINVAL;
6559 if (mddev->sync_thread ||
6560 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
6561 mddev->reshape_position != MaxSector)
6562 return -EBUSY;
6563
6564 rdev_for_each(rdev, mddev) {
6565 if (mddev->raid_disks < raid_disks &&
6566 rdev->data_offset < rdev->new_data_offset)
6567 return -EINVAL;
6568 if (mddev->raid_disks > raid_disks &&
6569 rdev->data_offset > rdev->new_data_offset)
6570 return -EINVAL;
6571 }
6572
6573 mddev->delta_disks = raid_disks - mddev->raid_disks;
6574 if (mddev->delta_disks < 0)
6575 mddev->reshape_backwards = 1;
6576 else if (mddev->delta_disks > 0)
6577 mddev->reshape_backwards = 0;
6578
6579 rv = mddev->pers->check_reshape(mddev);
6580 if (rv < 0) {
6581 mddev->delta_disks = 0;
6582 mddev->reshape_backwards = 0;
6583 }
6584 return rv;
6585}
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6596{
6597 int rv = 0;
6598 int cnt = 0;
6599 int state = 0;
6600
6601
6602 if (mddev->bitmap && mddev->bitmap_info.offset)
6603 state |= (1 << MD_SB_BITMAP_PRESENT);
6604
6605 if (mddev->major_version != info->major_version ||
6606 mddev->minor_version != info->minor_version ||
6607
6608 mddev->ctime != info->ctime ||
6609 mddev->level != info->level ||
6610
6611 mddev->persistent != !info->not_persistent ||
6612 mddev->chunk_sectors != info->chunk_size >> 9 ||
6613
6614 ((state^info->state) & 0xfffffe00)
6615 )
6616 return -EINVAL;
6617
6618 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6619 cnt++;
6620 if (mddev->raid_disks != info->raid_disks)
6621 cnt++;
6622 if (mddev->layout != info->layout)
6623 cnt++;
6624 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6625 cnt++;
6626 if (cnt == 0)
6627 return 0;
6628 if (cnt > 1)
6629 return -EINVAL;
6630
6631 if (mddev->layout != info->layout) {
6632
6633
6634
6635
6636 if (mddev->pers->check_reshape == NULL)
6637 return -EINVAL;
6638 else {
6639 mddev->new_layout = info->layout;
6640 rv = mddev->pers->check_reshape(mddev);
6641 if (rv)
6642 mddev->new_layout = mddev->layout;
6643 return rv;
6644 }
6645 }
6646 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6647 rv = update_size(mddev, (sector_t)info->size * 2);
6648
6649 if (mddev->raid_disks != info->raid_disks)
6650 rv = update_raid_disks(mddev, info->raid_disks);
6651
6652 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6653 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
6654 rv = -EINVAL;
6655 goto err;
6656 }
6657 if (mddev->recovery || mddev->sync_thread) {
6658 rv = -EBUSY;
6659 goto err;
6660 }
6661 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6662 struct bitmap *bitmap;
6663
6664 if (mddev->bitmap) {
6665 rv = -EEXIST;
6666 goto err;
6667 }
6668 if (mddev->bitmap_info.default_offset == 0) {
6669 rv = -EINVAL;
6670 goto err;
6671 }
6672 mddev->bitmap_info.offset =
6673 mddev->bitmap_info.default_offset;
6674 mddev->bitmap_info.space =
6675 mddev->bitmap_info.default_space;
6676 mddev->pers->quiesce(mddev, 1);
6677 bitmap = bitmap_create(mddev, -1);
6678 if (!IS_ERR(bitmap)) {
6679 mddev->bitmap = bitmap;
6680 rv = bitmap_load(mddev);
6681 } else
6682 rv = PTR_ERR(bitmap);
6683 if (rv)
6684 bitmap_destroy(mddev);
6685 mddev->pers->quiesce(mddev, 0);
6686 } else {
6687
6688 if (!mddev->bitmap) {
6689 rv = -ENOENT;
6690 goto err;
6691 }
6692 if (mddev->bitmap->storage.file) {
6693 rv = -EINVAL;
6694 goto err;
6695 }
6696 if (mddev->bitmap_info.nodes) {
6697
6698 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
6699 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
6700 rv = -EPERM;
6701 md_cluster_ops->unlock_all_bitmaps(mddev);
6702 goto err;
6703 }
6704
6705 mddev->bitmap_info.nodes = 0;
6706 md_cluster_ops->leave(mddev);
6707 }
6708 mddev->pers->quiesce(mddev, 1);
6709 bitmap_destroy(mddev);
6710 mddev->pers->quiesce(mddev, 0);
6711 mddev->bitmap_info.offset = 0;
6712 }
6713 }
6714 md_update_sb(mddev, 1);
6715 return rv;
6716err:
6717 return rv;
6718}
6719
6720static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6721{
6722 struct md_rdev *rdev;
6723 int err = 0;
6724
6725 if (mddev->pers == NULL)
6726 return -ENODEV;
6727
6728 rcu_read_lock();
6729 rdev = find_rdev_rcu(mddev, dev);
6730 if (!rdev)
6731 err = -ENODEV;
6732 else {
6733 md_error(mddev, rdev);
6734 if (!test_bit(Faulty, &rdev->flags))
6735 err = -EBUSY;
6736 }
6737 rcu_read_unlock();
6738 return err;
6739}
6740
6741
6742
6743
6744
6745
6746
6747static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6748{
6749 struct mddev *mddev = bdev->bd_disk->private_data;
6750
6751 geo->heads = 2;
6752 geo->sectors = 4;
6753 geo->cylinders = mddev->array_sectors / 8;
6754 return 0;
6755}
6756
6757static inline bool md_ioctl_valid(unsigned int cmd)
6758{
6759 switch (cmd) {
6760 case ADD_NEW_DISK:
6761 case BLKROSET:
6762 case GET_ARRAY_INFO:
6763 case GET_BITMAP_FILE:
6764 case GET_DISK_INFO:
6765 case HOT_ADD_DISK:
6766 case HOT_REMOVE_DISK:
6767 case RAID_AUTORUN:
6768 case RAID_VERSION:
6769 case RESTART_ARRAY_RW:
6770 case RUN_ARRAY:
6771 case SET_ARRAY_INFO:
6772 case SET_BITMAP_FILE:
6773 case SET_DISK_FAULTY:
6774 case STOP_ARRAY:
6775 case STOP_ARRAY_RO:
6776 case CLUSTERED_DISK_NACK:
6777 return true;
6778 default:
6779 return false;
6780 }
6781}
6782
6783static int md_ioctl(struct block_device *bdev, fmode_t mode,
6784 unsigned int cmd, unsigned long arg)
6785{
6786 int err = 0;
6787 void __user *argp = (void __user *)arg;
6788 struct mddev *mddev = NULL;
6789 int ro;
6790
6791 if (!md_ioctl_valid(cmd))
6792 return -ENOTTY;
6793
6794 switch (cmd) {
6795 case RAID_VERSION:
6796 case GET_ARRAY_INFO:
6797 case GET_DISK_INFO:
6798 break;
6799 default:
6800 if (!capable(CAP_SYS_ADMIN))
6801 return -EACCES;
6802 }
6803
6804
6805
6806
6807
6808 switch (cmd) {
6809 case RAID_VERSION:
6810 err = get_version(argp);
6811 goto out;
6812
6813#ifndef MODULE
6814 case RAID_AUTORUN:
6815 err = 0;
6816 autostart_arrays(arg);
6817 goto out;
6818#endif
6819 default:;
6820 }
6821
6822
6823
6824
6825
6826 mddev = bdev->bd_disk->private_data;
6827
6828 if (!mddev) {
6829 BUG();
6830 goto out;
6831 }
6832
6833
6834 switch (cmd) {
6835 case GET_ARRAY_INFO:
6836 if (!mddev->raid_disks && !mddev->external)
6837 err = -ENODEV;
6838 else
6839 err = get_array_info(mddev, argp);
6840 goto out;
6841
6842 case GET_DISK_INFO:
6843 if (!mddev->raid_disks && !mddev->external)
6844 err = -ENODEV;
6845 else
6846 err = get_disk_info(mddev, argp);
6847 goto out;
6848
6849 case SET_DISK_FAULTY:
6850 err = set_disk_faulty(mddev, new_decode_dev(arg));
6851 goto out;
6852
6853 case GET_BITMAP_FILE:
6854 err = get_bitmap_file(mddev, argp);
6855 goto out;
6856
6857 }
6858
6859 if (cmd == ADD_NEW_DISK)
6860
6861 flush_workqueue(md_misc_wq);
6862
6863 if (cmd == HOT_REMOVE_DISK)
6864
6865 wait_event_interruptible_timeout(mddev->sb_wait,
6866 !test_bit(MD_RECOVERY_NEEDED,
6867 &mddev->recovery),
6868 msecs_to_jiffies(5000));
6869 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6870
6871
6872
6873 mutex_lock(&mddev->open_mutex);
6874 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
6875 mutex_unlock(&mddev->open_mutex);
6876 err = -EBUSY;
6877 goto out;
6878 }
6879 set_bit(MD_CLOSING, &mddev->flags);
6880 mutex_unlock(&mddev->open_mutex);
6881 sync_blockdev(bdev);
6882 }
6883 err = mddev_lock(mddev);
6884 if (err) {
6885 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
6886 err, cmd);
6887 goto out;
6888 }
6889
6890 if (cmd == SET_ARRAY_INFO) {
6891 mdu_array_info_t info;
6892 if (!arg)
6893 memset(&info, 0, sizeof(info));
6894 else if (copy_from_user(&info, argp, sizeof(info))) {
6895 err = -EFAULT;
6896 goto unlock;
6897 }
6898 if (mddev->pers) {
6899 err = update_array_info(mddev, &info);
6900 if (err) {
6901 pr_warn("md: couldn't update array info. %d\n", err);
6902 goto unlock;
6903 }
6904 goto unlock;
6905 }
6906 if (!list_empty(&mddev->disks)) {
6907 pr_warn("md: array %s already has disks!\n", mdname(mddev));
6908 err = -EBUSY;
6909 goto unlock;
6910 }
6911 if (mddev->raid_disks) {
6912 pr_warn("md: array %s already initialised!\n", mdname(mddev));
6913 err = -EBUSY;
6914 goto unlock;
6915 }
6916 err = set_array_info(mddev, &info);
6917 if (err) {
6918 pr_warn("md: couldn't set array info. %d\n", err);
6919 goto unlock;
6920 }
6921 goto unlock;
6922 }
6923
6924
6925
6926
6927
6928
6929 if ((!mddev->raid_disks && !mddev->external)
6930 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6931 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6932 && cmd != GET_BITMAP_FILE) {
6933 err = -ENODEV;
6934 goto unlock;
6935 }
6936
6937
6938
6939
6940 switch (cmd) {
6941 case RESTART_ARRAY_RW:
6942 err = restart_array(mddev);
6943 goto unlock;
6944
6945 case STOP_ARRAY:
6946 err = do_md_stop(mddev, 0, bdev);
6947 goto unlock;
6948
6949 case STOP_ARRAY_RO:
6950 err = md_set_readonly(mddev, bdev);
6951 goto unlock;
6952
6953 case HOT_REMOVE_DISK:
6954 err = hot_remove_disk(mddev, new_decode_dev(arg));
6955 goto unlock;
6956
6957 case ADD_NEW_DISK:
6958
6959
6960
6961
6962 if (mddev->pers) {
6963 mdu_disk_info_t info;
6964 if (copy_from_user(&info, argp, sizeof(info)))
6965 err = -EFAULT;
6966 else if (!(info.state & (1<<MD_DISK_SYNC)))
6967
6968 break;
6969 else
6970 err = add_new_disk(mddev, &info);
6971 goto unlock;
6972 }
6973 break;
6974
6975 case BLKROSET:
6976 if (get_user(ro, (int __user *)(arg))) {
6977 err = -EFAULT;
6978 goto unlock;
6979 }
6980 err = -EINVAL;
6981
6982
6983
6984
6985 if (ro)
6986 goto unlock;
6987
6988
6989 if (mddev->ro != 1)
6990 goto unlock;
6991
6992
6993
6994
6995 if (mddev->pers) {
6996 err = restart_array(mddev);
6997 if (err == 0) {
6998 mddev->ro = 2;
6999 set_disk_ro(mddev->gendisk, 0);
7000 }
7001 }
7002 goto unlock;
7003 }
7004
7005
7006
7007
7008
7009 if (mddev->ro && mddev->pers) {
7010 if (mddev->ro == 2) {
7011 mddev->ro = 0;
7012 sysfs_notify_dirent_safe(mddev->sysfs_state);
7013 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7014
7015
7016
7017
7018 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7019 mddev_unlock(mddev);
7020 wait_event(mddev->sb_wait,
7021 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7022 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7023 mddev_lock_nointr(mddev);
7024 }
7025 } else {
7026 err = -EROFS;
7027 goto unlock;
7028 }
7029 }
7030
7031 switch (cmd) {
7032 case ADD_NEW_DISK:
7033 {
7034 mdu_disk_info_t info;
7035 if (copy_from_user(&info, argp, sizeof(info)))
7036 err = -EFAULT;
7037 else
7038 err = add_new_disk(mddev, &info);
7039 goto unlock;
7040 }
7041
7042 case CLUSTERED_DISK_NACK:
7043 if (mddev_is_clustered(mddev))
7044 md_cluster_ops->new_disk_ack(mddev, false);
7045 else
7046 err = -EINVAL;
7047 goto unlock;
7048
7049 case HOT_ADD_DISK:
7050 err = hot_add_disk(mddev, new_decode_dev(arg));
7051 goto unlock;
7052
7053 case RUN_ARRAY:
7054 err = do_md_run(mddev);
7055 goto unlock;
7056
7057 case SET_BITMAP_FILE:
7058 err = set_bitmap_file(mddev, (int)arg);
7059 goto unlock;
7060
7061 default:
7062 err = -EINVAL;
7063 goto unlock;
7064 }
7065
7066unlock:
7067 if (mddev->hold_active == UNTIL_IOCTL &&
7068 err != -EINVAL)
7069 mddev->hold_active = 0;
7070 mddev_unlock(mddev);
7071out:
7072 return err;
7073}
7074#ifdef CONFIG_COMPAT
7075static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7076 unsigned int cmd, unsigned long arg)
7077{
7078 switch (cmd) {
7079 case HOT_REMOVE_DISK:
7080 case HOT_ADD_DISK:
7081 case SET_DISK_FAULTY:
7082 case SET_BITMAP_FILE:
7083
7084 break;
7085 default:
7086 arg = (unsigned long)compat_ptr(arg);
7087 break;
7088 }
7089
7090 return md_ioctl(bdev, mode, cmd, arg);
7091}
7092#endif
7093
7094static int md_open(struct block_device *bdev, fmode_t mode)
7095{
7096
7097
7098
7099
7100 struct mddev *mddev = mddev_find(bdev->bd_dev);
7101 int err;
7102
7103 if (!mddev)
7104 return -ENODEV;
7105
7106 if (mddev->gendisk != bdev->bd_disk) {
7107
7108
7109
7110 mddev_put(mddev);
7111
7112 flush_workqueue(md_misc_wq);
7113
7114 return -ERESTARTSYS;
7115 }
7116 BUG_ON(mddev != bdev->bd_disk->private_data);
7117
7118 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7119 goto out;
7120
7121 if (test_bit(MD_CLOSING, &mddev->flags)) {
7122 mutex_unlock(&mddev->open_mutex);
7123 err = -ENODEV;
7124 goto out;
7125 }
7126
7127 err = 0;
7128 atomic_inc(&mddev->openers);
7129 mutex_unlock(&mddev->open_mutex);
7130
7131 check_disk_change(bdev);
7132 out:
7133 if (err)
7134 mddev_put(mddev);
7135 return err;
7136}
7137
7138static void md_release(struct gendisk *disk, fmode_t mode)
7139{
7140 struct mddev *mddev = disk->private_data;
7141
7142 BUG_ON(!mddev);
7143 atomic_dec(&mddev->openers);
7144 mddev_put(mddev);
7145}
7146
7147static int md_media_changed(struct gendisk *disk)
7148{
7149 struct mddev *mddev = disk->private_data;
7150
7151 return mddev->changed;
7152}
7153
7154static int md_revalidate(struct gendisk *disk)
7155{
7156 struct mddev *mddev = disk->private_data;
7157
7158 mddev->changed = 0;
7159 return 0;
7160}
7161static const struct block_device_operations md_fops =
7162{
7163 .owner = THIS_MODULE,
7164 .open = md_open,
7165 .release = md_release,
7166 .ioctl = md_ioctl,
7167#ifdef CONFIG_COMPAT
7168 .compat_ioctl = md_compat_ioctl,
7169#endif
7170 .getgeo = md_getgeo,
7171 .media_changed = md_media_changed,
7172 .revalidate_disk= md_revalidate,
7173};
7174
7175static int md_thread(void *arg)
7176{
7177 struct md_thread *thread = arg;
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191 allow_signal(SIGKILL);
7192 while (!kthread_should_stop()) {
7193
7194
7195
7196
7197
7198
7199 if (signal_pending(current))
7200 flush_signals(current);
7201
7202 wait_event_interruptible_timeout
7203 (thread->wqueue,
7204 test_bit(THREAD_WAKEUP, &thread->flags)
7205 || kthread_should_stop() || kthread_should_park(),
7206 thread->timeout);
7207
7208 clear_bit(THREAD_WAKEUP, &thread->flags);
7209 if (kthread_should_park())
7210 kthread_parkme();
7211 if (!kthread_should_stop())
7212 thread->run(thread);
7213 }
7214
7215 return 0;
7216}
7217
7218void md_wakeup_thread(struct md_thread *thread)
7219{
7220 if (thread) {
7221 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7222 set_bit(THREAD_WAKEUP, &thread->flags);
7223 wake_up(&thread->wqueue);
7224 }
7225}
7226EXPORT_SYMBOL(md_wakeup_thread);
7227
7228struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7229 struct mddev *mddev, const char *name)
7230{
7231 struct md_thread *thread;
7232
7233 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7234 if (!thread)
7235 return NULL;
7236
7237 init_waitqueue_head(&thread->wqueue);
7238
7239 thread->run = run;
7240 thread->mddev = mddev;
7241 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7242 thread->tsk = kthread_run(md_thread, thread,
7243 "%s_%s",
7244 mdname(thread->mddev),
7245 name);
7246 if (IS_ERR(thread->tsk)) {
7247 kfree(thread);
7248 return NULL;
7249 }
7250 return thread;
7251}
7252EXPORT_SYMBOL(md_register_thread);
7253
7254void md_unregister_thread(struct md_thread **threadp)
7255{
7256 struct md_thread *thread = *threadp;
7257 if (!thread)
7258 return;
7259 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7260
7261
7262
7263 spin_lock(&pers_lock);
7264 *threadp = NULL;
7265 spin_unlock(&pers_lock);
7266
7267 kthread_stop(thread->tsk);
7268 kfree(thread);
7269}
7270EXPORT_SYMBOL(md_unregister_thread);
7271
7272void md_error(struct mddev *mddev, struct md_rdev *rdev)
7273{
7274 if (!rdev || test_bit(Faulty, &rdev->flags))
7275 return;
7276
7277 if (!mddev->pers || !mddev->pers->error_handler)
7278 return;
7279 mddev->pers->error_handler(mddev,rdev);
7280 if (mddev->degraded)
7281 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7282 sysfs_notify_dirent_safe(rdev->sysfs_state);
7283 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7284 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7285 md_wakeup_thread(mddev->thread);
7286 if (mddev->event_work.func)
7287 queue_work(md_misc_wq, &mddev->event_work);
7288 md_new_event(mddev);
7289}
7290EXPORT_SYMBOL(md_error);
7291
7292
7293
7294static void status_unused(struct seq_file *seq)
7295{
7296 int i = 0;
7297 struct md_rdev *rdev;
7298
7299 seq_printf(seq, "unused devices: ");
7300
7301 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7302 char b[BDEVNAME_SIZE];
7303 i++;
7304 seq_printf(seq, "%s ",
7305 bdevname(rdev->bdev,b));
7306 }
7307 if (!i)
7308 seq_printf(seq, "<none>");
7309
7310 seq_printf(seq, "\n");
7311}
7312
7313static int status_resync(struct seq_file *seq, struct mddev *mddev)
7314{
7315 sector_t max_sectors, resync, res;
7316 unsigned long dt, db;
7317 sector_t rt;
7318 int scale;
7319 unsigned int per_milli;
7320
7321 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7322 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7323 max_sectors = mddev->resync_max_sectors;
7324 else
7325 max_sectors = mddev->dev_sectors;
7326
7327 resync = mddev->curr_resync;
7328 if (resync <= 3) {
7329 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7330
7331 resync = max_sectors;
7332 } else
7333 resync -= atomic_read(&mddev->recovery_active);
7334
7335 if (resync == 0) {
7336 if (mddev->recovery_cp < MaxSector) {
7337 seq_printf(seq, "\tresync=PENDING");
7338 return 1;
7339 }
7340 return 0;
7341 }
7342 if (resync < 3) {
7343 seq_printf(seq, "\tresync=DELAYED");
7344 return 1;
7345 }
7346
7347 WARN_ON(max_sectors == 0);
7348
7349
7350
7351
7352
7353 scale = 10;
7354 if (sizeof(sector_t) > sizeof(unsigned long)) {
7355 while ( max_sectors/2 > (1ULL<<(scale+32)))
7356 scale++;
7357 }
7358 res = (resync>>scale)*1000;
7359 sector_div(res, (u32)((max_sectors>>scale)+1));
7360
7361 per_milli = res;
7362 {
7363 int i, x = per_milli/50, y = 20-x;
7364 seq_printf(seq, "[");
7365 for (i = 0; i < x; i++)
7366 seq_printf(seq, "=");
7367 seq_printf(seq, ">");
7368 for (i = 0; i < y; i++)
7369 seq_printf(seq, ".");
7370 seq_printf(seq, "] ");
7371 }
7372 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
7373 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
7374 "reshape" :
7375 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
7376 "check" :
7377 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
7378 "resync" : "recovery"))),
7379 per_milli/10, per_milli % 10,
7380 (unsigned long long) resync/2,
7381 (unsigned long long) max_sectors/2);
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397 dt = ((jiffies - mddev->resync_mark) / HZ);
7398 if (!dt) dt++;
7399 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
7400 - mddev->resync_mark_cnt;
7401
7402 rt = max_sectors - resync;
7403 sector_div(rt, db/32+1);
7404 rt *= dt;
7405 rt >>= 5;
7406
7407 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
7408 ((unsigned long)rt % 60)/6);
7409
7410 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
7411 return 1;
7412}
7413
7414static void *md_seq_start(struct seq_file *seq, loff_t *pos)
7415{
7416 struct list_head *tmp;
7417 loff_t l = *pos;
7418 struct mddev *mddev;
7419
7420 if (l >= 0x10000)
7421 return NULL;
7422 if (!l--)
7423
7424 return (void*)1;
7425
7426 spin_lock(&all_mddevs_lock);
7427 list_for_each(tmp,&all_mddevs)
7428 if (!l--) {
7429 mddev = list_entry(tmp, struct mddev, all_mddevs);
7430 mddev_get(mddev);
7431 spin_unlock(&all_mddevs_lock);
7432 return mddev;
7433 }
7434 spin_unlock(&all_mddevs_lock);
7435 if (!l--)
7436 return (void*)2;
7437 return NULL;
7438}
7439
7440static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7441{
7442 struct list_head *tmp;
7443 struct mddev *next_mddev, *mddev = v;
7444
7445 ++*pos;
7446 if (v == (void*)2)
7447 return NULL;
7448
7449 spin_lock(&all_mddevs_lock);
7450 if (v == (void*)1)
7451 tmp = all_mddevs.next;
7452 else
7453 tmp = mddev->all_mddevs.next;
7454 if (tmp != &all_mddevs)
7455 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7456 else {
7457 next_mddev = (void*)2;
7458 *pos = 0x10000;
7459 }
7460 spin_unlock(&all_mddevs_lock);
7461
7462 if (v != (void*)1)
7463 mddev_put(mddev);
7464 return next_mddev;
7465
7466}
7467
7468static void md_seq_stop(struct seq_file *seq, void *v)
7469{
7470 struct mddev *mddev = v;
7471
7472 if (mddev && v != (void*)1 && v != (void*)2)
7473 mddev_put(mddev);
7474}
7475
7476static int md_seq_show(struct seq_file *seq, void *v)
7477{
7478 struct mddev *mddev = v;
7479 sector_t sectors;
7480 struct md_rdev *rdev;
7481
7482 if (v == (void*)1) {
7483 struct md_personality *pers;
7484 seq_printf(seq, "Personalities : ");
7485 spin_lock(&pers_lock);
7486 list_for_each_entry(pers, &pers_list, list)
7487 seq_printf(seq, "[%s] ", pers->name);
7488
7489 spin_unlock(&pers_lock);
7490 seq_printf(seq, "\n");
7491 seq->poll_event = atomic_read(&md_event_count);
7492 return 0;
7493 }
7494 if (v == (void*)2) {
7495 status_unused(seq);
7496 return 0;
7497 }
7498
7499 spin_lock(&mddev->lock);
7500 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7501 seq_printf(seq, "%s : %sactive", mdname(mddev),
7502 mddev->pers ? "" : "in");
7503 if (mddev->pers) {
7504 if (mddev->ro==1)
7505 seq_printf(seq, " (read-only)");
7506 if (mddev->ro==2)
7507 seq_printf(seq, " (auto-read-only)");
7508 seq_printf(seq, " %s", mddev->pers->name);
7509 }
7510
7511 sectors = 0;
7512 rcu_read_lock();
7513 rdev_for_each_rcu(rdev, mddev) {
7514 char b[BDEVNAME_SIZE];
7515 seq_printf(seq, " %s[%d]",
7516 bdevname(rdev->bdev,b), rdev->desc_nr);
7517 if (test_bit(WriteMostly, &rdev->flags))
7518 seq_printf(seq, "(W)");
7519 if (test_bit(Journal, &rdev->flags))
7520 seq_printf(seq, "(J)");
7521 if (test_bit(Faulty, &rdev->flags)) {
7522 seq_printf(seq, "(F)");
7523 continue;
7524 }
7525 if (rdev->raid_disk < 0)
7526 seq_printf(seq, "(S)");
7527 if (test_bit(Replacement, &rdev->flags))
7528 seq_printf(seq, "(R)");
7529 sectors += rdev->sectors;
7530 }
7531 rcu_read_unlock();
7532
7533 if (!list_empty(&mddev->disks)) {
7534 if (mddev->pers)
7535 seq_printf(seq, "\n %llu blocks",
7536 (unsigned long long)
7537 mddev->array_sectors / 2);
7538 else
7539 seq_printf(seq, "\n %llu blocks",
7540 (unsigned long long)sectors / 2);
7541 }
7542 if (mddev->persistent) {
7543 if (mddev->major_version != 0 ||
7544 mddev->minor_version != 90) {
7545 seq_printf(seq," super %d.%d",
7546 mddev->major_version,
7547 mddev->minor_version);
7548 }
7549 } else if (mddev->external)
7550 seq_printf(seq, " super external:%s",
7551 mddev->metadata_type);
7552 else
7553 seq_printf(seq, " super non-persistent");
7554
7555 if (mddev->pers) {
7556 mddev->pers->status(seq, mddev);
7557 seq_printf(seq, "\n ");
7558 if (mddev->pers->sync_request) {
7559 if (status_resync(seq, mddev))
7560 seq_printf(seq, "\n ");
7561 }
7562 } else
7563 seq_printf(seq, "\n ");
7564
7565 bitmap_status(seq, mddev->bitmap);
7566
7567 seq_printf(seq, "\n");
7568 }
7569 spin_unlock(&mddev->lock);
7570
7571 return 0;
7572}
7573
7574static const struct seq_operations md_seq_ops = {
7575 .start = md_seq_start,
7576 .next = md_seq_next,
7577 .stop = md_seq_stop,
7578 .show = md_seq_show,
7579};
7580
7581static int md_seq_open(struct inode *inode, struct file *file)
7582{
7583 struct seq_file *seq;
7584 int error;
7585
7586 error = seq_open(file, &md_seq_ops);
7587 if (error)
7588 return error;
7589
7590 seq = file->private_data;
7591 seq->poll_event = atomic_read(&md_event_count);
7592 return error;
7593}
7594
7595static int md_unloading;
7596static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7597{
7598 struct seq_file *seq = filp->private_data;
7599 int mask;
7600
7601 if (md_unloading)
7602 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;
7603 poll_wait(filp, &md_event_waiters, wait);
7604
7605
7606 mask = POLLIN | POLLRDNORM;
7607
7608 if (seq->poll_event != atomic_read(&md_event_count))
7609 mask |= POLLERR | POLLPRI;
7610 return mask;
7611}
7612
7613static const struct file_operations md_seq_fops = {
7614 .owner = THIS_MODULE,
7615 .open = md_seq_open,
7616 .read = seq_read,
7617 .llseek = seq_lseek,
7618 .release = seq_release_private,
7619 .poll = mdstat_poll,
7620};
7621
7622int register_md_personality(struct md_personality *p)
7623{
7624 pr_debug("md: %s personality registered for level %d\n",
7625 p->name, p->level);
7626 spin_lock(&pers_lock);
7627 list_add_tail(&p->list, &pers_list);
7628 spin_unlock(&pers_lock);
7629 return 0;
7630}
7631EXPORT_SYMBOL(register_md_personality);
7632
7633int unregister_md_personality(struct md_personality *p)
7634{
7635 pr_debug("md: %s personality unregistered\n", p->name);
7636 spin_lock(&pers_lock);
7637 list_del_init(&p->list);
7638 spin_unlock(&pers_lock);
7639 return 0;
7640}
7641EXPORT_SYMBOL(unregister_md_personality);
7642
7643int register_md_cluster_operations(struct md_cluster_operations *ops,
7644 struct module *module)
7645{
7646 int ret = 0;
7647 spin_lock(&pers_lock);
7648 if (md_cluster_ops != NULL)
7649 ret = -EALREADY;
7650 else {
7651 md_cluster_ops = ops;
7652 md_cluster_mod = module;
7653 }
7654 spin_unlock(&pers_lock);
7655 return ret;
7656}
7657EXPORT_SYMBOL(register_md_cluster_operations);
7658
7659int unregister_md_cluster_operations(void)
7660{
7661 spin_lock(&pers_lock);
7662 md_cluster_ops = NULL;
7663 spin_unlock(&pers_lock);
7664 return 0;
7665}
7666EXPORT_SYMBOL(unregister_md_cluster_operations);
7667
7668int md_setup_cluster(struct mddev *mddev, int nodes)
7669{
7670 if (!md_cluster_ops)
7671 request_module("md-cluster");
7672 spin_lock(&pers_lock);
7673
7674 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
7675 pr_warn("can't find md-cluster module or get it's reference.\n");
7676 spin_unlock(&pers_lock);
7677 return -ENOENT;
7678 }
7679 spin_unlock(&pers_lock);
7680
7681 return md_cluster_ops->join(mddev, nodes);
7682}
7683
7684void md_cluster_stop(struct mddev *mddev)
7685{
7686 if (!md_cluster_ops)
7687 return;
7688 md_cluster_ops->leave(mddev);
7689 module_put(md_cluster_mod);
7690}
7691
7692static int is_mddev_idle(struct mddev *mddev, int init)
7693{
7694 struct md_rdev *rdev;
7695 int idle;
7696 int curr_events;
7697
7698 idle = 1;
7699 rcu_read_lock();
7700 rdev_for_each_rcu(rdev, mddev) {
7701 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7702 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7703 (int)part_stat_read(&disk->part0, sectors[1]) -
7704 atomic_read(&disk->sync_io);
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727 if (init || curr_events - rdev->last_events > 64) {
7728 rdev->last_events = curr_events;
7729 idle = 0;
7730 }
7731 }
7732 rcu_read_unlock();
7733 return idle;
7734}
7735
7736void md_done_sync(struct mddev *mddev, int blocks, int ok)
7737{
7738
7739 atomic_sub(blocks, &mddev->recovery_active);
7740 wake_up(&mddev->recovery_wait);
7741 if (!ok) {
7742 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7743 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7744 md_wakeup_thread(mddev->thread);
7745
7746 }
7747}
7748EXPORT_SYMBOL(md_done_sync);
7749
7750
7751
7752
7753
7754
7755void md_write_start(struct mddev *mddev, struct bio *bi)
7756{
7757 int did_change = 0;
7758 if (bio_data_dir(bi) != WRITE)
7759 return;
7760
7761 BUG_ON(mddev->ro == 1);
7762 if (mddev->ro == 2) {
7763
7764 mddev->ro = 0;
7765 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7766 md_wakeup_thread(mddev->thread);
7767 md_wakeup_thread(mddev->sync_thread);
7768 did_change = 1;
7769 }
7770 atomic_inc(&mddev->writes_pending);
7771 if (mddev->safemode == 1)
7772 mddev->safemode = 0;
7773 if (mddev->in_sync) {
7774 spin_lock(&mddev->lock);
7775 if (mddev->in_sync) {
7776 mddev->in_sync = 0;
7777 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7778 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
7779 md_wakeup_thread(mddev->thread);
7780 did_change = 1;
7781 }
7782 spin_unlock(&mddev->lock);
7783 }
7784 if (did_change)
7785 sysfs_notify_dirent_safe(mddev->sysfs_state);
7786 wait_event(mddev->sb_wait,
7787 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7788}
7789EXPORT_SYMBOL(md_write_start);
7790
7791void md_write_end(struct mddev *mddev)
7792{
7793 if (atomic_dec_and_test(&mddev->writes_pending)) {
7794 if (mddev->safemode == 2)
7795 md_wakeup_thread(mddev->thread);
7796 else if (mddev->safemode_delay)
7797 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7798 }
7799}
7800EXPORT_SYMBOL(md_write_end);
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811int md_allow_write(struct mddev *mddev)
7812{
7813 if (!mddev->pers)
7814 return 0;
7815 if (mddev->ro)
7816 return 0;
7817 if (!mddev->pers->sync_request)
7818 return 0;
7819
7820 spin_lock(&mddev->lock);
7821 if (mddev->in_sync) {
7822 mddev->in_sync = 0;
7823 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
7824 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
7825 if (mddev->safemode_delay &&
7826 mddev->safemode == 0)
7827 mddev->safemode = 1;
7828 spin_unlock(&mddev->lock);
7829 md_update_sb(mddev, 0);
7830 sysfs_notify_dirent_safe(mddev->sysfs_state);
7831 } else
7832 spin_unlock(&mddev->lock);
7833
7834 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
7835 return -EAGAIN;
7836 else
7837 return 0;
7838}
7839EXPORT_SYMBOL_GPL(md_allow_write);
7840
7841#define SYNC_MARKS 10
7842#define SYNC_MARK_STEP (3*HZ)
7843#define UPDATE_FREQUENCY (5*60*HZ)
7844void md_do_sync(struct md_thread *thread)
7845{
7846 struct mddev *mddev = thread->mddev;
7847 struct mddev *mddev2;
7848 unsigned int currspeed = 0,
7849 window;
7850 sector_t max_sectors,j, io_sectors, recovery_done;
7851 unsigned long mark[SYNC_MARKS];
7852 unsigned long update_time;
7853 sector_t mark_cnt[SYNC_MARKS];
7854 int last_mark,m;
7855 struct list_head *tmp;
7856 sector_t last_check;
7857 int skipped = 0;
7858 struct md_rdev *rdev;
7859 char *desc, *action = NULL;
7860 struct blk_plug plug;
7861 int ret;
7862
7863
7864 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7865 return;
7866 if (mddev->ro) {
7867 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7868 return;
7869 }
7870
7871 if (mddev_is_clustered(mddev)) {
7872 ret = md_cluster_ops->resync_start(mddev);
7873 if (ret)
7874 goto skip;
7875
7876 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
7877 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
7878 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
7879 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
7880 && ((unsigned long long)mddev->curr_resync_completed
7881 < (unsigned long long)mddev->resync_max_sectors))
7882 goto skip;
7883 }
7884
7885 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7886 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7887 desc = "data-check";
7888 action = "check";
7889 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7890 desc = "requested-resync";
7891 action = "repair";
7892 } else
7893 desc = "resync";
7894 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7895 desc = "reshape";
7896 else
7897 desc = "recovery";
7898
7899 mddev->last_sync_action = action ?: desc;
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917 do {
7918 int mddev2_minor = -1;
7919 mddev->curr_resync = 2;
7920
7921 try_again:
7922 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7923 goto skip;
7924 for_each_mddev(mddev2, tmp) {
7925 if (mddev2 == mddev)
7926 continue;
7927 if (!mddev->parallel_resync
7928 && mddev2->curr_resync
7929 && match_mddev_units(mddev, mddev2)) {
7930 DEFINE_WAIT(wq);
7931 if (mddev < mddev2 && mddev->curr_resync == 2) {
7932
7933 mddev->curr_resync = 1;
7934 wake_up(&resync_wait);
7935 }
7936 if (mddev > mddev2 && mddev->curr_resync == 1)
7937
7938
7939
7940 continue;
7941
7942
7943
7944
7945 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7946 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7947 mddev2->curr_resync >= mddev->curr_resync) {
7948 if (mddev2_minor != mddev2->md_minor) {
7949 mddev2_minor = mddev2->md_minor;
7950 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
7951 desc, mdname(mddev),
7952 mdname(mddev2));
7953 }
7954 mddev_put(mddev2);
7955 if (signal_pending(current))
7956 flush_signals(current);
7957 schedule();
7958 finish_wait(&resync_wait, &wq);
7959 goto try_again;
7960 }
7961 finish_wait(&resync_wait, &wq);
7962 }
7963 }
7964 } while (mddev->curr_resync < 2);
7965
7966 j = 0;
7967 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7968
7969
7970
7971 max_sectors = mddev->resync_max_sectors;
7972 atomic64_set(&mddev->resync_mismatches, 0);
7973
7974 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7975 j = mddev->resync_min;
7976 else if (!mddev->bitmap)
7977 j = mddev->recovery_cp;
7978
7979 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7980 max_sectors = mddev->resync_max_sectors;
7981 else {
7982
7983 max_sectors = mddev->dev_sectors;
7984 j = MaxSector;
7985 rcu_read_lock();
7986 rdev_for_each_rcu(rdev, mddev)
7987 if (rdev->raid_disk >= 0 &&
7988 !test_bit(Journal, &rdev->flags) &&
7989 !test_bit(Faulty, &rdev->flags) &&
7990 !test_bit(In_sync, &rdev->flags) &&
7991 rdev->recovery_offset < j)
7992 j = rdev->recovery_offset;
7993 rcu_read_unlock();
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003 if (mddev->bitmap) {
8004 mddev->pers->quiesce(mddev, 1);
8005 mddev->pers->quiesce(mddev, 0);
8006 }
8007 }
8008
8009 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8010 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8011 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8012 speed_max(mddev), desc);
8013
8014 is_mddev_idle(mddev, 1);
8015
8016 io_sectors = 0;
8017 for (m = 0; m < SYNC_MARKS; m++) {
8018 mark[m] = jiffies;
8019 mark_cnt[m] = io_sectors;
8020 }
8021 last_mark = 0;
8022 mddev->resync_mark = mark[last_mark];
8023 mddev->resync_mark_cnt = mark_cnt[last_mark];
8024
8025
8026
8027
8028 window = 32*(PAGE_SIZE/512);
8029 pr_debug("md: using %dk window, over a total of %lluk.\n",
8030 window/2, (unsigned long long)max_sectors/2);
8031
8032 atomic_set(&mddev->recovery_active, 0);
8033 last_check = 0;
8034
8035 if (j>2) {
8036 pr_debug("md: resuming %s of %s from checkpoint.\n",
8037 desc, mdname(mddev));
8038 mddev->curr_resync = j;
8039 } else
8040 mddev->curr_resync = 3;
8041 mddev->curr_resync_completed = j;
8042 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8043 md_new_event(mddev);
8044 update_time = jiffies;
8045
8046 blk_start_plug(&plug);
8047 while (j < max_sectors) {
8048 sector_t sectors;
8049
8050 skipped = 0;
8051
8052 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8053 ((mddev->curr_resync > mddev->curr_resync_completed &&
8054 (mddev->curr_resync - mddev->curr_resync_completed)
8055 > (max_sectors >> 4)) ||
8056 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8057 (j - mddev->curr_resync_completed)*2
8058 >= mddev->resync_max - mddev->curr_resync_completed ||
8059 mddev->curr_resync_completed > mddev->resync_max
8060 )) {
8061
8062 wait_event(mddev->recovery_wait,
8063 atomic_read(&mddev->recovery_active) == 0);
8064 mddev->curr_resync_completed = j;
8065 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8066 j > mddev->recovery_cp)
8067 mddev->recovery_cp = j;
8068 update_time = jiffies;
8069 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8070 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8071 }
8072
8073 while (j >= mddev->resync_max &&
8074 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8075
8076
8077
8078
8079 flush_signals(current);
8080 wait_event_interruptible(mddev->recovery_wait,
8081 mddev->resync_max > j
8082 || test_bit(MD_RECOVERY_INTR,
8083 &mddev->recovery));
8084 }
8085
8086 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8087 break;
8088
8089 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8090 if (sectors == 0) {
8091 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8092 break;
8093 }
8094
8095 if (!skipped) {
8096 io_sectors += sectors;
8097 atomic_add(sectors, &mddev->recovery_active);
8098 }
8099
8100 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8101 break;
8102
8103 j += sectors;
8104 if (j > max_sectors)
8105
8106 j = max_sectors;
8107 if (j > 2)
8108 mddev->curr_resync = j;
8109 mddev->curr_mark_cnt = io_sectors;
8110 if (last_check == 0)
8111
8112
8113
8114 md_new_event(mddev);
8115
8116 if (last_check + window > io_sectors || j == max_sectors)
8117 continue;
8118
8119 last_check = io_sectors;
8120 repeat:
8121 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8122
8123 int next = (last_mark+1) % SYNC_MARKS;
8124
8125 mddev->resync_mark = mark[next];
8126 mddev->resync_mark_cnt = mark_cnt[next];
8127 mark[next] = jiffies;
8128 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8129 last_mark = next;
8130 }
8131
8132 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8133 break;
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143 cond_resched();
8144
8145 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8146 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8147 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8148
8149 if (currspeed > speed_min(mddev)) {
8150 if (currspeed > speed_max(mddev)) {
8151 msleep(500);
8152 goto repeat;
8153 }
8154 if (!is_mddev_idle(mddev, 0)) {
8155
8156
8157
8158
8159 wait_event(mddev->recovery_wait,
8160 !atomic_read(&mddev->recovery_active));
8161 }
8162 }
8163 }
8164 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8165 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8166 ? "interrupted" : "done");
8167
8168
8169
8170 blk_finish_plug(&plug);
8171 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8172
8173 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8174 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8175 mddev->curr_resync > 3) {
8176 mddev->curr_resync_completed = mddev->curr_resync;
8177 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
8178 }
8179 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8180
8181 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
8182 mddev->curr_resync > 3) {
8183 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8184 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8185 if (mddev->curr_resync >= mddev->recovery_cp) {
8186 pr_debug("md: checkpointing %s of %s.\n",
8187 desc, mdname(mddev));
8188 if (test_bit(MD_RECOVERY_ERROR,
8189 &mddev->recovery))
8190 mddev->recovery_cp =
8191 mddev->curr_resync_completed;
8192 else
8193 mddev->recovery_cp =
8194 mddev->curr_resync;
8195 }
8196 } else
8197 mddev->recovery_cp = MaxSector;
8198 } else {
8199 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8200 mddev->curr_resync = MaxSector;
8201 rcu_read_lock();
8202 rdev_for_each_rcu(rdev, mddev)
8203 if (rdev->raid_disk >= 0 &&
8204 mddev->delta_disks >= 0 &&
8205 !test_bit(Journal, &rdev->flags) &&
8206 !test_bit(Faulty, &rdev->flags) &&
8207 !test_bit(In_sync, &rdev->flags) &&
8208 rdev->recovery_offset < mddev->curr_resync)
8209 rdev->recovery_offset = mddev->curr_resync;
8210 rcu_read_unlock();
8211 }
8212 }
8213 skip:
8214
8215
8216
8217 set_mask_bits(&mddev->sb_flags, 0,
8218 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
8219
8220 spin_lock(&mddev->lock);
8221 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8222
8223 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8224 mddev->resync_min = 0;
8225 mddev->resync_max = MaxSector;
8226 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8227 mddev->resync_min = mddev->curr_resync_completed;
8228 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
8229 mddev->curr_resync = 0;
8230 spin_unlock(&mddev->lock);
8231
8232 wake_up(&resync_wait);
8233 md_wakeup_thread(mddev->thread);
8234 return;
8235}
8236EXPORT_SYMBOL_GPL(md_do_sync);
8237
8238static int remove_and_add_spares(struct mddev *mddev,
8239 struct md_rdev *this)
8240{
8241 struct md_rdev *rdev;
8242 int spares = 0;
8243 int removed = 0;
8244 bool remove_some = false;
8245
8246 rdev_for_each(rdev, mddev) {
8247 if ((this == NULL || rdev == this) &&
8248 rdev->raid_disk >= 0 &&
8249 !test_bit(Blocked, &rdev->flags) &&
8250 test_bit(Faulty, &rdev->flags) &&
8251 atomic_read(&rdev->nr_pending)==0) {
8252
8253
8254
8255
8256
8257 remove_some = true;
8258 set_bit(RemoveSynchronized, &rdev->flags);
8259 }
8260 }
8261
8262 if (remove_some)
8263 synchronize_rcu();
8264 rdev_for_each(rdev, mddev) {
8265 if ((this == NULL || rdev == this) &&
8266 rdev->raid_disk >= 0 &&
8267 !test_bit(Blocked, &rdev->flags) &&
8268 ((test_bit(RemoveSynchronized, &rdev->flags) ||
8269 (!test_bit(In_sync, &rdev->flags) &&
8270 !test_bit(Journal, &rdev->flags))) &&
8271 atomic_read(&rdev->nr_pending)==0)) {
8272 if (mddev->pers->hot_remove_disk(
8273 mddev, rdev) == 0) {
8274 sysfs_unlink_rdev(mddev, rdev);
8275 rdev->raid_disk = -1;
8276 removed++;
8277 }
8278 }
8279 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
8280 clear_bit(RemoveSynchronized, &rdev->flags);
8281 }
8282
8283 if (removed && mddev->kobj.sd)
8284 sysfs_notify(&mddev->kobj, NULL, "degraded");
8285
8286 if (this && removed)
8287 goto no_add;
8288
8289 rdev_for_each(rdev, mddev) {
8290 if (this && this != rdev)
8291 continue;
8292 if (test_bit(Candidate, &rdev->flags))
8293 continue;
8294 if (rdev->raid_disk >= 0 &&
8295 !test_bit(In_sync, &rdev->flags) &&
8296 !test_bit(Journal, &rdev->flags) &&
8297 !test_bit(Faulty, &rdev->flags))
8298 spares++;
8299 if (rdev->raid_disk >= 0)
8300 continue;
8301 if (test_bit(Faulty, &rdev->flags))
8302 continue;
8303 if (!test_bit(Journal, &rdev->flags)) {
8304 if (mddev->ro &&
8305 ! (rdev->saved_raid_disk >= 0 &&
8306 !test_bit(Bitmap_sync, &rdev->flags)))
8307 continue;
8308
8309 rdev->recovery_offset = 0;
8310 }
8311 if (mddev->pers->
8312 hot_add_disk(mddev, rdev) == 0) {
8313 if (sysfs_link_rdev(mddev, rdev))
8314 ;
8315 if (!test_bit(Journal, &rdev->flags))
8316 spares++;
8317 md_new_event(mddev);
8318 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8319 }
8320 }
8321no_add:
8322 if (removed)
8323 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8324 return spares;
8325}
8326
8327static void md_start_sync(struct work_struct *ws)
8328{
8329 struct mddev *mddev = container_of(ws, struct mddev, del_work);
8330
8331 mddev->sync_thread = md_register_thread(md_do_sync,
8332 mddev,
8333 "resync");
8334 if (!mddev->sync_thread) {
8335 pr_warn("%s: could not start resync thread...\n",
8336 mdname(mddev));
8337
8338 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8339 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8340 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8341 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8342 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8343 wake_up(&resync_wait);
8344 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8345 &mddev->recovery))
8346 if (mddev->sysfs_action)
8347 sysfs_notify_dirent_safe(mddev->sysfs_action);
8348 } else
8349 md_wakeup_thread(mddev->sync_thread);
8350 sysfs_notify_dirent_safe(mddev->sysfs_action);
8351 md_new_event(mddev);
8352}
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376void md_check_recovery(struct mddev *mddev)
8377{
8378 if (mddev->suspended)
8379 return;
8380
8381 if (mddev->bitmap)
8382 bitmap_daemon_work(mddev);
8383
8384 if (signal_pending(current)) {
8385 if (mddev->pers->sync_request && !mddev->external) {
8386 pr_debug("md: %s in immediate safe mode\n",
8387 mdname(mddev));
8388 mddev->safemode = 2;
8389 }
8390 flush_signals(current);
8391 }
8392
8393 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
8394 return;
8395 if ( ! (
8396 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
8397 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8398 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8399 test_bit(MD_RELOAD_SB, &mddev->flags) ||
8400 (mddev->external == 0 && mddev->safemode == 1) ||
8401 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
8402 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
8403 ))
8404 return;
8405
8406 if (mddev_trylock(mddev)) {
8407 int spares = 0;
8408
8409 if (mddev->ro) {
8410 struct md_rdev *rdev;
8411 if (!mddev->external && mddev->in_sync)
8412
8413
8414
8415
8416
8417 rdev_for_each(rdev, mddev)
8418 clear_bit(Blocked, &rdev->flags);
8419
8420
8421
8422
8423
8424
8425
8426 remove_and_add_spares(mddev, NULL);
8427
8428
8429
8430 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8431 md_reap_sync_thread(mddev);
8432 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8433 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8434 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8435 goto unlock;
8436 }
8437
8438 if (mddev_is_clustered(mddev)) {
8439 struct md_rdev *rdev;
8440
8441
8442
8443 rdev_for_each(rdev, mddev) {
8444 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
8445 rdev->raid_disk < 0)
8446 md_kick_rdev_from_array(rdev);
8447 }
8448
8449 if (test_and_clear_bit(MD_RELOAD_SB, &mddev->flags))
8450 md_reload_sb(mddev, mddev->good_device_nr);
8451 }
8452
8453 if (!mddev->external) {
8454 int did_change = 0;
8455 spin_lock(&mddev->lock);
8456 if (mddev->safemode &&
8457 !atomic_read(&mddev->writes_pending) &&
8458 !mddev->in_sync &&
8459 mddev->recovery_cp == MaxSector) {
8460 mddev->in_sync = 1;
8461 did_change = 1;
8462 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8463 }
8464 if (mddev->safemode == 1)
8465 mddev->safemode = 0;
8466 spin_unlock(&mddev->lock);
8467 if (did_change)
8468 sysfs_notify_dirent_safe(mddev->sysfs_state);
8469 }
8470
8471 if (mddev->sb_flags)
8472 md_update_sb(mddev, 0);
8473
8474 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
8475 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
8476
8477 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8478 goto unlock;
8479 }
8480 if (mddev->sync_thread) {
8481 md_reap_sync_thread(mddev);
8482 goto unlock;
8483 }
8484
8485
8486
8487 mddev->curr_resync_completed = 0;
8488 spin_lock(&mddev->lock);
8489 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8490 spin_unlock(&mddev->lock);
8491
8492
8493
8494 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
8495 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8496
8497 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
8498 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
8499 goto not_running;
8500
8501
8502
8503
8504
8505
8506
8507 if (mddev->reshape_position != MaxSector) {
8508 if (mddev->pers->check_reshape == NULL ||
8509 mddev->pers->check_reshape(mddev) != 0)
8510
8511 goto not_running;
8512 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8513 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8514 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
8515 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8516 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8517 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8518 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8519 } else if (mddev->recovery_cp < MaxSector) {
8520 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8521 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8522 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
8523
8524 goto not_running;
8525
8526 if (mddev->pers->sync_request) {
8527 if (spares) {
8528
8529
8530
8531
8532 bitmap_write_all(mddev->bitmap);
8533 }
8534 INIT_WORK(&mddev->del_work, md_start_sync);
8535 queue_work(md_misc_wq, &mddev->del_work);
8536 goto unlock;
8537 }
8538 not_running:
8539 if (!mddev->sync_thread) {
8540 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8541 wake_up(&resync_wait);
8542 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
8543 &mddev->recovery))
8544 if (mddev->sysfs_action)
8545 sysfs_notify_dirent_safe(mddev->sysfs_action);
8546 }
8547 unlock:
8548 wake_up(&mddev->sb_wait);
8549 mddev_unlock(mddev);
8550 }
8551}
8552EXPORT_SYMBOL(md_check_recovery);
8553
8554void md_reap_sync_thread(struct mddev *mddev)
8555{
8556 struct md_rdev *rdev;
8557
8558
8559 md_unregister_thread(&mddev->sync_thread);
8560 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8561 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8562
8563
8564 if (mddev->pers->spare_active(mddev)) {
8565 sysfs_notify(&mddev->kobj, NULL,
8566 "degraded");
8567 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
8568 }
8569 }
8570 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8571 mddev->pers->finish_reshape)
8572 mddev->pers->finish_reshape(mddev);
8573
8574
8575
8576
8577 if (!mddev->degraded)
8578 rdev_for_each(rdev, mddev)
8579 rdev->saved_raid_disk = -1;
8580
8581 md_update_sb(mddev, 1);
8582
8583
8584
8585 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
8586 md_cluster_ops->resync_finish(mddev);
8587 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
8588 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
8589 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8590 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8591 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8592 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8593 wake_up(&resync_wait);
8594
8595 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8596 sysfs_notify_dirent_safe(mddev->sysfs_action);
8597 md_new_event(mddev);
8598 if (mddev->event_work.func)
8599 queue_work(md_misc_wq, &mddev->event_work);
8600}
8601EXPORT_SYMBOL(md_reap_sync_thread);
8602
8603void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8604{
8605 sysfs_notify_dirent_safe(rdev->sysfs_state);
8606 wait_event_timeout(rdev->blocked_wait,
8607 !test_bit(Blocked, &rdev->flags) &&
8608 !test_bit(BlockedBadBlocks, &rdev->flags),
8609 msecs_to_jiffies(5000));
8610 rdev_dec_pending(rdev, mddev);
8611}
8612EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8613
8614void md_finish_reshape(struct mddev *mddev)
8615{
8616
8617 struct md_rdev *rdev;
8618
8619 rdev_for_each(rdev, mddev) {
8620 if (rdev->data_offset > rdev->new_data_offset)
8621 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8622 else
8623 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8624 rdev->data_offset = rdev->new_data_offset;
8625 }
8626}
8627EXPORT_SYMBOL(md_finish_reshape);
8628
8629
8630
8631
8632int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8633 int is_new)
8634{
8635 struct mddev *mddev = rdev->mddev;
8636 int rv;
8637 if (is_new)
8638 s += rdev->new_data_offset;
8639 else
8640 s += rdev->data_offset;
8641 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8642 if (rv == 0) {
8643
8644 if (test_bit(ExternalBbl, &rdev->flags))
8645 sysfs_notify(&rdev->kobj, NULL,
8646 "unacknowledged_bad_blocks");
8647 sysfs_notify_dirent_safe(rdev->sysfs_state);
8648 set_mask_bits(&mddev->sb_flags, 0,
8649 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
8650 md_wakeup_thread(rdev->mddev->thread);
8651 return 1;
8652 } else
8653 return 0;
8654}
8655EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8656
8657int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8658 int is_new)
8659{
8660 int rv;
8661 if (is_new)
8662 s += rdev->new_data_offset;
8663 else
8664 s += rdev->data_offset;
8665 rv = badblocks_clear(&rdev->badblocks, s, sectors);
8666 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
8667 sysfs_notify(&rdev->kobj, NULL, "bad_blocks");
8668 return rv;
8669}
8670EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8671
8672static int md_notify_reboot(struct notifier_block *this,
8673 unsigned long code, void *x)
8674{
8675 struct list_head *tmp;
8676 struct mddev *mddev;
8677 int need_delay = 0;
8678
8679 for_each_mddev(mddev, tmp) {
8680 if (mddev_trylock(mddev)) {
8681 if (mddev->pers)
8682 __md_stop_writes(mddev);
8683 if (mddev->persistent)
8684 mddev->safemode = 2;
8685 mddev_unlock(mddev);
8686 }
8687 need_delay = 1;
8688 }
8689
8690
8691
8692
8693
8694
8695 if (need_delay)
8696 mdelay(1000*1);
8697
8698 return NOTIFY_DONE;
8699}
8700
8701static struct notifier_block md_notifier = {
8702 .notifier_call = md_notify_reboot,
8703 .next = NULL,
8704 .priority = INT_MAX,
8705};
8706
8707static void md_geninit(void)
8708{
8709 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8710
8711 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8712}
8713
8714static int __init md_init(void)
8715{
8716 int ret = -ENOMEM;
8717
8718 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8719 if (!md_wq)
8720 goto err_wq;
8721
8722 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8723 if (!md_misc_wq)
8724 goto err_misc_wq;
8725
8726 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8727 goto err_md;
8728
8729 if ((ret = register_blkdev(0, "mdp")) < 0)
8730 goto err_mdp;
8731 mdp_major = ret;
8732
8733 blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
8734 md_probe, NULL, NULL);
8735 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8736 md_probe, NULL, NULL);
8737
8738 register_reboot_notifier(&md_notifier);
8739 raid_table_header = register_sysctl_table(raid_root_table);
8740
8741 md_geninit();
8742 return 0;
8743
8744err_mdp:
8745 unregister_blkdev(MD_MAJOR, "md");
8746err_md:
8747 destroy_workqueue(md_misc_wq);
8748err_misc_wq:
8749 destroy_workqueue(md_wq);
8750err_wq:
8751 return ret;
8752}
8753
8754static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
8755{
8756 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
8757 struct md_rdev *rdev2;
8758 int role, ret;
8759 char b[BDEVNAME_SIZE];
8760
8761
8762 rdev_for_each(rdev2, mddev) {
8763 if (test_bit(Faulty, &rdev2->flags))
8764 continue;
8765
8766
8767 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
8768
8769 if (test_bit(Candidate, &rdev2->flags)) {
8770 if (role == 0xfffe) {
8771 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
8772 md_kick_rdev_from_array(rdev2);
8773 continue;
8774 }
8775 else
8776 clear_bit(Candidate, &rdev2->flags);
8777 }
8778
8779 if (role != rdev2->raid_disk) {
8780
8781 if (rdev2->raid_disk == -1 && role != 0xffff) {
8782 rdev2->saved_raid_disk = role;
8783 ret = remove_and_add_spares(mddev, rdev2);
8784 pr_info("Activated spare: %s\n",
8785 bdevname(rdev2->bdev,b));
8786
8787
8788 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8789 md_wakeup_thread(mddev->thread);
8790
8791 }
8792
8793
8794
8795
8796
8797 if ((role == 0xfffe) || (role == 0xfffd)) {
8798 md_error(mddev, rdev2);
8799 clear_bit(Blocked, &rdev2->flags);
8800 }
8801 }
8802 }
8803
8804 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
8805 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
8806
8807
8808 mddev->events = le64_to_cpu(sb->events);
8809}
8810
8811static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
8812{
8813 int err;
8814 struct page *swapout = rdev->sb_page;
8815 struct mdp_superblock_1 *sb;
8816
8817
8818
8819
8820 rdev->sb_page = NULL;
8821 err = alloc_disk_sb(rdev);
8822 if (err == 0) {
8823 ClearPageUptodate(rdev->sb_page);
8824 rdev->sb_loaded = 0;
8825 err = super_types[mddev->major_version].
8826 load_super(rdev, NULL, mddev->minor_version);
8827 }
8828 if (err < 0) {
8829 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
8830 __func__, __LINE__, rdev->desc_nr, err);
8831 if (rdev->sb_page)
8832 put_page(rdev->sb_page);
8833 rdev->sb_page = swapout;
8834 rdev->sb_loaded = 1;
8835 return err;
8836 }
8837
8838 sb = page_address(rdev->sb_page);
8839
8840
8841
8842
8843 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
8844 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
8845
8846
8847
8848
8849 if (rdev->recovery_offset == MaxSector &&
8850 !test_bit(In_sync, &rdev->flags) &&
8851 mddev->pers->spare_active(mddev))
8852 sysfs_notify(&mddev->kobj, NULL, "degraded");
8853
8854 put_page(swapout);
8855 return 0;
8856}
8857
8858void md_reload_sb(struct mddev *mddev, int nr)
8859{
8860 struct md_rdev *rdev;
8861 int err;
8862
8863
8864 rdev_for_each_rcu(rdev, mddev) {
8865 if (rdev->desc_nr == nr)
8866 break;
8867 }
8868
8869 if (!rdev || rdev->desc_nr != nr) {
8870 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
8871 return;
8872 }
8873
8874 err = read_rdev(mddev, rdev);
8875 if (err < 0)
8876 return;
8877
8878 check_sb_changes(mddev, rdev);
8879
8880
8881 rdev_for_each_rcu(rdev, mddev)
8882 read_rdev(mddev, rdev);
8883}
8884EXPORT_SYMBOL(md_reload_sb);
8885
8886#ifndef MODULE
8887
8888
8889
8890
8891
8892
8893static DEFINE_MUTEX(detected_devices_mutex);
8894static LIST_HEAD(all_detected_devices);
8895struct detected_devices_node {
8896 struct list_head list;
8897 dev_t dev;
8898};
8899
8900void md_autodetect_dev(dev_t dev)
8901{
8902 struct detected_devices_node *node_detected_dev;
8903
8904 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8905 if (node_detected_dev) {
8906 node_detected_dev->dev = dev;
8907 mutex_lock(&detected_devices_mutex);
8908 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8909 mutex_unlock(&detected_devices_mutex);
8910 }
8911}
8912
8913static void autostart_arrays(int part)
8914{
8915 struct md_rdev *rdev;
8916 struct detected_devices_node *node_detected_dev;
8917 dev_t dev;
8918 int i_scanned, i_passed;
8919
8920 i_scanned = 0;
8921 i_passed = 0;
8922
8923 pr_info("md: Autodetecting RAID arrays.\n");
8924
8925 mutex_lock(&detected_devices_mutex);
8926 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8927 i_scanned++;
8928 node_detected_dev = list_entry(all_detected_devices.next,
8929 struct detected_devices_node, list);
8930 list_del(&node_detected_dev->list);
8931 dev = node_detected_dev->dev;
8932 kfree(node_detected_dev);
8933 mutex_unlock(&detected_devices_mutex);
8934 rdev = md_import_device(dev,0, 90);
8935 mutex_lock(&detected_devices_mutex);
8936 if (IS_ERR(rdev))
8937 continue;
8938
8939 if (test_bit(Faulty, &rdev->flags))
8940 continue;
8941
8942 set_bit(AutoDetected, &rdev->flags);
8943 list_add(&rdev->same_set, &pending_raid_disks);
8944 i_passed++;
8945 }
8946 mutex_unlock(&detected_devices_mutex);
8947
8948 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
8949
8950 autorun_devices(part);
8951}
8952
8953#endif
8954
8955static __exit void md_exit(void)
8956{
8957 struct mddev *mddev;
8958 struct list_head *tmp;
8959 int delay = 1;
8960
8961 blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
8962 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8963
8964 unregister_blkdev(MD_MAJOR,"md");
8965 unregister_blkdev(mdp_major, "mdp");
8966 unregister_reboot_notifier(&md_notifier);
8967 unregister_sysctl_table(raid_table_header);
8968
8969
8970
8971
8972 md_unloading = 1;
8973 while (waitqueue_active(&md_event_waiters)) {
8974
8975 wake_up(&md_event_waiters);
8976 msleep(delay);
8977 delay += delay;
8978 }
8979 remove_proc_entry("mdstat", NULL);
8980
8981 for_each_mddev(mddev, tmp) {
8982 export_array(mddev);
8983 mddev->hold_active = 0;
8984 }
8985 destroy_workqueue(md_misc_wq);
8986 destroy_workqueue(md_wq);
8987}
8988
8989subsys_initcall(md_init);
8990module_exit(md_exit)
8991
8992static int get_ro(char *buffer, struct kernel_param *kp)
8993{
8994 return sprintf(buffer, "%d", start_readonly);
8995}
8996static int set_ro(const char *val, struct kernel_param *kp)
8997{
8998 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
8999}
9000
9001module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9002module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9003module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9004
9005MODULE_LICENSE("GPL");
9006MODULE_DESCRIPTION("MD RAID framework");
9007MODULE_ALIAS("md");
9008MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9009