1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <linux/sched/mm.h>
48#include <linux/sched/signal.h>
49#include <linux/kthread.h>
50#include <linux/blkdev.h>
51#include <linux/badblocks.h>
52#include <linux/sysctl.h>
53#include <linux/seq_file.h>
54#include <linux/fs.h>
55#include <linux/poll.h>
56#include <linux/ctype.h>
57#include <linux/string.h>
58#include <linux/hdreg.h>
59#include <linux/proc_fs.h>
60#include <linux/random.h>
61#include <linux/module.h>
62#include <linux/reboot.h>
63#include <linux/file.h>
64#include <linux/compat.h>
65#include <linux/delay.h>
66#include <linux/raid/md_p.h>
67#include <linux/raid/md_u.h>
68#include <linux/raid/detect.h>
69#include <linux/slab.h>
70#include <linux/percpu-refcount.h>
71
72#include <trace/events/block.h>
73#include "md.h"
74#include "md-bitmap.h"
75#include "md-cluster.h"
76
77#ifndef MODULE
78static void autostart_arrays(int part);
79#endif
80
81
82
83
84
85
86static LIST_HEAD(pers_list);
87static DEFINE_SPINLOCK(pers_lock);
88
89static struct kobj_type md_ktype;
90
91struct md_cluster_operations *md_cluster_ops;
92EXPORT_SYMBOL(md_cluster_ops);
93static struct module *md_cluster_mod;
94
95static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
96static struct workqueue_struct *md_wq;
97static struct workqueue_struct *md_misc_wq;
98static struct workqueue_struct *md_rdev_misc_wq;
99
100static int remove_and_add_spares(struct mddev *mddev,
101 struct md_rdev *this);
102static void mddev_detach(struct mddev *mddev);
103
104
105
106
107
108
109#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
110
111#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
112
113
114
115
116
117
118
119
120
121
122
123
124
125static int sysctl_speed_limit_min = 1000;
126static int sysctl_speed_limit_max = 200000;
127static inline int speed_min(struct mddev *mddev)
128{
129 return mddev->sync_speed_min ?
130 mddev->sync_speed_min : sysctl_speed_limit_min;
131}
132
133static inline int speed_max(struct mddev *mddev)
134{
135 return mddev->sync_speed_max ?
136 mddev->sync_speed_max : sysctl_speed_limit_max;
137}
138
139static void rdev_uninit_serial(struct md_rdev *rdev)
140{
141 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
142 return;
143
144 kvfree(rdev->serial);
145 rdev->serial = NULL;
146}
147
148static void rdevs_uninit_serial(struct mddev *mddev)
149{
150 struct md_rdev *rdev;
151
152 rdev_for_each(rdev, mddev)
153 rdev_uninit_serial(rdev);
154}
155
156static int rdev_init_serial(struct md_rdev *rdev)
157{
158
159 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
160 struct serial_in_rdev *serial = NULL;
161
162 if (test_bit(CollisionCheck, &rdev->flags))
163 return 0;
164
165 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
166 GFP_KERNEL);
167 if (!serial)
168 return -ENOMEM;
169
170 for (i = 0; i < serial_nums; i++) {
171 struct serial_in_rdev *serial_tmp = &serial[i];
172
173 spin_lock_init(&serial_tmp->serial_lock);
174 serial_tmp->serial_rb = RB_ROOT_CACHED;
175 init_waitqueue_head(&serial_tmp->serial_io_wait);
176 }
177
178 rdev->serial = serial;
179 set_bit(CollisionCheck, &rdev->flags);
180
181 return 0;
182}
183
184static int rdevs_init_serial(struct mddev *mddev)
185{
186 struct md_rdev *rdev;
187 int ret = 0;
188
189 rdev_for_each(rdev, mddev) {
190 ret = rdev_init_serial(rdev);
191 if (ret)
192 break;
193 }
194
195
196 if (ret && !mddev->serial_info_pool)
197 rdevs_uninit_serial(mddev);
198
199 return ret;
200}
201
202
203
204
205
206
207static int rdev_need_serial(struct md_rdev *rdev)
208{
209 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
210 rdev->bdev->bd_queue->nr_hw_queues != 1 &&
211 test_bit(WriteMostly, &rdev->flags));
212}
213
214
215
216
217
218
219void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
220 bool is_suspend)
221{
222 int ret = 0;
223
224 if (rdev && !rdev_need_serial(rdev) &&
225 !test_bit(CollisionCheck, &rdev->flags))
226 return;
227
228 if (!is_suspend)
229 mddev_suspend(mddev);
230
231 if (!rdev)
232 ret = rdevs_init_serial(mddev);
233 else
234 ret = rdev_init_serial(rdev);
235 if (ret)
236 goto abort;
237
238 if (mddev->serial_info_pool == NULL) {
239
240
241
242
243 mddev->serial_info_pool =
244 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
245 sizeof(struct serial_info));
246 if (!mddev->serial_info_pool) {
247 rdevs_uninit_serial(mddev);
248 pr_err("can't alloc memory pool for serialization\n");
249 }
250 }
251
252abort:
253 if (!is_suspend)
254 mddev_resume(mddev);
255}
256
257
258
259
260
261
262
263void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
264 bool is_suspend)
265{
266 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
267 return;
268
269 if (mddev->serial_info_pool) {
270 struct md_rdev *temp;
271 int num = 0;
272
273 if (!is_suspend)
274 mddev_suspend(mddev);
275 rdev_for_each(temp, mddev) {
276 if (!rdev) {
277 if (!mddev->serialize_policy ||
278 !rdev_need_serial(temp))
279 rdev_uninit_serial(temp);
280 else
281 num++;
282 } else if (temp != rdev &&
283 test_bit(CollisionCheck, &temp->flags))
284 num++;
285 }
286
287 if (rdev)
288 rdev_uninit_serial(rdev);
289
290 if (num)
291 pr_info("The mempool could be used by other devices\n");
292 else {
293 mempool_destroy(mddev->serial_info_pool);
294 mddev->serial_info_pool = NULL;
295 }
296 if (!is_suspend)
297 mddev_resume(mddev);
298 }
299}
300
301static struct ctl_table_header *raid_table_header;
302
303static struct ctl_table raid_table[] = {
304 {
305 .procname = "speed_limit_min",
306 .data = &sysctl_speed_limit_min,
307 .maxlen = sizeof(int),
308 .mode = S_IRUGO|S_IWUSR,
309 .proc_handler = proc_dointvec,
310 },
311 {
312 .procname = "speed_limit_max",
313 .data = &sysctl_speed_limit_max,
314 .maxlen = sizeof(int),
315 .mode = S_IRUGO|S_IWUSR,
316 .proc_handler = proc_dointvec,
317 },
318 { }
319};
320
321static struct ctl_table raid_dir_table[] = {
322 {
323 .procname = "raid",
324 .maxlen = 0,
325 .mode = S_IRUGO|S_IXUGO,
326 .child = raid_table,
327 },
328 { }
329};
330
331static struct ctl_table raid_root_table[] = {
332 {
333 .procname = "dev",
334 .maxlen = 0,
335 .mode = 0555,
336 .child = raid_dir_table,
337 },
338 { }
339};
340
341static const struct block_device_operations md_fops;
342
343static int start_readonly;
344
345
346
347
348
349
350
351
352
353static bool create_on_open = true;
354
355struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
356 struct mddev *mddev)
357{
358 if (!mddev || !bioset_initialized(&mddev->bio_set))
359 return bio_alloc(gfp_mask, nr_iovecs);
360
361 return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
362}
363EXPORT_SYMBOL_GPL(bio_alloc_mddev);
364
365static struct bio *md_bio_alloc_sync(struct mddev *mddev)
366{
367 if (!mddev || !bioset_initialized(&mddev->sync_set))
368 return bio_alloc(GFP_NOIO, 1);
369
370 return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
371}
372
373
374
375
376
377
378
379
380
381
382
383static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
384static atomic_t md_event_count;
385void md_new_event(struct mddev *mddev)
386{
387 atomic_inc(&md_event_count);
388 wake_up(&md_event_waiters);
389}
390EXPORT_SYMBOL_GPL(md_new_event);
391
392
393
394
395
396static LIST_HEAD(all_mddevs);
397static DEFINE_SPINLOCK(all_mddevs_lock);
398
399
400
401
402
403
404
405
406#define for_each_mddev(_mddev,_tmp) \
407 \
408 for (({ spin_lock(&all_mddevs_lock); \
409 _tmp = all_mddevs.next; \
410 _mddev = NULL;}); \
411 ({ if (_tmp != &all_mddevs) \
412 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
413 spin_unlock(&all_mddevs_lock); \
414 if (_mddev) mddev_put(_mddev); \
415 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
416 _tmp != &all_mddevs;}); \
417 ({ spin_lock(&all_mddevs_lock); \
418 _tmp = _tmp->next;}) \
419 )
420
421
422
423
424
425
426
427
428static bool is_suspended(struct mddev *mddev, struct bio *bio)
429{
430 if (mddev->suspended)
431 return true;
432 if (bio_data_dir(bio) != WRITE)
433 return false;
434 if (mddev->suspend_lo >= mddev->suspend_hi)
435 return false;
436 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
437 return false;
438 if (bio_end_sector(bio) < mddev->suspend_lo)
439 return false;
440 return true;
441}
442
443void md_handle_request(struct mddev *mddev, struct bio *bio)
444{
445check_suspended:
446 rcu_read_lock();
447 if (is_suspended(mddev, bio)) {
448 DEFINE_WAIT(__wait);
449 for (;;) {
450 prepare_to_wait(&mddev->sb_wait, &__wait,
451 TASK_UNINTERRUPTIBLE);
452 if (!is_suspended(mddev, bio))
453 break;
454 rcu_read_unlock();
455 schedule();
456 rcu_read_lock();
457 }
458 finish_wait(&mddev->sb_wait, &__wait);
459 }
460 atomic_inc(&mddev->active_io);
461 rcu_read_unlock();
462
463 if (!mddev->pers->make_request(mddev, bio)) {
464 atomic_dec(&mddev->active_io);
465 wake_up(&mddev->sb_wait);
466 goto check_suspended;
467 }
468
469 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
470 wake_up(&mddev->sb_wait);
471}
472EXPORT_SYMBOL(md_handle_request);
473
474static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
475{
476 const int rw = bio_data_dir(bio);
477 const int sgrp = op_stat_group(bio_op(bio));
478 struct mddev *mddev = bio->bi_disk->private_data;
479 unsigned int sectors;
480
481 if (mddev == NULL || mddev->pers == NULL) {
482 bio_io_error(bio);
483 return BLK_QC_T_NONE;
484 }
485
486 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
487 bio_io_error(bio);
488 return BLK_QC_T_NONE;
489 }
490
491 blk_queue_split(q, &bio);
492
493 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
494 if (bio_sectors(bio) != 0)
495 bio->bi_status = BLK_STS_IOERR;
496 bio_endio(bio);
497 return BLK_QC_T_NONE;
498 }
499
500
501
502
503
504 sectors = bio_sectors(bio);
505
506 bio->bi_opf &= ~REQ_NOMERGE;
507
508 md_handle_request(mddev, bio);
509
510 part_stat_lock();
511 part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
512 part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
513 part_stat_unlock();
514
515 return BLK_QC_T_NONE;
516}
517
518
519
520
521
522
523
524void mddev_suspend(struct mddev *mddev)
525{
526 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
527 lockdep_assert_held(&mddev->reconfig_mutex);
528 if (mddev->suspended++)
529 return;
530 synchronize_rcu();
531 wake_up(&mddev->sb_wait);
532 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
533 smp_mb__after_atomic();
534 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
535 mddev->pers->quiesce(mddev, 1);
536 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
537 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
538
539 del_timer_sync(&mddev->safemode_timer);
540
541 mddev->noio_flag = memalloc_noio_save();
542}
543EXPORT_SYMBOL_GPL(mddev_suspend);
544
545void mddev_resume(struct mddev *mddev)
546{
547
548 memalloc_noio_restore(mddev->noio_flag);
549 lockdep_assert_held(&mddev->reconfig_mutex);
550 if (--mddev->suspended)
551 return;
552 wake_up(&mddev->sb_wait);
553 mddev->pers->quiesce(mddev, 0);
554
555 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
556 md_wakeup_thread(mddev->thread);
557 md_wakeup_thread(mddev->sync_thread);
558}
559EXPORT_SYMBOL_GPL(mddev_resume);
560
561int mddev_congested(struct mddev *mddev, int bits)
562{
563 struct md_personality *pers = mddev->pers;
564 int ret = 0;
565
566 rcu_read_lock();
567 if (mddev->suspended)
568 ret = 1;
569 else if (pers && pers->congested)
570 ret = pers->congested(mddev, bits);
571 rcu_read_unlock();
572 return ret;
573}
574EXPORT_SYMBOL_GPL(mddev_congested);
575static int md_congested(void *data, int bits)
576{
577 struct mddev *mddev = data;
578 return mddev_congested(mddev, bits);
579}
580
581
582
583
584
585static void md_end_flush(struct bio *bio)
586{
587 struct md_rdev *rdev = bio->bi_private;
588 struct mddev *mddev = rdev->mddev;
589
590 rdev_dec_pending(rdev, mddev);
591
592 if (atomic_dec_and_test(&mddev->flush_pending)) {
593
594 queue_work(md_wq, &mddev->flush_work);
595 }
596 bio_put(bio);
597}
598
599static void md_submit_flush_data(struct work_struct *ws);
600
601static void submit_flushes(struct work_struct *ws)
602{
603 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
604 struct md_rdev *rdev;
605
606 mddev->start_flush = ktime_get_boottime();
607 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
608 atomic_set(&mddev->flush_pending, 1);
609 rcu_read_lock();
610 rdev_for_each_rcu(rdev, mddev)
611 if (rdev->raid_disk >= 0 &&
612 !test_bit(Faulty, &rdev->flags)) {
613
614
615
616
617 struct bio *bi;
618 atomic_inc(&rdev->nr_pending);
619 atomic_inc(&rdev->nr_pending);
620 rcu_read_unlock();
621 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
622 bi->bi_end_io = md_end_flush;
623 bi->bi_private = rdev;
624 bio_set_dev(bi, rdev->bdev);
625 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
626 atomic_inc(&mddev->flush_pending);
627 submit_bio(bi);
628 rcu_read_lock();
629 rdev_dec_pending(rdev, mddev);
630 }
631 rcu_read_unlock();
632 if (atomic_dec_and_test(&mddev->flush_pending))
633 queue_work(md_wq, &mddev->flush_work);
634}
635
636static void md_submit_flush_data(struct work_struct *ws)
637{
638 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
639 struct bio *bio = mddev->flush_bio;
640
641
642
643
644
645
646
647 spin_lock_irq(&mddev->lock);
648 mddev->prev_flush_start = mddev->start_flush;
649 mddev->flush_bio = NULL;
650 spin_unlock_irq(&mddev->lock);
651 wake_up(&mddev->sb_wait);
652
653 if (bio->bi_iter.bi_size == 0) {
654
655 bio_endio(bio);
656 } else {
657 bio->bi_opf &= ~REQ_PREFLUSH;
658 md_handle_request(mddev, bio);
659 }
660}
661
662
663
664
665
666
667
668bool md_flush_request(struct mddev *mddev, struct bio *bio)
669{
670 ktime_t req_start = ktime_get_boottime();
671 spin_lock_irq(&mddev->lock);
672
673
674
675 wait_event_lock_irq(mddev->sb_wait,
676 !mddev->flush_bio ||
677 ktime_before(req_start, mddev->prev_flush_start),
678 mddev->lock);
679
680 if (ktime_after(req_start, mddev->prev_flush_start)) {
681 WARN_ON(mddev->flush_bio);
682 mddev->flush_bio = bio;
683 bio = NULL;
684 }
685 spin_unlock_irq(&mddev->lock);
686
687 if (!bio) {
688 INIT_WORK(&mddev->flush_work, submit_flushes);
689 queue_work(md_wq, &mddev->flush_work);
690 } else {
691
692 if (bio->bi_iter.bi_size == 0)
693
694 bio_endio(bio);
695 else {
696 bio->bi_opf &= ~REQ_PREFLUSH;
697 return false;
698 }
699 }
700 return true;
701}
702EXPORT_SYMBOL(md_flush_request);
703
704static inline struct mddev *mddev_get(struct mddev *mddev)
705{
706 atomic_inc(&mddev->active);
707 return mddev;
708}
709
710static void mddev_delayed_delete(struct work_struct *ws);
711
712static void mddev_put(struct mddev *mddev)
713{
714 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
715 return;
716 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
717 mddev->ctime == 0 && !mddev->hold_active) {
718
719
720 list_del_init(&mddev->all_mddevs);
721
722
723
724
725
726
727 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
728 queue_work(md_misc_wq, &mddev->del_work);
729 }
730 spin_unlock(&all_mddevs_lock);
731}
732
733static void md_safemode_timeout(struct timer_list *t);
734
735void mddev_init(struct mddev *mddev)
736{
737 kobject_init(&mddev->kobj, &md_ktype);
738 mutex_init(&mddev->open_mutex);
739 mutex_init(&mddev->reconfig_mutex);
740 mutex_init(&mddev->bitmap_info.mutex);
741 INIT_LIST_HEAD(&mddev->disks);
742 INIT_LIST_HEAD(&mddev->all_mddevs);
743 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
744 atomic_set(&mddev->active, 1);
745 atomic_set(&mddev->openers, 0);
746 atomic_set(&mddev->active_io, 0);
747 spin_lock_init(&mddev->lock);
748 atomic_set(&mddev->flush_pending, 0);
749 init_waitqueue_head(&mddev->sb_wait);
750 init_waitqueue_head(&mddev->recovery_wait);
751 mddev->reshape_position = MaxSector;
752 mddev->reshape_backwards = 0;
753 mddev->last_sync_action = "none";
754 mddev->resync_min = 0;
755 mddev->resync_max = MaxSector;
756 mddev->level = LEVEL_NONE;
757}
758EXPORT_SYMBOL_GPL(mddev_init);
759
760static struct mddev *mddev_find_locked(dev_t unit)
761{
762 struct mddev *mddev;
763
764 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
765 if (mddev->unit == unit)
766 return mddev;
767
768 return NULL;
769}
770
771
772static dev_t mddev_alloc_unit(void)
773{
774 static int next_minor = 512;
775 int start = next_minor;
776 bool is_free = 0;
777 dev_t dev = 0;
778
779 while (!is_free) {
780 dev = MKDEV(MD_MAJOR, next_minor);
781 next_minor++;
782 if (next_minor > MINORMASK)
783 next_minor = 0;
784 if (next_minor == start)
785 return 0;
786 is_free = !mddev_find_locked(dev);
787 }
788
789 return dev;
790}
791
792static struct mddev *mddev_find(dev_t unit)
793{
794 struct mddev *mddev;
795
796 if (MAJOR(unit) != MD_MAJOR)
797 unit &= ~((1 << MdpMinorShift) - 1);
798
799 spin_lock(&all_mddevs_lock);
800 mddev = mddev_find_locked(unit);
801 if (mddev)
802 mddev_get(mddev);
803 spin_unlock(&all_mddevs_lock);
804
805 return mddev;
806}
807
808static struct mddev *mddev_alloc(dev_t unit)
809{
810 struct mddev *new;
811 int error;
812
813 if (unit && MAJOR(unit) != MD_MAJOR)
814 unit &= ~((1 << MdpMinorShift) - 1);
815
816 new = kzalloc(sizeof(*new), GFP_KERNEL);
817 if (!new)
818 return ERR_PTR(-ENOMEM);
819 mddev_init(new);
820
821 spin_lock(&all_mddevs_lock);
822 if (unit) {
823 error = -EEXIST;
824 if (mddev_find_locked(unit))
825 goto out_free_new;
826 new->unit = unit;
827 if (MAJOR(unit) == MD_MAJOR)
828 new->md_minor = MINOR(unit);
829 else
830 new->md_minor = MINOR(unit) >> MdpMinorShift;
831 new->hold_active = UNTIL_IOCTL;
832 } else {
833 error = -ENODEV;
834 new->unit = mddev_alloc_unit();
835 if (!new->unit)
836 goto out_free_new;
837 new->md_minor = MINOR(new->unit);
838 new->hold_active = UNTIL_STOP;
839 }
840
841 list_add(&new->all_mddevs, &all_mddevs);
842 spin_unlock(&all_mddevs_lock);
843 return new;
844out_free_new:
845 spin_unlock(&all_mddevs_lock);
846 kfree(new);
847 return ERR_PTR(error);
848}
849
850static struct attribute_group md_redundancy_group;
851
852void mddev_unlock(struct mddev *mddev)
853{
854 if (mddev->to_remove) {
855
856
857
858
859
860
861
862
863
864
865
866
867 struct attribute_group *to_remove = mddev->to_remove;
868 mddev->to_remove = NULL;
869 mddev->sysfs_active = 1;
870 mutex_unlock(&mddev->reconfig_mutex);
871
872 if (mddev->kobj.sd) {
873 if (to_remove != &md_redundancy_group)
874 sysfs_remove_group(&mddev->kobj, to_remove);
875 if (mddev->pers == NULL ||
876 mddev->pers->sync_request == NULL) {
877 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
878 if (mddev->sysfs_action)
879 sysfs_put(mddev->sysfs_action);
880 if (mddev->sysfs_completed)
881 sysfs_put(mddev->sysfs_completed);
882 if (mddev->sysfs_degraded)
883 sysfs_put(mddev->sysfs_degraded);
884 mddev->sysfs_action = NULL;
885 mddev->sysfs_completed = NULL;
886 mddev->sysfs_degraded = NULL;
887 }
888 }
889 mddev->sysfs_active = 0;
890 } else
891 mutex_unlock(&mddev->reconfig_mutex);
892
893
894
895
896 spin_lock(&pers_lock);
897 md_wakeup_thread(mddev->thread);
898 wake_up(&mddev->sb_wait);
899 spin_unlock(&pers_lock);
900}
901EXPORT_SYMBOL_GPL(mddev_unlock);
902
903struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
904{
905 struct md_rdev *rdev;
906
907 rdev_for_each_rcu(rdev, mddev)
908 if (rdev->desc_nr == nr)
909 return rdev;
910
911 return NULL;
912}
913EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
914
915static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
916{
917 struct md_rdev *rdev;
918
919 rdev_for_each(rdev, mddev)
920 if (rdev->bdev->bd_dev == dev)
921 return rdev;
922
923 return NULL;
924}
925
926struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
927{
928 struct md_rdev *rdev;
929
930 rdev_for_each_rcu(rdev, mddev)
931 if (rdev->bdev->bd_dev == dev)
932 return rdev;
933
934 return NULL;
935}
936EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
937
938static struct md_personality *find_pers(int level, char *clevel)
939{
940 struct md_personality *pers;
941 list_for_each_entry(pers, &pers_list, list) {
942 if (level != LEVEL_NONE && pers->level == level)
943 return pers;
944 if (strcmp(pers->name, clevel)==0)
945 return pers;
946 }
947 return NULL;
948}
949
950
951static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
952{
953 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
954 return MD_NEW_SIZE_SECTORS(num_sectors);
955}
956
957static int alloc_disk_sb(struct md_rdev *rdev)
958{
959 rdev->sb_page = alloc_page(GFP_KERNEL);
960 if (!rdev->sb_page)
961 return -ENOMEM;
962 return 0;
963}
964
965void md_rdev_clear(struct md_rdev *rdev)
966{
967 if (rdev->sb_page) {
968 put_page(rdev->sb_page);
969 rdev->sb_loaded = 0;
970 rdev->sb_page = NULL;
971 rdev->sb_start = 0;
972 rdev->sectors = 0;
973 }
974 if (rdev->bb_page) {
975 put_page(rdev->bb_page);
976 rdev->bb_page = NULL;
977 }
978 badblocks_exit(&rdev->badblocks);
979}
980EXPORT_SYMBOL_GPL(md_rdev_clear);
981
982static void super_written(struct bio *bio)
983{
984 struct md_rdev *rdev = bio->bi_private;
985 struct mddev *mddev = rdev->mddev;
986
987 if (bio->bi_status) {
988 pr_err("md: %s gets error=%d\n", __func__,
989 blk_status_to_errno(bio->bi_status));
990 md_error(mddev, rdev);
991 if (!test_bit(Faulty, &rdev->flags)
992 && (bio->bi_opf & MD_FAILFAST)) {
993 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
994 set_bit(LastDev, &rdev->flags);
995 }
996 } else
997 clear_bit(LastDev, &rdev->flags);
998
999 if (atomic_dec_and_test(&mddev->pending_writes))
1000 wake_up(&mddev->sb_wait);
1001 rdev_dec_pending(rdev, mddev);
1002 bio_put(bio);
1003}
1004
1005void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
1006 sector_t sector, int size, struct page *page)
1007{
1008
1009
1010
1011
1012
1013
1014 struct bio *bio;
1015 int ff = 0;
1016
1017 if (!page)
1018 return;
1019
1020 if (test_bit(Faulty, &rdev->flags))
1021 return;
1022
1023 bio = md_bio_alloc_sync(mddev);
1024
1025 atomic_inc(&rdev->nr_pending);
1026
1027 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
1028 bio->bi_iter.bi_sector = sector;
1029 bio_add_page(bio, page, size, 0);
1030 bio->bi_private = rdev;
1031 bio->bi_end_io = super_written;
1032
1033 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
1034 test_bit(FailFast, &rdev->flags) &&
1035 !test_bit(LastDev, &rdev->flags))
1036 ff = MD_FAILFAST;
1037 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
1038
1039 atomic_inc(&mddev->pending_writes);
1040 submit_bio(bio);
1041}
1042
1043int md_super_wait(struct mddev *mddev)
1044{
1045
1046 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1047 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
1048 return -EAGAIN;
1049 return 0;
1050}
1051
1052int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
1053 struct page *page, int op, int op_flags, bool metadata_op)
1054{
1055 struct bio *bio = md_bio_alloc_sync(rdev->mddev);
1056 int ret;
1057
1058 if (metadata_op && rdev->meta_bdev)
1059 bio_set_dev(bio, rdev->meta_bdev);
1060 else
1061 bio_set_dev(bio, rdev->bdev);
1062 bio_set_op_attrs(bio, op, op_flags);
1063 if (metadata_op)
1064 bio->bi_iter.bi_sector = sector + rdev->sb_start;
1065 else if (rdev->mddev->reshape_position != MaxSector &&
1066 (rdev->mddev->reshape_backwards ==
1067 (sector >= rdev->mddev->reshape_position)))
1068 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
1069 else
1070 bio->bi_iter.bi_sector = sector + rdev->data_offset;
1071 bio_add_page(bio, page, size, 0);
1072
1073 submit_bio_wait(bio);
1074
1075 ret = !bio->bi_status;
1076 bio_put(bio);
1077 return ret;
1078}
1079EXPORT_SYMBOL_GPL(sync_page_io);
1080
1081static int read_disk_sb(struct md_rdev *rdev, int size)
1082{
1083 char b[BDEVNAME_SIZE];
1084
1085 if (rdev->sb_loaded)
1086 return 0;
1087
1088 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1089 goto fail;
1090 rdev->sb_loaded = 1;
1091 return 0;
1092
1093fail:
1094 pr_err("md: disabled device %s, could not read superblock.\n",
1095 bdevname(rdev->bdev,b));
1096 return -EINVAL;
1097}
1098
1099static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1100{
1101 return sb1->set_uuid0 == sb2->set_uuid0 &&
1102 sb1->set_uuid1 == sb2->set_uuid1 &&
1103 sb1->set_uuid2 == sb2->set_uuid2 &&
1104 sb1->set_uuid3 == sb2->set_uuid3;
1105}
1106
1107static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1108{
1109 int ret;
1110 mdp_super_t *tmp1, *tmp2;
1111
1112 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1113 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1114
1115 if (!tmp1 || !tmp2) {
1116 ret = 0;
1117 goto abort;
1118 }
1119
1120 *tmp1 = *sb1;
1121 *tmp2 = *sb2;
1122
1123
1124
1125
1126 tmp1->nr_disks = 0;
1127 tmp2->nr_disks = 0;
1128
1129 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1130abort:
1131 kfree(tmp1);
1132 kfree(tmp2);
1133 return ret;
1134}
1135
1136static u32 md_csum_fold(u32 csum)
1137{
1138 csum = (csum & 0xffff) + (csum >> 16);
1139 return (csum & 0xffff) + (csum >> 16);
1140}
1141
1142static unsigned int calc_sb_csum(mdp_super_t *sb)
1143{
1144 u64 newcsum = 0;
1145 u32 *sb32 = (u32*)sb;
1146 int i;
1147 unsigned int disk_csum, csum;
1148
1149 disk_csum = sb->sb_csum;
1150 sb->sb_csum = 0;
1151
1152 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1153 newcsum += sb32[i];
1154 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1155
1156#ifdef CONFIG_ALPHA
1157
1158
1159
1160
1161
1162
1163
1164
1165 sb->sb_csum = md_csum_fold(disk_csum);
1166#else
1167 sb->sb_csum = disk_csum;
1168#endif
1169 return csum;
1170}
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202struct super_type {
1203 char *name;
1204 struct module *owner;
1205 int (*load_super)(struct md_rdev *rdev,
1206 struct md_rdev *refdev,
1207 int minor_version);
1208 int (*validate_super)(struct mddev *mddev,
1209 struct md_rdev *rdev);
1210 void (*sync_super)(struct mddev *mddev,
1211 struct md_rdev *rdev);
1212 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1213 sector_t num_sectors);
1214 int (*allow_new_offset)(struct md_rdev *rdev,
1215 unsigned long long new_offset);
1216};
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226int md_check_no_bitmap(struct mddev *mddev)
1227{
1228 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1229 return 0;
1230 pr_warn("%s: bitmaps are not supported for %s\n",
1231 mdname(mddev), mddev->pers->name);
1232 return 1;
1233}
1234EXPORT_SYMBOL(md_check_no_bitmap);
1235
1236
1237
1238
1239static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1240{
1241 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1242 mdp_super_t *sb;
1243 int ret;
1244 bool spare_disk = true;
1245
1246
1247
1248
1249
1250
1251
1252 rdev->sb_start = calc_dev_sboffset(rdev);
1253
1254 ret = read_disk_sb(rdev, MD_SB_BYTES);
1255 if (ret)
1256 return ret;
1257
1258 ret = -EINVAL;
1259
1260 bdevname(rdev->bdev, b);
1261 sb = page_address(rdev->sb_page);
1262
1263 if (sb->md_magic != MD_SB_MAGIC) {
1264 pr_warn("md: invalid raid superblock magic on %s\n", b);
1265 goto abort;
1266 }
1267
1268 if (sb->major_version != 0 ||
1269 sb->minor_version < 90 ||
1270 sb->minor_version > 91) {
1271 pr_warn("Bad version number %d.%d on %s\n",
1272 sb->major_version, sb->minor_version, b);
1273 goto abort;
1274 }
1275
1276 if (sb->raid_disks <= 0)
1277 goto abort;
1278
1279 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1280 pr_warn("md: invalid superblock checksum on %s\n", b);
1281 goto abort;
1282 }
1283
1284 rdev->preferred_minor = sb->md_minor;
1285 rdev->data_offset = 0;
1286 rdev->new_data_offset = 0;
1287 rdev->sb_size = MD_SB_BYTES;
1288 rdev->badblocks.shift = -1;
1289
1290 if (sb->level == LEVEL_MULTIPATH)
1291 rdev->desc_nr = -1;
1292 else
1293 rdev->desc_nr = sb->this_disk.number;
1294
1295
1296 if (sb->level == LEVEL_MULTIPATH ||
1297 (rdev->desc_nr >= 0 &&
1298 rdev->desc_nr < MD_SB_DISKS &&
1299 sb->disks[rdev->desc_nr].state &
1300 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1301 spare_disk = false;
1302
1303 if (!refdev) {
1304 if (!spare_disk)
1305 ret = 1;
1306 else
1307 ret = 0;
1308 } else {
1309 __u64 ev1, ev2;
1310 mdp_super_t *refsb = page_address(refdev->sb_page);
1311 if (!md_uuid_equal(refsb, sb)) {
1312 pr_warn("md: %s has different UUID to %s\n",
1313 b, bdevname(refdev->bdev,b2));
1314 goto abort;
1315 }
1316 if (!md_sb_equal(refsb, sb)) {
1317 pr_warn("md: %s has same UUID but different superblock to %s\n",
1318 b, bdevname(refdev->bdev, b2));
1319 goto abort;
1320 }
1321 ev1 = md_event(sb);
1322 ev2 = md_event(refsb);
1323
1324 if (!spare_disk && ev1 > ev2)
1325 ret = 1;
1326 else
1327 ret = 0;
1328 }
1329 rdev->sectors = rdev->sb_start;
1330
1331
1332
1333
1334 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1335 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1336
1337 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1338
1339 ret = -EINVAL;
1340
1341 abort:
1342 return ret;
1343}
1344
1345
1346
1347
1348static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1349{
1350 mdp_disk_t *desc;
1351 mdp_super_t *sb = page_address(rdev->sb_page);
1352 __u64 ev1 = md_event(sb);
1353
1354 rdev->raid_disk = -1;
1355 clear_bit(Faulty, &rdev->flags);
1356 clear_bit(In_sync, &rdev->flags);
1357 clear_bit(Bitmap_sync, &rdev->flags);
1358 clear_bit(WriteMostly, &rdev->flags);
1359
1360 if (mddev->raid_disks == 0) {
1361 mddev->major_version = 0;
1362 mddev->minor_version = sb->minor_version;
1363 mddev->patch_version = sb->patch_version;
1364 mddev->external = 0;
1365 mddev->chunk_sectors = sb->chunk_size >> 9;
1366 mddev->ctime = sb->ctime;
1367 mddev->utime = sb->utime;
1368 mddev->level = sb->level;
1369 mddev->clevel[0] = 0;
1370 mddev->layout = sb->layout;
1371 mddev->raid_disks = sb->raid_disks;
1372 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1373 mddev->events = ev1;
1374 mddev->bitmap_info.offset = 0;
1375 mddev->bitmap_info.space = 0;
1376
1377 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1378 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1379 mddev->reshape_backwards = 0;
1380
1381 if (mddev->minor_version >= 91) {
1382 mddev->reshape_position = sb->reshape_position;
1383 mddev->delta_disks = sb->delta_disks;
1384 mddev->new_level = sb->new_level;
1385 mddev->new_layout = sb->new_layout;
1386 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1387 if (mddev->delta_disks < 0)
1388 mddev->reshape_backwards = 1;
1389 } else {
1390 mddev->reshape_position = MaxSector;
1391 mddev->delta_disks = 0;
1392 mddev->new_level = mddev->level;
1393 mddev->new_layout = mddev->layout;
1394 mddev->new_chunk_sectors = mddev->chunk_sectors;
1395 }
1396 if (mddev->level == 0)
1397 mddev->layout = -1;
1398
1399 if (sb->state & (1<<MD_SB_CLEAN))
1400 mddev->recovery_cp = MaxSector;
1401 else {
1402 if (sb->events_hi == sb->cp_events_hi &&
1403 sb->events_lo == sb->cp_events_lo) {
1404 mddev->recovery_cp = sb->recovery_cp;
1405 } else
1406 mddev->recovery_cp = 0;
1407 }
1408
1409 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1410 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1411 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1412 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1413
1414 mddev->max_disks = MD_SB_DISKS;
1415
1416 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1417 mddev->bitmap_info.file == NULL) {
1418 mddev->bitmap_info.offset =
1419 mddev->bitmap_info.default_offset;
1420 mddev->bitmap_info.space =
1421 mddev->bitmap_info.default_space;
1422 }
1423
1424 } else if (mddev->pers == NULL) {
1425
1426
1427 ++ev1;
1428 if (sb->disks[rdev->desc_nr].state & (
1429 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1430 if (ev1 < mddev->events)
1431 return -EINVAL;
1432 } else if (mddev->bitmap) {
1433
1434
1435
1436 if (ev1 < mddev->bitmap->events_cleared)
1437 return 0;
1438 if (ev1 < mddev->events)
1439 set_bit(Bitmap_sync, &rdev->flags);
1440 } else {
1441 if (ev1 < mddev->events)
1442
1443 return 0;
1444 }
1445
1446 if (mddev->level != LEVEL_MULTIPATH) {
1447 desc = sb->disks + rdev->desc_nr;
1448
1449 if (desc->state & (1<<MD_DISK_FAULTY))
1450 set_bit(Faulty, &rdev->flags);
1451 else if (desc->state & (1<<MD_DISK_SYNC)
1452) {
1453 set_bit(In_sync, &rdev->flags);
1454 rdev->raid_disk = desc->raid_disk;
1455 rdev->saved_raid_disk = desc->raid_disk;
1456 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1457
1458
1459
1460 if (mddev->minor_version >= 91) {
1461 rdev->recovery_offset = 0;
1462 rdev->raid_disk = desc->raid_disk;
1463 }
1464 }
1465 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1466 set_bit(WriteMostly, &rdev->flags);
1467 if (desc->state & (1<<MD_DISK_FAILFAST))
1468 set_bit(FailFast, &rdev->flags);
1469 } else
1470 set_bit(In_sync, &rdev->flags);
1471 return 0;
1472}
1473
1474
1475
1476
1477static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1478{
1479 mdp_super_t *sb;
1480 struct md_rdev *rdev2;
1481 int next_spare = mddev->raid_disks;
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493 int i;
1494 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1495
1496 rdev->sb_size = MD_SB_BYTES;
1497
1498 sb = page_address(rdev->sb_page);
1499
1500 memset(sb, 0, sizeof(*sb));
1501
1502 sb->md_magic = MD_SB_MAGIC;
1503 sb->major_version = mddev->major_version;
1504 sb->patch_version = mddev->patch_version;
1505 sb->gvalid_words = 0;
1506 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1507 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1508 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1509 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1510
1511 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1512 sb->level = mddev->level;
1513 sb->size = mddev->dev_sectors / 2;
1514 sb->raid_disks = mddev->raid_disks;
1515 sb->md_minor = mddev->md_minor;
1516 sb->not_persistent = 0;
1517 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1518 sb->state = 0;
1519 sb->events_hi = (mddev->events>>32);
1520 sb->events_lo = (u32)mddev->events;
1521
1522 if (mddev->reshape_position == MaxSector)
1523 sb->minor_version = 90;
1524 else {
1525 sb->minor_version = 91;
1526 sb->reshape_position = mddev->reshape_position;
1527 sb->new_level = mddev->new_level;
1528 sb->delta_disks = mddev->delta_disks;
1529 sb->new_layout = mddev->new_layout;
1530 sb->new_chunk = mddev->new_chunk_sectors << 9;
1531 }
1532 mddev->minor_version = sb->minor_version;
1533 if (mddev->in_sync)
1534 {
1535 sb->recovery_cp = mddev->recovery_cp;
1536 sb->cp_events_hi = (mddev->events>>32);
1537 sb->cp_events_lo = (u32)mddev->events;
1538 if (mddev->recovery_cp == MaxSector)
1539 sb->state = (1<< MD_SB_CLEAN);
1540 } else
1541 sb->recovery_cp = 0;
1542
1543 sb->layout = mddev->layout;
1544 sb->chunk_size = mddev->chunk_sectors << 9;
1545
1546 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1547 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1548
1549 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1550 rdev_for_each(rdev2, mddev) {
1551 mdp_disk_t *d;
1552 int desc_nr;
1553 int is_active = test_bit(In_sync, &rdev2->flags);
1554
1555 if (rdev2->raid_disk >= 0 &&
1556 sb->minor_version >= 91)
1557
1558
1559
1560
1561 is_active = 1;
1562 if (rdev2->raid_disk < 0 ||
1563 test_bit(Faulty, &rdev2->flags))
1564 is_active = 0;
1565 if (is_active)
1566 desc_nr = rdev2->raid_disk;
1567 else
1568 desc_nr = next_spare++;
1569 rdev2->desc_nr = desc_nr;
1570 d = &sb->disks[rdev2->desc_nr];
1571 nr_disks++;
1572 d->number = rdev2->desc_nr;
1573 d->major = MAJOR(rdev2->bdev->bd_dev);
1574 d->minor = MINOR(rdev2->bdev->bd_dev);
1575 if (is_active)
1576 d->raid_disk = rdev2->raid_disk;
1577 else
1578 d->raid_disk = rdev2->desc_nr;
1579 if (test_bit(Faulty, &rdev2->flags))
1580 d->state = (1<<MD_DISK_FAULTY);
1581 else if (is_active) {
1582 d->state = (1<<MD_DISK_ACTIVE);
1583 if (test_bit(In_sync, &rdev2->flags))
1584 d->state |= (1<<MD_DISK_SYNC);
1585 active++;
1586 working++;
1587 } else {
1588 d->state = 0;
1589 spare++;
1590 working++;
1591 }
1592 if (test_bit(WriteMostly, &rdev2->flags))
1593 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1594 if (test_bit(FailFast, &rdev2->flags))
1595 d->state |= (1<<MD_DISK_FAILFAST);
1596 }
1597
1598 for (i=0 ; i < mddev->raid_disks ; i++) {
1599 mdp_disk_t *d = &sb->disks[i];
1600 if (d->state == 0 && d->number == 0) {
1601 d->number = i;
1602 d->raid_disk = i;
1603 d->state = (1<<MD_DISK_REMOVED);
1604 d->state |= (1<<MD_DISK_FAULTY);
1605 failed++;
1606 }
1607 }
1608 sb->nr_disks = nr_disks;
1609 sb->active_disks = active;
1610 sb->working_disks = working;
1611 sb->failed_disks = failed;
1612 sb->spare_disks = spare;
1613
1614 sb->this_disk = sb->disks[rdev->desc_nr];
1615 sb->sb_csum = calc_sb_csum(sb);
1616}
1617
1618
1619
1620
1621static unsigned long long
1622super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1623{
1624 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1625 return 0;
1626 if (rdev->mddev->bitmap_info.offset)
1627 return 0;
1628 rdev->sb_start = calc_dev_sboffset(rdev);
1629 if (!num_sectors || num_sectors > rdev->sb_start)
1630 num_sectors = rdev->sb_start;
1631
1632
1633
1634 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1635 num_sectors = (sector_t)(2ULL << 32) - 2;
1636 do {
1637 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1638 rdev->sb_page);
1639 } while (md_super_wait(rdev->mddev) < 0);
1640 return num_sectors;
1641}
1642
1643static int
1644super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1645{
1646
1647 return new_offset == 0;
1648}
1649
1650
1651
1652
1653
1654static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1655{
1656 __le32 disk_csum;
1657 u32 csum;
1658 unsigned long long newcsum;
1659 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1660 __le32 *isuper = (__le32*)sb;
1661
1662 disk_csum = sb->sb_csum;
1663 sb->sb_csum = 0;
1664 newcsum = 0;
1665 for (; size >= 4; size -= 4)
1666 newcsum += le32_to_cpu(*isuper++);
1667
1668 if (size == 2)
1669 newcsum += le16_to_cpu(*(__le16*) isuper);
1670
1671 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1672 sb->sb_csum = disk_csum;
1673 return cpu_to_le32(csum);
1674}
1675
1676static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1677{
1678 struct mdp_superblock_1 *sb;
1679 int ret;
1680 sector_t sb_start;
1681 sector_t sectors;
1682 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1683 int bmask;
1684 bool spare_disk = true;
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694 switch(minor_version) {
1695 case 0:
1696 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1697 sb_start -= 8*2;
1698 sb_start &= ~(sector_t)(4*2-1);
1699 break;
1700 case 1:
1701 sb_start = 0;
1702 break;
1703 case 2:
1704 sb_start = 8;
1705 break;
1706 default:
1707 return -EINVAL;
1708 }
1709 rdev->sb_start = sb_start;
1710
1711
1712
1713
1714 ret = read_disk_sb(rdev, 4096);
1715 if (ret) return ret;
1716
1717 sb = page_address(rdev->sb_page);
1718
1719 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1720 sb->major_version != cpu_to_le32(1) ||
1721 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1722 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1723 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1724 return -EINVAL;
1725
1726 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1727 pr_warn("md: invalid superblock checksum on %s\n",
1728 bdevname(rdev->bdev,b));
1729 return -EINVAL;
1730 }
1731 if (le64_to_cpu(sb->data_size) < 10) {
1732 pr_warn("md: data_size too small on %s\n",
1733 bdevname(rdev->bdev,b));
1734 return -EINVAL;
1735 }
1736 if (sb->pad0 ||
1737 sb->pad3[0] ||
1738 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1739
1740 return -EINVAL;
1741
1742 rdev->preferred_minor = 0xffff;
1743 rdev->data_offset = le64_to_cpu(sb->data_offset);
1744 rdev->new_data_offset = rdev->data_offset;
1745 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1746 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1747 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1748 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1749
1750 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1751 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1752 if (rdev->sb_size & bmask)
1753 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1754
1755 if (minor_version
1756 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1757 return -EINVAL;
1758 if (minor_version
1759 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1760 return -EINVAL;
1761
1762 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1763 rdev->desc_nr = -1;
1764 else
1765 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1766
1767 if (!rdev->bb_page) {
1768 rdev->bb_page = alloc_page(GFP_KERNEL);
1769 if (!rdev->bb_page)
1770 return -ENOMEM;
1771 }
1772 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1773 rdev->badblocks.count == 0) {
1774
1775
1776
1777 s32 offset;
1778 sector_t bb_sector;
1779 __le64 *bbp;
1780 int i;
1781 int sectors = le16_to_cpu(sb->bblog_size);
1782 if (sectors > (PAGE_SIZE / 512))
1783 return -EINVAL;
1784 offset = le32_to_cpu(sb->bblog_offset);
1785 if (offset == 0)
1786 return -EINVAL;
1787 bb_sector = (long long)offset;
1788 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1789 rdev->bb_page, REQ_OP_READ, 0, true))
1790 return -EIO;
1791 bbp = (__le64 *)page_address(rdev->bb_page);
1792 rdev->badblocks.shift = sb->bblog_shift;
1793 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1794 u64 bb = le64_to_cpu(*bbp);
1795 int count = bb & (0x3ff);
1796 u64 sector = bb >> 10;
1797 sector <<= sb->bblog_shift;
1798 count <<= sb->bblog_shift;
1799 if (bb + 1 == 0)
1800 break;
1801 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1802 return -EINVAL;
1803 }
1804 } else if (sb->bblog_offset != 0)
1805 rdev->badblocks.shift = 0;
1806
1807 if ((le32_to_cpu(sb->feature_map) &
1808 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1809 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1810 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1811 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1812 }
1813
1814 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1815 sb->level != 0)
1816 return -EINVAL;
1817
1818
1819 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1820 (rdev->desc_nr >= 0 &&
1821 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1822 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1823 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1824 spare_disk = false;
1825
1826 if (!refdev) {
1827 if (!spare_disk)
1828 ret = 1;
1829 else
1830 ret = 0;
1831 } else {
1832 __u64 ev1, ev2;
1833 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1834
1835 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1836 sb->level != refsb->level ||
1837 sb->layout != refsb->layout ||
1838 sb->chunksize != refsb->chunksize) {
1839 pr_warn("md: %s has strangely different superblock to %s\n",
1840 bdevname(rdev->bdev,b),
1841 bdevname(refdev->bdev,b2));
1842 return -EINVAL;
1843 }
1844 ev1 = le64_to_cpu(sb->events);
1845 ev2 = le64_to_cpu(refsb->events);
1846
1847 if (!spare_disk && ev1 > ev2)
1848 ret = 1;
1849 else
1850 ret = 0;
1851 }
1852 if (minor_version) {
1853 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1854 sectors -= rdev->data_offset;
1855 } else
1856 sectors = rdev->sb_start;
1857 if (sectors < le64_to_cpu(sb->data_size))
1858 return -EINVAL;
1859 rdev->sectors = le64_to_cpu(sb->data_size);
1860 return ret;
1861}
1862
1863static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1864{
1865 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1866 __u64 ev1 = le64_to_cpu(sb->events);
1867
1868 rdev->raid_disk = -1;
1869 clear_bit(Faulty, &rdev->flags);
1870 clear_bit(In_sync, &rdev->flags);
1871 clear_bit(Bitmap_sync, &rdev->flags);
1872 clear_bit(WriteMostly, &rdev->flags);
1873
1874 if (mddev->raid_disks == 0) {
1875 mddev->major_version = 1;
1876 mddev->patch_version = 0;
1877 mddev->external = 0;
1878 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1879 mddev->ctime = le64_to_cpu(sb->ctime);
1880 mddev->utime = le64_to_cpu(sb->utime);
1881 mddev->level = le32_to_cpu(sb->level);
1882 mddev->clevel[0] = 0;
1883 mddev->layout = le32_to_cpu(sb->layout);
1884 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1885 mddev->dev_sectors = le64_to_cpu(sb->size);
1886 mddev->events = ev1;
1887 mddev->bitmap_info.offset = 0;
1888 mddev->bitmap_info.space = 0;
1889
1890
1891
1892 mddev->bitmap_info.default_offset = 1024 >> 9;
1893 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1894 mddev->reshape_backwards = 0;
1895
1896 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1897 memcpy(mddev->uuid, sb->set_uuid, 16);
1898
1899 mddev->max_disks = (4096-256)/2;
1900
1901 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1902 mddev->bitmap_info.file == NULL) {
1903 mddev->bitmap_info.offset =
1904 (__s32)le32_to_cpu(sb->bitmap_offset);
1905
1906
1907
1908
1909
1910 if (mddev->minor_version > 0)
1911 mddev->bitmap_info.space = 0;
1912 else if (mddev->bitmap_info.offset > 0)
1913 mddev->bitmap_info.space =
1914 8 - mddev->bitmap_info.offset;
1915 else
1916 mddev->bitmap_info.space =
1917 -mddev->bitmap_info.offset;
1918 }
1919
1920 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1921 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1922 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1923 mddev->new_level = le32_to_cpu(sb->new_level);
1924 mddev->new_layout = le32_to_cpu(sb->new_layout);
1925 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1926 if (mddev->delta_disks < 0 ||
1927 (mddev->delta_disks == 0 &&
1928 (le32_to_cpu(sb->feature_map)
1929 & MD_FEATURE_RESHAPE_BACKWARDS)))
1930 mddev->reshape_backwards = 1;
1931 } else {
1932 mddev->reshape_position = MaxSector;
1933 mddev->delta_disks = 0;
1934 mddev->new_level = mddev->level;
1935 mddev->new_layout = mddev->layout;
1936 mddev->new_chunk_sectors = mddev->chunk_sectors;
1937 }
1938
1939 if (mddev->level == 0 &&
1940 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1941 mddev->layout = -1;
1942
1943 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1944 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1945
1946 if (le32_to_cpu(sb->feature_map) &
1947 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1948 if (le32_to_cpu(sb->feature_map) &
1949 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1950 return -EINVAL;
1951 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1952 (le32_to_cpu(sb->feature_map) &
1953 MD_FEATURE_MULTIPLE_PPLS))
1954 return -EINVAL;
1955 set_bit(MD_HAS_PPL, &mddev->flags);
1956 }
1957 } else if (mddev->pers == NULL) {
1958
1959
1960 ++ev1;
1961 if (rdev->desc_nr >= 0 &&
1962 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1963 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1964 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1965 if (ev1 < mddev->events)
1966 return -EINVAL;
1967 } else if (mddev->bitmap) {
1968
1969
1970
1971 if (ev1 < mddev->bitmap->events_cleared)
1972 return 0;
1973 if (ev1 < mddev->events)
1974 set_bit(Bitmap_sync, &rdev->flags);
1975 } else {
1976 if (ev1 < mddev->events)
1977
1978 return 0;
1979 }
1980 if (mddev->level != LEVEL_MULTIPATH) {
1981 int role;
1982 if (rdev->desc_nr < 0 ||
1983 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1984 role = MD_DISK_ROLE_SPARE;
1985 rdev->desc_nr = -1;
1986 } else
1987 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1988 switch(role) {
1989 case MD_DISK_ROLE_SPARE:
1990 break;
1991 case MD_DISK_ROLE_FAULTY:
1992 set_bit(Faulty, &rdev->flags);
1993 break;
1994 case MD_DISK_ROLE_JOURNAL:
1995 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1996
1997 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1998 return -EINVAL;
1999 }
2000 set_bit(Journal, &rdev->flags);
2001 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
2002 rdev->raid_disk = 0;
2003 break;
2004 default:
2005 rdev->saved_raid_disk = role;
2006 if ((le32_to_cpu(sb->feature_map) &
2007 MD_FEATURE_RECOVERY_OFFSET)) {
2008 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
2009 if (!(le32_to_cpu(sb->feature_map) &
2010 MD_FEATURE_RECOVERY_BITMAP))
2011 rdev->saved_raid_disk = -1;
2012 } else {
2013
2014
2015
2016
2017 if (!test_bit(MD_RECOVERY_FROZEN,
2018 &mddev->recovery))
2019 set_bit(In_sync, &rdev->flags);
2020 }
2021 rdev->raid_disk = role;
2022 break;
2023 }
2024 if (sb->devflags & WriteMostly1)
2025 set_bit(WriteMostly, &rdev->flags);
2026 if (sb->devflags & FailFast1)
2027 set_bit(FailFast, &rdev->flags);
2028 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
2029 set_bit(Replacement, &rdev->flags);
2030 } else
2031 set_bit(In_sync, &rdev->flags);
2032
2033 return 0;
2034}
2035
2036static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
2037{
2038 struct mdp_superblock_1 *sb;
2039 struct md_rdev *rdev2;
2040 int max_dev, i;
2041
2042
2043 sb = page_address(rdev->sb_page);
2044
2045 sb->feature_map = 0;
2046 sb->pad0 = 0;
2047 sb->recovery_offset = cpu_to_le64(0);
2048 memset(sb->pad3, 0, sizeof(sb->pad3));
2049
2050 sb->utime = cpu_to_le64((__u64)mddev->utime);
2051 sb->events = cpu_to_le64(mddev->events);
2052 if (mddev->in_sync)
2053 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
2054 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
2055 sb->resync_offset = cpu_to_le64(MaxSector);
2056 else
2057 sb->resync_offset = cpu_to_le64(0);
2058
2059 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
2060
2061 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2062 sb->size = cpu_to_le64(mddev->dev_sectors);
2063 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2064 sb->level = cpu_to_le32(mddev->level);
2065 sb->layout = cpu_to_le32(mddev->layout);
2066 if (test_bit(FailFast, &rdev->flags))
2067 sb->devflags |= FailFast1;
2068 else
2069 sb->devflags &= ~FailFast1;
2070
2071 if (test_bit(WriteMostly, &rdev->flags))
2072 sb->devflags |= WriteMostly1;
2073 else
2074 sb->devflags &= ~WriteMostly1;
2075 sb->data_offset = cpu_to_le64(rdev->data_offset);
2076 sb->data_size = cpu_to_le64(rdev->sectors);
2077
2078 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2079 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2080 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2081 }
2082
2083 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2084 !test_bit(In_sync, &rdev->flags)) {
2085 sb->feature_map |=
2086 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2087 sb->recovery_offset =
2088 cpu_to_le64(rdev->recovery_offset);
2089 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2090 sb->feature_map |=
2091 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2092 }
2093
2094 if (test_bit(Journal, &rdev->flags))
2095 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2096 if (test_bit(Replacement, &rdev->flags))
2097 sb->feature_map |=
2098 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2099
2100 if (mddev->reshape_position != MaxSector) {
2101 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2102 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2103 sb->new_layout = cpu_to_le32(mddev->new_layout);
2104 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2105 sb->new_level = cpu_to_le32(mddev->new_level);
2106 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2107 if (mddev->delta_disks == 0 &&
2108 mddev->reshape_backwards)
2109 sb->feature_map
2110 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2111 if (rdev->new_data_offset != rdev->data_offset) {
2112 sb->feature_map
2113 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2114 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2115 - rdev->data_offset));
2116 }
2117 }
2118
2119 if (mddev_is_clustered(mddev))
2120 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2121
2122 if (rdev->badblocks.count == 0)
2123 ;
2124 else if (sb->bblog_offset == 0)
2125
2126 md_error(mddev, rdev);
2127 else {
2128 struct badblocks *bb = &rdev->badblocks;
2129 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2130 u64 *p = bb->page;
2131 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2132 if (bb->changed) {
2133 unsigned seq;
2134
2135retry:
2136 seq = read_seqbegin(&bb->lock);
2137
2138 memset(bbp, 0xff, PAGE_SIZE);
2139
2140 for (i = 0 ; i < bb->count ; i++) {
2141 u64 internal_bb = p[i];
2142 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2143 | BB_LEN(internal_bb));
2144 bbp[i] = cpu_to_le64(store_bb);
2145 }
2146 bb->changed = 0;
2147 if (read_seqretry(&bb->lock, seq))
2148 goto retry;
2149
2150 bb->sector = (rdev->sb_start +
2151 (int)le32_to_cpu(sb->bblog_offset));
2152 bb->size = le16_to_cpu(sb->bblog_size);
2153 }
2154 }
2155
2156 max_dev = 0;
2157 rdev_for_each(rdev2, mddev)
2158 if (rdev2->desc_nr+1 > max_dev)
2159 max_dev = rdev2->desc_nr+1;
2160
2161 if (max_dev > le32_to_cpu(sb->max_dev)) {
2162 int bmask;
2163 sb->max_dev = cpu_to_le32(max_dev);
2164 rdev->sb_size = max_dev * 2 + 256;
2165 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2166 if (rdev->sb_size & bmask)
2167 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2168 } else
2169 max_dev = le32_to_cpu(sb->max_dev);
2170
2171 for (i=0; i<max_dev;i++)
2172 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2173
2174 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2175 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2176
2177 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2178 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2179 sb->feature_map |=
2180 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2181 else
2182 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2183 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2184 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2185 }
2186
2187 rdev_for_each(rdev2, mddev) {
2188 i = rdev2->desc_nr;
2189 if (test_bit(Faulty, &rdev2->flags))
2190 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2191 else if (test_bit(In_sync, &rdev2->flags))
2192 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2193 else if (test_bit(Journal, &rdev2->flags))
2194 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2195 else if (rdev2->raid_disk >= 0)
2196 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2197 else
2198 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2199 }
2200
2201 sb->sb_csum = calc_sb_1_csum(sb);
2202}
2203
2204static sector_t super_1_choose_bm_space(sector_t dev_size)
2205{
2206 sector_t bm_space;
2207
2208
2209
2210
2211 if (dev_size < 64*2)
2212 bm_space = 0;
2213 else if (dev_size - 64*2 >= 200*1024*1024*2)
2214 bm_space = 128*2;
2215 else if (dev_size - 4*2 > 8*1024*1024*2)
2216 bm_space = 64*2;
2217 else
2218 bm_space = 4*2;
2219 return bm_space;
2220}
2221
2222static unsigned long long
2223super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2224{
2225 struct mdp_superblock_1 *sb;
2226 sector_t max_sectors;
2227 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2228 return 0;
2229 if (rdev->data_offset != rdev->new_data_offset)
2230 return 0;
2231 if (rdev->sb_start < rdev->data_offset) {
2232
2233 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2234 max_sectors -= rdev->data_offset;
2235 if (!num_sectors || num_sectors > max_sectors)
2236 num_sectors = max_sectors;
2237 } else if (rdev->mddev->bitmap_info.offset) {
2238
2239 return 0;
2240 } else {
2241
2242 sector_t sb_start, bm_space;
2243 sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
2244
2245
2246 sb_start = dev_size - 8*2;
2247 sb_start &= ~(sector_t)(4*2 - 1);
2248
2249 bm_space = super_1_choose_bm_space(dev_size);
2250
2251
2252
2253
2254 max_sectors = sb_start - bm_space - 4*2;
2255
2256 if (!num_sectors || num_sectors > max_sectors)
2257 num_sectors = max_sectors;
2258 }
2259 sb = page_address(rdev->sb_page);
2260 sb->data_size = cpu_to_le64(num_sectors);
2261 sb->super_offset = cpu_to_le64(rdev->sb_start);
2262 sb->sb_csum = calc_sb_1_csum(sb);
2263 do {
2264 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2265 rdev->sb_page);
2266 } while (md_super_wait(rdev->mddev) < 0);
2267 return num_sectors;
2268
2269}
2270
2271static int
2272super_1_allow_new_offset(struct md_rdev *rdev,
2273 unsigned long long new_offset)
2274{
2275
2276 struct bitmap *bitmap;
2277 if (new_offset >= rdev->data_offset)
2278 return 1;
2279
2280
2281
2282 if (rdev->mddev->minor_version == 0)
2283 return 1;
2284
2285
2286
2287
2288
2289
2290
2291 if (rdev->sb_start + (32+4)*2 > new_offset)
2292 return 0;
2293 bitmap = rdev->mddev->bitmap;
2294 if (bitmap && !rdev->mddev->bitmap_info.file &&
2295 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2296 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2297 return 0;
2298 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2299 return 0;
2300
2301 return 1;
2302}
2303
2304static struct super_type super_types[] = {
2305 [0] = {
2306 .name = "0.90.0",
2307 .owner = THIS_MODULE,
2308 .load_super = super_90_load,
2309 .validate_super = super_90_validate,
2310 .sync_super = super_90_sync,
2311 .rdev_size_change = super_90_rdev_size_change,
2312 .allow_new_offset = super_90_allow_new_offset,
2313 },
2314 [1] = {
2315 .name = "md-1",
2316 .owner = THIS_MODULE,
2317 .load_super = super_1_load,
2318 .validate_super = super_1_validate,
2319 .sync_super = super_1_sync,
2320 .rdev_size_change = super_1_rdev_size_change,
2321 .allow_new_offset = super_1_allow_new_offset,
2322 },
2323};
2324
2325static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2326{
2327 if (mddev->sync_super) {
2328 mddev->sync_super(mddev, rdev);
2329 return;
2330 }
2331
2332 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2333
2334 super_types[mddev->major_version].sync_super(mddev, rdev);
2335}
2336
2337static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2338{
2339 struct md_rdev *rdev, *rdev2;
2340
2341 rcu_read_lock();
2342 rdev_for_each_rcu(rdev, mddev1) {
2343 if (test_bit(Faulty, &rdev->flags) ||
2344 test_bit(Journal, &rdev->flags) ||
2345 rdev->raid_disk == -1)
2346 continue;
2347 rdev_for_each_rcu(rdev2, mddev2) {
2348 if (test_bit(Faulty, &rdev2->flags) ||
2349 test_bit(Journal, &rdev2->flags) ||
2350 rdev2->raid_disk == -1)
2351 continue;
2352 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2353 rcu_read_unlock();
2354 return 1;
2355 }
2356 }
2357 }
2358 rcu_read_unlock();
2359 return 0;
2360}
2361
2362static LIST_HEAD(pending_raid_disks);
2363
2364
2365
2366
2367
2368
2369
2370
2371int md_integrity_register(struct mddev *mddev)
2372{
2373 struct md_rdev *rdev, *reference = NULL;
2374
2375 if (list_empty(&mddev->disks))
2376 return 0;
2377 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2378 return 0;
2379 rdev_for_each(rdev, mddev) {
2380
2381 if (test_bit(Faulty, &rdev->flags))
2382 continue;
2383 if (rdev->raid_disk < 0)
2384 continue;
2385 if (!reference) {
2386
2387 reference = rdev;
2388 continue;
2389 }
2390
2391 if (blk_integrity_compare(reference->bdev->bd_disk,
2392 rdev->bdev->bd_disk) < 0)
2393 return -EINVAL;
2394 }
2395 if (!reference || !bdev_get_integrity(reference->bdev))
2396 return 0;
2397
2398
2399
2400
2401 blk_integrity_register(mddev->gendisk,
2402 bdev_get_integrity(reference->bdev));
2403
2404 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2405 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
2406 pr_err("md: failed to create integrity pool for %s\n",
2407 mdname(mddev));
2408 return -EINVAL;
2409 }
2410 return 0;
2411}
2412EXPORT_SYMBOL(md_integrity_register);
2413
2414
2415
2416
2417
2418int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2419{
2420 struct blk_integrity *bi_mddev;
2421 char name[BDEVNAME_SIZE];
2422
2423 if (!mddev->gendisk)
2424 return 0;
2425
2426 bi_mddev = blk_get_integrity(mddev->gendisk);
2427
2428 if (!bi_mddev)
2429 return 0;
2430
2431 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2432 pr_err("%s: incompatible integrity profile for %s\n",
2433 mdname(mddev), bdevname(rdev->bdev, name));
2434 return -ENXIO;
2435 }
2436
2437 return 0;
2438}
2439EXPORT_SYMBOL(md_integrity_add_rdev);
2440
2441static bool rdev_read_only(struct md_rdev *rdev)
2442{
2443 return bdev_read_only(rdev->bdev) ||
2444 (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
2445}
2446
2447static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2448{
2449 char b[BDEVNAME_SIZE];
2450 int err;
2451
2452
2453 if (find_rdev(mddev, rdev->bdev->bd_dev))
2454 return -EEXIST;
2455
2456 if (rdev_read_only(rdev) && mddev->pers)
2457 return -EROFS;
2458
2459
2460 if (!test_bit(Journal, &rdev->flags) &&
2461 rdev->sectors &&
2462 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2463 if (mddev->pers) {
2464
2465
2466
2467
2468 if (mddev->level > 0)
2469 return -ENOSPC;
2470 } else
2471 mddev->dev_sectors = rdev->sectors;
2472 }
2473
2474
2475
2476
2477
2478 rcu_read_lock();
2479 if (rdev->desc_nr < 0) {
2480 int choice = 0;
2481 if (mddev->pers)
2482 choice = mddev->raid_disks;
2483 while (md_find_rdev_nr_rcu(mddev, choice))
2484 choice++;
2485 rdev->desc_nr = choice;
2486 } else {
2487 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2488 rcu_read_unlock();
2489 return -EBUSY;
2490 }
2491 }
2492 rcu_read_unlock();
2493 if (!test_bit(Journal, &rdev->flags) &&
2494 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2495 pr_warn("md: %s: array is limited to %d devices\n",
2496 mdname(mddev), mddev->max_disks);
2497 return -EBUSY;
2498 }
2499 bdevname(rdev->bdev,b);
2500 strreplace(b, '/', '!');
2501
2502 rdev->mddev = mddev;
2503 pr_debug("md: bind<%s>\n", b);
2504
2505 if (mddev->raid_disks)
2506 mddev_create_serial_pool(mddev, rdev, false);
2507
2508 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2509 goto fail;
2510
2511
2512 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2513 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2514 rdev->sysfs_unack_badblocks =
2515 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2516 rdev->sysfs_badblocks =
2517 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2518
2519 list_add_rcu(&rdev->same_set, &mddev->disks);
2520 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2521
2522
2523 mddev->recovery_disabled++;
2524
2525 return 0;
2526
2527 fail:
2528 pr_warn("md: failed to register dev-%s for %s\n",
2529 b, mdname(mddev));
2530 return err;
2531}
2532
2533static void rdev_delayed_delete(struct work_struct *ws)
2534{
2535 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2536 kobject_del(&rdev->kobj);
2537 kobject_put(&rdev->kobj);
2538}
2539
2540static void unbind_rdev_from_array(struct md_rdev *rdev)
2541{
2542 char b[BDEVNAME_SIZE];
2543
2544 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2545 list_del_rcu(&rdev->same_set);
2546 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2547 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2548 rdev->mddev = NULL;
2549 sysfs_remove_link(&rdev->kobj, "block");
2550 sysfs_put(rdev->sysfs_state);
2551 sysfs_put(rdev->sysfs_unack_badblocks);
2552 sysfs_put(rdev->sysfs_badblocks);
2553 rdev->sysfs_state = NULL;
2554 rdev->sysfs_unack_badblocks = NULL;
2555 rdev->sysfs_badblocks = NULL;
2556 rdev->badblocks.count = 0;
2557
2558
2559
2560
2561 synchronize_rcu();
2562 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2563 kobject_get(&rdev->kobj);
2564 queue_work(md_rdev_misc_wq, &rdev->del_work);
2565}
2566
2567
2568
2569
2570
2571
2572static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2573{
2574 int err = 0;
2575 struct block_device *bdev;
2576
2577 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2578 shared ? (struct md_rdev *)lock_rdev : rdev);
2579 if (IS_ERR(bdev)) {
2580 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2581 MAJOR(dev), MINOR(dev));
2582 return PTR_ERR(bdev);
2583 }
2584 rdev->bdev = bdev;
2585 return err;
2586}
2587
2588static void unlock_rdev(struct md_rdev *rdev)
2589{
2590 struct block_device *bdev = rdev->bdev;
2591 rdev->bdev = NULL;
2592 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2593}
2594
2595void md_autodetect_dev(dev_t dev);
2596
2597static void export_rdev(struct md_rdev *rdev)
2598{
2599 char b[BDEVNAME_SIZE];
2600
2601 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2602 md_rdev_clear(rdev);
2603#ifndef MODULE
2604 if (test_bit(AutoDetected, &rdev->flags))
2605 md_autodetect_dev(rdev->bdev->bd_dev);
2606#endif
2607 unlock_rdev(rdev);
2608 kobject_put(&rdev->kobj);
2609}
2610
2611void md_kick_rdev_from_array(struct md_rdev *rdev)
2612{
2613 unbind_rdev_from_array(rdev);
2614 export_rdev(rdev);
2615}
2616EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2617
2618static void export_array(struct mddev *mddev)
2619{
2620 struct md_rdev *rdev;
2621
2622 while (!list_empty(&mddev->disks)) {
2623 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2624 same_set);
2625 md_kick_rdev_from_array(rdev);
2626 }
2627 mddev->raid_disks = 0;
2628 mddev->major_version = 0;
2629}
2630
2631static bool set_in_sync(struct mddev *mddev)
2632{
2633 lockdep_assert_held(&mddev->lock);
2634 if (!mddev->in_sync) {
2635 mddev->sync_checkers++;
2636 spin_unlock(&mddev->lock);
2637 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2638 spin_lock(&mddev->lock);
2639 if (!mddev->in_sync &&
2640 percpu_ref_is_zero(&mddev->writes_pending)) {
2641 mddev->in_sync = 1;
2642
2643
2644
2645
2646 smp_mb();
2647 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2648 sysfs_notify_dirent_safe(mddev->sysfs_state);
2649 }
2650 if (--mddev->sync_checkers == 0)
2651 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2652 }
2653 if (mddev->safemode == 1)
2654 mddev->safemode = 0;
2655 return mddev->in_sync;
2656}
2657
2658static void sync_sbs(struct mddev *mddev, int nospares)
2659{
2660
2661
2662
2663
2664
2665
2666 struct md_rdev *rdev;
2667 rdev_for_each(rdev, mddev) {
2668 if (rdev->sb_events == mddev->events ||
2669 (nospares &&
2670 rdev->raid_disk < 0 &&
2671 rdev->sb_events+1 == mddev->events)) {
2672
2673 rdev->sb_loaded = 2;
2674 } else {
2675 sync_super(mddev, rdev);
2676 rdev->sb_loaded = 1;
2677 }
2678 }
2679}
2680
2681static bool does_sb_need_changing(struct mddev *mddev)
2682{
2683 struct md_rdev *rdev;
2684 struct mdp_superblock_1 *sb;
2685 int role;
2686
2687
2688 rdev_for_each(rdev, mddev)
2689 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2690 break;
2691
2692
2693 if (!rdev)
2694 return false;
2695
2696 sb = page_address(rdev->sb_page);
2697
2698 rdev_for_each(rdev, mddev) {
2699 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2700
2701 if (role == 0xffff && rdev->raid_disk >=0 &&
2702 !test_bit(Faulty, &rdev->flags))
2703 return true;
2704
2705 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2706 return true;
2707 }
2708
2709
2710 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2711 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2712 (mddev->layout != le32_to_cpu(sb->layout)) ||
2713 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2714 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2715 return true;
2716
2717 return false;
2718}
2719
2720void md_update_sb(struct mddev *mddev, int force_change)
2721{
2722 struct md_rdev *rdev;
2723 int sync_req;
2724 int nospares = 0;
2725 int any_badblocks_changed = 0;
2726 int ret = -1;
2727
2728 if (mddev->ro) {
2729 if (force_change)
2730 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2731 return;
2732 }
2733
2734repeat:
2735 if (mddev_is_clustered(mddev)) {
2736 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2737 force_change = 1;
2738 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2739 nospares = 1;
2740 ret = md_cluster_ops->metadata_update_start(mddev);
2741
2742 if (!does_sb_need_changing(mddev)) {
2743 if (ret == 0)
2744 md_cluster_ops->metadata_update_cancel(mddev);
2745 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2746 BIT(MD_SB_CHANGE_DEVS) |
2747 BIT(MD_SB_CHANGE_CLEAN));
2748 return;
2749 }
2750 }
2751
2752
2753
2754
2755
2756
2757
2758 rdev_for_each(rdev, mddev) {
2759 if (rdev->raid_disk >= 0 &&
2760 mddev->delta_disks >= 0 &&
2761 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2762 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2763 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2764 !test_bit(Journal, &rdev->flags) &&
2765 !test_bit(In_sync, &rdev->flags) &&
2766 mddev->curr_resync_completed > rdev->recovery_offset)
2767 rdev->recovery_offset = mddev->curr_resync_completed;
2768
2769 }
2770 if (!mddev->persistent) {
2771 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2772 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2773 if (!mddev->external) {
2774 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2775 rdev_for_each(rdev, mddev) {
2776 if (rdev->badblocks.changed) {
2777 rdev->badblocks.changed = 0;
2778 ack_all_badblocks(&rdev->badblocks);
2779 md_error(mddev, rdev);
2780 }
2781 clear_bit(Blocked, &rdev->flags);
2782 clear_bit(BlockedBadBlocks, &rdev->flags);
2783 wake_up(&rdev->blocked_wait);
2784 }
2785 }
2786 wake_up(&mddev->sb_wait);
2787 return;
2788 }
2789
2790 spin_lock(&mddev->lock);
2791
2792 mddev->utime = ktime_get_real_seconds();
2793
2794 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2795 force_change = 1;
2796 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2797
2798
2799
2800
2801 nospares = 1;
2802 if (force_change)
2803 nospares = 0;
2804 if (mddev->degraded)
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814 nospares = 0;
2815
2816 sync_req = mddev->in_sync;
2817
2818
2819
2820 if (nospares
2821 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2822 && mddev->can_decrease_events
2823 && mddev->events != 1) {
2824 mddev->events--;
2825 mddev->can_decrease_events = 0;
2826 } else {
2827
2828 mddev->events ++;
2829 mddev->can_decrease_events = nospares;
2830 }
2831
2832
2833
2834
2835
2836
2837 WARN_ON(mddev->events == 0);
2838
2839 rdev_for_each(rdev, mddev) {
2840 if (rdev->badblocks.changed)
2841 any_badblocks_changed++;
2842 if (test_bit(Faulty, &rdev->flags))
2843 set_bit(FaultRecorded, &rdev->flags);
2844 }
2845
2846 sync_sbs(mddev, nospares);
2847 spin_unlock(&mddev->lock);
2848
2849 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2850 mdname(mddev), mddev->in_sync);
2851
2852 if (mddev->queue)
2853 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2854rewrite:
2855 md_bitmap_update_sb(mddev->bitmap);
2856 rdev_for_each(rdev, mddev) {
2857 char b[BDEVNAME_SIZE];
2858
2859 if (rdev->sb_loaded != 1)
2860 continue;
2861
2862 if (!test_bit(Faulty, &rdev->flags)) {
2863 md_super_write(mddev,rdev,
2864 rdev->sb_start, rdev->sb_size,
2865 rdev->sb_page);
2866 pr_debug("md: (write) %s's sb offset: %llu\n",
2867 bdevname(rdev->bdev, b),
2868 (unsigned long long)rdev->sb_start);
2869 rdev->sb_events = mddev->events;
2870 if (rdev->badblocks.size) {
2871 md_super_write(mddev, rdev,
2872 rdev->badblocks.sector,
2873 rdev->badblocks.size << 9,
2874 rdev->bb_page);
2875 rdev->badblocks.size = 0;
2876 }
2877
2878 } else
2879 pr_debug("md: %s (skipping faulty)\n",
2880 bdevname(rdev->bdev, b));
2881
2882 if (mddev->level == LEVEL_MULTIPATH)
2883
2884 break;
2885 }
2886 if (md_super_wait(mddev) < 0)
2887 goto rewrite;
2888
2889
2890 if (mddev_is_clustered(mddev) && ret == 0)
2891 md_cluster_ops->metadata_update_finish(mddev);
2892
2893 if (mddev->in_sync != sync_req ||
2894 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2895 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2896
2897 goto repeat;
2898 wake_up(&mddev->sb_wait);
2899 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2900 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2901
2902 rdev_for_each(rdev, mddev) {
2903 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2904 clear_bit(Blocked, &rdev->flags);
2905
2906 if (any_badblocks_changed)
2907 ack_all_badblocks(&rdev->badblocks);
2908 clear_bit(BlockedBadBlocks, &rdev->flags);
2909 wake_up(&rdev->blocked_wait);
2910 }
2911}
2912EXPORT_SYMBOL(md_update_sb);
2913
2914static int add_bound_rdev(struct md_rdev *rdev)
2915{
2916 struct mddev *mddev = rdev->mddev;
2917 int err = 0;
2918 bool add_journal = test_bit(Journal, &rdev->flags);
2919
2920 if (!mddev->pers->hot_remove_disk || add_journal) {
2921
2922
2923
2924
2925 super_types[mddev->major_version].
2926 validate_super(mddev, rdev);
2927 if (add_journal)
2928 mddev_suspend(mddev);
2929 err = mddev->pers->hot_add_disk(mddev, rdev);
2930 if (add_journal)
2931 mddev_resume(mddev);
2932 if (err) {
2933 md_kick_rdev_from_array(rdev);
2934 return err;
2935 }
2936 }
2937 sysfs_notify_dirent_safe(rdev->sysfs_state);
2938
2939 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2940 if (mddev->degraded)
2941 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2942 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2943 md_new_event(mddev);
2944 md_wakeup_thread(mddev->thread);
2945 return 0;
2946}
2947
2948
2949
2950
2951static int cmd_match(const char *cmd, const char *str)
2952{
2953
2954
2955
2956
2957 while (*cmd && *str && *cmd == *str) {
2958 cmd++;
2959 str++;
2960 }
2961 if (*cmd == '\n')
2962 cmd++;
2963 if (*str || *cmd)
2964 return 0;
2965 return 1;
2966}
2967
2968struct rdev_sysfs_entry {
2969 struct attribute attr;
2970 ssize_t (*show)(struct md_rdev *, char *);
2971 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2972};
2973
2974static ssize_t
2975state_show(struct md_rdev *rdev, char *page)
2976{
2977 char *sep = ",";
2978 size_t len = 0;
2979 unsigned long flags = READ_ONCE(rdev->flags);
2980
2981 if (test_bit(Faulty, &flags) ||
2982 (!test_bit(ExternalBbl, &flags) &&
2983 rdev->badblocks.unacked_exist))
2984 len += sprintf(page+len, "faulty%s", sep);
2985 if (test_bit(In_sync, &flags))
2986 len += sprintf(page+len, "in_sync%s", sep);
2987 if (test_bit(Journal, &flags))
2988 len += sprintf(page+len, "journal%s", sep);
2989 if (test_bit(WriteMostly, &flags))
2990 len += sprintf(page+len, "write_mostly%s", sep);
2991 if (test_bit(Blocked, &flags) ||
2992 (rdev->badblocks.unacked_exist
2993 && !test_bit(Faulty, &flags)))
2994 len += sprintf(page+len, "blocked%s", sep);
2995 if (!test_bit(Faulty, &flags) &&
2996 !test_bit(Journal, &flags) &&
2997 !test_bit(In_sync, &flags))
2998 len += sprintf(page+len, "spare%s", sep);
2999 if (test_bit(WriteErrorSeen, &flags))
3000 len += sprintf(page+len, "write_error%s", sep);
3001 if (test_bit(WantReplacement, &flags))
3002 len += sprintf(page+len, "want_replacement%s", sep);
3003 if (test_bit(Replacement, &flags))
3004 len += sprintf(page+len, "replacement%s", sep);
3005 if (test_bit(ExternalBbl, &flags))
3006 len += sprintf(page+len, "external_bbl%s", sep);
3007 if (test_bit(FailFast, &flags))
3008 len += sprintf(page+len, "failfast%s", sep);
3009
3010 if (len)
3011 len -= strlen(sep);
3012
3013 return len+sprintf(page+len, "\n");
3014}
3015
3016static ssize_t
3017state_store(struct md_rdev *rdev, const char *buf, size_t len)
3018{
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033 int err = -EINVAL;
3034 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
3035 md_error(rdev->mddev, rdev);
3036 if (test_bit(Faulty, &rdev->flags))
3037 err = 0;
3038 else
3039 err = -EBUSY;
3040 } else if (cmd_match(buf, "remove")) {
3041 if (rdev->mddev->pers) {
3042 clear_bit(Blocked, &rdev->flags);
3043 remove_and_add_spares(rdev->mddev, rdev);
3044 }
3045 if (rdev->raid_disk >= 0)
3046 err = -EBUSY;
3047 else {
3048 struct mddev *mddev = rdev->mddev;
3049 err = 0;
3050 if (mddev_is_clustered(mddev))
3051 err = md_cluster_ops->remove_disk(mddev, rdev);
3052
3053 if (err == 0) {
3054 md_kick_rdev_from_array(rdev);
3055 if (mddev->pers) {
3056 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3057 md_wakeup_thread(mddev->thread);
3058 }
3059 md_new_event(mddev);
3060 }
3061 }
3062 } else if (cmd_match(buf, "writemostly")) {
3063 set_bit(WriteMostly, &rdev->flags);
3064 mddev_create_serial_pool(rdev->mddev, rdev, false);
3065 err = 0;
3066 } else if (cmd_match(buf, "-writemostly")) {
3067 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3068 clear_bit(WriteMostly, &rdev->flags);
3069 err = 0;
3070 } else if (cmd_match(buf, "blocked")) {
3071 set_bit(Blocked, &rdev->flags);
3072 err = 0;
3073 } else if (cmd_match(buf, "-blocked")) {
3074 if (!test_bit(Faulty, &rdev->flags) &&
3075 !test_bit(ExternalBbl, &rdev->flags) &&
3076 rdev->badblocks.unacked_exist) {
3077
3078
3079
3080 md_error(rdev->mddev, rdev);
3081 }
3082 clear_bit(Blocked, &rdev->flags);
3083 clear_bit(BlockedBadBlocks, &rdev->flags);
3084 wake_up(&rdev->blocked_wait);
3085 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3086 md_wakeup_thread(rdev->mddev->thread);
3087
3088 err = 0;
3089 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3090 set_bit(In_sync, &rdev->flags);
3091 err = 0;
3092 } else if (cmd_match(buf, "failfast")) {
3093 set_bit(FailFast, &rdev->flags);
3094 err = 0;
3095 } else if (cmd_match(buf, "-failfast")) {
3096 clear_bit(FailFast, &rdev->flags);
3097 err = 0;
3098 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3099 !test_bit(Journal, &rdev->flags)) {
3100 if (rdev->mddev->pers == NULL) {
3101 clear_bit(In_sync, &rdev->flags);
3102 rdev->saved_raid_disk = rdev->raid_disk;
3103 rdev->raid_disk = -1;
3104 err = 0;
3105 }
3106 } else if (cmd_match(buf, "write_error")) {
3107 set_bit(WriteErrorSeen, &rdev->flags);
3108 err = 0;
3109 } else if (cmd_match(buf, "-write_error")) {
3110 clear_bit(WriteErrorSeen, &rdev->flags);
3111 err = 0;
3112 } else if (cmd_match(buf, "want_replacement")) {
3113
3114
3115
3116
3117 if (rdev->raid_disk >= 0 &&
3118 !test_bit(Journal, &rdev->flags) &&
3119 !test_bit(Replacement, &rdev->flags))
3120 set_bit(WantReplacement, &rdev->flags);
3121 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3122 md_wakeup_thread(rdev->mddev->thread);
3123 err = 0;
3124 } else if (cmd_match(buf, "-want_replacement")) {
3125
3126
3127
3128 err = 0;
3129 clear_bit(WantReplacement, &rdev->flags);
3130 } else if (cmd_match(buf, "replacement")) {
3131
3132
3133
3134
3135 if (rdev->mddev->pers)
3136 err = -EBUSY;
3137 else {
3138 set_bit(Replacement, &rdev->flags);
3139 err = 0;
3140 }
3141 } else if (cmd_match(buf, "-replacement")) {
3142
3143 if (rdev->mddev->pers)
3144 err = -EBUSY;
3145 else {
3146 clear_bit(Replacement, &rdev->flags);
3147 err = 0;
3148 }
3149 } else if (cmd_match(buf, "re-add")) {
3150 if (!rdev->mddev->pers)
3151 err = -EINVAL;
3152 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3153 rdev->saved_raid_disk >= 0) {
3154
3155
3156
3157
3158
3159
3160 if (!mddev_is_clustered(rdev->mddev) ||
3161 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3162 clear_bit(Faulty, &rdev->flags);
3163 err = add_bound_rdev(rdev);
3164 }
3165 } else
3166 err = -EBUSY;
3167 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3168 set_bit(ExternalBbl, &rdev->flags);
3169 rdev->badblocks.shift = 0;
3170 err = 0;
3171 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3172 clear_bit(ExternalBbl, &rdev->flags);
3173 err = 0;
3174 }
3175 if (!err)
3176 sysfs_notify_dirent_safe(rdev->sysfs_state);
3177 return err ? err : len;
3178}
3179static struct rdev_sysfs_entry rdev_state =
3180__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3181
3182static ssize_t
3183errors_show(struct md_rdev *rdev, char *page)
3184{
3185 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3186}
3187
3188static ssize_t
3189errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3190{
3191 unsigned int n;
3192 int rv;
3193
3194 rv = kstrtouint(buf, 10, &n);
3195 if (rv < 0)
3196 return rv;
3197 atomic_set(&rdev->corrected_errors, n);
3198 return len;
3199}
3200static struct rdev_sysfs_entry rdev_errors =
3201__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3202
3203static ssize_t
3204slot_show(struct md_rdev *rdev, char *page)
3205{
3206 if (test_bit(Journal, &rdev->flags))
3207 return sprintf(page, "journal\n");
3208 else if (rdev->raid_disk < 0)
3209 return sprintf(page, "none\n");
3210 else
3211 return sprintf(page, "%d\n", rdev->raid_disk);
3212}
3213
3214static ssize_t
3215slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3216{
3217 int slot;
3218 int err;
3219
3220 if (test_bit(Journal, &rdev->flags))
3221 return -EBUSY;
3222 if (strncmp(buf, "none", 4)==0)
3223 slot = -1;
3224 else {
3225 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3226 if (err < 0)
3227 return err;
3228 }
3229 if (rdev->mddev->pers && slot == -1) {
3230
3231
3232
3233
3234
3235
3236
3237 if (rdev->raid_disk == -1)
3238 return -EEXIST;
3239
3240 if (rdev->mddev->pers->hot_remove_disk == NULL)
3241 return -EINVAL;
3242 clear_bit(Blocked, &rdev->flags);
3243 remove_and_add_spares(rdev->mddev, rdev);
3244 if (rdev->raid_disk >= 0)
3245 return -EBUSY;
3246 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3247 md_wakeup_thread(rdev->mddev->thread);
3248 } else if (rdev->mddev->pers) {
3249
3250
3251
3252 int err;
3253
3254 if (rdev->raid_disk != -1)
3255 return -EBUSY;
3256
3257 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3258 return -EBUSY;
3259
3260 if (rdev->mddev->pers->hot_add_disk == NULL)
3261 return -EINVAL;
3262
3263 if (slot >= rdev->mddev->raid_disks &&
3264 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3265 return -ENOSPC;
3266
3267 rdev->raid_disk = slot;
3268 if (test_bit(In_sync, &rdev->flags))
3269 rdev->saved_raid_disk = slot;
3270 else
3271 rdev->saved_raid_disk = -1;
3272 clear_bit(In_sync, &rdev->flags);
3273 clear_bit(Bitmap_sync, &rdev->flags);
3274 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3275 if (err) {
3276 rdev->raid_disk = -1;
3277 return err;
3278 } else
3279 sysfs_notify_dirent_safe(rdev->sysfs_state);
3280 ;
3281 sysfs_link_rdev(rdev->mddev, rdev);
3282
3283 } else {
3284 if (slot >= rdev->mddev->raid_disks &&
3285 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3286 return -ENOSPC;
3287 rdev->raid_disk = slot;
3288
3289 clear_bit(Faulty, &rdev->flags);
3290 clear_bit(WriteMostly, &rdev->flags);
3291 set_bit(In_sync, &rdev->flags);
3292 sysfs_notify_dirent_safe(rdev->sysfs_state);
3293 }
3294 return len;
3295}
3296
3297static struct rdev_sysfs_entry rdev_slot =
3298__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3299
3300static ssize_t
3301offset_show(struct md_rdev *rdev, char *page)
3302{
3303 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3304}
3305
3306static ssize_t
3307offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3308{
3309 unsigned long long offset;
3310 if (kstrtoull(buf, 10, &offset) < 0)
3311 return -EINVAL;
3312 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3313 return -EBUSY;
3314 if (rdev->sectors && rdev->mddev->external)
3315
3316
3317 return -EBUSY;
3318 rdev->data_offset = offset;
3319 rdev->new_data_offset = offset;
3320 return len;
3321}
3322
3323static struct rdev_sysfs_entry rdev_offset =
3324__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3325
3326static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3327{
3328 return sprintf(page, "%llu\n",
3329 (unsigned long long)rdev->new_data_offset);
3330}
3331
3332static ssize_t new_offset_store(struct md_rdev *rdev,
3333 const char *buf, size_t len)
3334{
3335 unsigned long long new_offset;
3336 struct mddev *mddev = rdev->mddev;
3337
3338 if (kstrtoull(buf, 10, &new_offset) < 0)
3339 return -EINVAL;
3340
3341 if (mddev->sync_thread ||
3342 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3343 return -EBUSY;
3344 if (new_offset == rdev->data_offset)
3345
3346 ;
3347 else if (new_offset > rdev->data_offset) {
3348
3349 if (new_offset - rdev->data_offset
3350 + mddev->dev_sectors > rdev->sectors)
3351 return -E2BIG;
3352 }
3353
3354
3355
3356
3357
3358 if (new_offset < rdev->data_offset &&
3359 mddev->reshape_backwards)
3360 return -EINVAL;
3361
3362
3363
3364
3365 if (new_offset > rdev->data_offset &&
3366 !mddev->reshape_backwards)
3367 return -EINVAL;
3368
3369 if (mddev->pers && mddev->persistent &&
3370 !super_types[mddev->major_version]
3371 .allow_new_offset(rdev, new_offset))
3372 return -E2BIG;
3373 rdev->new_data_offset = new_offset;
3374 if (new_offset > rdev->data_offset)
3375 mddev->reshape_backwards = 1;
3376 else if (new_offset < rdev->data_offset)
3377 mddev->reshape_backwards = 0;
3378
3379 return len;
3380}
3381static struct rdev_sysfs_entry rdev_new_offset =
3382__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3383
3384static ssize_t
3385rdev_size_show(struct md_rdev *rdev, char *page)
3386{
3387 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3388}
3389
3390static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3391{
3392
3393 if (s1+l1 <= s2)
3394 return 0;
3395 if (s2+l2 <= s1)
3396 return 0;
3397 return 1;
3398}
3399
3400static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3401{
3402 unsigned long long blocks;
3403 sector_t new;
3404
3405 if (kstrtoull(buf, 10, &blocks) < 0)
3406 return -EINVAL;
3407
3408 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3409 return -EINVAL;
3410
3411 new = blocks * 2;
3412 if (new != blocks * 2)
3413 return -EINVAL;
3414
3415 *sectors = new;
3416 return 0;
3417}
3418
3419static ssize_t
3420rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3421{
3422 struct mddev *my_mddev = rdev->mddev;
3423 sector_t oldsectors = rdev->sectors;
3424 sector_t sectors;
3425
3426 if (test_bit(Journal, &rdev->flags))
3427 return -EBUSY;
3428 if (strict_blocks_to_sectors(buf, §ors) < 0)
3429 return -EINVAL;
3430 if (rdev->data_offset != rdev->new_data_offset)
3431 return -EINVAL;
3432 if (my_mddev->pers && rdev->raid_disk >= 0) {
3433 if (my_mddev->persistent) {
3434 sectors = super_types[my_mddev->major_version].
3435 rdev_size_change(rdev, sectors);
3436 if (!sectors)
3437 return -EBUSY;
3438 } else if (!sectors)
3439 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3440 rdev->data_offset;
3441 if (!my_mddev->pers->resize)
3442
3443 return -EINVAL;
3444 }
3445 if (sectors < my_mddev->dev_sectors)
3446 return -EINVAL;
3447
3448 rdev->sectors = sectors;
3449 if (sectors > oldsectors && my_mddev->external) {
3450
3451
3452
3453
3454
3455
3456 struct mddev *mddev;
3457 int overlap = 0;
3458 struct list_head *tmp;
3459
3460 rcu_read_lock();
3461 for_each_mddev(mddev, tmp) {
3462 struct md_rdev *rdev2;
3463
3464 rdev_for_each(rdev2, mddev)
3465 if (rdev->bdev == rdev2->bdev &&
3466 rdev != rdev2 &&
3467 overlaps(rdev->data_offset, rdev->sectors,
3468 rdev2->data_offset,
3469 rdev2->sectors)) {
3470 overlap = 1;
3471 break;
3472 }
3473 if (overlap) {
3474 mddev_put(mddev);
3475 break;
3476 }
3477 }
3478 rcu_read_unlock();
3479 if (overlap) {
3480
3481
3482
3483
3484
3485
3486 rdev->sectors = oldsectors;
3487 return -EBUSY;
3488 }
3489 }
3490 return len;
3491}
3492
3493static struct rdev_sysfs_entry rdev_size =
3494__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3495
3496static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3497{
3498 unsigned long long recovery_start = rdev->recovery_offset;
3499
3500 if (test_bit(In_sync, &rdev->flags) ||
3501 recovery_start == MaxSector)
3502 return sprintf(page, "none\n");
3503
3504 return sprintf(page, "%llu\n", recovery_start);
3505}
3506
3507static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3508{
3509 unsigned long long recovery_start;
3510
3511 if (cmd_match(buf, "none"))
3512 recovery_start = MaxSector;
3513 else if (kstrtoull(buf, 10, &recovery_start))
3514 return -EINVAL;
3515
3516 if (rdev->mddev->pers &&
3517 rdev->raid_disk >= 0)
3518 return -EBUSY;
3519
3520 rdev->recovery_offset = recovery_start;
3521 if (recovery_start == MaxSector)
3522 set_bit(In_sync, &rdev->flags);
3523 else
3524 clear_bit(In_sync, &rdev->flags);
3525 return len;
3526}
3527
3528static struct rdev_sysfs_entry rdev_recovery_start =
3529__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542static ssize_t bb_show(struct md_rdev *rdev, char *page)
3543{
3544 return badblocks_show(&rdev->badblocks, page, 0);
3545}
3546static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3547{
3548 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3549
3550 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3551 wake_up(&rdev->blocked_wait);
3552 return rv;
3553}
3554static struct rdev_sysfs_entry rdev_bad_blocks =
3555__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3556
3557static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3558{
3559 return badblocks_show(&rdev->badblocks, page, 1);
3560}
3561static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3562{
3563 return badblocks_store(&rdev->badblocks, page, len, 1);
3564}
3565static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3566__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3567
3568static ssize_t
3569ppl_sector_show(struct md_rdev *rdev, char *page)
3570{
3571 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3572}
3573
3574static ssize_t
3575ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3576{
3577 unsigned long long sector;
3578
3579 if (kstrtoull(buf, 10, §or) < 0)
3580 return -EINVAL;
3581 if (sector != (sector_t)sector)
3582 return -EINVAL;
3583
3584 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3585 rdev->raid_disk >= 0)
3586 return -EBUSY;
3587
3588 if (rdev->mddev->persistent) {
3589 if (rdev->mddev->major_version == 0)
3590 return -EINVAL;
3591 if ((sector > rdev->sb_start &&
3592 sector - rdev->sb_start > S16_MAX) ||
3593 (sector < rdev->sb_start &&
3594 rdev->sb_start - sector > -S16_MIN))
3595 return -EINVAL;
3596 rdev->ppl.offset = sector - rdev->sb_start;
3597 } else if (!rdev->mddev->external) {
3598 return -EBUSY;
3599 }
3600 rdev->ppl.sector = sector;
3601 return len;
3602}
3603
3604static struct rdev_sysfs_entry rdev_ppl_sector =
3605__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3606
3607static ssize_t
3608ppl_size_show(struct md_rdev *rdev, char *page)
3609{
3610 return sprintf(page, "%u\n", rdev->ppl.size);
3611}
3612
3613static ssize_t
3614ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3615{
3616 unsigned int size;
3617
3618 if (kstrtouint(buf, 10, &size) < 0)
3619 return -EINVAL;
3620
3621 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3622 rdev->raid_disk >= 0)
3623 return -EBUSY;
3624
3625 if (rdev->mddev->persistent) {
3626 if (rdev->mddev->major_version == 0)
3627 return -EINVAL;
3628 if (size > U16_MAX)
3629 return -EINVAL;
3630 } else if (!rdev->mddev->external) {
3631 return -EBUSY;
3632 }
3633 rdev->ppl.size = size;
3634 return len;
3635}
3636
3637static struct rdev_sysfs_entry rdev_ppl_size =
3638__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3639
3640static struct attribute *rdev_default_attrs[] = {
3641 &rdev_state.attr,
3642 &rdev_errors.attr,
3643 &rdev_slot.attr,
3644 &rdev_offset.attr,
3645 &rdev_new_offset.attr,
3646 &rdev_size.attr,
3647 &rdev_recovery_start.attr,
3648 &rdev_bad_blocks.attr,
3649 &rdev_unack_bad_blocks.attr,
3650 &rdev_ppl_sector.attr,
3651 &rdev_ppl_size.attr,
3652 NULL,
3653};
3654static ssize_t
3655rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3656{
3657 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3658 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3659
3660 if (!entry->show)
3661 return -EIO;
3662 if (!rdev->mddev)
3663 return -ENODEV;
3664 return entry->show(rdev, page);
3665}
3666
3667static ssize_t
3668rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3669 const char *page, size_t length)
3670{
3671 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3672 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3673 ssize_t rv;
3674 struct mddev *mddev = rdev->mddev;
3675
3676 if (!entry->store)
3677 return -EIO;
3678 if (!capable(CAP_SYS_ADMIN))
3679 return -EACCES;
3680 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3681 if (!rv) {
3682 if (rdev->mddev == NULL)
3683 rv = -ENODEV;
3684 else
3685 rv = entry->store(rdev, page, length);
3686 mddev_unlock(mddev);
3687 }
3688 return rv;
3689}
3690
3691static void rdev_free(struct kobject *ko)
3692{
3693 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3694 kfree(rdev);
3695}
3696static const struct sysfs_ops rdev_sysfs_ops = {
3697 .show = rdev_attr_show,
3698 .store = rdev_attr_store,
3699};
3700static struct kobj_type rdev_ktype = {
3701 .release = rdev_free,
3702 .sysfs_ops = &rdev_sysfs_ops,
3703 .default_attrs = rdev_default_attrs,
3704};
3705
3706int md_rdev_init(struct md_rdev *rdev)
3707{
3708 rdev->desc_nr = -1;
3709 rdev->saved_raid_disk = -1;
3710 rdev->raid_disk = -1;
3711 rdev->flags = 0;
3712 rdev->data_offset = 0;
3713 rdev->new_data_offset = 0;
3714 rdev->sb_events = 0;
3715 rdev->last_read_error = 0;
3716 rdev->sb_loaded = 0;
3717 rdev->bb_page = NULL;
3718 atomic_set(&rdev->nr_pending, 0);
3719 atomic_set(&rdev->read_errors, 0);
3720 atomic_set(&rdev->corrected_errors, 0);
3721
3722 INIT_LIST_HEAD(&rdev->same_set);
3723 init_waitqueue_head(&rdev->blocked_wait);
3724
3725
3726
3727
3728
3729 return badblocks_init(&rdev->badblocks, 0);
3730}
3731EXPORT_SYMBOL_GPL(md_rdev_init);
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3743{
3744 char b[BDEVNAME_SIZE];
3745 int err;
3746 struct md_rdev *rdev;
3747 sector_t size;
3748
3749 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3750 if (!rdev)
3751 return ERR_PTR(-ENOMEM);
3752
3753 err = md_rdev_init(rdev);
3754 if (err)
3755 goto abort_free;
3756 err = alloc_disk_sb(rdev);
3757 if (err)
3758 goto abort_free;
3759
3760 err = lock_rdev(rdev, newdev, super_format == -2);
3761 if (err)
3762 goto abort_free;
3763
3764 kobject_init(&rdev->kobj, &rdev_ktype);
3765
3766 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3767 if (!size) {
3768 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3769 bdevname(rdev->bdev,b));
3770 err = -EINVAL;
3771 goto abort_free;
3772 }
3773
3774 if (super_format >= 0) {
3775 err = super_types[super_format].
3776 load_super(rdev, NULL, super_minor);
3777 if (err == -EINVAL) {
3778 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3779 bdevname(rdev->bdev,b),
3780 super_format, super_minor);
3781 goto abort_free;
3782 }
3783 if (err < 0) {
3784 pr_warn("md: could not read %s's sb, not importing!\n",
3785 bdevname(rdev->bdev,b));
3786 goto abort_free;
3787 }
3788 }
3789
3790 return rdev;
3791
3792abort_free:
3793 if (rdev->bdev)
3794 unlock_rdev(rdev);
3795 md_rdev_clear(rdev);
3796 kfree(rdev);
3797 return ERR_PTR(err);
3798}
3799
3800
3801
3802
3803
3804static int analyze_sbs(struct mddev *mddev)
3805{
3806 int i;
3807 struct md_rdev *rdev, *freshest, *tmp;
3808 char b[BDEVNAME_SIZE];
3809
3810 freshest = NULL;
3811 rdev_for_each_safe(rdev, tmp, mddev)
3812 switch (super_types[mddev->major_version].
3813 load_super(rdev, freshest, mddev->minor_version)) {
3814 case 1:
3815 freshest = rdev;
3816 break;
3817 case 0:
3818 break;
3819 default:
3820 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3821 bdevname(rdev->bdev,b));
3822 md_kick_rdev_from_array(rdev);
3823 }
3824
3825
3826 if (!freshest) {
3827 pr_warn("md: cannot find a valid disk\n");
3828 return -EINVAL;
3829 }
3830
3831 super_types[mddev->major_version].
3832 validate_super(mddev, freshest);
3833
3834 i = 0;
3835 rdev_for_each_safe(rdev, tmp, mddev) {
3836 if (mddev->max_disks &&
3837 (rdev->desc_nr >= mddev->max_disks ||
3838 i > mddev->max_disks)) {
3839 pr_warn("md: %s: %s: only %d devices permitted\n",
3840 mdname(mddev), bdevname(rdev->bdev, b),
3841 mddev->max_disks);
3842 md_kick_rdev_from_array(rdev);
3843 continue;
3844 }
3845 if (rdev != freshest) {
3846 if (super_types[mddev->major_version].
3847 validate_super(mddev, rdev)) {
3848 pr_warn("md: kicking non-fresh %s from array!\n",
3849 bdevname(rdev->bdev,b));
3850 md_kick_rdev_from_array(rdev);
3851 continue;
3852 }
3853 }
3854 if (mddev->level == LEVEL_MULTIPATH) {
3855 rdev->desc_nr = i++;
3856 rdev->raid_disk = rdev->desc_nr;
3857 set_bit(In_sync, &rdev->flags);
3858 } else if (rdev->raid_disk >=
3859 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3860 !test_bit(Journal, &rdev->flags)) {
3861 rdev->raid_disk = -1;
3862 clear_bit(In_sync, &rdev->flags);
3863 }
3864 }
3865
3866 return 0;
3867}
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3880{
3881 unsigned long result = 0;
3882 long decimals = -1;
3883 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3884 if (*cp == '.')
3885 decimals = 0;
3886 else if (decimals < scale) {
3887 unsigned int value;
3888 value = *cp - '0';
3889 result = result * 10 + value;
3890 if (decimals >= 0)
3891 decimals++;
3892 }
3893 cp++;
3894 }
3895 if (*cp == '\n')
3896 cp++;
3897 if (*cp)
3898 return -EINVAL;
3899 if (decimals < 0)
3900 decimals = 0;
3901 while (decimals < scale) {
3902 result *= 10;
3903 decimals ++;
3904 }
3905 *res = result;
3906 return 0;
3907}
3908
3909static ssize_t
3910safe_delay_show(struct mddev *mddev, char *page)
3911{
3912 int msec = (mddev->safemode_delay*1000)/HZ;
3913 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3914}
3915static ssize_t
3916safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3917{
3918 unsigned long msec;
3919
3920 if (mddev_is_clustered(mddev)) {
3921 pr_warn("md: Safemode is disabled for clustered mode\n");
3922 return -EINVAL;
3923 }
3924
3925 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3926 return -EINVAL;
3927 if (msec == 0)
3928 mddev->safemode_delay = 0;
3929 else {
3930 unsigned long old_delay = mddev->safemode_delay;
3931 unsigned long new_delay = (msec*HZ)/1000;
3932
3933 if (new_delay == 0)
3934 new_delay = 1;
3935 mddev->safemode_delay = new_delay;
3936 if (new_delay < old_delay || old_delay == 0)
3937 mod_timer(&mddev->safemode_timer, jiffies+1);
3938 }
3939 return len;
3940}
3941static struct md_sysfs_entry md_safe_delay =
3942__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3943
3944static ssize_t
3945level_show(struct mddev *mddev, char *page)
3946{
3947 struct md_personality *p;
3948 int ret;
3949 spin_lock(&mddev->lock);
3950 p = mddev->pers;
3951 if (p)
3952 ret = sprintf(page, "%s\n", p->name);
3953 else if (mddev->clevel[0])
3954 ret = sprintf(page, "%s\n", mddev->clevel);
3955 else if (mddev->level != LEVEL_NONE)
3956 ret = sprintf(page, "%d\n", mddev->level);
3957 else
3958 ret = 0;
3959 spin_unlock(&mddev->lock);
3960 return ret;
3961}
3962
3963static ssize_t
3964level_store(struct mddev *mddev, const char *buf, size_t len)
3965{
3966 char clevel[16];
3967 ssize_t rv;
3968 size_t slen = len;
3969 struct md_personality *pers, *oldpers;
3970 long level;
3971 void *priv, *oldpriv;
3972 struct md_rdev *rdev;
3973
3974 if (slen == 0 || slen >= sizeof(clevel))
3975 return -EINVAL;
3976
3977 rv = mddev_lock(mddev);
3978 if (rv)
3979 return rv;
3980
3981 if (mddev->pers == NULL) {
3982 strncpy(mddev->clevel, buf, slen);
3983 if (mddev->clevel[slen-1] == '\n')
3984 slen--;
3985 mddev->clevel[slen] = 0;
3986 mddev->level = LEVEL_NONE;
3987 rv = len;
3988 goto out_unlock;
3989 }
3990 rv = -EROFS;
3991 if (mddev->ro)
3992 goto out_unlock;
3993
3994
3995
3996
3997
3998
3999
4000 rv = -EBUSY;
4001 if (mddev->sync_thread ||
4002 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4003 mddev->reshape_position != MaxSector ||
4004 mddev->sysfs_active)
4005 goto out_unlock;
4006
4007 rv = -EINVAL;
4008 if (!mddev->pers->quiesce) {
4009 pr_warn("md: %s: %s does not support online personality change\n",
4010 mdname(mddev), mddev->pers->name);
4011 goto out_unlock;
4012 }
4013
4014
4015 strncpy(clevel, buf, slen);
4016 if (clevel[slen-1] == '\n')
4017 slen--;
4018 clevel[slen] = 0;
4019 if (kstrtol(clevel, 10, &level))
4020 level = LEVEL_NONE;
4021
4022 if (request_module("md-%s", clevel) != 0)
4023 request_module("md-level-%s", clevel);
4024 spin_lock(&pers_lock);
4025 pers = find_pers(level, clevel);
4026 if (!pers || !try_module_get(pers->owner)) {
4027 spin_unlock(&pers_lock);
4028 pr_warn("md: personality %s not loaded\n", clevel);
4029 rv = -EINVAL;
4030 goto out_unlock;
4031 }
4032 spin_unlock(&pers_lock);
4033
4034 if (pers == mddev->pers) {
4035
4036 module_put(pers->owner);
4037 rv = len;
4038 goto out_unlock;
4039 }
4040 if (!pers->takeover) {
4041 module_put(pers->owner);
4042 pr_warn("md: %s: %s does not support personality takeover\n",
4043 mdname(mddev), clevel);
4044 rv = -EINVAL;
4045 goto out_unlock;
4046 }
4047
4048 rdev_for_each(rdev, mddev)
4049 rdev->new_raid_disk = rdev->raid_disk;
4050
4051
4052
4053
4054 priv = pers->takeover(mddev);
4055 if (IS_ERR(priv)) {
4056 mddev->new_level = mddev->level;
4057 mddev->new_layout = mddev->layout;
4058 mddev->new_chunk_sectors = mddev->chunk_sectors;
4059 mddev->raid_disks -= mddev->delta_disks;
4060 mddev->delta_disks = 0;
4061 mddev->reshape_backwards = 0;
4062 module_put(pers->owner);
4063 pr_warn("md: %s: %s would not accept array\n",
4064 mdname(mddev), clevel);
4065 rv = PTR_ERR(priv);
4066 goto out_unlock;
4067 }
4068
4069
4070 mddev_suspend(mddev);
4071 mddev_detach(mddev);
4072
4073 spin_lock(&mddev->lock);
4074 oldpers = mddev->pers;
4075 oldpriv = mddev->private;
4076 mddev->pers = pers;
4077 mddev->private = priv;
4078 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4079 mddev->level = mddev->new_level;
4080 mddev->layout = mddev->new_layout;
4081 mddev->chunk_sectors = mddev->new_chunk_sectors;
4082 mddev->delta_disks = 0;
4083 mddev->reshape_backwards = 0;
4084 mddev->degraded = 0;
4085 spin_unlock(&mddev->lock);
4086
4087 if (oldpers->sync_request == NULL &&
4088 mddev->external) {
4089
4090
4091
4092
4093
4094
4095
4096 mddev->in_sync = 0;
4097 mddev->safemode_delay = 0;
4098 mddev->safemode = 0;
4099 }
4100
4101 oldpers->free(mddev, oldpriv);
4102
4103 if (oldpers->sync_request == NULL &&
4104 pers->sync_request != NULL) {
4105
4106 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4107 pr_warn("md: cannot register extra attributes for %s\n",
4108 mdname(mddev));
4109 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4110 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4111 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4112 }
4113 if (oldpers->sync_request != NULL &&
4114 pers->sync_request == NULL) {
4115
4116 if (mddev->to_remove == NULL)
4117 mddev->to_remove = &md_redundancy_group;
4118 }
4119
4120 module_put(oldpers->owner);
4121
4122 rdev_for_each(rdev, mddev) {
4123 if (rdev->raid_disk < 0)
4124 continue;
4125 if (rdev->new_raid_disk >= mddev->raid_disks)
4126 rdev->new_raid_disk = -1;
4127 if (rdev->new_raid_disk == rdev->raid_disk)
4128 continue;
4129 sysfs_unlink_rdev(mddev, rdev);
4130 }
4131 rdev_for_each(rdev, mddev) {
4132 if (rdev->raid_disk < 0)
4133 continue;
4134 if (rdev->new_raid_disk == rdev->raid_disk)
4135 continue;
4136 rdev->raid_disk = rdev->new_raid_disk;
4137 if (rdev->raid_disk < 0)
4138 clear_bit(In_sync, &rdev->flags);
4139 else {
4140 if (sysfs_link_rdev(mddev, rdev))
4141 pr_warn("md: cannot register rd%d for %s after level change\n",
4142 rdev->raid_disk, mdname(mddev));
4143 }
4144 }
4145
4146 if (pers->sync_request == NULL) {
4147
4148
4149
4150 mddev->in_sync = 1;
4151 del_timer_sync(&mddev->safemode_timer);
4152 }
4153 blk_set_stacking_limits(&mddev->queue->limits);
4154 pers->run(mddev);
4155 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4156 mddev_resume(mddev);
4157 if (!mddev->thread)
4158 md_update_sb(mddev, 1);
4159 sysfs_notify_dirent_safe(mddev->sysfs_level);
4160 md_new_event(mddev);
4161 rv = len;
4162out_unlock:
4163 mddev_unlock(mddev);
4164 return rv;
4165}
4166
4167static struct md_sysfs_entry md_level =
4168__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4169
4170static ssize_t
4171layout_show(struct mddev *mddev, char *page)
4172{
4173
4174 if (mddev->reshape_position != MaxSector &&
4175 mddev->layout != mddev->new_layout)
4176 return sprintf(page, "%d (%d)\n",
4177 mddev->new_layout, mddev->layout);
4178 return sprintf(page, "%d\n", mddev->layout);
4179}
4180
4181static ssize_t
4182layout_store(struct mddev *mddev, const char *buf, size_t len)
4183{
4184 unsigned int n;
4185 int err;
4186
4187 err = kstrtouint(buf, 10, &n);
4188 if (err < 0)
4189 return err;
4190 err = mddev_lock(mddev);
4191 if (err)
4192 return err;
4193
4194 if (mddev->pers) {
4195 if (mddev->pers->check_reshape == NULL)
4196 err = -EBUSY;
4197 else if (mddev->ro)
4198 err = -EROFS;
4199 else {
4200 mddev->new_layout = n;
4201 err = mddev->pers->check_reshape(mddev);
4202 if (err)
4203 mddev->new_layout = mddev->layout;
4204 }
4205 } else {
4206 mddev->new_layout = n;
4207 if (mddev->reshape_position == MaxSector)
4208 mddev->layout = n;
4209 }
4210 mddev_unlock(mddev);
4211 return err ?: len;
4212}
4213static struct md_sysfs_entry md_layout =
4214__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4215
4216static ssize_t
4217raid_disks_show(struct mddev *mddev, char *page)
4218{
4219 if (mddev->raid_disks == 0)
4220 return 0;
4221 if (mddev->reshape_position != MaxSector &&
4222 mddev->delta_disks != 0)
4223 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4224 mddev->raid_disks - mddev->delta_disks);
4225 return sprintf(page, "%d\n", mddev->raid_disks);
4226}
4227
4228static int update_raid_disks(struct mddev *mddev, int raid_disks);
4229
4230static ssize_t
4231raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4232{
4233 unsigned int n;
4234 int err;
4235
4236 err = kstrtouint(buf, 10, &n);
4237 if (err < 0)
4238 return err;
4239
4240 err = mddev_lock(mddev);
4241 if (err)
4242 return err;
4243 if (mddev->pers)
4244 err = update_raid_disks(mddev, n);
4245 else if (mddev->reshape_position != MaxSector) {
4246 struct md_rdev *rdev;
4247 int olddisks = mddev->raid_disks - mddev->delta_disks;
4248
4249 err = -EINVAL;
4250 rdev_for_each(rdev, mddev) {
4251 if (olddisks < n &&
4252 rdev->data_offset < rdev->new_data_offset)
4253 goto out_unlock;
4254 if (olddisks > n &&
4255 rdev->data_offset > rdev->new_data_offset)
4256 goto out_unlock;
4257 }
4258 err = 0;
4259 mddev->delta_disks = n - olddisks;
4260 mddev->raid_disks = n;
4261 mddev->reshape_backwards = (mddev->delta_disks < 0);
4262 } else
4263 mddev->raid_disks = n;
4264out_unlock:
4265 mddev_unlock(mddev);
4266 return err ? err : len;
4267}
4268static struct md_sysfs_entry md_raid_disks =
4269__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4270
4271static ssize_t
4272uuid_show(struct mddev *mddev, char *page)
4273{
4274 return sprintf(page, "%pU\n", mddev->uuid);
4275}
4276static struct md_sysfs_entry md_uuid =
4277__ATTR(uuid, S_IRUGO, uuid_show, NULL);
4278
4279static ssize_t
4280chunk_size_show(struct mddev *mddev, char *page)
4281{
4282 if (mddev->reshape_position != MaxSector &&
4283 mddev->chunk_sectors != mddev->new_chunk_sectors)
4284 return sprintf(page, "%d (%d)\n",
4285 mddev->new_chunk_sectors << 9,
4286 mddev->chunk_sectors << 9);
4287 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4288}
4289
4290static ssize_t
4291chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4292{
4293 unsigned long n;
4294 int err;
4295
4296 err = kstrtoul(buf, 10, &n);
4297 if (err < 0)
4298 return err;
4299
4300 err = mddev_lock(mddev);
4301 if (err)
4302 return err;
4303 if (mddev->pers) {
4304 if (mddev->pers->check_reshape == NULL)
4305 err = -EBUSY;
4306 else if (mddev->ro)
4307 err = -EROFS;
4308 else {
4309 mddev->new_chunk_sectors = n >> 9;
4310 err = mddev->pers->check_reshape(mddev);
4311 if (err)
4312 mddev->new_chunk_sectors = mddev->chunk_sectors;
4313 }
4314 } else {
4315 mddev->new_chunk_sectors = n >> 9;
4316 if (mddev->reshape_position == MaxSector)
4317 mddev->chunk_sectors = n >> 9;
4318 }
4319 mddev_unlock(mddev);
4320 return err ?: len;
4321}
4322static struct md_sysfs_entry md_chunk_size =
4323__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4324
4325static ssize_t
4326resync_start_show(struct mddev *mddev, char *page)
4327{
4328 if (mddev->recovery_cp == MaxSector)
4329 return sprintf(page, "none\n");
4330 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4331}
4332
4333static ssize_t
4334resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4335{
4336 unsigned long long n;
4337 int err;
4338
4339 if (cmd_match(buf, "none"))
4340 n = MaxSector;
4341 else {
4342 err = kstrtoull(buf, 10, &n);
4343 if (err < 0)
4344 return err;
4345 if (n != (sector_t)n)
4346 return -EINVAL;
4347 }
4348
4349 err = mddev_lock(mddev);
4350 if (err)
4351 return err;
4352 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4353 err = -EBUSY;
4354
4355 if (!err) {
4356 mddev->recovery_cp = n;
4357 if (mddev->pers)
4358 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4359 }
4360 mddev_unlock(mddev);
4361 return err ?: len;
4362}
4363static struct md_sysfs_entry md_resync_start =
4364__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4365 resync_start_show, resync_start_store);
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4409 write_pending, active_idle, broken, bad_word};
4410static char *array_states[] = {
4411 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4412 "write-pending", "active-idle", "broken", NULL };
4413
4414static int match_word(const char *word, char **list)
4415{
4416 int n;
4417 for (n=0; list[n]; n++)
4418 if (cmd_match(word, list[n]))
4419 break;
4420 return n;
4421}
4422
4423static ssize_t
4424array_state_show(struct mddev *mddev, char *page)
4425{
4426 enum array_state st = inactive;
4427
4428 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4429 switch(mddev->ro) {
4430 case 1:
4431 st = readonly;
4432 break;
4433 case 2:
4434 st = read_auto;
4435 break;
4436 case 0:
4437 spin_lock(&mddev->lock);
4438 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4439 st = write_pending;
4440 else if (mddev->in_sync)
4441 st = clean;
4442 else if (mddev->safemode)
4443 st = active_idle;
4444 else
4445 st = active;
4446 spin_unlock(&mddev->lock);
4447 }
4448
4449 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4450 st = broken;
4451 } else {
4452 if (list_empty(&mddev->disks) &&
4453 mddev->raid_disks == 0 &&
4454 mddev->dev_sectors == 0)
4455 st = clear;
4456 else
4457 st = inactive;
4458 }
4459 return sprintf(page, "%s\n", array_states[st]);
4460}
4461
4462static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4463static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4464static int do_md_run(struct mddev *mddev);
4465static int restart_array(struct mddev *mddev);
4466
4467static ssize_t
4468array_state_store(struct mddev *mddev, const char *buf, size_t len)
4469{
4470 int err = 0;
4471 enum array_state st = match_word(buf, array_states);
4472
4473 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4474
4475
4476
4477 spin_lock(&mddev->lock);
4478 if (st == active) {
4479 restart_array(mddev);
4480 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4481 md_wakeup_thread(mddev->thread);
4482 wake_up(&mddev->sb_wait);
4483 } else {
4484 restart_array(mddev);
4485 if (!set_in_sync(mddev))
4486 err = -EBUSY;
4487 }
4488 if (!err)
4489 sysfs_notify_dirent_safe(mddev->sysfs_state);
4490 spin_unlock(&mddev->lock);
4491 return err ?: len;
4492 }
4493 err = mddev_lock(mddev);
4494 if (err)
4495 return err;
4496 err = -EINVAL;
4497 switch(st) {
4498 case bad_word:
4499 break;
4500 case clear:
4501
4502 err = do_md_stop(mddev, 0, NULL);
4503 break;
4504 case inactive:
4505
4506 if (mddev->pers)
4507 err = do_md_stop(mddev, 2, NULL);
4508 else
4509 err = 0;
4510 break;
4511 case suspended:
4512 break;
4513 case readonly:
4514 if (mddev->pers)
4515 err = md_set_readonly(mddev, NULL);
4516 else {
4517 mddev->ro = 1;
4518 set_disk_ro(mddev->gendisk, 1);
4519 err = do_md_run(mddev);
4520 }
4521 break;
4522 case read_auto:
4523 if (mddev->pers) {
4524 if (mddev->ro == 0)
4525 err = md_set_readonly(mddev, NULL);
4526 else if (mddev->ro == 1)
4527 err = restart_array(mddev);
4528 if (err == 0) {
4529 mddev->ro = 2;
4530 set_disk_ro(mddev->gendisk, 0);
4531 }
4532 } else {
4533 mddev->ro = 2;
4534 err = do_md_run(mddev);
4535 }
4536 break;
4537 case clean:
4538 if (mddev->pers) {
4539 err = restart_array(mddev);
4540 if (err)
4541 break;
4542 spin_lock(&mddev->lock);
4543 if (!set_in_sync(mddev))
4544 err = -EBUSY;
4545 spin_unlock(&mddev->lock);
4546 } else
4547 err = -EINVAL;
4548 break;
4549 case active:
4550 if (mddev->pers) {
4551 err = restart_array(mddev);
4552 if (err)
4553 break;
4554 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4555 wake_up(&mddev->sb_wait);
4556 err = 0;
4557 } else {
4558 mddev->ro = 0;
4559 set_disk_ro(mddev->gendisk, 0);
4560 err = do_md_run(mddev);
4561 }
4562 break;
4563 case write_pending:
4564 case active_idle:
4565 case broken:
4566
4567 break;
4568 }
4569
4570 if (!err) {
4571 if (mddev->hold_active == UNTIL_IOCTL)
4572 mddev->hold_active = 0;
4573 sysfs_notify_dirent_safe(mddev->sysfs_state);
4574 }
4575 mddev_unlock(mddev);
4576 return err ?: len;
4577}
4578static struct md_sysfs_entry md_array_state =
4579__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4580
4581static ssize_t
4582max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4583 return sprintf(page, "%d\n",
4584 atomic_read(&mddev->max_corr_read_errors));
4585}
4586
4587static ssize_t
4588max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4589{
4590 unsigned int n;
4591 int rv;
4592
4593 rv = kstrtouint(buf, 10, &n);
4594 if (rv < 0)
4595 return rv;
4596 atomic_set(&mddev->max_corr_read_errors, n);
4597 return len;
4598}
4599
4600static struct md_sysfs_entry max_corr_read_errors =
4601__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4602 max_corrected_read_errors_store);
4603
4604static ssize_t
4605null_show(struct mddev *mddev, char *page)
4606{
4607 return -EINVAL;
4608}
4609
4610
4611static void flush_rdev_wq(struct mddev *mddev)
4612{
4613 struct md_rdev *rdev;
4614
4615 rcu_read_lock();
4616 rdev_for_each_rcu(rdev, mddev)
4617 if (work_pending(&rdev->del_work)) {
4618 flush_workqueue(md_rdev_misc_wq);
4619 break;
4620 }
4621 rcu_read_unlock();
4622}
4623
4624static ssize_t
4625new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4626{
4627
4628
4629
4630
4631
4632
4633
4634 char *e;
4635 int major = simple_strtoul(buf, &e, 10);
4636 int minor;
4637 dev_t dev;
4638 struct md_rdev *rdev;
4639 int err;
4640
4641 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4642 return -EINVAL;
4643 minor = simple_strtoul(e+1, &e, 10);
4644 if (*e && *e != '\n')
4645 return -EINVAL;
4646 dev = MKDEV(major, minor);
4647 if (major != MAJOR(dev) ||
4648 minor != MINOR(dev))
4649 return -EOVERFLOW;
4650
4651 flush_rdev_wq(mddev);
4652 err = mddev_lock(mddev);
4653 if (err)
4654 return err;
4655 if (mddev->persistent) {
4656 rdev = md_import_device(dev, mddev->major_version,
4657 mddev->minor_version);
4658 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4659 struct md_rdev *rdev0
4660 = list_entry(mddev->disks.next,
4661 struct md_rdev, same_set);
4662 err = super_types[mddev->major_version]
4663 .load_super(rdev, rdev0, mddev->minor_version);
4664 if (err < 0)
4665 goto out;
4666 }
4667 } else if (mddev->external)
4668 rdev = md_import_device(dev, -2, -1);
4669 else
4670 rdev = md_import_device(dev, -1, -1);
4671
4672 if (IS_ERR(rdev)) {
4673 mddev_unlock(mddev);
4674 return PTR_ERR(rdev);
4675 }
4676 err = bind_rdev_to_array(rdev, mddev);
4677 out:
4678 if (err)
4679 export_rdev(rdev);
4680 mddev_unlock(mddev);
4681 if (!err)
4682 md_new_event(mddev);
4683 return err ? err : len;
4684}
4685
4686static struct md_sysfs_entry md_new_device =
4687__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4688
4689static ssize_t
4690bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4691{
4692 char *end;
4693 unsigned long chunk, end_chunk;
4694 int err;
4695
4696 err = mddev_lock(mddev);
4697 if (err)
4698 return err;
4699 if (!mddev->bitmap)
4700 goto out;
4701
4702 while (*buf) {
4703 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4704 if (buf == end) break;
4705 if (*end == '-') {
4706 buf = end + 1;
4707 end_chunk = simple_strtoul(buf, &end, 0);
4708 if (buf == end) break;
4709 }
4710 if (*end && !isspace(*end)) break;
4711 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4712 buf = skip_spaces(end);
4713 }
4714 md_bitmap_unplug(mddev->bitmap);
4715out:
4716 mddev_unlock(mddev);
4717 return len;
4718}
4719
4720static struct md_sysfs_entry md_bitmap =
4721__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4722
4723static ssize_t
4724size_show(struct mddev *mddev, char *page)
4725{
4726 return sprintf(page, "%llu\n",
4727 (unsigned long long)mddev->dev_sectors / 2);
4728}
4729
4730static int update_size(struct mddev *mddev, sector_t num_sectors);
4731
4732static ssize_t
4733size_store(struct mddev *mddev, const char *buf, size_t len)
4734{
4735
4736
4737
4738
4739 sector_t sectors;
4740 int err = strict_blocks_to_sectors(buf, §ors);
4741
4742 if (err < 0)
4743 return err;
4744 err = mddev_lock(mddev);
4745 if (err)
4746 return err;
4747 if (mddev->pers) {
4748 err = update_size(mddev, sectors);
4749 if (err == 0)
4750 md_update_sb(mddev, 1);
4751 } else {
4752 if (mddev->dev_sectors == 0 ||
4753 mddev->dev_sectors > sectors)
4754 mddev->dev_sectors = sectors;
4755 else
4756 err = -ENOSPC;
4757 }
4758 mddev_unlock(mddev);
4759 return err ? err : len;
4760}
4761
4762static struct md_sysfs_entry md_size =
4763__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4764
4765
4766
4767
4768
4769
4770
4771static ssize_t
4772metadata_show(struct mddev *mddev, char *page)
4773{
4774 if (mddev->persistent)
4775 return sprintf(page, "%d.%d\n",
4776 mddev->major_version, mddev->minor_version);
4777 else if (mddev->external)
4778 return sprintf(page, "external:%s\n", mddev->metadata_type);
4779 else
4780 return sprintf(page, "none\n");
4781}
4782
4783static ssize_t
4784metadata_store(struct mddev *mddev, const char *buf, size_t len)
4785{
4786 int major, minor;
4787 char *e;
4788 int err;
4789
4790
4791
4792
4793
4794 err = mddev_lock(mddev);
4795 if (err)
4796 return err;
4797 err = -EBUSY;
4798 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4799 ;
4800 else if (!list_empty(&mddev->disks))
4801 goto out_unlock;
4802
4803 err = 0;
4804 if (cmd_match(buf, "none")) {
4805 mddev->persistent = 0;
4806 mddev->external = 0;
4807 mddev->major_version = 0;
4808 mddev->minor_version = 90;
4809 goto out_unlock;
4810 }
4811 if (strncmp(buf, "external:", 9) == 0) {
4812 size_t namelen = len-9;
4813 if (namelen >= sizeof(mddev->metadata_type))
4814 namelen = sizeof(mddev->metadata_type)-1;
4815 strncpy(mddev->metadata_type, buf+9, namelen);
4816 mddev->metadata_type[namelen] = 0;
4817 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4818 mddev->metadata_type[--namelen] = 0;
4819 mddev->persistent = 0;
4820 mddev->external = 1;
4821 mddev->major_version = 0;
4822 mddev->minor_version = 90;
4823 goto out_unlock;
4824 }
4825 major = simple_strtoul(buf, &e, 10);
4826 err = -EINVAL;
4827 if (e==buf || *e != '.')
4828 goto out_unlock;
4829 buf = e+1;
4830 minor = simple_strtoul(buf, &e, 10);
4831 if (e==buf || (*e && *e != '\n') )
4832 goto out_unlock;
4833 err = -ENOENT;
4834 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4835 goto out_unlock;
4836 mddev->major_version = major;
4837 mddev->minor_version = minor;
4838 mddev->persistent = 1;
4839 mddev->external = 0;
4840 err = 0;
4841out_unlock:
4842 mddev_unlock(mddev);
4843 return err ?: len;
4844}
4845
4846static struct md_sysfs_entry md_metadata =
4847__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4848
4849static ssize_t
4850action_show(struct mddev *mddev, char *page)
4851{
4852 char *type = "idle";
4853 unsigned long recovery = mddev->recovery;
4854 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4855 type = "frozen";
4856 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4857 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4858 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4859 type = "reshape";
4860 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4861 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4862 type = "resync";
4863 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4864 type = "check";
4865 else
4866 type = "repair";
4867 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4868 type = "recover";
4869 else if (mddev->reshape_position != MaxSector)
4870 type = "reshape";
4871 }
4872 return sprintf(page, "%s\n", type);
4873}
4874
4875static ssize_t
4876action_store(struct mddev *mddev, const char *page, size_t len)
4877{
4878 if (!mddev->pers || !mddev->pers->sync_request)
4879 return -EINVAL;
4880
4881
4882 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4883 if (cmd_match(page, "frozen"))
4884 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4885 else
4886 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4887 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4888 mddev_lock(mddev) == 0) {
4889 if (work_pending(&mddev->del_work))
4890 flush_workqueue(md_misc_wq);
4891 if (mddev->sync_thread) {
4892 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4893 md_reap_sync_thread(mddev);
4894 }
4895 mddev_unlock(mddev);
4896 }
4897 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4898 return -EBUSY;
4899 else if (cmd_match(page, "resync"))
4900 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4901 else if (cmd_match(page, "recover")) {
4902 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4903 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4904 } else if (cmd_match(page, "reshape")) {
4905 int err;
4906 if (mddev->pers->start_reshape == NULL)
4907 return -EINVAL;
4908 err = mddev_lock(mddev);
4909 if (!err) {
4910 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4911 err = -EBUSY;
4912 else {
4913 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4914 err = mddev->pers->start_reshape(mddev);
4915 }
4916 mddev_unlock(mddev);
4917 }
4918 if (err)
4919 return err;
4920 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4921 } else {
4922 if (cmd_match(page, "check"))
4923 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4924 else if (!cmd_match(page, "repair"))
4925 return -EINVAL;
4926 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4927 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4928 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4929 }
4930 if (mddev->ro == 2) {
4931
4932
4933
4934 mddev->ro = 0;
4935 md_wakeup_thread(mddev->sync_thread);
4936 }
4937 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4938 md_wakeup_thread(mddev->thread);
4939 sysfs_notify_dirent_safe(mddev->sysfs_action);
4940 return len;
4941}
4942
4943static struct md_sysfs_entry md_scan_mode =
4944__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4945
4946static ssize_t
4947last_sync_action_show(struct mddev *mddev, char *page)
4948{
4949 return sprintf(page, "%s\n", mddev->last_sync_action);
4950}
4951
4952static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4953
4954static ssize_t
4955mismatch_cnt_show(struct mddev *mddev, char *page)
4956{
4957 return sprintf(page, "%llu\n",
4958 (unsigned long long)
4959 atomic64_read(&mddev->resync_mismatches));
4960}
4961
4962static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4963
4964static ssize_t
4965sync_min_show(struct mddev *mddev, char *page)
4966{
4967 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4968 mddev->sync_speed_min ? "local": "system");
4969}
4970
4971static ssize_t
4972sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4973{
4974 unsigned int min;
4975 int rv;
4976
4977 if (strncmp(buf, "system", 6)==0) {
4978 min = 0;
4979 } else {
4980 rv = kstrtouint(buf, 10, &min);
4981 if (rv < 0)
4982 return rv;
4983 if (min == 0)
4984 return -EINVAL;
4985 }
4986 mddev->sync_speed_min = min;
4987 return len;
4988}
4989
4990static struct md_sysfs_entry md_sync_min =
4991__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4992
4993static ssize_t
4994sync_max_show(struct mddev *mddev, char *page)
4995{
4996 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4997 mddev->sync_speed_max ? "local": "system");
4998}
4999
5000static ssize_t
5001sync_max_store(struct mddev *mddev, const char *buf, size_t len)
5002{
5003 unsigned int max;
5004 int rv;
5005
5006 if (strncmp(buf, "system", 6)==0) {
5007 max = 0;
5008 } else {
5009 rv = kstrtouint(buf, 10, &max);
5010 if (rv < 0)
5011 return rv;
5012 if (max == 0)
5013 return -EINVAL;
5014 }
5015 mddev->sync_speed_max = max;
5016 return len;
5017}
5018
5019static struct md_sysfs_entry md_sync_max =
5020__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
5021
5022static ssize_t
5023degraded_show(struct mddev *mddev, char *page)
5024{
5025 return sprintf(page, "%d\n", mddev->degraded);
5026}
5027static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
5028
5029static ssize_t
5030sync_force_parallel_show(struct mddev *mddev, char *page)
5031{
5032 return sprintf(page, "%d\n", mddev->parallel_resync);
5033}
5034
5035static ssize_t
5036sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
5037{
5038 long n;
5039
5040 if (kstrtol(buf, 10, &n))
5041 return -EINVAL;
5042
5043 if (n != 0 && n != 1)
5044 return -EINVAL;
5045
5046 mddev->parallel_resync = n;
5047
5048 if (mddev->sync_thread)
5049 wake_up(&resync_wait);
5050
5051 return len;
5052}
5053
5054
5055static struct md_sysfs_entry md_sync_force_parallel =
5056__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
5057 sync_force_parallel_show, sync_force_parallel_store);
5058
5059static ssize_t
5060sync_speed_show(struct mddev *mddev, char *page)
5061{
5062 unsigned long resync, dt, db;
5063 if (mddev->curr_resync == 0)
5064 return sprintf(page, "none\n");
5065 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5066 dt = (jiffies - mddev->resync_mark) / HZ;
5067 if (!dt) dt++;
5068 db = resync - mddev->resync_mark_cnt;
5069 return sprintf(page, "%lu\n", db/dt/2);
5070}
5071
5072static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
5073
5074static ssize_t
5075sync_completed_show(struct mddev *mddev, char *page)
5076{
5077 unsigned long long max_sectors, resync;
5078
5079 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5080 return sprintf(page, "none\n");
5081
5082 if (mddev->curr_resync == 1 ||
5083 mddev->curr_resync == 2)
5084 return sprintf(page, "delayed\n");
5085
5086 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5087 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5088 max_sectors = mddev->resync_max_sectors;
5089 else
5090 max_sectors = mddev->dev_sectors;
5091
5092 resync = mddev->curr_resync_completed;
5093 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
5094}
5095
5096static struct md_sysfs_entry md_sync_completed =
5097 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
5098
5099static ssize_t
5100min_sync_show(struct mddev *mddev, char *page)
5101{
5102 return sprintf(page, "%llu\n",
5103 (unsigned long long)mddev->resync_min);
5104}
5105static ssize_t
5106min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5107{
5108 unsigned long long min;
5109 int err;
5110
5111 if (kstrtoull(buf, 10, &min))
5112 return -EINVAL;
5113
5114 spin_lock(&mddev->lock);
5115 err = -EINVAL;
5116 if (min > mddev->resync_max)
5117 goto out_unlock;
5118
5119 err = -EBUSY;
5120 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5121 goto out_unlock;
5122
5123
5124 mddev->resync_min = round_down(min, 8);
5125 err = 0;
5126
5127out_unlock:
5128 spin_unlock(&mddev->lock);
5129 return err ?: len;
5130}
5131
5132static struct md_sysfs_entry md_min_sync =
5133__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5134
5135static ssize_t
5136max_sync_show(struct mddev *mddev, char *page)
5137{
5138 if (mddev->resync_max == MaxSector)
5139 return sprintf(page, "max\n");
5140 else
5141 return sprintf(page, "%llu\n",
5142 (unsigned long long)mddev->resync_max);
5143}
5144static ssize_t
5145max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5146{
5147 int err;
5148 spin_lock(&mddev->lock);
5149 if (strncmp(buf, "max", 3) == 0)
5150 mddev->resync_max = MaxSector;
5151 else {
5152 unsigned long long max;
5153 int chunk;
5154
5155 err = -EINVAL;
5156 if (kstrtoull(buf, 10, &max))
5157 goto out_unlock;
5158 if (max < mddev->resync_min)
5159 goto out_unlock;
5160
5161 err = -EBUSY;
5162 if (max < mddev->resync_max &&
5163 mddev->ro == 0 &&
5164 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5165 goto out_unlock;
5166
5167
5168 chunk = mddev->chunk_sectors;
5169 if (chunk) {
5170 sector_t temp = max;
5171
5172 err = -EINVAL;
5173 if (sector_div(temp, chunk))
5174 goto out_unlock;
5175 }
5176 mddev->resync_max = max;
5177 }
5178 wake_up(&mddev->recovery_wait);
5179 err = 0;
5180out_unlock:
5181 spin_unlock(&mddev->lock);
5182 return err ?: len;
5183}
5184
5185static struct md_sysfs_entry md_max_sync =
5186__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5187
5188static ssize_t
5189suspend_lo_show(struct mddev *mddev, char *page)
5190{
5191 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5192}
5193
5194static ssize_t
5195suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5196{
5197 unsigned long long new;
5198 int err;
5199
5200 err = kstrtoull(buf, 10, &new);
5201 if (err < 0)
5202 return err;
5203 if (new != (sector_t)new)
5204 return -EINVAL;
5205
5206 err = mddev_lock(mddev);
5207 if (err)
5208 return err;
5209 err = -EINVAL;
5210 if (mddev->pers == NULL ||
5211 mddev->pers->quiesce == NULL)
5212 goto unlock;
5213 mddev_suspend(mddev);
5214 mddev->suspend_lo = new;
5215 mddev_resume(mddev);
5216
5217 err = 0;
5218unlock:
5219 mddev_unlock(mddev);
5220 return err ?: len;
5221}
5222static struct md_sysfs_entry md_suspend_lo =
5223__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5224
5225static ssize_t
5226suspend_hi_show(struct mddev *mddev, char *page)
5227{
5228 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5229}
5230
5231static ssize_t
5232suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5233{
5234 unsigned long long new;
5235 int err;
5236
5237 err = kstrtoull(buf, 10, &new);
5238 if (err < 0)
5239 return err;
5240 if (new != (sector_t)new)
5241 return -EINVAL;
5242
5243 err = mddev_lock(mddev);
5244 if (err)
5245 return err;
5246 err = -EINVAL;
5247 if (mddev->pers == NULL)
5248 goto unlock;
5249
5250 mddev_suspend(mddev);
5251 mddev->suspend_hi = new;
5252 mddev_resume(mddev);
5253
5254 err = 0;
5255unlock:
5256 mddev_unlock(mddev);
5257 return err ?: len;
5258}
5259static struct md_sysfs_entry md_suspend_hi =
5260__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5261
5262static ssize_t
5263reshape_position_show(struct mddev *mddev, char *page)
5264{
5265 if (mddev->reshape_position != MaxSector)
5266 return sprintf(page, "%llu\n",
5267 (unsigned long long)mddev->reshape_position);
5268 strcpy(page, "none\n");
5269 return 5;
5270}
5271
5272static ssize_t
5273reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5274{
5275 struct md_rdev *rdev;
5276 unsigned long long new;
5277 int err;
5278
5279 err = kstrtoull(buf, 10, &new);
5280 if (err < 0)
5281 return err;
5282 if (new != (sector_t)new)
5283 return -EINVAL;
5284 err = mddev_lock(mddev);
5285 if (err)
5286 return err;
5287 err = -EBUSY;
5288 if (mddev->pers)
5289 goto unlock;
5290 mddev->reshape_position = new;
5291 mddev->delta_disks = 0;
5292 mddev->reshape_backwards = 0;
5293 mddev->new_level = mddev->level;
5294 mddev->new_layout = mddev->layout;
5295 mddev->new_chunk_sectors = mddev->chunk_sectors;
5296 rdev_for_each(rdev, mddev)
5297 rdev->new_data_offset = rdev->data_offset;
5298 err = 0;
5299unlock:
5300 mddev_unlock(mddev);
5301 return err ?: len;
5302}
5303
5304static struct md_sysfs_entry md_reshape_position =
5305__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5306 reshape_position_store);
5307
5308static ssize_t
5309reshape_direction_show(struct mddev *mddev, char *page)
5310{
5311 return sprintf(page, "%s\n",
5312 mddev->reshape_backwards ? "backwards" : "forwards");
5313}
5314
5315static ssize_t
5316reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5317{
5318 int backwards = 0;
5319 int err;
5320
5321 if (cmd_match(buf, "forwards"))
5322 backwards = 0;
5323 else if (cmd_match(buf, "backwards"))
5324 backwards = 1;
5325 else
5326 return -EINVAL;
5327 if (mddev->reshape_backwards == backwards)
5328 return len;
5329
5330 err = mddev_lock(mddev);
5331 if (err)
5332 return err;
5333
5334 if (mddev->delta_disks)
5335 err = -EBUSY;
5336 else if (mddev->persistent &&
5337 mddev->major_version == 0)
5338 err = -EINVAL;
5339 else
5340 mddev->reshape_backwards = backwards;
5341 mddev_unlock(mddev);
5342 return err ?: len;
5343}
5344
5345static struct md_sysfs_entry md_reshape_direction =
5346__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5347 reshape_direction_store);
5348
5349static ssize_t
5350array_size_show(struct mddev *mddev, char *page)
5351{
5352 if (mddev->external_size)
5353 return sprintf(page, "%llu\n",
5354 (unsigned long long)mddev->array_sectors/2);
5355 else
5356 return sprintf(page, "default\n");
5357}
5358
5359static ssize_t
5360array_size_store(struct mddev *mddev, const char *buf, size_t len)
5361{
5362 sector_t sectors;
5363 int err;
5364
5365 err = mddev_lock(mddev);
5366 if (err)
5367 return err;
5368
5369
5370 if (mddev_is_clustered(mddev)) {
5371 mddev_unlock(mddev);
5372 return -EINVAL;
5373 }
5374
5375 if (strncmp(buf, "default", 7) == 0) {
5376 if (mddev->pers)
5377 sectors = mddev->pers->size(mddev, 0, 0);
5378 else
5379 sectors = mddev->array_sectors;
5380
5381 mddev->external_size = 0;
5382 } else {
5383 if (strict_blocks_to_sectors(buf, §ors) < 0)
5384 err = -EINVAL;
5385 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5386 err = -E2BIG;
5387 else
5388 mddev->external_size = 1;
5389 }
5390
5391 if (!err) {
5392 mddev->array_sectors = sectors;
5393 if (mddev->pers) {
5394 set_capacity(mddev->gendisk, mddev->array_sectors);
5395 revalidate_disk_size(mddev->gendisk, true);
5396 }
5397 }
5398 mddev_unlock(mddev);
5399 return err ?: len;
5400}
5401
5402static struct md_sysfs_entry md_array_size =
5403__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5404 array_size_store);
5405
5406static ssize_t
5407consistency_policy_show(struct mddev *mddev, char *page)
5408{
5409 int ret;
5410
5411 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5412 ret = sprintf(page, "journal\n");
5413 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5414 ret = sprintf(page, "ppl\n");
5415 } else if (mddev->bitmap) {
5416 ret = sprintf(page, "bitmap\n");
5417 } else if (mddev->pers) {
5418 if (mddev->pers->sync_request)
5419 ret = sprintf(page, "resync\n");
5420 else
5421 ret = sprintf(page, "none\n");
5422 } else {
5423 ret = sprintf(page, "unknown\n");
5424 }
5425
5426 return ret;
5427}
5428
5429static ssize_t
5430consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5431{
5432 int err = 0;
5433
5434 if (mddev->pers) {
5435 if (mddev->pers->change_consistency_policy)
5436 err = mddev->pers->change_consistency_policy(mddev, buf);
5437 else
5438 err = -EBUSY;
5439 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5440 set_bit(MD_HAS_PPL, &mddev->flags);
5441 } else {
5442 err = -EINVAL;
5443 }
5444
5445 return err ? err : len;
5446}
5447
5448static struct md_sysfs_entry md_consistency_policy =
5449__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5450 consistency_policy_store);
5451
5452static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5453{
5454 return sprintf(page, "%d\n", mddev->fail_last_dev);
5455}
5456
5457
5458
5459
5460
5461static ssize_t
5462fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5463{
5464 int ret;
5465 bool value;
5466
5467 ret = kstrtobool(buf, &value);
5468 if (ret)
5469 return ret;
5470
5471 if (value != mddev->fail_last_dev)
5472 mddev->fail_last_dev = value;
5473
5474 return len;
5475}
5476static struct md_sysfs_entry md_fail_last_dev =
5477__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5478 fail_last_dev_store);
5479
5480static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5481{
5482 if (mddev->pers == NULL || (mddev->pers->level != 1))
5483 return sprintf(page, "n/a\n");
5484 else
5485 return sprintf(page, "%d\n", mddev->serialize_policy);
5486}
5487
5488
5489
5490
5491
5492static ssize_t
5493serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5494{
5495 int err;
5496 bool value;
5497
5498 err = kstrtobool(buf, &value);
5499 if (err)
5500 return err;
5501
5502 if (value == mddev->serialize_policy)
5503 return len;
5504
5505 err = mddev_lock(mddev);
5506 if (err)
5507 return err;
5508 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5509 pr_err("md: serialize_policy is only effective for raid1\n");
5510 err = -EINVAL;
5511 goto unlock;
5512 }
5513
5514 mddev_suspend(mddev);
5515 if (value)
5516 mddev_create_serial_pool(mddev, NULL, true);
5517 else
5518 mddev_destroy_serial_pool(mddev, NULL, true);
5519 mddev->serialize_policy = value;
5520 mddev_resume(mddev);
5521unlock:
5522 mddev_unlock(mddev);
5523 return err ?: len;
5524}
5525
5526static struct md_sysfs_entry md_serialize_policy =
5527__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5528 serialize_policy_store);
5529
5530
5531static struct attribute *md_default_attrs[] = {
5532 &md_level.attr,
5533 &md_layout.attr,
5534 &md_raid_disks.attr,
5535 &md_uuid.attr,
5536 &md_chunk_size.attr,
5537 &md_size.attr,
5538 &md_resync_start.attr,
5539 &md_metadata.attr,
5540 &md_new_device.attr,
5541 &md_safe_delay.attr,
5542 &md_array_state.attr,
5543 &md_reshape_position.attr,
5544 &md_reshape_direction.attr,
5545 &md_array_size.attr,
5546 &max_corr_read_errors.attr,
5547 &md_consistency_policy.attr,
5548 &md_fail_last_dev.attr,
5549 &md_serialize_policy.attr,
5550 NULL,
5551};
5552
5553static struct attribute *md_redundancy_attrs[] = {
5554 &md_scan_mode.attr,
5555 &md_last_scan_mode.attr,
5556 &md_mismatches.attr,
5557 &md_sync_min.attr,
5558 &md_sync_max.attr,
5559 &md_sync_speed.attr,
5560 &md_sync_force_parallel.attr,
5561 &md_sync_completed.attr,
5562 &md_min_sync.attr,
5563 &md_max_sync.attr,
5564 &md_suspend_lo.attr,
5565 &md_suspend_hi.attr,
5566 &md_bitmap.attr,
5567 &md_degraded.attr,
5568 NULL,
5569};
5570static struct attribute_group md_redundancy_group = {
5571 .name = NULL,
5572 .attrs = md_redundancy_attrs,
5573};
5574
5575static ssize_t
5576md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5577{
5578 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5579 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5580 ssize_t rv;
5581
5582 if (!entry->show)
5583 return -EIO;
5584 spin_lock(&all_mddevs_lock);
5585 if (list_empty(&mddev->all_mddevs)) {
5586 spin_unlock(&all_mddevs_lock);
5587 return -EBUSY;
5588 }
5589 mddev_get(mddev);
5590 spin_unlock(&all_mddevs_lock);
5591
5592 rv = entry->show(mddev, page);
5593 mddev_put(mddev);
5594 return rv;
5595}
5596
5597static ssize_t
5598md_attr_store(struct kobject *kobj, struct attribute *attr,
5599 const char *page, size_t length)
5600{
5601 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5602 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5603 ssize_t rv;
5604
5605 if (!entry->store)
5606 return -EIO;
5607 if (!capable(CAP_SYS_ADMIN))
5608 return -EACCES;
5609 spin_lock(&all_mddevs_lock);
5610 if (list_empty(&mddev->all_mddevs)) {
5611 spin_unlock(&all_mddevs_lock);
5612 return -EBUSY;
5613 }
5614 mddev_get(mddev);
5615 spin_unlock(&all_mddevs_lock);
5616 rv = entry->store(mddev, page, length);
5617 mddev_put(mddev);
5618 return rv;
5619}
5620
5621static void md_free(struct kobject *ko)
5622{
5623 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5624
5625 if (mddev->sysfs_state)
5626 sysfs_put(mddev->sysfs_state);
5627 if (mddev->sysfs_level)
5628 sysfs_put(mddev->sysfs_level);
5629
5630 if (mddev->gendisk)
5631 del_gendisk(mddev->gendisk);
5632 if (mddev->queue)
5633 blk_cleanup_queue(mddev->queue);
5634 if (mddev->gendisk)
5635 put_disk(mddev->gendisk);
5636 percpu_ref_exit(&mddev->writes_pending);
5637
5638 bioset_exit(&mddev->bio_set);
5639 bioset_exit(&mddev->sync_set);
5640 kfree(mddev);
5641}
5642
5643static const struct sysfs_ops md_sysfs_ops = {
5644 .show = md_attr_show,
5645 .store = md_attr_store,
5646};
5647static struct kobj_type md_ktype = {
5648 .release = md_free,
5649 .sysfs_ops = &md_sysfs_ops,
5650 .default_attrs = md_default_attrs,
5651};
5652
5653int mdp_major = 0;
5654
5655static void mddev_delayed_delete(struct work_struct *ws)
5656{
5657 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5658
5659 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5660 kobject_del(&mddev->kobj);
5661 kobject_put(&mddev->kobj);
5662}
5663
5664static void no_op(struct percpu_ref *r) {}
5665
5666int mddev_init_writes_pending(struct mddev *mddev)
5667{
5668 if (mddev->writes_pending.percpu_count_ptr)
5669 return 0;
5670 if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0)
5671 return -ENOMEM;
5672
5673 percpu_ref_put(&mddev->writes_pending);
5674 return 0;
5675}
5676EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5677
5678static int md_alloc(dev_t dev, char *name)
5679{
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689 static DEFINE_MUTEX(disks_mutex);
5690 struct mddev *mddev;
5691 struct gendisk *disk;
5692 int partitioned;
5693 int shift;
5694 int unit;
5695 int error ;
5696
5697
5698
5699
5700
5701 flush_workqueue(md_misc_wq);
5702
5703 mutex_lock(&disks_mutex);
5704 mddev = mddev_alloc(dev);
5705 if (IS_ERR(mddev)) {
5706 mutex_unlock(&disks_mutex);
5707 return PTR_ERR(mddev);
5708 }
5709
5710 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5711 shift = partitioned ? MdpMinorShift : 0;
5712 unit = MINOR(mddev->unit) >> shift;
5713
5714 if (name && !dev) {
5715
5716
5717 struct mddev *mddev2;
5718 spin_lock(&all_mddevs_lock);
5719
5720 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5721 if (mddev2->gendisk &&
5722 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5723 spin_unlock(&all_mddevs_lock);
5724 error = -EEXIST;
5725 goto abort;
5726 }
5727 spin_unlock(&all_mddevs_lock);
5728 }
5729 if (name && dev)
5730
5731
5732
5733 mddev->hold_active = UNTIL_STOP;
5734
5735 error = -ENOMEM;
5736 mddev->queue = blk_alloc_queue_rh(md_make_request, NUMA_NO_NODE);
5737 if (!mddev->queue)
5738 goto abort;
5739
5740 blk_set_stacking_limits(&mddev->queue->limits);
5741
5742 disk = alloc_disk(1 << shift);
5743 if (!disk) {
5744 blk_cleanup_queue(mddev->queue);
5745 mddev->queue = NULL;
5746 goto abort;
5747 }
5748 disk->major = MAJOR(mddev->unit);
5749 disk->first_minor = unit << shift;
5750 if (name)
5751 strcpy(disk->disk_name, name);
5752 else if (partitioned)
5753 sprintf(disk->disk_name, "md_d%d", unit);
5754 else
5755 sprintf(disk->disk_name, "md%d", unit);
5756 disk->fops = &md_fops;
5757 disk->private_data = mddev;
5758 disk->queue = mddev->queue;
5759 blk_queue_write_cache(mddev->queue, true, true);
5760
5761
5762
5763
5764 disk->flags |= GENHD_FL_EXT_DEVT;
5765 mddev->gendisk = disk;
5766
5767
5768
5769 mutex_lock(&mddev->open_mutex);
5770 add_disk(disk);
5771
5772 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5773 if (error) {
5774
5775
5776
5777 pr_debug("md: cannot register %s/md - name in use\n",
5778 disk->disk_name);
5779 error = 0;
5780 }
5781 if (mddev->kobj.sd &&
5782 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5783 pr_debug("pointless warning\n");
5784 mutex_unlock(&mddev->open_mutex);
5785 abort:
5786 mutex_unlock(&disks_mutex);
5787 if (!error && mddev->kobj.sd) {
5788 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5789 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5790 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5791 }
5792 mddev_put(mddev);
5793 return error;
5794}
5795
5796static void md_probe(dev_t dev)
5797{
5798 if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
5799 return;
5800 if (create_on_open)
5801 md_alloc(dev, NULL);
5802}
5803
5804static int add_named_array(const char *val, const struct kernel_param *kp)
5805{
5806
5807
5808
5809
5810
5811
5812
5813 int len = strlen(val);
5814 char buf[DISK_NAME_LEN];
5815 unsigned long devnum;
5816
5817 while (len && val[len-1] == '\n')
5818 len--;
5819 if (len >= DISK_NAME_LEN)
5820 return -E2BIG;
5821 strlcpy(buf, val, len+1);
5822 if (strncmp(buf, "md_", 3) == 0)
5823 return md_alloc(0, buf);
5824 if (strncmp(buf, "md", 2) == 0 &&
5825 isdigit(buf[2]) &&
5826 kstrtoul(buf+2, 10, &devnum) == 0 &&
5827 devnum <= MINORMASK)
5828 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5829
5830 return -EINVAL;
5831}
5832
5833static void md_safemode_timeout(struct timer_list *t)
5834{
5835 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5836
5837 mddev->safemode = 1;
5838 if (mddev->external)
5839 sysfs_notify_dirent_safe(mddev->sysfs_state);
5840
5841 md_wakeup_thread(mddev->thread);
5842}
5843
5844static int start_dirty_degraded;
5845
5846int md_run(struct mddev *mddev)
5847{
5848 int err;
5849 struct md_rdev *rdev;
5850 struct md_personality *pers;
5851
5852 if (list_empty(&mddev->disks))
5853
5854 return -EINVAL;
5855
5856 if (mddev->pers)
5857 return -EBUSY;
5858
5859 if (mddev->sysfs_active)
5860 return -EBUSY;
5861
5862
5863
5864
5865 if (!mddev->raid_disks) {
5866 if (!mddev->persistent)
5867 return -EINVAL;
5868 err = analyze_sbs(mddev);
5869 if (err)
5870 return -EINVAL;
5871 }
5872
5873 if (mddev->level != LEVEL_NONE)
5874 request_module("md-level-%d", mddev->level);
5875 else if (mddev->clevel[0])
5876 request_module("md-%s", mddev->clevel);
5877
5878
5879
5880
5881
5882
5883 mddev->has_superblocks = false;
5884 rdev_for_each(rdev, mddev) {
5885 if (test_bit(Faulty, &rdev->flags))
5886 continue;
5887 sync_blockdev(rdev->bdev);
5888 invalidate_bdev(rdev->bdev);
5889 if (mddev->ro != 1 && rdev_read_only(rdev)) {
5890 mddev->ro = 1;
5891 if (mddev->gendisk)
5892 set_disk_ro(mddev->gendisk, 1);
5893 }
5894
5895 if (rdev->sb_page)
5896 mddev->has_superblocks = true;
5897
5898
5899
5900
5901
5902 if (rdev->meta_bdev) {
5903 ;
5904 } else if (rdev->data_offset < rdev->sb_start) {
5905 if (mddev->dev_sectors &&
5906 rdev->data_offset + mddev->dev_sectors
5907 > rdev->sb_start) {
5908 pr_warn("md: %s: data overlaps metadata\n",
5909 mdname(mddev));
5910 return -EINVAL;
5911 }
5912 } else {
5913 if (rdev->sb_start + rdev->sb_size/512
5914 > rdev->data_offset) {
5915 pr_warn("md: %s: metadata overlaps data\n",
5916 mdname(mddev));
5917 return -EINVAL;
5918 }
5919 }
5920 sysfs_notify_dirent_safe(rdev->sysfs_state);
5921 }
5922
5923 if (!bioset_initialized(&mddev->bio_set)) {
5924 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5925 if (err)
5926 return err;
5927 }
5928 if (!bioset_initialized(&mddev->sync_set)) {
5929 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5930 if (err)
5931 return err;
5932 }
5933
5934 spin_lock(&pers_lock);
5935 pers = find_pers(mddev->level, mddev->clevel);
5936 if (!pers || !try_module_get(pers->owner)) {
5937 spin_unlock(&pers_lock);
5938 if (mddev->level != LEVEL_NONE)
5939 pr_warn("md: personality for level %d is not loaded!\n",
5940 mddev->level);
5941 else
5942 pr_warn("md: personality for level %s is not loaded!\n",
5943 mddev->clevel);
5944 err = -EINVAL;
5945 goto abort;
5946 }
5947 spin_unlock(&pers_lock);
5948 if (mddev->level != pers->level) {
5949 mddev->level = pers->level;
5950 mddev->new_level = pers->level;
5951 }
5952 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5953
5954 if (mddev->reshape_position != MaxSector &&
5955 pers->start_reshape == NULL) {
5956
5957 module_put(pers->owner);
5958 err = -EINVAL;
5959 goto abort;
5960 }
5961
5962 if (pers->sync_request) {
5963
5964
5965
5966 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5967 struct md_rdev *rdev2;
5968 int warned = 0;
5969
5970 rdev_for_each(rdev, mddev)
5971 rdev_for_each(rdev2, mddev) {
5972 if (rdev < rdev2 &&
5973 rdev->bdev->bd_disk ==
5974 rdev2->bdev->bd_disk) {
5975 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5976 mdname(mddev),
5977 bdevname(rdev->bdev,b),
5978 bdevname(rdev2->bdev,b2));
5979 warned = 1;
5980 }
5981 }
5982
5983 if (warned)
5984 pr_warn("True protection against single-disk failure might be compromised.\n");
5985 }
5986
5987 mddev->recovery = 0;
5988
5989 mddev->resync_max_sectors = mddev->dev_sectors;
5990
5991 mddev->ok_start_degraded = start_dirty_degraded;
5992
5993 if (start_readonly && mddev->ro == 0)
5994 mddev->ro = 2;
5995
5996 err = pers->run(mddev);
5997 if (err)
5998 pr_warn("md: pers->run() failed ...\n");
5999 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
6000 WARN_ONCE(!mddev->external_size,
6001 "%s: default size too small, but 'external_size' not in effect?\n",
6002 __func__);
6003 pr_warn("md: invalid array_size %llu > default size %llu\n",
6004 (unsigned long long)mddev->array_sectors / 2,
6005 (unsigned long long)pers->size(mddev, 0, 0) / 2);
6006 err = -EINVAL;
6007 }
6008 if (err == 0 && pers->sync_request &&
6009 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
6010 struct bitmap *bitmap;
6011
6012 bitmap = md_bitmap_create(mddev, -1);
6013 if (IS_ERR(bitmap)) {
6014 err = PTR_ERR(bitmap);
6015 pr_warn("%s: failed to create bitmap (%d)\n",
6016 mdname(mddev), err);
6017 } else
6018 mddev->bitmap = bitmap;
6019
6020 }
6021 if (err)
6022 goto bitmap_abort;
6023
6024 if (mddev->bitmap_info.max_write_behind > 0) {
6025 bool create_pool = false;
6026
6027 rdev_for_each(rdev, mddev) {
6028 if (test_bit(WriteMostly, &rdev->flags) &&
6029 rdev_init_serial(rdev))
6030 create_pool = true;
6031 }
6032 if (create_pool && mddev->serial_info_pool == NULL) {
6033 mddev->serial_info_pool =
6034 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
6035 sizeof(struct serial_info));
6036 if (!mddev->serial_info_pool) {
6037 err = -ENOMEM;
6038 goto bitmap_abort;
6039 }
6040 }
6041 }
6042
6043 if (mddev->queue) {
6044 bool nonrot = true;
6045
6046 rdev_for_each(rdev, mddev) {
6047 if (rdev->raid_disk >= 0 &&
6048 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
6049 nonrot = false;
6050 break;
6051 }
6052 }
6053 if (mddev->degraded)
6054 nonrot = false;
6055 if (nonrot)
6056 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6057 else
6058 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6059 mddev->queue->backing_dev_info->congested_data = mddev;
6060 mddev->queue->backing_dev_info->congested_fn = md_congested;
6061 }
6062 if (pers->sync_request) {
6063 if (mddev->kobj.sd &&
6064 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6065 pr_warn("md: cannot register extra attributes for %s\n",
6066 mdname(mddev));
6067 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6068 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6069 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6070 } else if (mddev->ro == 2)
6071 mddev->ro = 0;
6072
6073 atomic_set(&mddev->max_corr_read_errors,
6074 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
6075 mddev->safemode = 0;
6076 if (mddev_is_clustered(mddev))
6077 mddev->safemode_delay = 0;
6078 else
6079 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6080 mddev->in_sync = 1;
6081 smp_wmb();
6082 spin_lock(&mddev->lock);
6083 mddev->pers = pers;
6084 spin_unlock(&mddev->lock);
6085 rdev_for_each(rdev, mddev)
6086 if (rdev->raid_disk >= 0)
6087 sysfs_link_rdev(mddev, rdev);
6088
6089 if (mddev->degraded && !mddev->ro)
6090
6091
6092
6093 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6094 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6095
6096 if (mddev->sb_flags)
6097 md_update_sb(mddev, 0);
6098
6099 md_new_event(mddev);
6100 return 0;
6101
6102bitmap_abort:
6103 mddev_detach(mddev);
6104 if (mddev->private)
6105 pers->free(mddev, mddev->private);
6106 mddev->private = NULL;
6107 module_put(pers->owner);
6108 md_bitmap_destroy(mddev);
6109abort:
6110 bioset_exit(&mddev->bio_set);
6111 bioset_exit(&mddev->sync_set);
6112 return err;
6113}
6114EXPORT_SYMBOL_GPL(md_run);
6115
6116static int do_md_run(struct mddev *mddev)
6117{
6118 int err;
6119
6120 set_bit(MD_NOT_READY, &mddev->flags);
6121 err = md_run(mddev);
6122 if (err)
6123 goto out;
6124 err = md_bitmap_load(mddev);
6125 if (err) {
6126 md_bitmap_destroy(mddev);
6127 goto out;
6128 }
6129
6130 if (mddev_is_clustered(mddev))
6131 md_allow_write(mddev);
6132
6133
6134 md_start(mddev);
6135
6136 md_wakeup_thread(mddev->thread);
6137 md_wakeup_thread(mddev->sync_thread);
6138
6139 set_capacity(mddev->gendisk, mddev->array_sectors);
6140 revalidate_disk_size(mddev->gendisk, true);
6141 clear_bit(MD_NOT_READY, &mddev->flags);
6142 mddev->changed = 1;
6143 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6144 sysfs_notify_dirent_safe(mddev->sysfs_state);
6145 sysfs_notify_dirent_safe(mddev->sysfs_action);
6146 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6147out:
6148 clear_bit(MD_NOT_READY, &mddev->flags);
6149 return err;
6150}
6151
6152int md_start(struct mddev *mddev)
6153{
6154 int ret = 0;
6155
6156 if (mddev->pers->start) {
6157 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6158 md_wakeup_thread(mddev->thread);
6159 ret = mddev->pers->start(mddev);
6160 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6161 md_wakeup_thread(mddev->sync_thread);
6162 }
6163 return ret;
6164}
6165EXPORT_SYMBOL_GPL(md_start);
6166
6167static int restart_array(struct mddev *mddev)
6168{
6169 struct gendisk *disk = mddev->gendisk;
6170 struct md_rdev *rdev;
6171 bool has_journal = false;
6172 bool has_readonly = false;
6173
6174
6175 if (list_empty(&mddev->disks))
6176 return -ENXIO;
6177 if (!mddev->pers)
6178 return -EINVAL;
6179 if (!mddev->ro)
6180 return -EBUSY;
6181
6182 rcu_read_lock();
6183 rdev_for_each_rcu(rdev, mddev) {
6184 if (test_bit(Journal, &rdev->flags) &&
6185 !test_bit(Faulty, &rdev->flags))
6186 has_journal = true;
6187 if (bdev_read_only(rdev->bdev))
6188 has_readonly = true;
6189 }
6190 rcu_read_unlock();
6191 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6192
6193 return -EINVAL;
6194 if (has_readonly)
6195 return -EROFS;
6196
6197 mddev->safemode = 0;
6198 mddev->ro = 0;
6199 set_disk_ro(disk, 0);
6200 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6201
6202 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6203 md_wakeup_thread(mddev->thread);
6204 md_wakeup_thread(mddev->sync_thread);
6205 sysfs_notify_dirent_safe(mddev->sysfs_state);
6206 return 0;
6207}
6208
6209static void md_clean(struct mddev *mddev)
6210{
6211 mddev->array_sectors = 0;
6212 mddev->external_size = 0;
6213 mddev->dev_sectors = 0;
6214 mddev->raid_disks = 0;
6215 mddev->recovery_cp = 0;
6216 mddev->resync_min = 0;
6217 mddev->resync_max = MaxSector;
6218 mddev->reshape_position = MaxSector;
6219 mddev->external = 0;
6220 mddev->persistent = 0;
6221 mddev->level = LEVEL_NONE;
6222 mddev->clevel[0] = 0;
6223 mddev->flags = 0;
6224 mddev->sb_flags = 0;
6225 mddev->ro = 0;
6226 mddev->metadata_type[0] = 0;
6227 mddev->chunk_sectors = 0;
6228 mddev->ctime = mddev->utime = 0;
6229 mddev->layout = 0;
6230 mddev->max_disks = 0;
6231 mddev->events = 0;
6232 mddev->can_decrease_events = 0;
6233 mddev->delta_disks = 0;
6234 mddev->reshape_backwards = 0;
6235 mddev->new_level = LEVEL_NONE;
6236 mddev->new_layout = 0;
6237 mddev->new_chunk_sectors = 0;
6238 mddev->curr_resync = 0;
6239 atomic64_set(&mddev->resync_mismatches, 0);
6240 mddev->suspend_lo = mddev->suspend_hi = 0;
6241 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6242 mddev->recovery = 0;
6243 mddev->in_sync = 0;
6244 mddev->changed = 0;
6245 mddev->degraded = 0;
6246 mddev->safemode = 0;
6247 mddev->private = NULL;
6248 mddev->cluster_info = NULL;
6249 mddev->bitmap_info.offset = 0;
6250 mddev->bitmap_info.default_offset = 0;
6251 mddev->bitmap_info.default_space = 0;
6252 mddev->bitmap_info.chunksize = 0;
6253 mddev->bitmap_info.daemon_sleep = 0;
6254 mddev->bitmap_info.max_write_behind = 0;
6255 mddev->bitmap_info.nodes = 0;
6256}
6257
6258static void __md_stop_writes(struct mddev *mddev)
6259{
6260 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6261 if (work_pending(&mddev->del_work))
6262 flush_workqueue(md_misc_wq);
6263 if (mddev->sync_thread) {
6264 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6265 md_reap_sync_thread(mddev);
6266 }
6267
6268 del_timer_sync(&mddev->safemode_timer);
6269
6270 if (mddev->pers && mddev->pers->quiesce) {
6271 mddev->pers->quiesce(mddev, 1);
6272 mddev->pers->quiesce(mddev, 0);
6273 }
6274 md_bitmap_flush(mddev);
6275
6276 if (mddev->ro == 0 &&
6277 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6278 mddev->sb_flags)) {
6279
6280 if (!mddev_is_clustered(mddev))
6281 mddev->in_sync = 1;
6282 md_update_sb(mddev, 1);
6283 }
6284
6285 mddev->serialize_policy = 0;
6286 mddev_destroy_serial_pool(mddev, NULL, true);
6287}
6288
6289void md_stop_writes(struct mddev *mddev)
6290{
6291 mddev_lock_nointr(mddev);
6292 __md_stop_writes(mddev);
6293 mddev_unlock(mddev);
6294}
6295EXPORT_SYMBOL_GPL(md_stop_writes);
6296
6297static void mddev_detach(struct mddev *mddev)
6298{
6299 md_bitmap_wait_behind_writes(mddev);
6300 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6301 mddev->pers->quiesce(mddev, 1);
6302 mddev->pers->quiesce(mddev, 0);
6303 }
6304 md_unregister_thread(&mddev->thread);
6305 if (mddev->queue)
6306 blk_sync_queue(mddev->queue);
6307}
6308
6309static void __md_stop(struct mddev *mddev)
6310{
6311 struct md_personality *pers = mddev->pers;
6312 md_bitmap_destroy(mddev);
6313 mddev_detach(mddev);
6314
6315 if (mddev->event_work.func)
6316 flush_workqueue(md_misc_wq);
6317 spin_lock(&mddev->lock);
6318 mddev->pers = NULL;
6319 spin_unlock(&mddev->lock);
6320 pers->free(mddev, mddev->private);
6321 mddev->private = NULL;
6322 if (pers->sync_request && mddev->to_remove == NULL)
6323 mddev->to_remove = &md_redundancy_group;
6324 module_put(pers->owner);
6325 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6326}
6327
6328void md_stop(struct mddev *mddev)
6329{
6330
6331
6332
6333 __md_stop(mddev);
6334 bioset_exit(&mddev->bio_set);
6335 bioset_exit(&mddev->sync_set);
6336}
6337
6338EXPORT_SYMBOL_GPL(md_stop);
6339
6340static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6341{
6342 int err = 0;
6343 int did_freeze = 0;
6344
6345 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6346 did_freeze = 1;
6347 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6348 md_wakeup_thread(mddev->thread);
6349 }
6350 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6351 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6352 if (mddev->sync_thread)
6353
6354
6355 wake_up_process(mddev->sync_thread->tsk);
6356
6357 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6358 return -EBUSY;
6359 mddev_unlock(mddev);
6360 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6361 &mddev->recovery));
6362 wait_event(mddev->sb_wait,
6363 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6364 mddev_lock_nointr(mddev);
6365
6366 mutex_lock(&mddev->open_mutex);
6367 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6368 mddev->sync_thread ||
6369 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6370 pr_warn("md: %s still in use.\n",mdname(mddev));
6371 if (did_freeze) {
6372 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6373 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6374 md_wakeup_thread(mddev->thread);
6375 }
6376 err = -EBUSY;
6377 goto out;
6378 }
6379 if (mddev->pers) {
6380 __md_stop_writes(mddev);
6381
6382 err = -ENXIO;
6383 if (mddev->ro==1)
6384 goto out;
6385 mddev->ro = 1;
6386 set_disk_ro(mddev->gendisk, 1);
6387 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6388 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6389 md_wakeup_thread(mddev->thread);
6390 sysfs_notify_dirent_safe(mddev->sysfs_state);
6391 err = 0;
6392 }
6393out:
6394 mutex_unlock(&mddev->open_mutex);
6395 return err;
6396}
6397
6398
6399
6400
6401
6402static int do_md_stop(struct mddev *mddev, int mode,
6403 struct block_device *bdev)
6404{
6405 struct gendisk *disk = mddev->gendisk;
6406 struct md_rdev *rdev;
6407 int did_freeze = 0;
6408
6409 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6410 did_freeze = 1;
6411 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6412 md_wakeup_thread(mddev->thread);
6413 }
6414 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6415 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6416 if (mddev->sync_thread)
6417
6418
6419 wake_up_process(mddev->sync_thread->tsk);
6420
6421 mddev_unlock(mddev);
6422 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6423 !test_bit(MD_RECOVERY_RUNNING,
6424 &mddev->recovery)));
6425 mddev_lock_nointr(mddev);
6426
6427 mutex_lock(&mddev->open_mutex);
6428 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6429 mddev->sysfs_active ||
6430 mddev->sync_thread ||
6431 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6432 pr_warn("md: %s still in use.\n",mdname(mddev));
6433 mutex_unlock(&mddev->open_mutex);
6434 if (did_freeze) {
6435 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6436 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6437 md_wakeup_thread(mddev->thread);
6438 }
6439 return -EBUSY;
6440 }
6441 if (mddev->pers) {
6442 if (mddev->ro)
6443 set_disk_ro(disk, 0);
6444
6445 __md_stop_writes(mddev);
6446 __md_stop(mddev);
6447 mddev->queue->backing_dev_info->congested_fn = NULL;
6448
6449
6450 sysfs_notify_dirent_safe(mddev->sysfs_state);
6451
6452 rdev_for_each(rdev, mddev)
6453 if (rdev->raid_disk >= 0)
6454 sysfs_unlink_rdev(mddev, rdev);
6455
6456 set_capacity(disk, 0);
6457 mutex_unlock(&mddev->open_mutex);
6458 mddev->changed = 1;
6459 revalidate_disk_size(disk, true);
6460
6461 if (mddev->ro)
6462 mddev->ro = 0;
6463 } else
6464 mutex_unlock(&mddev->open_mutex);
6465
6466
6467
6468 if (mode == 0) {
6469 pr_info("md: %s stopped.\n", mdname(mddev));
6470
6471 if (mddev->bitmap_info.file) {
6472 struct file *f = mddev->bitmap_info.file;
6473 spin_lock(&mddev->lock);
6474 mddev->bitmap_info.file = NULL;
6475 spin_unlock(&mddev->lock);
6476 fput(f);
6477 }
6478 mddev->bitmap_info.offset = 0;
6479
6480 export_array(mddev);
6481
6482 md_clean(mddev);
6483 if (mddev->hold_active == UNTIL_STOP)
6484 mddev->hold_active = 0;
6485 }
6486 md_new_event(mddev);
6487 sysfs_notify_dirent_safe(mddev->sysfs_state);
6488 return 0;
6489}
6490
6491#ifndef MODULE
6492static void autorun_array(struct mddev *mddev)
6493{
6494 struct md_rdev *rdev;
6495 int err;
6496
6497 if (list_empty(&mddev->disks))
6498 return;
6499
6500 pr_info("md: running: ");
6501
6502 rdev_for_each(rdev, mddev) {
6503 char b[BDEVNAME_SIZE];
6504 pr_cont("<%s>", bdevname(rdev->bdev,b));
6505 }
6506 pr_cont("\n");
6507
6508 err = do_md_run(mddev);
6509 if (err) {
6510 pr_warn("md: do_md_run() returned %d\n", err);
6511 do_md_stop(mddev, 0, NULL);
6512 }
6513}
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527static void autorun_devices(int part)
6528{
6529 struct md_rdev *rdev0, *rdev, *tmp;
6530 struct mddev *mddev;
6531 char b[BDEVNAME_SIZE];
6532
6533 pr_info("md: autorun ...\n");
6534 while (!list_empty(&pending_raid_disks)) {
6535 int unit;
6536 dev_t dev;
6537 LIST_HEAD(candidates);
6538 rdev0 = list_entry(pending_raid_disks.next,
6539 struct md_rdev, same_set);
6540
6541 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6542 INIT_LIST_HEAD(&candidates);
6543 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6544 if (super_90_load(rdev, rdev0, 0) >= 0) {
6545 pr_debug("md: adding %s ...\n",
6546 bdevname(rdev->bdev,b));
6547 list_move(&rdev->same_set, &candidates);
6548 }
6549
6550
6551
6552
6553
6554 if (part) {
6555 dev = MKDEV(mdp_major,
6556 rdev0->preferred_minor << MdpMinorShift);
6557 unit = MINOR(dev) >> MdpMinorShift;
6558 } else {
6559 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6560 unit = MINOR(dev);
6561 }
6562 if (rdev0->preferred_minor != unit) {
6563 pr_warn("md: unit number in %s is bad: %d\n",
6564 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6565 break;
6566 }
6567
6568 md_probe(dev);
6569 mddev = mddev_find(dev);
6570 if (!mddev)
6571 break;
6572
6573 if (mddev_lock(mddev))
6574 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6575 else if (mddev->raid_disks || mddev->major_version
6576 || !list_empty(&mddev->disks)) {
6577 pr_warn("md: %s already running, cannot run %s\n",
6578 mdname(mddev), bdevname(rdev0->bdev,b));
6579 mddev_unlock(mddev);
6580 } else {
6581 pr_debug("md: created %s\n", mdname(mddev));
6582 mddev->persistent = 1;
6583 rdev_for_each_list(rdev, tmp, &candidates) {
6584 list_del_init(&rdev->same_set);
6585 if (bind_rdev_to_array(rdev, mddev))
6586 export_rdev(rdev);
6587 }
6588 autorun_array(mddev);
6589 mddev_unlock(mddev);
6590 }
6591
6592
6593
6594 rdev_for_each_list(rdev, tmp, &candidates) {
6595 list_del_init(&rdev->same_set);
6596 export_rdev(rdev);
6597 }
6598 mddev_put(mddev);
6599 }
6600 pr_info("md: ... autorun DONE.\n");
6601}
6602#endif
6603
6604static int get_version(void __user *arg)
6605{
6606 mdu_version_t ver;
6607
6608 ver.major = MD_MAJOR_VERSION;
6609 ver.minor = MD_MINOR_VERSION;
6610 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6611
6612 if (copy_to_user(arg, &ver, sizeof(ver)))
6613 return -EFAULT;
6614
6615 return 0;
6616}
6617
6618static int get_array_info(struct mddev *mddev, void __user *arg)
6619{
6620 mdu_array_info_t info;
6621 int nr,working,insync,failed,spare;
6622 struct md_rdev *rdev;
6623
6624 nr = working = insync = failed = spare = 0;
6625 rcu_read_lock();
6626 rdev_for_each_rcu(rdev, mddev) {
6627 nr++;
6628 if (test_bit(Faulty, &rdev->flags))
6629 failed++;
6630 else {
6631 working++;
6632 if (test_bit(In_sync, &rdev->flags))
6633 insync++;
6634 else if (test_bit(Journal, &rdev->flags))
6635
6636 ;
6637 else
6638 spare++;
6639 }
6640 }
6641 rcu_read_unlock();
6642
6643 info.major_version = mddev->major_version;
6644 info.minor_version = mddev->minor_version;
6645 info.patch_version = MD_PATCHLEVEL_VERSION;
6646 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6647 info.level = mddev->level;
6648 info.size = mddev->dev_sectors / 2;
6649 if (info.size != mddev->dev_sectors / 2)
6650 info.size = -1;
6651 info.nr_disks = nr;
6652 info.raid_disks = mddev->raid_disks;
6653 info.md_minor = mddev->md_minor;
6654 info.not_persistent= !mddev->persistent;
6655
6656 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6657 info.state = 0;
6658 if (mddev->in_sync)
6659 info.state = (1<<MD_SB_CLEAN);
6660 if (mddev->bitmap && mddev->bitmap_info.offset)
6661 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6662 if (mddev_is_clustered(mddev))
6663 info.state |= (1<<MD_SB_CLUSTERED);
6664 info.active_disks = insync;
6665 info.working_disks = working;
6666 info.failed_disks = failed;
6667 info.spare_disks = spare;
6668
6669 info.layout = mddev->layout;
6670 info.chunk_size = mddev->chunk_sectors << 9;
6671
6672 if (copy_to_user(arg, &info, sizeof(info)))
6673 return -EFAULT;
6674
6675 return 0;
6676}
6677
6678static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6679{
6680 mdu_bitmap_file_t *file = NULL;
6681 char *ptr;
6682 int err;
6683
6684 file = kzalloc(sizeof(*file), GFP_NOIO);
6685 if (!file)
6686 return -ENOMEM;
6687
6688 err = 0;
6689 spin_lock(&mddev->lock);
6690
6691 if (mddev->bitmap_info.file) {
6692 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6693 sizeof(file->pathname));
6694 if (IS_ERR(ptr))
6695 err = PTR_ERR(ptr);
6696 else
6697 memmove(file->pathname, ptr,
6698 sizeof(file->pathname)-(ptr-file->pathname));
6699 }
6700 spin_unlock(&mddev->lock);
6701
6702 if (err == 0 &&
6703 copy_to_user(arg, file, sizeof(*file)))
6704 err = -EFAULT;
6705
6706 kfree(file);
6707 return err;
6708}
6709
6710static int get_disk_info(struct mddev *mddev, void __user * arg)
6711{
6712 mdu_disk_info_t info;
6713 struct md_rdev *rdev;
6714
6715 if (copy_from_user(&info, arg, sizeof(info)))
6716 return -EFAULT;
6717
6718 rcu_read_lock();
6719 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6720 if (rdev) {
6721 info.major = MAJOR(rdev->bdev->bd_dev);
6722 info.minor = MINOR(rdev->bdev->bd_dev);
6723 info.raid_disk = rdev->raid_disk;
6724 info.state = 0;
6725 if (test_bit(Faulty, &rdev->flags))
6726 info.state |= (1<<MD_DISK_FAULTY);
6727 else if (test_bit(In_sync, &rdev->flags)) {
6728 info.state |= (1<<MD_DISK_ACTIVE);
6729 info.state |= (1<<MD_DISK_SYNC);
6730 }
6731 if (test_bit(Journal, &rdev->flags))
6732 info.state |= (1<<MD_DISK_JOURNAL);
6733 if (test_bit(WriteMostly, &rdev->flags))
6734 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6735 if (test_bit(FailFast, &rdev->flags))
6736 info.state |= (1<<MD_DISK_FAILFAST);
6737 } else {
6738 info.major = info.minor = 0;
6739 info.raid_disk = -1;
6740 info.state = (1<<MD_DISK_REMOVED);
6741 }
6742 rcu_read_unlock();
6743
6744 if (copy_to_user(arg, &info, sizeof(info)))
6745 return -EFAULT;
6746
6747 return 0;
6748}
6749
6750static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
6751{
6752 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6753 struct md_rdev *rdev;
6754 dev_t dev = MKDEV(info->major,info->minor);
6755
6756 if (mddev_is_clustered(mddev) &&
6757 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6758 pr_warn("%s: Cannot add to clustered mddev.\n",
6759 mdname(mddev));
6760 return -EINVAL;
6761 }
6762
6763 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6764 return -EOVERFLOW;
6765
6766 if (!mddev->raid_disks) {
6767 int err;
6768
6769 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6770 if (IS_ERR(rdev)) {
6771 pr_warn("md: md_import_device returned %ld\n",
6772 PTR_ERR(rdev));
6773 return PTR_ERR(rdev);
6774 }
6775 if (!list_empty(&mddev->disks)) {
6776 struct md_rdev *rdev0
6777 = list_entry(mddev->disks.next,
6778 struct md_rdev, same_set);
6779 err = super_types[mddev->major_version]
6780 .load_super(rdev, rdev0, mddev->minor_version);
6781 if (err < 0) {
6782 pr_warn("md: %s has different UUID to %s\n",
6783 bdevname(rdev->bdev,b),
6784 bdevname(rdev0->bdev,b2));
6785 export_rdev(rdev);
6786 return -EINVAL;
6787 }
6788 }
6789 err = bind_rdev_to_array(rdev, mddev);
6790 if (err)
6791 export_rdev(rdev);
6792 return err;
6793 }
6794
6795
6796
6797
6798
6799
6800 if (mddev->pers) {
6801 int err;
6802 if (!mddev->pers->hot_add_disk) {
6803 pr_warn("%s: personality does not support diskops!\n",
6804 mdname(mddev));
6805 return -EINVAL;
6806 }
6807 if (mddev->persistent)
6808 rdev = md_import_device(dev, mddev->major_version,
6809 mddev->minor_version);
6810 else
6811 rdev = md_import_device(dev, -1, -1);
6812 if (IS_ERR(rdev)) {
6813 pr_warn("md: md_import_device returned %ld\n",
6814 PTR_ERR(rdev));
6815 return PTR_ERR(rdev);
6816 }
6817
6818 if (!mddev->persistent) {
6819 if (info->state & (1<<MD_DISK_SYNC) &&
6820 info->raid_disk < mddev->raid_disks) {
6821 rdev->raid_disk = info->raid_disk;
6822 set_bit(In_sync, &rdev->flags);
6823 clear_bit(Bitmap_sync, &rdev->flags);
6824 } else
6825 rdev->raid_disk = -1;
6826 rdev->saved_raid_disk = rdev->raid_disk;
6827 } else
6828 super_types[mddev->major_version].
6829 validate_super(mddev, rdev);
6830 if ((info->state & (1<<MD_DISK_SYNC)) &&
6831 rdev->raid_disk != info->raid_disk) {
6832
6833
6834
6835 export_rdev(rdev);
6836 return -EINVAL;
6837 }
6838
6839 clear_bit(In_sync, &rdev->flags);
6840 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6841 set_bit(WriteMostly, &rdev->flags);
6842 else
6843 clear_bit(WriteMostly, &rdev->flags);
6844 if (info->state & (1<<MD_DISK_FAILFAST))
6845 set_bit(FailFast, &rdev->flags);
6846 else
6847 clear_bit(FailFast, &rdev->flags);
6848
6849 if (info->state & (1<<MD_DISK_JOURNAL)) {
6850 struct md_rdev *rdev2;
6851 bool has_journal = false;
6852
6853
6854 rdev_for_each(rdev2, mddev) {
6855 if (test_bit(Journal, &rdev2->flags)) {
6856 has_journal = true;
6857 break;
6858 }
6859 }
6860 if (has_journal || mddev->bitmap) {
6861 export_rdev(rdev);
6862 return -EBUSY;
6863 }
6864 set_bit(Journal, &rdev->flags);
6865 }
6866
6867
6868
6869 if (mddev_is_clustered(mddev)) {
6870 if (info->state & (1 << MD_DISK_CANDIDATE))
6871 set_bit(Candidate, &rdev->flags);
6872 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6873
6874 err = md_cluster_ops->add_new_disk(mddev, rdev);
6875 if (err) {
6876 export_rdev(rdev);
6877 return err;
6878 }
6879 }
6880 }
6881
6882 rdev->raid_disk = -1;
6883 err = bind_rdev_to_array(rdev, mddev);
6884
6885 if (err)
6886 export_rdev(rdev);
6887
6888 if (mddev_is_clustered(mddev)) {
6889 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6890 if (!err) {
6891 err = md_cluster_ops->new_disk_ack(mddev,
6892 err == 0);
6893 if (err)
6894 md_kick_rdev_from_array(rdev);
6895 }
6896 } else {
6897 if (err)
6898 md_cluster_ops->add_new_disk_cancel(mddev);
6899 else
6900 err = add_bound_rdev(rdev);
6901 }
6902
6903 } else if (!err)
6904 err = add_bound_rdev(rdev);
6905
6906 return err;
6907 }
6908
6909
6910
6911
6912 if (mddev->major_version != 0) {
6913 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6914 return -EINVAL;
6915 }
6916
6917 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6918 int err;
6919 rdev = md_import_device(dev, -1, 0);
6920 if (IS_ERR(rdev)) {
6921 pr_warn("md: error, md_import_device() returned %ld\n",
6922 PTR_ERR(rdev));
6923 return PTR_ERR(rdev);
6924 }
6925 rdev->desc_nr = info->number;
6926 if (info->raid_disk < mddev->raid_disks)
6927 rdev->raid_disk = info->raid_disk;
6928 else
6929 rdev->raid_disk = -1;
6930
6931 if (rdev->raid_disk < mddev->raid_disks)
6932 if (info->state & (1<<MD_DISK_SYNC))
6933 set_bit(In_sync, &rdev->flags);
6934
6935 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6936 set_bit(WriteMostly, &rdev->flags);
6937 if (info->state & (1<<MD_DISK_FAILFAST))
6938 set_bit(FailFast, &rdev->flags);
6939
6940 if (!mddev->persistent) {
6941 pr_debug("md: nonpersistent superblock ...\n");
6942 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6943 } else
6944 rdev->sb_start = calc_dev_sboffset(rdev);
6945 rdev->sectors = rdev->sb_start;
6946
6947 err = bind_rdev_to_array(rdev, mddev);
6948 if (err) {
6949 export_rdev(rdev);
6950 return err;
6951 }
6952 }
6953
6954 return 0;
6955}
6956
6957static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6958{
6959 char b[BDEVNAME_SIZE];
6960 struct md_rdev *rdev;
6961
6962 if (!mddev->pers)
6963 return -ENODEV;
6964
6965 rdev = find_rdev(mddev, dev);
6966 if (!rdev)
6967 return -ENXIO;
6968
6969 if (rdev->raid_disk < 0)
6970 goto kick_rdev;
6971
6972 clear_bit(Blocked, &rdev->flags);
6973 remove_and_add_spares(mddev, rdev);
6974
6975 if (rdev->raid_disk >= 0)
6976 goto busy;
6977
6978kick_rdev:
6979 if (mddev_is_clustered(mddev)) {
6980 if (md_cluster_ops->remove_disk(mddev, rdev))
6981 goto busy;
6982 }
6983
6984 md_kick_rdev_from_array(rdev);
6985 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6986 if (mddev->thread)
6987 md_wakeup_thread(mddev->thread);
6988 else
6989 md_update_sb(mddev, 1);
6990 md_new_event(mddev);
6991
6992 return 0;
6993busy:
6994 pr_debug("md: cannot remove active disk %s from %s ...\n",
6995 bdevname(rdev->bdev,b), mdname(mddev));
6996 return -EBUSY;
6997}
6998
6999static int hot_add_disk(struct mddev *mddev, dev_t dev)
7000{
7001 char b[BDEVNAME_SIZE];
7002 int err;
7003 struct md_rdev *rdev;
7004
7005 if (!mddev->pers)
7006 return -ENODEV;
7007
7008 if (mddev->major_version != 0) {
7009 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
7010 mdname(mddev));
7011 return -EINVAL;
7012 }
7013 if (!mddev->pers->hot_add_disk) {
7014 pr_warn("%s: personality does not support diskops!\n",
7015 mdname(mddev));
7016 return -EINVAL;
7017 }
7018
7019 rdev = md_import_device(dev, -1, 0);
7020 if (IS_ERR(rdev)) {
7021 pr_warn("md: error, md_import_device() returned %ld\n",
7022 PTR_ERR(rdev));
7023 return -EINVAL;
7024 }
7025
7026 if (mddev->persistent)
7027 rdev->sb_start = calc_dev_sboffset(rdev);
7028 else
7029 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
7030
7031 rdev->sectors = rdev->sb_start;
7032
7033 if (test_bit(Faulty, &rdev->flags)) {
7034 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
7035 bdevname(rdev->bdev,b), mdname(mddev));
7036 err = -EINVAL;
7037 goto abort_export;
7038 }
7039
7040 clear_bit(In_sync, &rdev->flags);
7041 rdev->desc_nr = -1;
7042 rdev->saved_raid_disk = -1;
7043 err = bind_rdev_to_array(rdev, mddev);
7044 if (err)
7045 goto abort_export;
7046
7047
7048
7049
7050
7051
7052 rdev->raid_disk = -1;
7053
7054 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7055 if (!mddev->thread)
7056 md_update_sb(mddev, 1);
7057
7058
7059
7060
7061 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7062 md_wakeup_thread(mddev->thread);
7063 md_new_event(mddev);
7064 return 0;
7065
7066abort_export:
7067 export_rdev(rdev);
7068 return err;
7069}
7070
7071static int set_bitmap_file(struct mddev *mddev, int fd)
7072{
7073 int err = 0;
7074
7075 if (mddev->pers) {
7076 if (!mddev->pers->quiesce || !mddev->thread)
7077 return -EBUSY;
7078 if (mddev->recovery || mddev->sync_thread)
7079 return -EBUSY;
7080
7081 }
7082
7083 if (fd >= 0) {
7084 struct inode *inode;
7085 struct file *f;
7086
7087 if (mddev->bitmap || mddev->bitmap_info.file)
7088 return -EEXIST;
7089 f = fget(fd);
7090
7091 if (f == NULL) {
7092 pr_warn("%s: error: failed to get bitmap file\n",
7093 mdname(mddev));
7094 return -EBADF;
7095 }
7096
7097 inode = f->f_mapping->host;
7098 if (!S_ISREG(inode->i_mode)) {
7099 pr_warn("%s: error: bitmap file must be a regular file\n",
7100 mdname(mddev));
7101 err = -EBADF;
7102 } else if (!(f->f_mode & FMODE_WRITE)) {
7103 pr_warn("%s: error: bitmap file must open for write\n",
7104 mdname(mddev));
7105 err = -EBADF;
7106 } else if (atomic_read(&inode->i_writecount) != 1) {
7107 pr_warn("%s: error: bitmap file is already in use\n",
7108 mdname(mddev));
7109 err = -EBUSY;
7110 }
7111 if (err) {
7112 fput(f);
7113 return err;
7114 }
7115 mddev->bitmap_info.file = f;
7116 mddev->bitmap_info.offset = 0;
7117 } else if (mddev->bitmap == NULL)
7118 return -ENOENT;
7119 err = 0;
7120 if (mddev->pers) {
7121 if (fd >= 0) {
7122 struct bitmap *bitmap;
7123
7124 bitmap = md_bitmap_create(mddev, -1);
7125 mddev_suspend(mddev);
7126 if (!IS_ERR(bitmap)) {
7127 mddev->bitmap = bitmap;
7128 err = md_bitmap_load(mddev);
7129 } else
7130 err = PTR_ERR(bitmap);
7131 if (err) {
7132 md_bitmap_destroy(mddev);
7133 fd = -1;
7134 }
7135 mddev_resume(mddev);
7136 } else if (fd < 0) {
7137 mddev_suspend(mddev);
7138 md_bitmap_destroy(mddev);
7139 mddev_resume(mddev);
7140 }
7141 }
7142 if (fd < 0) {
7143 struct file *f = mddev->bitmap_info.file;
7144 if (f) {
7145 spin_lock(&mddev->lock);
7146 mddev->bitmap_info.file = NULL;
7147 spin_unlock(&mddev->lock);
7148 fput(f);
7149 }
7150 }
7151
7152 return err;
7153}
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
7169{
7170
7171 if (info->raid_disks == 0) {
7172
7173 if (info->major_version < 0 ||
7174 info->major_version >= ARRAY_SIZE(super_types) ||
7175 super_types[info->major_version].name == NULL) {
7176
7177 pr_warn("md: superblock version %d not known\n",
7178 info->major_version);
7179 return -EINVAL;
7180 }
7181 mddev->major_version = info->major_version;
7182 mddev->minor_version = info->minor_version;
7183 mddev->patch_version = info->patch_version;
7184 mddev->persistent = !info->not_persistent;
7185
7186
7187
7188 mddev->ctime = ktime_get_real_seconds();
7189 return 0;
7190 }
7191 mddev->major_version = MD_MAJOR_VERSION;
7192 mddev->minor_version = MD_MINOR_VERSION;
7193 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7194 mddev->ctime = ktime_get_real_seconds();
7195
7196 mddev->level = info->level;
7197 mddev->clevel[0] = 0;
7198 mddev->dev_sectors = 2 * (sector_t)info->size;
7199 mddev->raid_disks = info->raid_disks;
7200
7201
7202
7203 if (info->state & (1<<MD_SB_CLEAN))
7204 mddev->recovery_cp = MaxSector;
7205 else
7206 mddev->recovery_cp = 0;
7207 mddev->persistent = ! info->not_persistent;
7208 mddev->external = 0;
7209
7210 mddev->layout = info->layout;
7211 if (mddev->level == 0)
7212
7213 mddev->layout = -1;
7214 mddev->chunk_sectors = info->chunk_size >> 9;
7215
7216 if (mddev->persistent) {
7217 mddev->max_disks = MD_SB_DISKS;
7218 mddev->flags = 0;
7219 mddev->sb_flags = 0;
7220 }
7221 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7222
7223 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7224 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7225 mddev->bitmap_info.offset = 0;
7226
7227 mddev->reshape_position = MaxSector;
7228
7229
7230
7231
7232 get_random_bytes(mddev->uuid, 16);
7233
7234 mddev->new_level = mddev->level;
7235 mddev->new_chunk_sectors = mddev->chunk_sectors;
7236 mddev->new_layout = mddev->layout;
7237 mddev->delta_disks = 0;
7238 mddev->reshape_backwards = 0;
7239
7240 return 0;
7241}
7242
7243void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7244{
7245 lockdep_assert_held(&mddev->reconfig_mutex);
7246
7247 if (mddev->external_size)
7248 return;
7249
7250 mddev->array_sectors = array_sectors;
7251}
7252EXPORT_SYMBOL(md_set_array_sectors);
7253
7254static int update_size(struct mddev *mddev, sector_t num_sectors)
7255{
7256 struct md_rdev *rdev;
7257 int rv;
7258 int fit = (num_sectors == 0);
7259 sector_t old_dev_sectors = mddev->dev_sectors;
7260
7261 if (mddev->pers->resize == NULL)
7262 return -EINVAL;
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7273 mddev->sync_thread)
7274 return -EBUSY;
7275 if (mddev->ro)
7276 return -EROFS;
7277
7278 rdev_for_each(rdev, mddev) {
7279 sector_t avail = rdev->sectors;
7280
7281 if (fit && (num_sectors == 0 || num_sectors > avail))
7282 num_sectors = avail;
7283 if (avail < num_sectors)
7284 return -ENOSPC;
7285 }
7286 rv = mddev->pers->resize(mddev, num_sectors);
7287 if (!rv) {
7288 if (mddev_is_clustered(mddev))
7289 md_cluster_ops->update_size(mddev, old_dev_sectors);
7290 else if (mddev->queue) {
7291 set_capacity(mddev->gendisk, mddev->array_sectors);
7292 revalidate_disk_size(mddev->gendisk, true);
7293 }
7294 }
7295 return rv;
7296}
7297
7298static int update_raid_disks(struct mddev *mddev, int raid_disks)
7299{
7300 int rv;
7301 struct md_rdev *rdev;
7302
7303 if (mddev->pers->check_reshape == NULL)
7304 return -EINVAL;
7305 if (mddev->ro)
7306 return -EROFS;
7307 if (raid_disks <= 0 ||
7308 (mddev->max_disks && raid_disks >= mddev->max_disks))
7309 return -EINVAL;
7310 if (mddev->sync_thread ||
7311 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7312 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7313 mddev->reshape_position != MaxSector)
7314 return -EBUSY;
7315
7316 rdev_for_each(rdev, mddev) {
7317 if (mddev->raid_disks < raid_disks &&
7318 rdev->data_offset < rdev->new_data_offset)
7319 return -EINVAL;
7320 if (mddev->raid_disks > raid_disks &&
7321 rdev->data_offset > rdev->new_data_offset)
7322 return -EINVAL;
7323 }
7324
7325 mddev->delta_disks = raid_disks - mddev->raid_disks;
7326 if (mddev->delta_disks < 0)
7327 mddev->reshape_backwards = 1;
7328 else if (mddev->delta_disks > 0)
7329 mddev->reshape_backwards = 0;
7330
7331 rv = mddev->pers->check_reshape(mddev);
7332 if (rv < 0) {
7333 mddev->delta_disks = 0;
7334 mddev->reshape_backwards = 0;
7335 }
7336 return rv;
7337}
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7348{
7349 int rv = 0;
7350 int cnt = 0;
7351 int state = 0;
7352
7353
7354 if (mddev->bitmap && mddev->bitmap_info.offset)
7355 state |= (1 << MD_SB_BITMAP_PRESENT);
7356
7357 if (mddev->major_version != info->major_version ||
7358 mddev->minor_version != info->minor_version ||
7359
7360 mddev->ctime != info->ctime ||
7361 mddev->level != info->level ||
7362
7363 mddev->persistent != !info->not_persistent ||
7364 mddev->chunk_sectors != info->chunk_size >> 9 ||
7365
7366 ((state^info->state) & 0xfffffe00)
7367 )
7368 return -EINVAL;
7369
7370 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7371 cnt++;
7372 if (mddev->raid_disks != info->raid_disks)
7373 cnt++;
7374 if (mddev->layout != info->layout)
7375 cnt++;
7376 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7377 cnt++;
7378 if (cnt == 0)
7379 return 0;
7380 if (cnt > 1)
7381 return -EINVAL;
7382
7383 if (mddev->layout != info->layout) {
7384
7385
7386
7387
7388 if (mddev->pers->check_reshape == NULL)
7389 return -EINVAL;
7390 else {
7391 mddev->new_layout = info->layout;
7392 rv = mddev->pers->check_reshape(mddev);
7393 if (rv)
7394 mddev->new_layout = mddev->layout;
7395 return rv;
7396 }
7397 }
7398 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7399 rv = update_size(mddev, (sector_t)info->size * 2);
7400
7401 if (mddev->raid_disks != info->raid_disks)
7402 rv = update_raid_disks(mddev, info->raid_disks);
7403
7404 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7405 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7406 rv = -EINVAL;
7407 goto err;
7408 }
7409 if (mddev->recovery || mddev->sync_thread) {
7410 rv = -EBUSY;
7411 goto err;
7412 }
7413 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7414 struct bitmap *bitmap;
7415
7416 if (mddev->bitmap) {
7417 rv = -EEXIST;
7418 goto err;
7419 }
7420 if (mddev->bitmap_info.default_offset == 0) {
7421 rv = -EINVAL;
7422 goto err;
7423 }
7424 mddev->bitmap_info.offset =
7425 mddev->bitmap_info.default_offset;
7426 mddev->bitmap_info.space =
7427 mddev->bitmap_info.default_space;
7428 bitmap = md_bitmap_create(mddev, -1);
7429 mddev_suspend(mddev);
7430 if (!IS_ERR(bitmap)) {
7431 mddev->bitmap = bitmap;
7432 rv = md_bitmap_load(mddev);
7433 } else
7434 rv = PTR_ERR(bitmap);
7435 if (rv)
7436 md_bitmap_destroy(mddev);
7437 mddev_resume(mddev);
7438 } else {
7439
7440 if (!mddev->bitmap) {
7441 rv = -ENOENT;
7442 goto err;
7443 }
7444 if (mddev->bitmap->storage.file) {
7445 rv = -EINVAL;
7446 goto err;
7447 }
7448 if (mddev->bitmap_info.nodes) {
7449
7450 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7451 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7452 rv = -EPERM;
7453 md_cluster_ops->unlock_all_bitmaps(mddev);
7454 goto err;
7455 }
7456
7457 mddev->bitmap_info.nodes = 0;
7458 md_cluster_ops->leave(mddev);
7459 module_put(md_cluster_mod);
7460 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7461 }
7462 mddev_suspend(mddev);
7463 md_bitmap_destroy(mddev);
7464 mddev_resume(mddev);
7465 mddev->bitmap_info.offset = 0;
7466 }
7467 }
7468 md_update_sb(mddev, 1);
7469 return rv;
7470err:
7471 return rv;
7472}
7473
7474static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7475{
7476 struct md_rdev *rdev;
7477 int err = 0;
7478
7479 if (mddev->pers == NULL)
7480 return -ENODEV;
7481
7482 rcu_read_lock();
7483 rdev = md_find_rdev_rcu(mddev, dev);
7484 if (!rdev)
7485 err = -ENODEV;
7486 else {
7487 md_error(mddev, rdev);
7488 if (!test_bit(Faulty, &rdev->flags))
7489 err = -EBUSY;
7490 }
7491 rcu_read_unlock();
7492 return err;
7493}
7494
7495
7496
7497
7498
7499
7500
7501static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7502{
7503 struct mddev *mddev = bdev->bd_disk->private_data;
7504
7505 geo->heads = 2;
7506 geo->sectors = 4;
7507 geo->cylinders = mddev->array_sectors / 8;
7508 return 0;
7509}
7510
7511static inline bool md_ioctl_valid(unsigned int cmd)
7512{
7513 switch (cmd) {
7514 case ADD_NEW_DISK:
7515 case GET_ARRAY_INFO:
7516 case GET_BITMAP_FILE:
7517 case GET_DISK_INFO:
7518 case HOT_ADD_DISK:
7519 case HOT_REMOVE_DISK:
7520 case RAID_AUTORUN:
7521 case RAID_VERSION:
7522 case RESTART_ARRAY_RW:
7523 case RUN_ARRAY:
7524 case SET_ARRAY_INFO:
7525 case SET_BITMAP_FILE:
7526 case SET_DISK_FAULTY:
7527 case STOP_ARRAY:
7528 case STOP_ARRAY_RO:
7529 case CLUSTERED_DISK_NACK:
7530 return true;
7531 default:
7532 return false;
7533 }
7534}
7535
7536static int md_ioctl(struct block_device *bdev, fmode_t mode,
7537 unsigned int cmd, unsigned long arg)
7538{
7539 int err = 0;
7540 void __user *argp = (void __user *)arg;
7541 struct mddev *mddev = NULL;
7542 bool did_set_md_closing = false;
7543
7544 if (!md_ioctl_valid(cmd))
7545 return -ENOTTY;
7546
7547 switch (cmd) {
7548 case RAID_VERSION:
7549 case GET_ARRAY_INFO:
7550 case GET_DISK_INFO:
7551 break;
7552 default:
7553 if (!capable(CAP_SYS_ADMIN))
7554 return -EACCES;
7555 }
7556
7557
7558
7559
7560
7561 switch (cmd) {
7562 case RAID_VERSION:
7563 err = get_version(argp);
7564 goto out;
7565
7566#ifndef MODULE
7567 case RAID_AUTORUN:
7568 err = 0;
7569 autostart_arrays(arg);
7570 goto out;
7571#endif
7572 default:;
7573 }
7574
7575
7576
7577
7578
7579 mddev = bdev->bd_disk->private_data;
7580
7581 if (!mddev) {
7582 BUG();
7583 goto out;
7584 }
7585
7586
7587 switch (cmd) {
7588 case GET_ARRAY_INFO:
7589 if (!mddev->raid_disks && !mddev->external)
7590 err = -ENODEV;
7591 else
7592 err = get_array_info(mddev, argp);
7593 goto out;
7594
7595 case GET_DISK_INFO:
7596 if (!mddev->raid_disks && !mddev->external)
7597 err = -ENODEV;
7598 else
7599 err = get_disk_info(mddev, argp);
7600 goto out;
7601
7602 case SET_DISK_FAULTY:
7603 err = set_disk_faulty(mddev, new_decode_dev(arg));
7604 goto out;
7605
7606 case GET_BITMAP_FILE:
7607 err = get_bitmap_file(mddev, argp);
7608 goto out;
7609
7610 }
7611
7612 if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7613 flush_rdev_wq(mddev);
7614
7615 if (cmd == HOT_REMOVE_DISK)
7616
7617 wait_event_interruptible_timeout(mddev->sb_wait,
7618 !test_bit(MD_RECOVERY_NEEDED,
7619 &mddev->recovery),
7620 msecs_to_jiffies(5000));
7621 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7622
7623
7624
7625 mutex_lock(&mddev->open_mutex);
7626 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7627 mutex_unlock(&mddev->open_mutex);
7628 err = -EBUSY;
7629 goto out;
7630 }
7631 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7632 mutex_unlock(&mddev->open_mutex);
7633 err = -EBUSY;
7634 goto out;
7635 }
7636 did_set_md_closing = true;
7637 mutex_unlock(&mddev->open_mutex);
7638 sync_blockdev(bdev);
7639 }
7640 err = mddev_lock(mddev);
7641 if (err) {
7642 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7643 err, cmd);
7644 goto out;
7645 }
7646
7647 if (cmd == SET_ARRAY_INFO) {
7648 mdu_array_info_t info;
7649 if (!arg)
7650 memset(&info, 0, sizeof(info));
7651 else if (copy_from_user(&info, argp, sizeof(info))) {
7652 err = -EFAULT;
7653 goto unlock;
7654 }
7655 if (mddev->pers) {
7656 err = update_array_info(mddev, &info);
7657 if (err) {
7658 pr_warn("md: couldn't update array info. %d\n", err);
7659 goto unlock;
7660 }
7661 goto unlock;
7662 }
7663 if (!list_empty(&mddev->disks)) {
7664 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7665 err = -EBUSY;
7666 goto unlock;
7667 }
7668 if (mddev->raid_disks) {
7669 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7670 err = -EBUSY;
7671 goto unlock;
7672 }
7673 err = set_array_info(mddev, &info);
7674 if (err) {
7675 pr_warn("md: couldn't set array info. %d\n", err);
7676 goto unlock;
7677 }
7678 goto unlock;
7679 }
7680
7681
7682
7683
7684
7685
7686 if ((!mddev->raid_disks && !mddev->external)
7687 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7688 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7689 && cmd != GET_BITMAP_FILE) {
7690 err = -ENODEV;
7691 goto unlock;
7692 }
7693
7694
7695
7696
7697 switch (cmd) {
7698 case RESTART_ARRAY_RW:
7699 err = restart_array(mddev);
7700 goto unlock;
7701
7702 case STOP_ARRAY:
7703 err = do_md_stop(mddev, 0, bdev);
7704 goto unlock;
7705
7706 case STOP_ARRAY_RO:
7707 err = md_set_readonly(mddev, bdev);
7708 goto unlock;
7709
7710 case HOT_REMOVE_DISK:
7711 err = hot_remove_disk(mddev, new_decode_dev(arg));
7712 goto unlock;
7713
7714 case ADD_NEW_DISK:
7715
7716
7717
7718
7719 if (mddev->pers) {
7720 mdu_disk_info_t info;
7721 if (copy_from_user(&info, argp, sizeof(info)))
7722 err = -EFAULT;
7723 else if (!(info.state & (1<<MD_DISK_SYNC)))
7724
7725 break;
7726 else
7727 err = add_new_disk(mddev, &info);
7728 goto unlock;
7729 }
7730 break;
7731 }
7732
7733
7734
7735
7736
7737 if (mddev->ro && mddev->pers) {
7738 if (mddev->ro == 2) {
7739 mddev->ro = 0;
7740 sysfs_notify_dirent_safe(mddev->sysfs_state);
7741 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7742
7743
7744
7745
7746 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7747 mddev_unlock(mddev);
7748 wait_event(mddev->sb_wait,
7749 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7750 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7751 mddev_lock_nointr(mddev);
7752 }
7753 } else {
7754 err = -EROFS;
7755 goto unlock;
7756 }
7757 }
7758
7759 switch (cmd) {
7760 case ADD_NEW_DISK:
7761 {
7762 mdu_disk_info_t info;
7763 if (copy_from_user(&info, argp, sizeof(info)))
7764 err = -EFAULT;
7765 else
7766 err = add_new_disk(mddev, &info);
7767 goto unlock;
7768 }
7769
7770 case CLUSTERED_DISK_NACK:
7771 if (mddev_is_clustered(mddev))
7772 md_cluster_ops->new_disk_ack(mddev, false);
7773 else
7774 err = -EINVAL;
7775 goto unlock;
7776
7777 case HOT_ADD_DISK:
7778 err = hot_add_disk(mddev, new_decode_dev(arg));
7779 goto unlock;
7780
7781 case RUN_ARRAY:
7782 err = do_md_run(mddev);
7783 goto unlock;
7784
7785 case SET_BITMAP_FILE:
7786 err = set_bitmap_file(mddev, (int)arg);
7787 goto unlock;
7788
7789 default:
7790 err = -EINVAL;
7791 goto unlock;
7792 }
7793
7794unlock:
7795 if (mddev->hold_active == UNTIL_IOCTL &&
7796 err != -EINVAL)
7797 mddev->hold_active = 0;
7798 mddev_unlock(mddev);
7799out:
7800 if(did_set_md_closing)
7801 clear_bit(MD_CLOSING, &mddev->flags);
7802 return err;
7803}
7804#ifdef CONFIG_COMPAT
7805static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7806 unsigned int cmd, unsigned long arg)
7807{
7808 switch (cmd) {
7809 case HOT_REMOVE_DISK:
7810 case HOT_ADD_DISK:
7811 case SET_DISK_FAULTY:
7812 case SET_BITMAP_FILE:
7813
7814 break;
7815 default:
7816 arg = (unsigned long)compat_ptr(arg);
7817 break;
7818 }
7819
7820 return md_ioctl(bdev, mode, cmd, arg);
7821}
7822#endif
7823
7824static int md_set_read_only(struct block_device *bdev, bool ro)
7825{
7826 struct mddev *mddev = bdev->bd_disk->private_data;
7827 int err;
7828
7829 err = mddev_lock(mddev);
7830 if (err)
7831 return err;
7832
7833 if (!mddev->raid_disks && !mddev->external) {
7834 err = -ENODEV;
7835 goto out_unlock;
7836 }
7837
7838
7839
7840
7841
7842 if (!ro && mddev->ro == 1 && mddev->pers) {
7843 err = restart_array(mddev);
7844 if (err)
7845 goto out_unlock;
7846 mddev->ro = 2;
7847 }
7848
7849out_unlock:
7850 mddev_unlock(mddev);
7851 return err;
7852}
7853
7854static int md_open(struct block_device *bdev, fmode_t mode)
7855{
7856
7857
7858
7859
7860 struct mddev *mddev = mddev_find(bdev->bd_dev);
7861 int err;
7862
7863 if (!mddev)
7864 return -ENODEV;
7865
7866 if (mddev->gendisk != bdev->bd_disk) {
7867
7868
7869
7870 mddev_put(mddev);
7871
7872 if (work_pending(&mddev->del_work))
7873 flush_workqueue(md_misc_wq);
7874 return -EBUSY;
7875 }
7876 BUG_ON(mddev != bdev->bd_disk->private_data);
7877
7878 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7879 goto out;
7880
7881 if (test_bit(MD_CLOSING, &mddev->flags)) {
7882 mutex_unlock(&mddev->open_mutex);
7883 err = -ENODEV;
7884 goto out;
7885 }
7886
7887 err = 0;
7888 atomic_inc(&mddev->openers);
7889 mutex_unlock(&mddev->open_mutex);
7890
7891 bdev_check_media_change(bdev);
7892 out:
7893 if (err)
7894 mddev_put(mddev);
7895 return err;
7896}
7897
7898static void md_release(struct gendisk *disk, fmode_t mode)
7899{
7900 struct mddev *mddev = disk->private_data;
7901
7902 BUG_ON(!mddev);
7903 atomic_dec(&mddev->openers);
7904 mddev_put(mddev);
7905}
7906
7907static int md_media_changed(struct gendisk *disk)
7908{
7909 struct mddev *mddev = disk->private_data;
7910
7911 return mddev->changed;
7912}
7913
7914static int md_revalidate(struct gendisk *disk)
7915{
7916 struct mddev *mddev = disk->private_data;
7917
7918 mddev->changed = 0;
7919 return 0;
7920}
7921static const struct block_device_operations md_fops =
7922{
7923 .owner = THIS_MODULE,
7924 .open = md_open,
7925 .release = md_release,
7926 .ioctl = md_ioctl,
7927#ifdef CONFIG_COMPAT
7928 .compat_ioctl = md_compat_ioctl,
7929#endif
7930 .getgeo = md_getgeo,
7931 .media_changed = md_media_changed,
7932 .revalidate_disk= md_revalidate,
7933 .set_read_only = md_set_read_only,
7934};
7935
7936static int md_thread(void *arg)
7937{
7938 struct md_thread *thread = arg;
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952 allow_signal(SIGKILL);
7953 while (!kthread_should_stop()) {
7954
7955
7956
7957
7958
7959
7960 if (signal_pending(current))
7961 flush_signals(current);
7962
7963 wait_event_interruptible_timeout
7964 (thread->wqueue,
7965 test_bit(THREAD_WAKEUP, &thread->flags)
7966 || kthread_should_stop() || kthread_should_park(),
7967 thread->timeout);
7968
7969 clear_bit(THREAD_WAKEUP, &thread->flags);
7970 if (kthread_should_park())
7971 kthread_parkme();
7972 if (!kthread_should_stop())
7973 thread->run(thread);
7974 }
7975
7976 return 0;
7977}
7978
7979void md_wakeup_thread(struct md_thread *thread)
7980{
7981 if (thread) {
7982 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7983 set_bit(THREAD_WAKEUP, &thread->flags);
7984 wake_up(&thread->wqueue);
7985 }
7986}
7987EXPORT_SYMBOL(md_wakeup_thread);
7988
7989struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7990 struct mddev *mddev, const char *name)
7991{
7992 struct md_thread *thread;
7993
7994 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7995 if (!thread)
7996 return NULL;
7997
7998 init_waitqueue_head(&thread->wqueue);
7999
8000 thread->run = run;
8001 thread->mddev = mddev;
8002 thread->timeout = MAX_SCHEDULE_TIMEOUT;
8003 thread->tsk = kthread_run(md_thread, thread,
8004 "%s_%s",
8005 mdname(thread->mddev),
8006 name);
8007 if (IS_ERR(thread->tsk)) {
8008 kfree(thread);
8009 return NULL;
8010 }
8011 return thread;
8012}
8013EXPORT_SYMBOL(md_register_thread);
8014
8015void md_unregister_thread(struct md_thread **threadp)
8016{
8017 struct md_thread *thread = *threadp;
8018 if (!thread)
8019 return;
8020 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
8021
8022
8023
8024 spin_lock(&pers_lock);
8025 *threadp = NULL;
8026 spin_unlock(&pers_lock);
8027
8028 kthread_stop(thread->tsk);
8029 kfree(thread);
8030}
8031EXPORT_SYMBOL(md_unregister_thread);
8032
8033void md_error(struct mddev *mddev, struct md_rdev *rdev)
8034{
8035 if (!rdev || test_bit(Faulty, &rdev->flags))
8036 return;
8037
8038 if (!mddev->pers || !mddev->pers->error_handler)
8039 return;
8040 mddev->pers->error_handler(mddev,rdev);
8041 if (mddev->degraded)
8042 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
8043 sysfs_notify_dirent_safe(rdev->sysfs_state);
8044 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8045 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8046 md_wakeup_thread(mddev->thread);
8047 if (mddev->event_work.func)
8048 queue_work(md_misc_wq, &mddev->event_work);
8049 md_new_event(mddev);
8050}
8051EXPORT_SYMBOL(md_error);
8052
8053
8054
8055static void status_unused(struct seq_file *seq)
8056{
8057 int i = 0;
8058 struct md_rdev *rdev;
8059
8060 seq_printf(seq, "unused devices: ");
8061
8062 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
8063 char b[BDEVNAME_SIZE];
8064 i++;
8065 seq_printf(seq, "%s ",
8066 bdevname(rdev->bdev,b));
8067 }
8068 if (!i)
8069 seq_printf(seq, "<none>");
8070
8071 seq_printf(seq, "\n");
8072}
8073
8074static int status_resync(struct seq_file *seq, struct mddev *mddev)
8075{
8076 sector_t max_sectors, resync, res;
8077 unsigned long dt, db = 0;
8078 sector_t rt, curr_mark_cnt, resync_mark_cnt;
8079 int scale, recovery_active;
8080 unsigned int per_milli;
8081
8082 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8083 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8084 max_sectors = mddev->resync_max_sectors;
8085 else
8086 max_sectors = mddev->dev_sectors;
8087
8088 resync = mddev->curr_resync;
8089 if (resync <= 3) {
8090 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8091
8092 resync = max_sectors;
8093 } else if (resync > max_sectors)
8094 resync = max_sectors;
8095 else
8096 resync -= atomic_read(&mddev->recovery_active);
8097
8098 if (resync == 0) {
8099 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8100 struct md_rdev *rdev;
8101
8102 rdev_for_each(rdev, mddev)
8103 if (rdev->raid_disk >= 0 &&
8104 !test_bit(Faulty, &rdev->flags) &&
8105 rdev->recovery_offset != MaxSector &&
8106 rdev->recovery_offset) {
8107 seq_printf(seq, "\trecover=REMOTE");
8108 return 1;
8109 }
8110 if (mddev->reshape_position != MaxSector)
8111 seq_printf(seq, "\treshape=REMOTE");
8112 else
8113 seq_printf(seq, "\tresync=REMOTE");
8114 return 1;
8115 }
8116 if (mddev->recovery_cp < MaxSector) {
8117 seq_printf(seq, "\tresync=PENDING");
8118 return 1;
8119 }
8120 return 0;
8121 }
8122 if (resync < 3) {
8123 seq_printf(seq, "\tresync=DELAYED");
8124 return 1;
8125 }
8126
8127 WARN_ON(max_sectors == 0);
8128
8129
8130
8131
8132
8133 scale = 10;
8134 if (sizeof(sector_t) > sizeof(unsigned long)) {
8135 while ( max_sectors/2 > (1ULL<<(scale+32)))
8136 scale++;
8137 }
8138 res = (resync>>scale)*1000;
8139 sector_div(res, (u32)((max_sectors>>scale)+1));
8140
8141 per_milli = res;
8142 {
8143 int i, x = per_milli/50, y = 20-x;
8144 seq_printf(seq, "[");
8145 for (i = 0; i < x; i++)
8146 seq_printf(seq, "=");
8147 seq_printf(seq, ">");
8148 for (i = 0; i < y; i++)
8149 seq_printf(seq, ".");
8150 seq_printf(seq, "] ");
8151 }
8152 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8153 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8154 "reshape" :
8155 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8156 "check" :
8157 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8158 "resync" : "recovery"))),
8159 per_milli/10, per_milli % 10,
8160 (unsigned long long) resync/2,
8161 (unsigned long long) max_sectors/2);
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180 dt = ((jiffies - mddev->resync_mark) / HZ);
8181 if (!dt) dt++;
8182
8183 curr_mark_cnt = mddev->curr_mark_cnt;
8184 recovery_active = atomic_read(&mddev->recovery_active);
8185 resync_mark_cnt = mddev->resync_mark_cnt;
8186
8187 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8188 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8189
8190 rt = max_sectors - resync;
8191 rt = div64_u64(rt, db/32+1);
8192 rt *= dt;
8193 rt >>= 5;
8194
8195 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8196 ((unsigned long)rt % 60)/6);
8197
8198 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8199 return 1;
8200}
8201
8202static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8203{
8204 struct list_head *tmp;
8205 loff_t l = *pos;
8206 struct mddev *mddev;
8207
8208 if (l == 0x10000) {
8209 ++*pos;
8210 return (void *)2;
8211 }
8212 if (l > 0x10000)
8213 return NULL;
8214 if (!l--)
8215
8216 return (void*)1;
8217
8218 spin_lock(&all_mddevs_lock);
8219 list_for_each(tmp,&all_mddevs)
8220 if (!l--) {
8221 mddev = list_entry(tmp, struct mddev, all_mddevs);
8222 mddev_get(mddev);
8223 spin_unlock(&all_mddevs_lock);
8224 return mddev;
8225 }
8226 spin_unlock(&all_mddevs_lock);
8227 if (!l--)
8228 return (void*)2;
8229 return NULL;
8230}
8231
8232static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8233{
8234 struct list_head *tmp;
8235 struct mddev *next_mddev, *mddev = v;
8236
8237 ++*pos;
8238 if (v == (void*)2)
8239 return NULL;
8240
8241 spin_lock(&all_mddevs_lock);
8242 if (v == (void*)1)
8243 tmp = all_mddevs.next;
8244 else
8245 tmp = mddev->all_mddevs.next;
8246 if (tmp != &all_mddevs)
8247 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8248 else {
8249 next_mddev = (void*)2;
8250 *pos = 0x10000;
8251 }
8252 spin_unlock(&all_mddevs_lock);
8253
8254 if (v != (void*)1)
8255 mddev_put(mddev);
8256 return next_mddev;
8257
8258}
8259
8260static void md_seq_stop(struct seq_file *seq, void *v)
8261{
8262 struct mddev *mddev = v;
8263
8264 if (mddev && v != (void*)1 && v != (void*)2)
8265 mddev_put(mddev);
8266}
8267
8268static int md_seq_show(struct seq_file *seq, void *v)
8269{
8270 struct mddev *mddev = v;
8271 sector_t sectors;
8272 struct md_rdev *rdev;
8273
8274 if (v == (void*)1) {
8275 struct md_personality *pers;
8276 seq_printf(seq, "Personalities : ");
8277 spin_lock(&pers_lock);
8278 list_for_each_entry(pers, &pers_list, list)
8279 seq_printf(seq, "[%s] ", pers->name);
8280
8281 spin_unlock(&pers_lock);
8282 seq_printf(seq, "\n");
8283 seq->poll_event = atomic_read(&md_event_count);
8284 return 0;
8285 }
8286 if (v == (void*)2) {
8287 status_unused(seq);
8288 return 0;
8289 }
8290
8291 spin_lock(&mddev->lock);
8292 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8293 seq_printf(seq, "%s : %sactive", mdname(mddev),
8294 mddev->pers ? "" : "in");
8295 if (mddev->pers) {
8296 if (mddev->ro==1)
8297 seq_printf(seq, " (read-only)");
8298 if (mddev->ro==2)
8299 seq_printf(seq, " (auto-read-only)");
8300 seq_printf(seq, " %s", mddev->pers->name);
8301 }
8302
8303 sectors = 0;
8304 rcu_read_lock();
8305 rdev_for_each_rcu(rdev, mddev) {
8306 char b[BDEVNAME_SIZE];
8307 seq_printf(seq, " %s[%d]",
8308 bdevname(rdev->bdev,b), rdev->desc_nr);
8309 if (test_bit(WriteMostly, &rdev->flags))
8310 seq_printf(seq, "(W)");
8311 if (test_bit(Journal, &rdev->flags))
8312 seq_printf(seq, "(J)");
8313 if (test_bit(Faulty, &rdev->flags)) {
8314 seq_printf(seq, "(F)");
8315 continue;
8316 }
8317 if (rdev->raid_disk < 0)
8318 seq_printf(seq, "(S)");
8319 if (test_bit(Replacement, &rdev->flags))
8320 seq_printf(seq, "(R)");
8321 sectors += rdev->sectors;
8322 }
8323 rcu_read_unlock();
8324
8325 if (!list_empty(&mddev->disks)) {
8326 if (mddev->pers)
8327 seq_printf(seq, "\n %llu blocks",
8328 (unsigned long long)
8329 mddev->array_sectors / 2);
8330 else
8331 seq_printf(seq, "\n %llu blocks",
8332 (unsigned long long)sectors / 2);
8333 }
8334 if (mddev->persistent) {
8335 if (mddev->major_version != 0 ||
8336 mddev->minor_version != 90) {
8337 seq_printf(seq," super %d.%d",
8338 mddev->major_version,
8339 mddev->minor_version);
8340 }
8341 } else if (mddev->external)
8342 seq_printf(seq, " super external:%s",
8343 mddev->metadata_type);
8344 else
8345 seq_printf(seq, " super non-persistent");
8346
8347 if (mddev->pers) {
8348 mddev->pers->status(seq, mddev);
8349 seq_printf(seq, "\n ");
8350 if (mddev->pers->sync_request) {
8351 if (status_resync(seq, mddev))
8352 seq_printf(seq, "\n ");
8353 }
8354 } else
8355 seq_printf(seq, "\n ");
8356
8357 md_bitmap_status(seq, mddev->bitmap);
8358
8359 seq_printf(seq, "\n");
8360 }
8361 spin_unlock(&mddev->lock);
8362
8363 return 0;
8364}
8365
8366static const struct seq_operations md_seq_ops = {
8367 .start = md_seq_start,
8368 .next = md_seq_next,
8369 .stop = md_seq_stop,
8370 .show = md_seq_show,
8371};
8372
8373static int md_seq_open(struct inode *inode, struct file *file)
8374{
8375 struct seq_file *seq;
8376 int error;
8377
8378 error = seq_open(file, &md_seq_ops);
8379 if (error)
8380 return error;
8381
8382 seq = file->private_data;
8383 seq->poll_event = atomic_read(&md_event_count);
8384 return error;
8385}
8386
8387static int md_unloading;
8388static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8389{
8390 struct seq_file *seq = filp->private_data;
8391 __poll_t mask;
8392
8393 if (md_unloading)
8394 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8395 poll_wait(filp, &md_event_waiters, wait);
8396
8397
8398 mask = EPOLLIN | EPOLLRDNORM;
8399
8400 if (seq->poll_event != atomic_read(&md_event_count))
8401 mask |= EPOLLERR | EPOLLPRI;
8402 return mask;
8403}
8404
8405static const struct file_operations md_seq_fops = {
8406 .owner = THIS_MODULE,
8407 .open = md_seq_open,
8408 .read = seq_read,
8409 .llseek = seq_lseek,
8410 .release = seq_release,
8411 .poll = mdstat_poll,
8412};
8413
8414int register_md_personality(struct md_personality *p)
8415{
8416 pr_debug("md: %s personality registered for level %d\n",
8417 p->name, p->level);
8418 spin_lock(&pers_lock);
8419 list_add_tail(&p->list, &pers_list);
8420 spin_unlock(&pers_lock);
8421 return 0;
8422}
8423EXPORT_SYMBOL(register_md_personality);
8424
8425int unregister_md_personality(struct md_personality *p)
8426{
8427 pr_debug("md: %s personality unregistered\n", p->name);
8428 spin_lock(&pers_lock);
8429 list_del_init(&p->list);
8430 spin_unlock(&pers_lock);
8431 return 0;
8432}
8433EXPORT_SYMBOL(unregister_md_personality);
8434
8435int register_md_cluster_operations(struct md_cluster_operations *ops,
8436 struct module *module)
8437{
8438 int ret = 0;
8439 spin_lock(&pers_lock);
8440 if (md_cluster_ops != NULL)
8441 ret = -EALREADY;
8442 else {
8443 md_cluster_ops = ops;
8444 md_cluster_mod = module;
8445 }
8446 spin_unlock(&pers_lock);
8447 return ret;
8448}
8449EXPORT_SYMBOL(register_md_cluster_operations);
8450
8451int unregister_md_cluster_operations(void)
8452{
8453 spin_lock(&pers_lock);
8454 md_cluster_ops = NULL;
8455 spin_unlock(&pers_lock);
8456 return 0;
8457}
8458EXPORT_SYMBOL(unregister_md_cluster_operations);
8459
8460int md_setup_cluster(struct mddev *mddev, int nodes)
8461{
8462 int ret;
8463 if (!md_cluster_ops)
8464 request_module("md-cluster");
8465 spin_lock(&pers_lock);
8466
8467 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8468 pr_warn("can't find md-cluster module or get it's reference.\n");
8469 spin_unlock(&pers_lock);
8470 return -ENOENT;
8471 }
8472 spin_unlock(&pers_lock);
8473
8474 ret = md_cluster_ops->join(mddev, nodes);
8475 if (!ret)
8476 mddev->safemode_delay = 0;
8477 return ret;
8478}
8479
8480void md_cluster_stop(struct mddev *mddev)
8481{
8482 if (!md_cluster_ops)
8483 return;
8484 md_cluster_ops->leave(mddev);
8485 module_put(md_cluster_mod);
8486}
8487
8488static int is_mddev_idle(struct mddev *mddev, int init)
8489{
8490 struct md_rdev *rdev;
8491 int idle;
8492 int curr_events;
8493
8494 idle = 1;
8495 rcu_read_lock();
8496 rdev_for_each_rcu(rdev, mddev) {
8497 struct gendisk *disk = rdev->bdev->bd_disk;
8498 curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
8499 atomic_read(&disk->sync_io);
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522 if (init || curr_events - rdev->last_events > 64) {
8523 rdev->last_events = curr_events;
8524 idle = 0;
8525 }
8526 }
8527 rcu_read_unlock();
8528 return idle;
8529}
8530
8531void md_done_sync(struct mddev *mddev, int blocks, int ok)
8532{
8533
8534 atomic_sub(blocks, &mddev->recovery_active);
8535 wake_up(&mddev->recovery_wait);
8536 if (!ok) {
8537 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8538 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8539 md_wakeup_thread(mddev->thread);
8540
8541 }
8542}
8543EXPORT_SYMBOL(md_done_sync);
8544
8545
8546
8547
8548
8549
8550
8551
8552bool md_write_start(struct mddev *mddev, struct bio *bi)
8553{
8554 int did_change = 0;
8555
8556 if (bio_data_dir(bi) != WRITE)
8557 return true;
8558
8559 BUG_ON(mddev->ro == 1);
8560 if (mddev->ro == 2) {
8561
8562 mddev->ro = 0;
8563 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8564 md_wakeup_thread(mddev->thread);
8565 md_wakeup_thread(mddev->sync_thread);
8566 did_change = 1;
8567 }
8568 rcu_read_lock();
8569 percpu_ref_get(&mddev->writes_pending);
8570 smp_mb();
8571 if (mddev->safemode == 1)
8572 mddev->safemode = 0;
8573
8574 if (mddev->in_sync || mddev->sync_checkers) {
8575 spin_lock(&mddev->lock);
8576 if (mddev->in_sync) {
8577 mddev->in_sync = 0;
8578 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8579 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8580 md_wakeup_thread(mddev->thread);
8581 did_change = 1;
8582 }
8583 spin_unlock(&mddev->lock);
8584 }
8585 rcu_read_unlock();
8586 if (did_change)
8587 sysfs_notify_dirent_safe(mddev->sysfs_state);
8588 if (!mddev->has_superblocks)
8589 return true;
8590 wait_event(mddev->sb_wait,
8591 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8592 mddev->suspended);
8593 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8594 percpu_ref_put(&mddev->writes_pending);
8595 return false;
8596 }
8597 return true;
8598}
8599EXPORT_SYMBOL(md_write_start);
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609void md_write_inc(struct mddev *mddev, struct bio *bi)
8610{
8611 if (bio_data_dir(bi) != WRITE)
8612 return;
8613 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8614 percpu_ref_get(&mddev->writes_pending);
8615}
8616EXPORT_SYMBOL(md_write_inc);
8617
8618void md_write_end(struct mddev *mddev)
8619{
8620 percpu_ref_put(&mddev->writes_pending);
8621
8622 if (mddev->safemode == 2)
8623 md_wakeup_thread(mddev->thread);
8624 else if (mddev->safemode_delay)
8625
8626
8627
8628 mod_timer(&mddev->safemode_timer,
8629 roundup(jiffies, mddev->safemode_delay) +
8630 mddev->safemode_delay);
8631}
8632
8633EXPORT_SYMBOL(md_write_end);
8634
8635
8636void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
8637 struct bio *bio, sector_t start, sector_t size)
8638{
8639 struct bio *discard_bio = NULL;
8640
8641 if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
8642 &discard_bio) || !discard_bio)
8643 return;
8644
8645 bio_chain(discard_bio, bio);
8646 bio_clone_blkg_association(discard_bio, bio);
8647 if (mddev->gendisk)
8648 trace_block_bio_remap(discard_bio->bi_disk->queue,
8649 discard_bio, disk_devt(mddev->gendisk),
8650 bio->bi_iter.bi_sector);
8651 generic_make_request(discard_bio);
8652}
8653EXPORT_SYMBOL_GPL(md_submit_discard_bio);
8654
8655
8656
8657
8658
8659
8660
8661void md_allow_write(struct mddev *mddev)
8662{
8663 if (!mddev->pers)
8664 return;
8665 if (mddev->ro)
8666 return;
8667 if (!mddev->pers->sync_request)
8668 return;
8669
8670 spin_lock(&mddev->lock);
8671 if (mddev->in_sync) {
8672 mddev->in_sync = 0;
8673 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8674 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8675 if (mddev->safemode_delay &&
8676 mddev->safemode == 0)
8677 mddev->safemode = 1;
8678 spin_unlock(&mddev->lock);
8679 md_update_sb(mddev, 0);
8680 sysfs_notify_dirent_safe(mddev->sysfs_state);
8681
8682 wait_event(mddev->sb_wait,
8683 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8684 } else
8685 spin_unlock(&mddev->lock);
8686}
8687EXPORT_SYMBOL_GPL(md_allow_write);
8688
8689#define SYNC_MARKS 10
8690#define SYNC_MARK_STEP (3*HZ)
8691#define UPDATE_FREQUENCY (5*60*HZ)
8692void md_do_sync(struct md_thread *thread)
8693{
8694 struct mddev *mddev = thread->mddev;
8695 struct mddev *mddev2;
8696 unsigned int currspeed = 0, window;
8697 sector_t max_sectors,j, io_sectors, recovery_done;
8698 unsigned long mark[SYNC_MARKS];
8699 unsigned long update_time;
8700 sector_t mark_cnt[SYNC_MARKS];
8701 int last_mark,m;
8702 struct list_head *tmp;
8703 sector_t last_check;
8704 int skipped = 0;
8705 struct md_rdev *rdev;
8706 char *desc, *action = NULL;
8707 struct blk_plug plug;
8708 int ret;
8709
8710
8711 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8712 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8713 return;
8714 if (mddev->ro) {
8715 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8716 return;
8717 }
8718
8719 if (mddev_is_clustered(mddev)) {
8720 ret = md_cluster_ops->resync_start(mddev);
8721 if (ret)
8722 goto skip;
8723
8724 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8725 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8726 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8727 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8728 && ((unsigned long long)mddev->curr_resync_completed
8729 < (unsigned long long)mddev->resync_max_sectors))
8730 goto skip;
8731 }
8732
8733 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8734 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8735 desc = "data-check";
8736 action = "check";
8737 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8738 desc = "requested-resync";
8739 action = "repair";
8740 } else
8741 desc = "resync";
8742 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8743 desc = "reshape";
8744 else
8745 desc = "recovery";
8746
8747 mddev->last_sync_action = action ?: desc;
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765 do {
8766 int mddev2_minor = -1;
8767 mddev->curr_resync = 2;
8768
8769 try_again:
8770 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8771 goto skip;
8772 for_each_mddev(mddev2, tmp) {
8773 if (mddev2 == mddev)
8774 continue;
8775 if (!mddev->parallel_resync
8776 && mddev2->curr_resync
8777 && match_mddev_units(mddev, mddev2)) {
8778 DEFINE_WAIT(wq);
8779 if (mddev < mddev2 && mddev->curr_resync == 2) {
8780
8781 mddev->curr_resync = 1;
8782 wake_up(&resync_wait);
8783 }
8784 if (mddev > mddev2 && mddev->curr_resync == 1)
8785
8786
8787
8788 continue;
8789
8790
8791
8792
8793 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8794 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8795 mddev2->curr_resync >= mddev->curr_resync) {
8796 if (mddev2_minor != mddev2->md_minor) {
8797 mddev2_minor = mddev2->md_minor;
8798 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8799 desc, mdname(mddev),
8800 mdname(mddev2));
8801 }
8802 mddev_put(mddev2);
8803 if (signal_pending(current))
8804 flush_signals(current);
8805 schedule();
8806 finish_wait(&resync_wait, &wq);
8807 goto try_again;
8808 }
8809 finish_wait(&resync_wait, &wq);
8810 }
8811 }
8812 } while (mddev->curr_resync < 2);
8813
8814 j = 0;
8815 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8816
8817
8818
8819 max_sectors = mddev->resync_max_sectors;
8820 atomic64_set(&mddev->resync_mismatches, 0);
8821
8822 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8823 j = mddev->resync_min;
8824 else if (!mddev->bitmap)
8825 j = mddev->recovery_cp;
8826
8827 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8828 max_sectors = mddev->resync_max_sectors;
8829
8830
8831
8832
8833
8834 if (mddev_is_clustered(mddev) &&
8835 mddev->reshape_position != MaxSector)
8836 j = mddev->reshape_position;
8837 } else {
8838
8839 max_sectors = mddev->dev_sectors;
8840 j = MaxSector;
8841 rcu_read_lock();
8842 rdev_for_each_rcu(rdev, mddev)
8843 if (rdev->raid_disk >= 0 &&
8844 !test_bit(Journal, &rdev->flags) &&
8845 !test_bit(Faulty, &rdev->flags) &&
8846 !test_bit(In_sync, &rdev->flags) &&
8847 rdev->recovery_offset < j)
8848 j = rdev->recovery_offset;
8849 rcu_read_unlock();
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859 if (mddev->bitmap) {
8860 mddev->pers->quiesce(mddev, 1);
8861 mddev->pers->quiesce(mddev, 0);
8862 }
8863 }
8864
8865 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8866 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8867 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8868 speed_max(mddev), desc);
8869
8870 is_mddev_idle(mddev, 1);
8871
8872 io_sectors = 0;
8873 for (m = 0; m < SYNC_MARKS; m++) {
8874 mark[m] = jiffies;
8875 mark_cnt[m] = io_sectors;
8876 }
8877 last_mark = 0;
8878 mddev->resync_mark = mark[last_mark];
8879 mddev->resync_mark_cnt = mark_cnt[last_mark];
8880
8881
8882
8883
8884 window = 32 * (PAGE_SIZE / 512);
8885 pr_debug("md: using %dk window, over a total of %lluk.\n",
8886 window/2, (unsigned long long)max_sectors/2);
8887
8888 atomic_set(&mddev->recovery_active, 0);
8889 last_check = 0;
8890
8891 if (j>2) {
8892 pr_debug("md: resuming %s of %s from checkpoint.\n",
8893 desc, mdname(mddev));
8894 mddev->curr_resync = j;
8895 } else
8896 mddev->curr_resync = 3;
8897 mddev->curr_resync_completed = j;
8898 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8899 md_new_event(mddev);
8900 update_time = jiffies;
8901
8902 blk_start_plug(&plug);
8903 while (j < max_sectors) {
8904 sector_t sectors;
8905
8906 skipped = 0;
8907
8908 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8909 ((mddev->curr_resync > mddev->curr_resync_completed &&
8910 (mddev->curr_resync - mddev->curr_resync_completed)
8911 > (max_sectors >> 4)) ||
8912 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8913 (j - mddev->curr_resync_completed)*2
8914 >= mddev->resync_max - mddev->curr_resync_completed ||
8915 mddev->curr_resync_completed > mddev->resync_max
8916 )) {
8917
8918 wait_event(mddev->recovery_wait,
8919 atomic_read(&mddev->recovery_active) == 0);
8920 mddev->curr_resync_completed = j;
8921 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8922 j > mddev->recovery_cp)
8923 mddev->recovery_cp = j;
8924 update_time = jiffies;
8925 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8926 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8927 }
8928
8929 while (j >= mddev->resync_max &&
8930 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8931
8932
8933
8934
8935 flush_signals(current);
8936 wait_event_interruptible(mddev->recovery_wait,
8937 mddev->resync_max > j
8938 || test_bit(MD_RECOVERY_INTR,
8939 &mddev->recovery));
8940 }
8941
8942 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8943 break;
8944
8945 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8946 if (sectors == 0) {
8947 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8948 break;
8949 }
8950
8951 if (!skipped) {
8952 io_sectors += sectors;
8953 atomic_add(sectors, &mddev->recovery_active);
8954 }
8955
8956 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8957 break;
8958
8959 j += sectors;
8960 if (j > max_sectors)
8961
8962 j = max_sectors;
8963 if (j > 2)
8964 mddev->curr_resync = j;
8965 mddev->curr_mark_cnt = io_sectors;
8966 if (last_check == 0)
8967
8968
8969
8970 md_new_event(mddev);
8971
8972 if (last_check + window > io_sectors || j == max_sectors)
8973 continue;
8974
8975 last_check = io_sectors;
8976 repeat:
8977 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8978
8979 int next = (last_mark+1) % SYNC_MARKS;
8980
8981 mddev->resync_mark = mark[next];
8982 mddev->resync_mark_cnt = mark_cnt[next];
8983 mark[next] = jiffies;
8984 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8985 last_mark = next;
8986 }
8987
8988 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8989 break;
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999 cond_resched();
9000
9001 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
9002 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
9003 /((jiffies-mddev->resync_mark)/HZ +1) +1;
9004
9005 if (currspeed > speed_min(mddev)) {
9006 if (currspeed > speed_max(mddev)) {
9007 msleep(500);
9008 goto repeat;
9009 }
9010 if (!is_mddev_idle(mddev, 0)) {
9011
9012
9013
9014
9015 wait_event(mddev->recovery_wait,
9016 !atomic_read(&mddev->recovery_active));
9017 }
9018 }
9019 }
9020 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
9021 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
9022 ? "interrupted" : "done");
9023
9024
9025
9026 blk_finish_plug(&plug);
9027 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
9028
9029 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9030 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9031 mddev->curr_resync > 3) {
9032 mddev->curr_resync_completed = mddev->curr_resync;
9033 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9034 }
9035 mddev->pers->sync_request(mddev, max_sectors, &skipped);
9036
9037 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9038 mddev->curr_resync > 3) {
9039 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9040 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9041 if (mddev->curr_resync >= mddev->recovery_cp) {
9042 pr_debug("md: checkpointing %s of %s.\n",
9043 desc, mdname(mddev));
9044 if (test_bit(MD_RECOVERY_ERROR,
9045 &mddev->recovery))
9046 mddev->recovery_cp =
9047 mddev->curr_resync_completed;
9048 else
9049 mddev->recovery_cp =
9050 mddev->curr_resync;
9051 }
9052 } else
9053 mddev->recovery_cp = MaxSector;
9054 } else {
9055 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9056 mddev->curr_resync = MaxSector;
9057 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9058 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9059 rcu_read_lock();
9060 rdev_for_each_rcu(rdev, mddev)
9061 if (rdev->raid_disk >= 0 &&
9062 mddev->delta_disks >= 0 &&
9063 !test_bit(Journal, &rdev->flags) &&
9064 !test_bit(Faulty, &rdev->flags) &&
9065 !test_bit(In_sync, &rdev->flags) &&
9066 rdev->recovery_offset < mddev->curr_resync)
9067 rdev->recovery_offset = mddev->curr_resync;
9068 rcu_read_unlock();
9069 }
9070 }
9071 }
9072 skip:
9073
9074
9075
9076 set_mask_bits(&mddev->sb_flags, 0,
9077 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
9078
9079 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9080 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9081 mddev->delta_disks > 0 &&
9082 mddev->pers->finish_reshape &&
9083 mddev->pers->size &&
9084 mddev->queue) {
9085 mddev_lock_nointr(mddev);
9086 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9087 mddev_unlock(mddev);
9088 if (!mddev_is_clustered(mddev)) {
9089 set_capacity(mddev->gendisk, mddev->array_sectors);
9090 revalidate_disk_size(mddev->gendisk, true);
9091 }
9092 }
9093
9094 spin_lock(&mddev->lock);
9095 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9096
9097 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9098 mddev->resync_min = 0;
9099 mddev->resync_max = MaxSector;
9100 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9101 mddev->resync_min = mddev->curr_resync_completed;
9102 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9103 mddev->curr_resync = 0;
9104 spin_unlock(&mddev->lock);
9105
9106 wake_up(&resync_wait);
9107 md_wakeup_thread(mddev->thread);
9108 return;
9109}
9110EXPORT_SYMBOL_GPL(md_do_sync);
9111
9112static int remove_and_add_spares(struct mddev *mddev,
9113 struct md_rdev *this)
9114{
9115 struct md_rdev *rdev;
9116 int spares = 0;
9117 int removed = 0;
9118 bool remove_some = false;
9119
9120 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9121
9122 return 0;
9123
9124 rdev_for_each(rdev, mddev) {
9125 if ((this == NULL || rdev == this) &&
9126 rdev->raid_disk >= 0 &&
9127 !test_bit(Blocked, &rdev->flags) &&
9128 test_bit(Faulty, &rdev->flags) &&
9129 atomic_read(&rdev->nr_pending)==0) {
9130
9131
9132
9133
9134
9135 remove_some = true;
9136 set_bit(RemoveSynchronized, &rdev->flags);
9137 }
9138 }
9139
9140 if (remove_some)
9141 synchronize_rcu();
9142 rdev_for_each(rdev, mddev) {
9143 if ((this == NULL || rdev == this) &&
9144 rdev->raid_disk >= 0 &&
9145 !test_bit(Blocked, &rdev->flags) &&
9146 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9147 (!test_bit(In_sync, &rdev->flags) &&
9148 !test_bit(Journal, &rdev->flags))) &&
9149 atomic_read(&rdev->nr_pending)==0)) {
9150 if (mddev->pers->hot_remove_disk(
9151 mddev, rdev) == 0) {
9152 sysfs_unlink_rdev(mddev, rdev);
9153 rdev->saved_raid_disk = rdev->raid_disk;
9154 rdev->raid_disk = -1;
9155 removed++;
9156 }
9157 }
9158 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9159 clear_bit(RemoveSynchronized, &rdev->flags);
9160 }
9161
9162 if (removed && mddev->kobj.sd)
9163 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9164
9165 if (this && removed)
9166 goto no_add;
9167
9168 rdev_for_each(rdev, mddev) {
9169 if (this && this != rdev)
9170 continue;
9171 if (test_bit(Candidate, &rdev->flags))
9172 continue;
9173 if (rdev->raid_disk >= 0 &&
9174 !test_bit(In_sync, &rdev->flags) &&
9175 !test_bit(Journal, &rdev->flags) &&
9176 !test_bit(Faulty, &rdev->flags))
9177 spares++;
9178 if (rdev->raid_disk >= 0)
9179 continue;
9180 if (test_bit(Faulty, &rdev->flags))
9181 continue;
9182 if (!test_bit(Journal, &rdev->flags)) {
9183 if (mddev->ro &&
9184 ! (rdev->saved_raid_disk >= 0 &&
9185 !test_bit(Bitmap_sync, &rdev->flags)))
9186 continue;
9187
9188 rdev->recovery_offset = 0;
9189 }
9190 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9191
9192 sysfs_link_rdev(mddev, rdev);
9193 if (!test_bit(Journal, &rdev->flags))
9194 spares++;
9195 md_new_event(mddev);
9196 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9197 }
9198 }
9199no_add:
9200 if (removed)
9201 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9202 return spares;
9203}
9204
9205static void md_start_sync(struct work_struct *ws)
9206{
9207 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9208
9209 mddev->sync_thread = md_register_thread(md_do_sync,
9210 mddev,
9211 "resync");
9212 if (!mddev->sync_thread) {
9213 pr_warn("%s: could not start resync thread...\n",
9214 mdname(mddev));
9215
9216 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9217 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9218 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9219 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9220 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9221 wake_up(&resync_wait);
9222 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9223 &mddev->recovery))
9224 if (mddev->sysfs_action)
9225 sysfs_notify_dirent_safe(mddev->sysfs_action);
9226 } else
9227 md_wakeup_thread(mddev->sync_thread);
9228 sysfs_notify_dirent_safe(mddev->sysfs_action);
9229 md_new_event(mddev);
9230}
9231
9232
9233
9234
9235
9236
9237
9238
9239
9240
9241
9242
9243
9244
9245
9246
9247
9248
9249
9250
9251
9252
9253
9254void md_check_recovery(struct mddev *mddev)
9255{
9256 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9257
9258
9259
9260 set_bit(MD_UPDATING_SB, &mddev->flags);
9261 smp_mb__after_atomic();
9262 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9263 md_update_sb(mddev, 0);
9264 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9265 wake_up(&mddev->sb_wait);
9266 }
9267
9268 if (mddev->suspended)
9269 return;
9270
9271 if (mddev->bitmap)
9272 md_bitmap_daemon_work(mddev);
9273
9274 if (signal_pending(current)) {
9275 if (mddev->pers->sync_request && !mddev->external) {
9276 pr_debug("md: %s in immediate safe mode\n",
9277 mdname(mddev));
9278 mddev->safemode = 2;
9279 }
9280 flush_signals(current);
9281 }
9282
9283 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9284 return;
9285 if ( ! (
9286 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9287 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9288 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9289 (mddev->external == 0 && mddev->safemode == 1) ||
9290 (mddev->safemode == 2
9291 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9292 ))
9293 return;
9294
9295 if (mddev_trylock(mddev)) {
9296 int spares = 0;
9297 bool try_set_sync = mddev->safemode != 0;
9298
9299 if (!mddev->external && mddev->safemode == 1)
9300 mddev->safemode = 0;
9301
9302 if (mddev->ro) {
9303 struct md_rdev *rdev;
9304 if (!mddev->external && mddev->in_sync)
9305
9306
9307
9308
9309
9310 rdev_for_each(rdev, mddev)
9311 clear_bit(Blocked, &rdev->flags);
9312
9313
9314
9315
9316
9317
9318
9319 remove_and_add_spares(mddev, NULL);
9320
9321
9322
9323 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9324 md_reap_sync_thread(mddev);
9325 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9326 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9327 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9328 goto unlock;
9329 }
9330
9331 if (mddev_is_clustered(mddev)) {
9332 struct md_rdev *rdev, *tmp;
9333
9334
9335
9336 rdev_for_each_safe(rdev, tmp, mddev) {
9337 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9338 rdev->raid_disk < 0)
9339 md_kick_rdev_from_array(rdev);
9340 }
9341 }
9342
9343 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9344 spin_lock(&mddev->lock);
9345 set_in_sync(mddev);
9346 spin_unlock(&mddev->lock);
9347 }
9348
9349 if (mddev->sb_flags)
9350 md_update_sb(mddev, 0);
9351
9352 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9353 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9354
9355 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9356 goto unlock;
9357 }
9358 if (mddev->sync_thread) {
9359 md_reap_sync_thread(mddev);
9360 goto unlock;
9361 }
9362
9363
9364
9365 mddev->curr_resync_completed = 0;
9366 spin_lock(&mddev->lock);
9367 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9368 spin_unlock(&mddev->lock);
9369
9370
9371
9372 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9373 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9374
9375 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9376 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9377 goto not_running;
9378
9379
9380
9381
9382
9383
9384
9385 if (mddev->reshape_position != MaxSector) {
9386 if (mddev->pers->check_reshape == NULL ||
9387 mddev->pers->check_reshape(mddev) != 0)
9388
9389 goto not_running;
9390 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9391 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9392 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9393 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9394 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9395 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9396 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9397 } else if (mddev->recovery_cp < MaxSector) {
9398 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9399 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9400 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9401
9402 goto not_running;
9403
9404 if (mddev->pers->sync_request) {
9405 if (spares) {
9406
9407
9408
9409
9410 md_bitmap_write_all(mddev->bitmap);
9411 }
9412 INIT_WORK(&mddev->del_work, md_start_sync);
9413 queue_work(md_misc_wq, &mddev->del_work);
9414 goto unlock;
9415 }
9416 not_running:
9417 if (!mddev->sync_thread) {
9418 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9419 wake_up(&resync_wait);
9420 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9421 &mddev->recovery))
9422 if (mddev->sysfs_action)
9423 sysfs_notify_dirent_safe(mddev->sysfs_action);
9424 }
9425 unlock:
9426 wake_up(&mddev->sb_wait);
9427 mddev_unlock(mddev);
9428 }
9429}
9430EXPORT_SYMBOL(md_check_recovery);
9431
9432void md_reap_sync_thread(struct mddev *mddev)
9433{
9434 struct md_rdev *rdev;
9435 sector_t old_dev_sectors = mddev->dev_sectors;
9436 bool is_reshaped = false;
9437
9438
9439 md_unregister_thread(&mddev->sync_thread);
9440 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9441 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9442 mddev->degraded != mddev->raid_disks) {
9443
9444
9445 if (mddev->pers->spare_active(mddev)) {
9446 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9447 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9448 }
9449 }
9450 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9451 mddev->pers->finish_reshape) {
9452 mddev->pers->finish_reshape(mddev);
9453 if (mddev_is_clustered(mddev))
9454 is_reshaped = true;
9455 }
9456
9457
9458
9459
9460 if (!mddev->degraded)
9461 rdev_for_each(rdev, mddev)
9462 rdev->saved_raid_disk = -1;
9463
9464 md_update_sb(mddev, 1);
9465
9466
9467
9468 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9469 md_cluster_ops->resync_finish(mddev);
9470 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9471 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9472 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9473 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9474 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9475 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9476
9477
9478
9479
9480
9481 if (mddev_is_clustered(mddev) && is_reshaped
9482 && !test_bit(MD_CLOSING, &mddev->flags))
9483 md_cluster_ops->update_size(mddev, old_dev_sectors);
9484 wake_up(&resync_wait);
9485
9486 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9487 sysfs_notify_dirent_safe(mddev->sysfs_action);
9488 md_new_event(mddev);
9489 if (mddev->event_work.func)
9490 queue_work(md_misc_wq, &mddev->event_work);
9491}
9492EXPORT_SYMBOL(md_reap_sync_thread);
9493
9494void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9495{
9496 sysfs_notify_dirent_safe(rdev->sysfs_state);
9497 wait_event_timeout(rdev->blocked_wait,
9498 !test_bit(Blocked, &rdev->flags) &&
9499 !test_bit(BlockedBadBlocks, &rdev->flags),
9500 msecs_to_jiffies(5000));
9501 rdev_dec_pending(rdev, mddev);
9502}
9503EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9504
9505void md_finish_reshape(struct mddev *mddev)
9506{
9507
9508 struct md_rdev *rdev;
9509
9510 rdev_for_each(rdev, mddev) {
9511 if (rdev->data_offset > rdev->new_data_offset)
9512 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9513 else
9514 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9515 rdev->data_offset = rdev->new_data_offset;
9516 }
9517}
9518EXPORT_SYMBOL(md_finish_reshape);
9519
9520
9521
9522
9523int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9524 int is_new)
9525{
9526 struct mddev *mddev = rdev->mddev;
9527 int rv;
9528 if (is_new)
9529 s += rdev->new_data_offset;
9530 else
9531 s += rdev->data_offset;
9532 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9533 if (rv == 0) {
9534
9535 if (test_bit(ExternalBbl, &rdev->flags))
9536 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9537 sysfs_notify_dirent_safe(rdev->sysfs_state);
9538 set_mask_bits(&mddev->sb_flags, 0,
9539 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9540 md_wakeup_thread(rdev->mddev->thread);
9541 return 1;
9542 } else
9543 return 0;
9544}
9545EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9546
9547int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9548 int is_new)
9549{
9550 int rv;
9551 if (is_new)
9552 s += rdev->new_data_offset;
9553 else
9554 s += rdev->data_offset;
9555 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9556 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9557 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9558 return rv;
9559}
9560EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9561
9562static int md_notify_reboot(struct notifier_block *this,
9563 unsigned long code, void *x)
9564{
9565 struct list_head *tmp;
9566 struct mddev *mddev;
9567 int need_delay = 0;
9568
9569 for_each_mddev(mddev, tmp) {
9570 if (mddev_trylock(mddev)) {
9571 if (mddev->pers)
9572 __md_stop_writes(mddev);
9573 if (mddev->persistent)
9574 mddev->safemode = 2;
9575 mddev_unlock(mddev);
9576 }
9577 need_delay = 1;
9578 }
9579
9580
9581
9582
9583
9584
9585 if (need_delay)
9586 mdelay(1000*1);
9587
9588 return NOTIFY_DONE;
9589}
9590
9591static struct notifier_block md_notifier = {
9592 .notifier_call = md_notify_reboot,
9593 .next = NULL,
9594 .priority = INT_MAX,
9595};
9596
9597static void md_geninit(void)
9598{
9599 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9600
9601 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
9602}
9603
9604static int __init md_init(void)
9605{
9606 int ret = -ENOMEM;
9607
9608 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9609 if (!md_wq)
9610 goto err_wq;
9611
9612 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9613 if (!md_misc_wq)
9614 goto err_misc_wq;
9615
9616 md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9617 if (!md_rdev_misc_wq)
9618 goto err_rdev_misc_wq;
9619
9620 ret = __register_blkdev(MD_MAJOR, "md", md_probe);
9621 if (ret < 0)
9622 goto err_md;
9623
9624 ret = __register_blkdev(0, "mdp", md_probe);
9625 if (ret < 0)
9626 goto err_mdp;
9627 mdp_major = ret;
9628
9629 register_reboot_notifier(&md_notifier);
9630 raid_table_header = register_sysctl_table(raid_root_table);
9631
9632 md_geninit();
9633 return 0;
9634
9635err_mdp:
9636 unregister_blkdev(MD_MAJOR, "md");
9637err_md:
9638 destroy_workqueue(md_rdev_misc_wq);
9639err_rdev_misc_wq:
9640 destroy_workqueue(md_misc_wq);
9641err_misc_wq:
9642 destroy_workqueue(md_wq);
9643err_wq:
9644 return ret;
9645}
9646
9647static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9648{
9649 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9650 struct md_rdev *rdev2, *tmp;
9651 int role, ret;
9652 char b[BDEVNAME_SIZE];
9653
9654
9655
9656
9657
9658 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9659 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9660 if (ret)
9661 pr_info("md-cluster: resize failed\n");
9662 else
9663 md_bitmap_update_sb(mddev->bitmap);
9664 }
9665
9666
9667 rdev_for_each_safe(rdev2, tmp, mddev) {
9668 if (test_bit(Faulty, &rdev2->flags))
9669 continue;
9670
9671
9672 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9673
9674 if (test_bit(Candidate, &rdev2->flags)) {
9675 if (role == 0xfffe) {
9676 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9677 md_kick_rdev_from_array(rdev2);
9678 continue;
9679 }
9680 else
9681 clear_bit(Candidate, &rdev2->flags);
9682 }
9683
9684 if (role != rdev2->raid_disk) {
9685
9686
9687
9688 if (rdev2->raid_disk == -1 && role != 0xffff &&
9689 !(le32_to_cpu(sb->feature_map) &
9690 MD_FEATURE_RESHAPE_ACTIVE)) {
9691 rdev2->saved_raid_disk = role;
9692 ret = remove_and_add_spares(mddev, rdev2);
9693 pr_info("Activated spare: %s\n",
9694 bdevname(rdev2->bdev,b));
9695
9696
9697 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9698 md_wakeup_thread(mddev->thread);
9699 }
9700
9701
9702
9703
9704
9705 if ((role == 0xfffe) || (role == 0xfffd)) {
9706 md_error(mddev, rdev2);
9707 clear_bit(Blocked, &rdev2->flags);
9708 }
9709 }
9710 }
9711
9712 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9713 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9714 if (ret)
9715 pr_warn("md: updating array disks failed. %d\n", ret);
9716 }
9717
9718
9719
9720
9721
9722 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9723 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9724
9725
9726
9727
9728 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9729 if (mddev->pers->update_reshape_pos)
9730 mddev->pers->update_reshape_pos(mddev);
9731 if (mddev->pers->start_reshape)
9732 mddev->pers->start_reshape(mddev);
9733 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9734 mddev->reshape_position != MaxSector &&
9735 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9736
9737 mddev->reshape_position = MaxSector;
9738 if (mddev->pers->update_reshape_pos)
9739 mddev->pers->update_reshape_pos(mddev);
9740 }
9741
9742
9743 mddev->events = le64_to_cpu(sb->events);
9744}
9745
9746static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9747{
9748 int err;
9749 struct page *swapout = rdev->sb_page;
9750 struct mdp_superblock_1 *sb;
9751
9752
9753
9754
9755 rdev->sb_page = NULL;
9756 err = alloc_disk_sb(rdev);
9757 if (err == 0) {
9758 ClearPageUptodate(rdev->sb_page);
9759 rdev->sb_loaded = 0;
9760 err = super_types[mddev->major_version].
9761 load_super(rdev, NULL, mddev->minor_version);
9762 }
9763 if (err < 0) {
9764 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9765 __func__, __LINE__, rdev->desc_nr, err);
9766 if (rdev->sb_page)
9767 put_page(rdev->sb_page);
9768 rdev->sb_page = swapout;
9769 rdev->sb_loaded = 1;
9770 return err;
9771 }
9772
9773 sb = page_address(rdev->sb_page);
9774
9775
9776
9777
9778 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9779 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9780
9781
9782
9783
9784 if (rdev->recovery_offset == MaxSector &&
9785 !test_bit(In_sync, &rdev->flags) &&
9786 mddev->pers->spare_active(mddev))
9787 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9788
9789 put_page(swapout);
9790 return 0;
9791}
9792
9793void md_reload_sb(struct mddev *mddev, int nr)
9794{
9795 struct md_rdev *rdev;
9796 int err;
9797
9798
9799 rdev_for_each_rcu(rdev, mddev) {
9800 if (rdev->desc_nr == nr)
9801 break;
9802 }
9803
9804 if (!rdev || rdev->desc_nr != nr) {
9805 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9806 return;
9807 }
9808
9809 err = read_rdev(mddev, rdev);
9810 if (err < 0)
9811 return;
9812
9813 check_sb_changes(mddev, rdev);
9814
9815
9816 rdev_for_each_rcu(rdev, mddev) {
9817 if (!test_bit(Faulty, &rdev->flags))
9818 read_rdev(mddev, rdev);
9819 }
9820}
9821EXPORT_SYMBOL(md_reload_sb);
9822
9823#ifndef MODULE
9824
9825
9826
9827
9828
9829
9830static DEFINE_MUTEX(detected_devices_mutex);
9831static LIST_HEAD(all_detected_devices);
9832struct detected_devices_node {
9833 struct list_head list;
9834 dev_t dev;
9835};
9836
9837void md_autodetect_dev(dev_t dev)
9838{
9839 struct detected_devices_node *node_detected_dev;
9840
9841 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9842 if (node_detected_dev) {
9843 node_detected_dev->dev = dev;
9844 mutex_lock(&detected_devices_mutex);
9845 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9846 mutex_unlock(&detected_devices_mutex);
9847 }
9848}
9849
9850static void autostart_arrays(int part)
9851{
9852 struct md_rdev *rdev;
9853 struct detected_devices_node *node_detected_dev;
9854 dev_t dev;
9855 int i_scanned, i_passed;
9856
9857 i_scanned = 0;
9858 i_passed = 0;
9859
9860 pr_info("md: Autodetecting RAID arrays.\n");
9861
9862 mutex_lock(&detected_devices_mutex);
9863 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9864 i_scanned++;
9865 node_detected_dev = list_entry(all_detected_devices.next,
9866 struct detected_devices_node, list);
9867 list_del(&node_detected_dev->list);
9868 dev = node_detected_dev->dev;
9869 kfree(node_detected_dev);
9870 mutex_unlock(&detected_devices_mutex);
9871 rdev = md_import_device(dev,0, 90);
9872 mutex_lock(&detected_devices_mutex);
9873 if (IS_ERR(rdev))
9874 continue;
9875
9876 if (test_bit(Faulty, &rdev->flags))
9877 continue;
9878
9879 set_bit(AutoDetected, &rdev->flags);
9880 list_add(&rdev->same_set, &pending_raid_disks);
9881 i_passed++;
9882 }
9883 mutex_unlock(&detected_devices_mutex);
9884
9885 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9886
9887 autorun_devices(part);
9888}
9889
9890#endif
9891
9892static __exit void md_exit(void)
9893{
9894 struct mddev *mddev;
9895 struct list_head *tmp;
9896 int delay = 1;
9897
9898 unregister_blkdev(MD_MAJOR,"md");
9899 unregister_blkdev(mdp_major, "mdp");
9900 unregister_reboot_notifier(&md_notifier);
9901 unregister_sysctl_table(raid_table_header);
9902
9903
9904
9905
9906 md_unloading = 1;
9907 while (waitqueue_active(&md_event_waiters)) {
9908
9909 wake_up(&md_event_waiters);
9910 msleep(delay);
9911 delay += delay;
9912 }
9913 remove_proc_entry("mdstat", NULL);
9914
9915 for_each_mddev(mddev, tmp) {
9916 export_array(mddev);
9917 mddev->ctime = 0;
9918 mddev->hold_active = 0;
9919
9920
9921
9922
9923
9924
9925 }
9926 destroy_workqueue(md_rdev_misc_wq);
9927 destroy_workqueue(md_misc_wq);
9928 destroy_workqueue(md_wq);
9929}
9930
9931subsys_initcall(md_init);
9932module_exit(md_exit)
9933
9934static int get_ro(char *buffer, const struct kernel_param *kp)
9935{
9936 return sprintf(buffer, "%d\n", start_readonly);
9937}
9938static int set_ro(const char *val, const struct kernel_param *kp)
9939{
9940 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9941}
9942
9943module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9944module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9945module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9946module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9947
9948MODULE_LICENSE("GPL");
9949MODULE_DESCRIPTION("MD RAID framework");
9950MODULE_ALIAS("md");
9951MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9952