1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/raid/detect.h>
62#include <linux/slab.h>
63#include <linux/percpu-refcount.h>
64#include <linux/part_stat.h>
65
66#include <trace/events/block.h>
67#include "md.h"
68#include "md-bitmap.h"
69#include "md-cluster.h"
70
71
72
73
74
75
76static LIST_HEAD(pers_list);
77static DEFINE_SPINLOCK(pers_lock);
78
79static struct kobj_type md_ktype;
80
81struct md_cluster_operations *md_cluster_ops;
82EXPORT_SYMBOL(md_cluster_ops);
83static struct module *md_cluster_mod;
84
85static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
86static struct workqueue_struct *md_wq;
87static struct workqueue_struct *md_misc_wq;
88static struct workqueue_struct *md_rdev_misc_wq;
89
90static int remove_and_add_spares(struct mddev *mddev,
91 struct md_rdev *this);
92static void mddev_detach(struct mddev *mddev);
93
94
95
96
97
98
99#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
100
101#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
102
103
104
105
106
107
108
109
110
111
112
113
114
115static int sysctl_speed_limit_min = 1000;
116static int sysctl_speed_limit_max = 200000;
117static inline int speed_min(struct mddev *mddev)
118{
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
121}
122
123static inline int speed_max(struct mddev *mddev)
124{
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
127}
128
129static void rdev_uninit_serial(struct md_rdev *rdev)
130{
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
132 return;
133
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
136}
137
138static void rdevs_uninit_serial(struct mddev *mddev)
139{
140 struct md_rdev *rdev;
141
142 rdev_for_each(rdev, mddev)
143 rdev_uninit_serial(rdev);
144}
145
146static int rdev_init_serial(struct md_rdev *rdev)
147{
148
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
150 struct serial_in_rdev *serial = NULL;
151
152 if (test_bit(CollisionCheck, &rdev->flags))
153 return 0;
154
155 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
156 GFP_KERNEL);
157 if (!serial)
158 return -ENOMEM;
159
160 for (i = 0; i < serial_nums; i++) {
161 struct serial_in_rdev *serial_tmp = &serial[i];
162
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
166 }
167
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
170
171 return 0;
172}
173
174static int rdevs_init_serial(struct mddev *mddev)
175{
176 struct md_rdev *rdev;
177 int ret = 0;
178
179 rdev_for_each(rdev, mddev) {
180 ret = rdev_init_serial(rdev);
181 if (ret)
182 break;
183 }
184
185
186 if (ret && !mddev->serial_info_pool)
187 rdevs_uninit_serial(mddev);
188
189 return ret;
190}
191
192
193
194
195
196
197static int rdev_need_serial(struct md_rdev *rdev)
198{
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
202}
203
204
205
206
207
208
209void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
210 bool is_suspend)
211{
212 int ret = 0;
213
214 if (rdev && !rdev_need_serial(rdev) &&
215 !test_bit(CollisionCheck, &rdev->flags))
216 return;
217
218 if (!is_suspend)
219 mddev_suspend(mddev);
220
221 if (!rdev)
222 ret = rdevs_init_serial(mddev);
223 else
224 ret = rdev_init_serial(rdev);
225 if (ret)
226 goto abort;
227
228 if (mddev->serial_info_pool == NULL) {
229
230
231
232
233 mddev->serial_info_pool =
234 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
235 sizeof(struct serial_info));
236 if (!mddev->serial_info_pool) {
237 rdevs_uninit_serial(mddev);
238 pr_err("can't alloc memory pool for serialization\n");
239 }
240 }
241
242abort:
243 if (!is_suspend)
244 mddev_resume(mddev);
245}
246
247
248
249
250
251
252
253void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
254 bool is_suspend)
255{
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
257 return;
258
259 if (mddev->serial_info_pool) {
260 struct md_rdev *temp;
261 int num = 0;
262
263 if (!is_suspend)
264 mddev_suspend(mddev);
265 rdev_for_each(temp, mddev) {
266 if (!rdev) {
267 if (!mddev->serialize_policy ||
268 !rdev_need_serial(temp))
269 rdev_uninit_serial(temp);
270 else
271 num++;
272 } else if (temp != rdev &&
273 test_bit(CollisionCheck, &temp->flags))
274 num++;
275 }
276
277 if (rdev)
278 rdev_uninit_serial(rdev);
279
280 if (num)
281 pr_info("The mempool could be used by other devices\n");
282 else {
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
285 }
286 if (!is_suspend)
287 mddev_resume(mddev);
288 }
289}
290
291static struct ctl_table_header *raid_table_header;
292
293static struct ctl_table raid_table[] = {
294 {
295 .procname = "speed_limit_min",
296 .data = &sysctl_speed_limit_min,
297 .maxlen = sizeof(int),
298 .mode = S_IRUGO|S_IWUSR,
299 .proc_handler = proc_dointvec,
300 },
301 {
302 .procname = "speed_limit_max",
303 .data = &sysctl_speed_limit_max,
304 .maxlen = sizeof(int),
305 .mode = S_IRUGO|S_IWUSR,
306 .proc_handler = proc_dointvec,
307 },
308 { }
309};
310
311static struct ctl_table raid_dir_table[] = {
312 {
313 .procname = "raid",
314 .maxlen = 0,
315 .mode = S_IRUGO|S_IXUGO,
316 .child = raid_table,
317 },
318 { }
319};
320
321static struct ctl_table raid_root_table[] = {
322 {
323 .procname = "dev",
324 .maxlen = 0,
325 .mode = 0555,
326 .child = raid_dir_table,
327 },
328 { }
329};
330
331static int start_readonly;
332
333
334
335
336
337
338
339
340
341static bool create_on_open = true;
342
343
344
345
346
347
348
349
350
351
352
353static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
354static atomic_t md_event_count;
355void md_new_event(struct mddev *mddev)
356{
357 atomic_inc(&md_event_count);
358 wake_up(&md_event_waiters);
359}
360EXPORT_SYMBOL_GPL(md_new_event);
361
362
363
364
365
366static LIST_HEAD(all_mddevs);
367static DEFINE_SPINLOCK(all_mddevs_lock);
368
369
370
371
372
373
374
375
376#define for_each_mddev(_mddev,_tmp) \
377 \
378 for (({ spin_lock(&all_mddevs_lock); \
379 _tmp = all_mddevs.next; \
380 _mddev = NULL;}); \
381 ({ if (_tmp != &all_mddevs) \
382 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
383 spin_unlock(&all_mddevs_lock); \
384 if (_mddev) mddev_put(_mddev); \
385 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
386 _tmp != &all_mddevs;}); \
387 ({ spin_lock(&all_mddevs_lock); \
388 _tmp = _tmp->next;}) \
389 )
390
391
392
393
394
395
396
397
398static bool is_suspended(struct mddev *mddev, struct bio *bio)
399{
400 if (mddev->suspended)
401 return true;
402 if (bio_data_dir(bio) != WRITE)
403 return false;
404 if (mddev->suspend_lo >= mddev->suspend_hi)
405 return false;
406 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
407 return false;
408 if (bio_end_sector(bio) < mddev->suspend_lo)
409 return false;
410 return true;
411}
412
413void md_handle_request(struct mddev *mddev, struct bio *bio)
414{
415check_suspended:
416 rcu_read_lock();
417 if (is_suspended(mddev, bio)) {
418 DEFINE_WAIT(__wait);
419 for (;;) {
420 prepare_to_wait(&mddev->sb_wait, &__wait,
421 TASK_UNINTERRUPTIBLE);
422 if (!is_suspended(mddev, bio))
423 break;
424 rcu_read_unlock();
425 schedule();
426 rcu_read_lock();
427 }
428 finish_wait(&mddev->sb_wait, &__wait);
429 }
430 atomic_inc(&mddev->active_io);
431 rcu_read_unlock();
432
433 if (!mddev->pers->make_request(mddev, bio)) {
434 atomic_dec(&mddev->active_io);
435 wake_up(&mddev->sb_wait);
436 goto check_suspended;
437 }
438
439 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
440 wake_up(&mddev->sb_wait);
441}
442EXPORT_SYMBOL(md_handle_request);
443
444static blk_qc_t md_submit_bio(struct bio *bio)
445{
446 const int rw = bio_data_dir(bio);
447 struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
448
449 if (mddev == NULL || mddev->pers == NULL) {
450 bio_io_error(bio);
451 return BLK_QC_T_NONE;
452 }
453
454 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
455 bio_io_error(bio);
456 return BLK_QC_T_NONE;
457 }
458
459 blk_queue_split(&bio);
460
461 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
462 if (bio_sectors(bio) != 0)
463 bio->bi_status = BLK_STS_IOERR;
464 bio_endio(bio);
465 return BLK_QC_T_NONE;
466 }
467
468
469 bio->bi_opf &= ~REQ_NOMERGE;
470
471 md_handle_request(mddev, bio);
472
473 return BLK_QC_T_NONE;
474}
475
476
477
478
479
480
481
482void mddev_suspend(struct mddev *mddev)
483{
484 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
485 lockdep_assert_held(&mddev->reconfig_mutex);
486 if (mddev->suspended++)
487 return;
488 synchronize_rcu();
489 wake_up(&mddev->sb_wait);
490 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
491 smp_mb__after_atomic();
492 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
493 mddev->pers->quiesce(mddev, 1);
494 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
495 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
496
497 del_timer_sync(&mddev->safemode_timer);
498
499 mddev->noio_flag = memalloc_noio_save();
500}
501EXPORT_SYMBOL_GPL(mddev_suspend);
502
503void mddev_resume(struct mddev *mddev)
504{
505
506 memalloc_noio_restore(mddev->noio_flag);
507 lockdep_assert_held(&mddev->reconfig_mutex);
508 if (--mddev->suspended)
509 return;
510 wake_up(&mddev->sb_wait);
511 mddev->pers->quiesce(mddev, 0);
512
513 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
514 md_wakeup_thread(mddev->thread);
515 md_wakeup_thread(mddev->sync_thread);
516}
517EXPORT_SYMBOL_GPL(mddev_resume);
518
519
520
521
522
523static void md_end_flush(struct bio *bio)
524{
525 struct md_rdev *rdev = bio->bi_private;
526 struct mddev *mddev = rdev->mddev;
527
528 rdev_dec_pending(rdev, mddev);
529
530 if (atomic_dec_and_test(&mddev->flush_pending)) {
531
532 queue_work(md_wq, &mddev->flush_work);
533 }
534 bio_put(bio);
535}
536
537static void md_submit_flush_data(struct work_struct *ws);
538
539static void submit_flushes(struct work_struct *ws)
540{
541 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
542 struct md_rdev *rdev;
543
544 mddev->start_flush = ktime_get_boottime();
545 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
546 atomic_set(&mddev->flush_pending, 1);
547 rcu_read_lock();
548 rdev_for_each_rcu(rdev, mddev)
549 if (rdev->raid_disk >= 0 &&
550 !test_bit(Faulty, &rdev->flags)) {
551
552
553
554
555 struct bio *bi;
556 atomic_inc(&rdev->nr_pending);
557 atomic_inc(&rdev->nr_pending);
558 rcu_read_unlock();
559 bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set);
560 bi->bi_end_io = md_end_flush;
561 bi->bi_private = rdev;
562 bio_set_dev(bi, rdev->bdev);
563 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
564 atomic_inc(&mddev->flush_pending);
565 submit_bio(bi);
566 rcu_read_lock();
567 rdev_dec_pending(rdev, mddev);
568 }
569 rcu_read_unlock();
570 if (atomic_dec_and_test(&mddev->flush_pending))
571 queue_work(md_wq, &mddev->flush_work);
572}
573
574static void md_submit_flush_data(struct work_struct *ws)
575{
576 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
577 struct bio *bio = mddev->flush_bio;
578
579
580
581
582
583
584
585 spin_lock_irq(&mddev->lock);
586 mddev->prev_flush_start = mddev->start_flush;
587 mddev->flush_bio = NULL;
588 spin_unlock_irq(&mddev->lock);
589 wake_up(&mddev->sb_wait);
590
591 if (bio->bi_iter.bi_size == 0) {
592
593 bio_endio(bio);
594 } else {
595 bio->bi_opf &= ~REQ_PREFLUSH;
596 md_handle_request(mddev, bio);
597 }
598}
599
600
601
602
603
604
605
606bool md_flush_request(struct mddev *mddev, struct bio *bio)
607{
608 ktime_t req_start = ktime_get_boottime();
609 spin_lock_irq(&mddev->lock);
610
611
612
613 wait_event_lock_irq(mddev->sb_wait,
614 !mddev->flush_bio ||
615 ktime_before(req_start, mddev->prev_flush_start),
616 mddev->lock);
617
618 if (ktime_after(req_start, mddev->prev_flush_start)) {
619 WARN_ON(mddev->flush_bio);
620 mddev->flush_bio = bio;
621 bio = NULL;
622 }
623 spin_unlock_irq(&mddev->lock);
624
625 if (!bio) {
626 INIT_WORK(&mddev->flush_work, submit_flushes);
627 queue_work(md_wq, &mddev->flush_work);
628 } else {
629
630 if (bio->bi_iter.bi_size == 0)
631
632 bio_endio(bio);
633 else {
634 bio->bi_opf &= ~REQ_PREFLUSH;
635 return false;
636 }
637 }
638 return true;
639}
640EXPORT_SYMBOL(md_flush_request);
641
642static inline struct mddev *mddev_get(struct mddev *mddev)
643{
644 atomic_inc(&mddev->active);
645 return mddev;
646}
647
648static void mddev_delayed_delete(struct work_struct *ws);
649
650static void mddev_put(struct mddev *mddev)
651{
652 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
653 return;
654 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
655 mddev->ctime == 0 && !mddev->hold_active) {
656
657
658 list_del_init(&mddev->all_mddevs);
659
660
661
662
663
664
665 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
666 queue_work(md_misc_wq, &mddev->del_work);
667 }
668 spin_unlock(&all_mddevs_lock);
669}
670
671static void md_safemode_timeout(struct timer_list *t);
672
673void mddev_init(struct mddev *mddev)
674{
675 kobject_init(&mddev->kobj, &md_ktype);
676 mutex_init(&mddev->open_mutex);
677 mutex_init(&mddev->reconfig_mutex);
678 mutex_init(&mddev->bitmap_info.mutex);
679 INIT_LIST_HEAD(&mddev->disks);
680 INIT_LIST_HEAD(&mddev->all_mddevs);
681 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
682 atomic_set(&mddev->active, 1);
683 atomic_set(&mddev->openers, 0);
684 atomic_set(&mddev->active_io, 0);
685 spin_lock_init(&mddev->lock);
686 atomic_set(&mddev->flush_pending, 0);
687 init_waitqueue_head(&mddev->sb_wait);
688 init_waitqueue_head(&mddev->recovery_wait);
689 mddev->reshape_position = MaxSector;
690 mddev->reshape_backwards = 0;
691 mddev->last_sync_action = "none";
692 mddev->resync_min = 0;
693 mddev->resync_max = MaxSector;
694 mddev->level = LEVEL_NONE;
695}
696EXPORT_SYMBOL_GPL(mddev_init);
697
698static struct mddev *mddev_find_locked(dev_t unit)
699{
700 struct mddev *mddev;
701
702 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
703 if (mddev->unit == unit)
704 return mddev;
705
706 return NULL;
707}
708
709
710static dev_t mddev_alloc_unit(void)
711{
712 static int next_minor = 512;
713 int start = next_minor;
714 bool is_free = 0;
715 dev_t dev = 0;
716
717 while (!is_free) {
718 dev = MKDEV(MD_MAJOR, next_minor);
719 next_minor++;
720 if (next_minor > MINORMASK)
721 next_minor = 0;
722 if (next_minor == start)
723 return 0;
724 is_free = !mddev_find_locked(dev);
725 }
726
727 return dev;
728}
729
730static struct mddev *mddev_find(dev_t unit)
731{
732 struct mddev *mddev;
733
734 if (MAJOR(unit) != MD_MAJOR)
735 unit &= ~((1 << MdpMinorShift) - 1);
736
737 spin_lock(&all_mddevs_lock);
738 mddev = mddev_find_locked(unit);
739 if (mddev)
740 mddev_get(mddev);
741 spin_unlock(&all_mddevs_lock);
742
743 return mddev;
744}
745
746static struct mddev *mddev_alloc(dev_t unit)
747{
748 struct mddev *new;
749 int error;
750
751 if (unit && MAJOR(unit) != MD_MAJOR)
752 unit &= ~((1 << MdpMinorShift) - 1);
753
754 new = kzalloc(sizeof(*new), GFP_KERNEL);
755 if (!new)
756 return ERR_PTR(-ENOMEM);
757 mddev_init(new);
758
759 spin_lock(&all_mddevs_lock);
760 if (unit) {
761 error = -EEXIST;
762 if (mddev_find_locked(unit))
763 goto out_free_new;
764 new->unit = unit;
765 if (MAJOR(unit) == MD_MAJOR)
766 new->md_minor = MINOR(unit);
767 else
768 new->md_minor = MINOR(unit) >> MdpMinorShift;
769 new->hold_active = UNTIL_IOCTL;
770 } else {
771 error = -ENODEV;
772 new->unit = mddev_alloc_unit();
773 if (!new->unit)
774 goto out_free_new;
775 new->md_minor = MINOR(new->unit);
776 new->hold_active = UNTIL_STOP;
777 }
778
779 list_add(&new->all_mddevs, &all_mddevs);
780 spin_unlock(&all_mddevs_lock);
781 return new;
782out_free_new:
783 spin_unlock(&all_mddevs_lock);
784 kfree(new);
785 return ERR_PTR(error);
786}
787
788static const struct attribute_group md_redundancy_group;
789
790void mddev_unlock(struct mddev *mddev)
791{
792 if (mddev->to_remove) {
793
794
795
796
797
798
799
800
801
802
803
804
805 const struct attribute_group *to_remove = mddev->to_remove;
806 mddev->to_remove = NULL;
807 mddev->sysfs_active = 1;
808 mutex_unlock(&mddev->reconfig_mutex);
809
810 if (mddev->kobj.sd) {
811 if (to_remove != &md_redundancy_group)
812 sysfs_remove_group(&mddev->kobj, to_remove);
813 if (mddev->pers == NULL ||
814 mddev->pers->sync_request == NULL) {
815 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
816 if (mddev->sysfs_action)
817 sysfs_put(mddev->sysfs_action);
818 if (mddev->sysfs_completed)
819 sysfs_put(mddev->sysfs_completed);
820 if (mddev->sysfs_degraded)
821 sysfs_put(mddev->sysfs_degraded);
822 mddev->sysfs_action = NULL;
823 mddev->sysfs_completed = NULL;
824 mddev->sysfs_degraded = NULL;
825 }
826 }
827 mddev->sysfs_active = 0;
828 } else
829 mutex_unlock(&mddev->reconfig_mutex);
830
831
832
833
834 spin_lock(&pers_lock);
835 md_wakeup_thread(mddev->thread);
836 wake_up(&mddev->sb_wait);
837 spin_unlock(&pers_lock);
838}
839EXPORT_SYMBOL_GPL(mddev_unlock);
840
841struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
842{
843 struct md_rdev *rdev;
844
845 rdev_for_each_rcu(rdev, mddev)
846 if (rdev->desc_nr == nr)
847 return rdev;
848
849 return NULL;
850}
851EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
852
853static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
854{
855 struct md_rdev *rdev;
856
857 rdev_for_each(rdev, mddev)
858 if (rdev->bdev->bd_dev == dev)
859 return rdev;
860
861 return NULL;
862}
863
864struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
865{
866 struct md_rdev *rdev;
867
868 rdev_for_each_rcu(rdev, mddev)
869 if (rdev->bdev->bd_dev == dev)
870 return rdev;
871
872 return NULL;
873}
874EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
875
876static struct md_personality *find_pers(int level, char *clevel)
877{
878 struct md_personality *pers;
879 list_for_each_entry(pers, &pers_list, list) {
880 if (level != LEVEL_NONE && pers->level == level)
881 return pers;
882 if (strcmp(pers->name, clevel)==0)
883 return pers;
884 }
885 return NULL;
886}
887
888
889static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
890{
891 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
892 return MD_NEW_SIZE_SECTORS(num_sectors);
893}
894
895static int alloc_disk_sb(struct md_rdev *rdev)
896{
897 rdev->sb_page = alloc_page(GFP_KERNEL);
898 if (!rdev->sb_page)
899 return -ENOMEM;
900 return 0;
901}
902
903void md_rdev_clear(struct md_rdev *rdev)
904{
905 if (rdev->sb_page) {
906 put_page(rdev->sb_page);
907 rdev->sb_loaded = 0;
908 rdev->sb_page = NULL;
909 rdev->sb_start = 0;
910 rdev->sectors = 0;
911 }
912 if (rdev->bb_page) {
913 put_page(rdev->bb_page);
914 rdev->bb_page = NULL;
915 }
916 badblocks_exit(&rdev->badblocks);
917}
918EXPORT_SYMBOL_GPL(md_rdev_clear);
919
920static void super_written(struct bio *bio)
921{
922 struct md_rdev *rdev = bio->bi_private;
923 struct mddev *mddev = rdev->mddev;
924
925 if (bio->bi_status) {
926 pr_err("md: %s gets error=%d\n", __func__,
927 blk_status_to_errno(bio->bi_status));
928 md_error(mddev, rdev);
929 if (!test_bit(Faulty, &rdev->flags)
930 && (bio->bi_opf & MD_FAILFAST)) {
931 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
932 set_bit(LastDev, &rdev->flags);
933 }
934 } else
935 clear_bit(LastDev, &rdev->flags);
936
937 if (atomic_dec_and_test(&mddev->pending_writes))
938 wake_up(&mddev->sb_wait);
939 rdev_dec_pending(rdev, mddev);
940 bio_put(bio);
941}
942
943void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
944 sector_t sector, int size, struct page *page)
945{
946
947
948
949
950
951
952 struct bio *bio;
953 int ff = 0;
954
955 if (!page)
956 return;
957
958 if (test_bit(Faulty, &rdev->flags))
959 return;
960
961 bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
962
963 atomic_inc(&rdev->nr_pending);
964
965 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
966 bio->bi_iter.bi_sector = sector;
967 bio_add_page(bio, page, size, 0);
968 bio->bi_private = rdev;
969 bio->bi_end_io = super_written;
970
971 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
972 test_bit(FailFast, &rdev->flags) &&
973 !test_bit(LastDev, &rdev->flags))
974 ff = MD_FAILFAST;
975 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
976
977 atomic_inc(&mddev->pending_writes);
978 submit_bio(bio);
979}
980
981int md_super_wait(struct mddev *mddev)
982{
983
984 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
985 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
986 return -EAGAIN;
987 return 0;
988}
989
990int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
991 struct page *page, int op, int op_flags, bool metadata_op)
992{
993 struct bio bio;
994 struct bio_vec bvec;
995
996 bio_init(&bio, &bvec, 1);
997
998 if (metadata_op && rdev->meta_bdev)
999 bio_set_dev(&bio, rdev->meta_bdev);
1000 else
1001 bio_set_dev(&bio, rdev->bdev);
1002 bio.bi_opf = op | op_flags;
1003 if (metadata_op)
1004 bio.bi_iter.bi_sector = sector + rdev->sb_start;
1005 else if (rdev->mddev->reshape_position != MaxSector &&
1006 (rdev->mddev->reshape_backwards ==
1007 (sector >= rdev->mddev->reshape_position)))
1008 bio.bi_iter.bi_sector = sector + rdev->new_data_offset;
1009 else
1010 bio.bi_iter.bi_sector = sector + rdev->data_offset;
1011 bio_add_page(&bio, page, size, 0);
1012
1013 submit_bio_wait(&bio);
1014
1015 return !bio.bi_status;
1016}
1017EXPORT_SYMBOL_GPL(sync_page_io);
1018
1019static int read_disk_sb(struct md_rdev *rdev, int size)
1020{
1021 char b[BDEVNAME_SIZE];
1022
1023 if (rdev->sb_loaded)
1024 return 0;
1025
1026 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1027 goto fail;
1028 rdev->sb_loaded = 1;
1029 return 0;
1030
1031fail:
1032 pr_err("md: disabled device %s, could not read superblock.\n",
1033 bdevname(rdev->bdev,b));
1034 return -EINVAL;
1035}
1036
1037static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1038{
1039 return sb1->set_uuid0 == sb2->set_uuid0 &&
1040 sb1->set_uuid1 == sb2->set_uuid1 &&
1041 sb1->set_uuid2 == sb2->set_uuid2 &&
1042 sb1->set_uuid3 == sb2->set_uuid3;
1043}
1044
1045static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1046{
1047 int ret;
1048 mdp_super_t *tmp1, *tmp2;
1049
1050 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1051 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1052
1053 if (!tmp1 || !tmp2) {
1054 ret = 0;
1055 goto abort;
1056 }
1057
1058 *tmp1 = *sb1;
1059 *tmp2 = *sb2;
1060
1061
1062
1063
1064 tmp1->nr_disks = 0;
1065 tmp2->nr_disks = 0;
1066
1067 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1068abort:
1069 kfree(tmp1);
1070 kfree(tmp2);
1071 return ret;
1072}
1073
1074static u32 md_csum_fold(u32 csum)
1075{
1076 csum = (csum & 0xffff) + (csum >> 16);
1077 return (csum & 0xffff) + (csum >> 16);
1078}
1079
1080static unsigned int calc_sb_csum(mdp_super_t *sb)
1081{
1082 u64 newcsum = 0;
1083 u32 *sb32 = (u32*)sb;
1084 int i;
1085 unsigned int disk_csum, csum;
1086
1087 disk_csum = sb->sb_csum;
1088 sb->sb_csum = 0;
1089
1090 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1091 newcsum += sb32[i];
1092 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1093
1094#ifdef CONFIG_ALPHA
1095
1096
1097
1098
1099
1100
1101
1102
1103 sb->sb_csum = md_csum_fold(disk_csum);
1104#else
1105 sb->sb_csum = disk_csum;
1106#endif
1107 return csum;
1108}
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140struct super_type {
1141 char *name;
1142 struct module *owner;
1143 int (*load_super)(struct md_rdev *rdev,
1144 struct md_rdev *refdev,
1145 int minor_version);
1146 int (*validate_super)(struct mddev *mddev,
1147 struct md_rdev *rdev);
1148 void (*sync_super)(struct mddev *mddev,
1149 struct md_rdev *rdev);
1150 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1151 sector_t num_sectors);
1152 int (*allow_new_offset)(struct md_rdev *rdev,
1153 unsigned long long new_offset);
1154};
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164int md_check_no_bitmap(struct mddev *mddev)
1165{
1166 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1167 return 0;
1168 pr_warn("%s: bitmaps are not supported for %s\n",
1169 mdname(mddev), mddev->pers->name);
1170 return 1;
1171}
1172EXPORT_SYMBOL(md_check_no_bitmap);
1173
1174
1175
1176
1177static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1178{
1179 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1180 mdp_super_t *sb;
1181 int ret;
1182 bool spare_disk = true;
1183
1184
1185
1186
1187
1188
1189
1190 rdev->sb_start = calc_dev_sboffset(rdev);
1191
1192 ret = read_disk_sb(rdev, MD_SB_BYTES);
1193 if (ret)
1194 return ret;
1195
1196 ret = -EINVAL;
1197
1198 bdevname(rdev->bdev, b);
1199 sb = page_address(rdev->sb_page);
1200
1201 if (sb->md_magic != MD_SB_MAGIC) {
1202 pr_warn("md: invalid raid superblock magic on %s\n", b);
1203 goto abort;
1204 }
1205
1206 if (sb->major_version != 0 ||
1207 sb->minor_version < 90 ||
1208 sb->minor_version > 91) {
1209 pr_warn("Bad version number %d.%d on %s\n",
1210 sb->major_version, sb->minor_version, b);
1211 goto abort;
1212 }
1213
1214 if (sb->raid_disks <= 0)
1215 goto abort;
1216
1217 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1218 pr_warn("md: invalid superblock checksum on %s\n", b);
1219 goto abort;
1220 }
1221
1222 rdev->preferred_minor = sb->md_minor;
1223 rdev->data_offset = 0;
1224 rdev->new_data_offset = 0;
1225 rdev->sb_size = MD_SB_BYTES;
1226 rdev->badblocks.shift = -1;
1227
1228 if (sb->level == LEVEL_MULTIPATH)
1229 rdev->desc_nr = -1;
1230 else
1231 rdev->desc_nr = sb->this_disk.number;
1232
1233
1234 if (sb->level == LEVEL_MULTIPATH ||
1235 (rdev->desc_nr >= 0 &&
1236 rdev->desc_nr < MD_SB_DISKS &&
1237 sb->disks[rdev->desc_nr].state &
1238 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1239 spare_disk = false;
1240
1241 if (!refdev) {
1242 if (!spare_disk)
1243 ret = 1;
1244 else
1245 ret = 0;
1246 } else {
1247 __u64 ev1, ev2;
1248 mdp_super_t *refsb = page_address(refdev->sb_page);
1249 if (!md_uuid_equal(refsb, sb)) {
1250 pr_warn("md: %s has different UUID to %s\n",
1251 b, bdevname(refdev->bdev,b2));
1252 goto abort;
1253 }
1254 if (!md_sb_equal(refsb, sb)) {
1255 pr_warn("md: %s has same UUID but different superblock to %s\n",
1256 b, bdevname(refdev->bdev, b2));
1257 goto abort;
1258 }
1259 ev1 = md_event(sb);
1260 ev2 = md_event(refsb);
1261
1262 if (!spare_disk && ev1 > ev2)
1263 ret = 1;
1264 else
1265 ret = 0;
1266 }
1267 rdev->sectors = rdev->sb_start;
1268
1269
1270
1271
1272 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1273 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1274
1275 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1276
1277 ret = -EINVAL;
1278
1279 abort:
1280 return ret;
1281}
1282
1283
1284
1285
1286static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1287{
1288 mdp_disk_t *desc;
1289 mdp_super_t *sb = page_address(rdev->sb_page);
1290 __u64 ev1 = md_event(sb);
1291
1292 rdev->raid_disk = -1;
1293 clear_bit(Faulty, &rdev->flags);
1294 clear_bit(In_sync, &rdev->flags);
1295 clear_bit(Bitmap_sync, &rdev->flags);
1296 clear_bit(WriteMostly, &rdev->flags);
1297
1298 if (mddev->raid_disks == 0) {
1299 mddev->major_version = 0;
1300 mddev->minor_version = sb->minor_version;
1301 mddev->patch_version = sb->patch_version;
1302 mddev->external = 0;
1303 mddev->chunk_sectors = sb->chunk_size >> 9;
1304 mddev->ctime = sb->ctime;
1305 mddev->utime = sb->utime;
1306 mddev->level = sb->level;
1307 mddev->clevel[0] = 0;
1308 mddev->layout = sb->layout;
1309 mddev->raid_disks = sb->raid_disks;
1310 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1311 mddev->events = ev1;
1312 mddev->bitmap_info.offset = 0;
1313 mddev->bitmap_info.space = 0;
1314
1315 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1316 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1317 mddev->reshape_backwards = 0;
1318
1319 if (mddev->minor_version >= 91) {
1320 mddev->reshape_position = sb->reshape_position;
1321 mddev->delta_disks = sb->delta_disks;
1322 mddev->new_level = sb->new_level;
1323 mddev->new_layout = sb->new_layout;
1324 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1325 if (mddev->delta_disks < 0)
1326 mddev->reshape_backwards = 1;
1327 } else {
1328 mddev->reshape_position = MaxSector;
1329 mddev->delta_disks = 0;
1330 mddev->new_level = mddev->level;
1331 mddev->new_layout = mddev->layout;
1332 mddev->new_chunk_sectors = mddev->chunk_sectors;
1333 }
1334 if (mddev->level == 0)
1335 mddev->layout = -1;
1336
1337 if (sb->state & (1<<MD_SB_CLEAN))
1338 mddev->recovery_cp = MaxSector;
1339 else {
1340 if (sb->events_hi == sb->cp_events_hi &&
1341 sb->events_lo == sb->cp_events_lo) {
1342 mddev->recovery_cp = sb->recovery_cp;
1343 } else
1344 mddev->recovery_cp = 0;
1345 }
1346
1347 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1348 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1349 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1350 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1351
1352 mddev->max_disks = MD_SB_DISKS;
1353
1354 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1355 mddev->bitmap_info.file == NULL) {
1356 mddev->bitmap_info.offset =
1357 mddev->bitmap_info.default_offset;
1358 mddev->bitmap_info.space =
1359 mddev->bitmap_info.default_space;
1360 }
1361
1362 } else if (mddev->pers == NULL) {
1363
1364
1365 ++ev1;
1366 if (sb->disks[rdev->desc_nr].state & (
1367 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1368 if (ev1 < mddev->events)
1369 return -EINVAL;
1370 } else if (mddev->bitmap) {
1371
1372
1373
1374 if (ev1 < mddev->bitmap->events_cleared)
1375 return 0;
1376 if (ev1 < mddev->events)
1377 set_bit(Bitmap_sync, &rdev->flags);
1378 } else {
1379 if (ev1 < mddev->events)
1380
1381 return 0;
1382 }
1383
1384 if (mddev->level != LEVEL_MULTIPATH) {
1385 desc = sb->disks + rdev->desc_nr;
1386
1387 if (desc->state & (1<<MD_DISK_FAULTY))
1388 set_bit(Faulty, &rdev->flags);
1389 else if (desc->state & (1<<MD_DISK_SYNC)
1390) {
1391 set_bit(In_sync, &rdev->flags);
1392 rdev->raid_disk = desc->raid_disk;
1393 rdev->saved_raid_disk = desc->raid_disk;
1394 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1395
1396
1397
1398 if (mddev->minor_version >= 91) {
1399 rdev->recovery_offset = 0;
1400 rdev->raid_disk = desc->raid_disk;
1401 }
1402 }
1403 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1404 set_bit(WriteMostly, &rdev->flags);
1405 if (desc->state & (1<<MD_DISK_FAILFAST))
1406 set_bit(FailFast, &rdev->flags);
1407 } else
1408 set_bit(In_sync, &rdev->flags);
1409 return 0;
1410}
1411
1412
1413
1414
1415static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1416{
1417 mdp_super_t *sb;
1418 struct md_rdev *rdev2;
1419 int next_spare = mddev->raid_disks;
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431 int i;
1432 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1433
1434 rdev->sb_size = MD_SB_BYTES;
1435
1436 sb = page_address(rdev->sb_page);
1437
1438 memset(sb, 0, sizeof(*sb));
1439
1440 sb->md_magic = MD_SB_MAGIC;
1441 sb->major_version = mddev->major_version;
1442 sb->patch_version = mddev->patch_version;
1443 sb->gvalid_words = 0;
1444 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1445 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1446 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1447 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1448
1449 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1450 sb->level = mddev->level;
1451 sb->size = mddev->dev_sectors / 2;
1452 sb->raid_disks = mddev->raid_disks;
1453 sb->md_minor = mddev->md_minor;
1454 sb->not_persistent = 0;
1455 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1456 sb->state = 0;
1457 sb->events_hi = (mddev->events>>32);
1458 sb->events_lo = (u32)mddev->events;
1459
1460 if (mddev->reshape_position == MaxSector)
1461 sb->minor_version = 90;
1462 else {
1463 sb->minor_version = 91;
1464 sb->reshape_position = mddev->reshape_position;
1465 sb->new_level = mddev->new_level;
1466 sb->delta_disks = mddev->delta_disks;
1467 sb->new_layout = mddev->new_layout;
1468 sb->new_chunk = mddev->new_chunk_sectors << 9;
1469 }
1470 mddev->minor_version = sb->minor_version;
1471 if (mddev->in_sync)
1472 {
1473 sb->recovery_cp = mddev->recovery_cp;
1474 sb->cp_events_hi = (mddev->events>>32);
1475 sb->cp_events_lo = (u32)mddev->events;
1476 if (mddev->recovery_cp == MaxSector)
1477 sb->state = (1<< MD_SB_CLEAN);
1478 } else
1479 sb->recovery_cp = 0;
1480
1481 sb->layout = mddev->layout;
1482 sb->chunk_size = mddev->chunk_sectors << 9;
1483
1484 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1485 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1486
1487 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1488 rdev_for_each(rdev2, mddev) {
1489 mdp_disk_t *d;
1490 int desc_nr;
1491 int is_active = test_bit(In_sync, &rdev2->flags);
1492
1493 if (rdev2->raid_disk >= 0 &&
1494 sb->minor_version >= 91)
1495
1496
1497
1498
1499 is_active = 1;
1500 if (rdev2->raid_disk < 0 ||
1501 test_bit(Faulty, &rdev2->flags))
1502 is_active = 0;
1503 if (is_active)
1504 desc_nr = rdev2->raid_disk;
1505 else
1506 desc_nr = next_spare++;
1507 rdev2->desc_nr = desc_nr;
1508 d = &sb->disks[rdev2->desc_nr];
1509 nr_disks++;
1510 d->number = rdev2->desc_nr;
1511 d->major = MAJOR(rdev2->bdev->bd_dev);
1512 d->minor = MINOR(rdev2->bdev->bd_dev);
1513 if (is_active)
1514 d->raid_disk = rdev2->raid_disk;
1515 else
1516 d->raid_disk = rdev2->desc_nr;
1517 if (test_bit(Faulty, &rdev2->flags))
1518 d->state = (1<<MD_DISK_FAULTY);
1519 else if (is_active) {
1520 d->state = (1<<MD_DISK_ACTIVE);
1521 if (test_bit(In_sync, &rdev2->flags))
1522 d->state |= (1<<MD_DISK_SYNC);
1523 active++;
1524 working++;
1525 } else {
1526 d->state = 0;
1527 spare++;
1528 working++;
1529 }
1530 if (test_bit(WriteMostly, &rdev2->flags))
1531 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1532 if (test_bit(FailFast, &rdev2->flags))
1533 d->state |= (1<<MD_DISK_FAILFAST);
1534 }
1535
1536 for (i=0 ; i < mddev->raid_disks ; i++) {
1537 mdp_disk_t *d = &sb->disks[i];
1538 if (d->state == 0 && d->number == 0) {
1539 d->number = i;
1540 d->raid_disk = i;
1541 d->state = (1<<MD_DISK_REMOVED);
1542 d->state |= (1<<MD_DISK_FAULTY);
1543 failed++;
1544 }
1545 }
1546 sb->nr_disks = nr_disks;
1547 sb->active_disks = active;
1548 sb->working_disks = working;
1549 sb->failed_disks = failed;
1550 sb->spare_disks = spare;
1551
1552 sb->this_disk = sb->disks[rdev->desc_nr];
1553 sb->sb_csum = calc_sb_csum(sb);
1554}
1555
1556
1557
1558
1559static unsigned long long
1560super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1561{
1562 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1563 return 0;
1564 if (rdev->mddev->bitmap_info.offset)
1565 return 0;
1566 rdev->sb_start = calc_dev_sboffset(rdev);
1567 if (!num_sectors || num_sectors > rdev->sb_start)
1568 num_sectors = rdev->sb_start;
1569
1570
1571
1572 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1573 num_sectors = (sector_t)(2ULL << 32) - 2;
1574 do {
1575 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1576 rdev->sb_page);
1577 } while (md_super_wait(rdev->mddev) < 0);
1578 return num_sectors;
1579}
1580
1581static int
1582super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1583{
1584
1585 return new_offset == 0;
1586}
1587
1588
1589
1590
1591
1592static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1593{
1594 __le32 disk_csum;
1595 u32 csum;
1596 unsigned long long newcsum;
1597 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1598 __le32 *isuper = (__le32*)sb;
1599
1600 disk_csum = sb->sb_csum;
1601 sb->sb_csum = 0;
1602 newcsum = 0;
1603 for (; size >= 4; size -= 4)
1604 newcsum += le32_to_cpu(*isuper++);
1605
1606 if (size == 2)
1607 newcsum += le16_to_cpu(*(__le16*) isuper);
1608
1609 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1610 sb->sb_csum = disk_csum;
1611 return cpu_to_le32(csum);
1612}
1613
1614static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1615{
1616 struct mdp_superblock_1 *sb;
1617 int ret;
1618 sector_t sb_start;
1619 sector_t sectors;
1620 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1621 int bmask;
1622 bool spare_disk = true;
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632 switch(minor_version) {
1633 case 0:
1634 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1635 sb_start -= 8*2;
1636 sb_start &= ~(sector_t)(4*2-1);
1637 break;
1638 case 1:
1639 sb_start = 0;
1640 break;
1641 case 2:
1642 sb_start = 8;
1643 break;
1644 default:
1645 return -EINVAL;
1646 }
1647 rdev->sb_start = sb_start;
1648
1649
1650
1651
1652 ret = read_disk_sb(rdev, 4096);
1653 if (ret) return ret;
1654
1655 sb = page_address(rdev->sb_page);
1656
1657 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1658 sb->major_version != cpu_to_le32(1) ||
1659 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1660 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1661 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1662 return -EINVAL;
1663
1664 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1665 pr_warn("md: invalid superblock checksum on %s\n",
1666 bdevname(rdev->bdev,b));
1667 return -EINVAL;
1668 }
1669 if (le64_to_cpu(sb->data_size) < 10) {
1670 pr_warn("md: data_size too small on %s\n",
1671 bdevname(rdev->bdev,b));
1672 return -EINVAL;
1673 }
1674 if (sb->pad0 ||
1675 sb->pad3[0] ||
1676 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1677
1678 return -EINVAL;
1679
1680 rdev->preferred_minor = 0xffff;
1681 rdev->data_offset = le64_to_cpu(sb->data_offset);
1682 rdev->new_data_offset = rdev->data_offset;
1683 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1684 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1685 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1686 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1687
1688 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1689 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1690 if (rdev->sb_size & bmask)
1691 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1692
1693 if (minor_version
1694 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1695 return -EINVAL;
1696 if (minor_version
1697 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1698 return -EINVAL;
1699
1700 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1701 rdev->desc_nr = -1;
1702 else
1703 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1704
1705 if (!rdev->bb_page) {
1706 rdev->bb_page = alloc_page(GFP_KERNEL);
1707 if (!rdev->bb_page)
1708 return -ENOMEM;
1709 }
1710 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1711 rdev->badblocks.count == 0) {
1712
1713
1714
1715 s32 offset;
1716 sector_t bb_sector;
1717 __le64 *bbp;
1718 int i;
1719 int sectors = le16_to_cpu(sb->bblog_size);
1720 if (sectors > (PAGE_SIZE / 512))
1721 return -EINVAL;
1722 offset = le32_to_cpu(sb->bblog_offset);
1723 if (offset == 0)
1724 return -EINVAL;
1725 bb_sector = (long long)offset;
1726 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1727 rdev->bb_page, REQ_OP_READ, 0, true))
1728 return -EIO;
1729 bbp = (__le64 *)page_address(rdev->bb_page);
1730 rdev->badblocks.shift = sb->bblog_shift;
1731 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1732 u64 bb = le64_to_cpu(*bbp);
1733 int count = bb & (0x3ff);
1734 u64 sector = bb >> 10;
1735 sector <<= sb->bblog_shift;
1736 count <<= sb->bblog_shift;
1737 if (bb + 1 == 0)
1738 break;
1739 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1740 return -EINVAL;
1741 }
1742 } else if (sb->bblog_offset != 0)
1743 rdev->badblocks.shift = 0;
1744
1745 if ((le32_to_cpu(sb->feature_map) &
1746 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1747 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1748 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1749 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1750 }
1751
1752 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1753 sb->level != 0)
1754 return -EINVAL;
1755
1756
1757 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1758 (rdev->desc_nr >= 0 &&
1759 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1760 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1761 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1762 spare_disk = false;
1763
1764 if (!refdev) {
1765 if (!spare_disk)
1766 ret = 1;
1767 else
1768 ret = 0;
1769 } else {
1770 __u64 ev1, ev2;
1771 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1772
1773 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1774 sb->level != refsb->level ||
1775 sb->layout != refsb->layout ||
1776 sb->chunksize != refsb->chunksize) {
1777 pr_warn("md: %s has strangely different superblock to %s\n",
1778 bdevname(rdev->bdev,b),
1779 bdevname(refdev->bdev,b2));
1780 return -EINVAL;
1781 }
1782 ev1 = le64_to_cpu(sb->events);
1783 ev2 = le64_to_cpu(refsb->events);
1784
1785 if (!spare_disk && ev1 > ev2)
1786 ret = 1;
1787 else
1788 ret = 0;
1789 }
1790 if (minor_version) {
1791 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1792 sectors -= rdev->data_offset;
1793 } else
1794 sectors = rdev->sb_start;
1795 if (sectors < le64_to_cpu(sb->data_size))
1796 return -EINVAL;
1797 rdev->sectors = le64_to_cpu(sb->data_size);
1798 return ret;
1799}
1800
1801static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1802{
1803 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1804 __u64 ev1 = le64_to_cpu(sb->events);
1805
1806 rdev->raid_disk = -1;
1807 clear_bit(Faulty, &rdev->flags);
1808 clear_bit(In_sync, &rdev->flags);
1809 clear_bit(Bitmap_sync, &rdev->flags);
1810 clear_bit(WriteMostly, &rdev->flags);
1811
1812 if (mddev->raid_disks == 0) {
1813 mddev->major_version = 1;
1814 mddev->patch_version = 0;
1815 mddev->external = 0;
1816 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1817 mddev->ctime = le64_to_cpu(sb->ctime);
1818 mddev->utime = le64_to_cpu(sb->utime);
1819 mddev->level = le32_to_cpu(sb->level);
1820 mddev->clevel[0] = 0;
1821 mddev->layout = le32_to_cpu(sb->layout);
1822 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1823 mddev->dev_sectors = le64_to_cpu(sb->size);
1824 mddev->events = ev1;
1825 mddev->bitmap_info.offset = 0;
1826 mddev->bitmap_info.space = 0;
1827
1828
1829
1830 mddev->bitmap_info.default_offset = 1024 >> 9;
1831 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1832 mddev->reshape_backwards = 0;
1833
1834 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1835 memcpy(mddev->uuid, sb->set_uuid, 16);
1836
1837 mddev->max_disks = (4096-256)/2;
1838
1839 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1840 mddev->bitmap_info.file == NULL) {
1841 mddev->bitmap_info.offset =
1842 (__s32)le32_to_cpu(sb->bitmap_offset);
1843
1844
1845
1846
1847
1848 if (mddev->minor_version > 0)
1849 mddev->bitmap_info.space = 0;
1850 else if (mddev->bitmap_info.offset > 0)
1851 mddev->bitmap_info.space =
1852 8 - mddev->bitmap_info.offset;
1853 else
1854 mddev->bitmap_info.space =
1855 -mddev->bitmap_info.offset;
1856 }
1857
1858 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1859 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1860 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1861 mddev->new_level = le32_to_cpu(sb->new_level);
1862 mddev->new_layout = le32_to_cpu(sb->new_layout);
1863 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1864 if (mddev->delta_disks < 0 ||
1865 (mddev->delta_disks == 0 &&
1866 (le32_to_cpu(sb->feature_map)
1867 & MD_FEATURE_RESHAPE_BACKWARDS)))
1868 mddev->reshape_backwards = 1;
1869 } else {
1870 mddev->reshape_position = MaxSector;
1871 mddev->delta_disks = 0;
1872 mddev->new_level = mddev->level;
1873 mddev->new_layout = mddev->layout;
1874 mddev->new_chunk_sectors = mddev->chunk_sectors;
1875 }
1876
1877 if (mddev->level == 0 &&
1878 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1879 mddev->layout = -1;
1880
1881 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1882 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1883
1884 if (le32_to_cpu(sb->feature_map) &
1885 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1886 if (le32_to_cpu(sb->feature_map) &
1887 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1888 return -EINVAL;
1889 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1890 (le32_to_cpu(sb->feature_map) &
1891 MD_FEATURE_MULTIPLE_PPLS))
1892 return -EINVAL;
1893 set_bit(MD_HAS_PPL, &mddev->flags);
1894 }
1895 } else if (mddev->pers == NULL) {
1896
1897
1898 ++ev1;
1899 if (rdev->desc_nr >= 0 &&
1900 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1901 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1902 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1903 if (ev1 < mddev->events)
1904 return -EINVAL;
1905 } else if (mddev->bitmap) {
1906
1907
1908
1909 if (ev1 < mddev->bitmap->events_cleared)
1910 return 0;
1911 if (ev1 < mddev->events)
1912 set_bit(Bitmap_sync, &rdev->flags);
1913 } else {
1914 if (ev1 < mddev->events)
1915
1916 return 0;
1917 }
1918 if (mddev->level != LEVEL_MULTIPATH) {
1919 int role;
1920 if (rdev->desc_nr < 0 ||
1921 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1922 role = MD_DISK_ROLE_SPARE;
1923 rdev->desc_nr = -1;
1924 } else
1925 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1926 switch(role) {
1927 case MD_DISK_ROLE_SPARE:
1928 break;
1929 case MD_DISK_ROLE_FAULTY:
1930 set_bit(Faulty, &rdev->flags);
1931 break;
1932 case MD_DISK_ROLE_JOURNAL:
1933 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1934
1935 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1936 return -EINVAL;
1937 }
1938 set_bit(Journal, &rdev->flags);
1939 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1940 rdev->raid_disk = 0;
1941 break;
1942 default:
1943 rdev->saved_raid_disk = role;
1944 if ((le32_to_cpu(sb->feature_map) &
1945 MD_FEATURE_RECOVERY_OFFSET)) {
1946 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1947 if (!(le32_to_cpu(sb->feature_map) &
1948 MD_FEATURE_RECOVERY_BITMAP))
1949 rdev->saved_raid_disk = -1;
1950 } else {
1951
1952
1953
1954
1955 if (!test_bit(MD_RECOVERY_FROZEN,
1956 &mddev->recovery))
1957 set_bit(In_sync, &rdev->flags);
1958 }
1959 rdev->raid_disk = role;
1960 break;
1961 }
1962 if (sb->devflags & WriteMostly1)
1963 set_bit(WriteMostly, &rdev->flags);
1964 if (sb->devflags & FailFast1)
1965 set_bit(FailFast, &rdev->flags);
1966 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1967 set_bit(Replacement, &rdev->flags);
1968 } else
1969 set_bit(In_sync, &rdev->flags);
1970
1971 return 0;
1972}
1973
1974static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1975{
1976 struct mdp_superblock_1 *sb;
1977 struct md_rdev *rdev2;
1978 int max_dev, i;
1979
1980
1981 sb = page_address(rdev->sb_page);
1982
1983 sb->feature_map = 0;
1984 sb->pad0 = 0;
1985 sb->recovery_offset = cpu_to_le64(0);
1986 memset(sb->pad3, 0, sizeof(sb->pad3));
1987
1988 sb->utime = cpu_to_le64((__u64)mddev->utime);
1989 sb->events = cpu_to_le64(mddev->events);
1990 if (mddev->in_sync)
1991 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1992 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1993 sb->resync_offset = cpu_to_le64(MaxSector);
1994 else
1995 sb->resync_offset = cpu_to_le64(0);
1996
1997 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1998
1999 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2000 sb->size = cpu_to_le64(mddev->dev_sectors);
2001 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2002 sb->level = cpu_to_le32(mddev->level);
2003 sb->layout = cpu_to_le32(mddev->layout);
2004 if (test_bit(FailFast, &rdev->flags))
2005 sb->devflags |= FailFast1;
2006 else
2007 sb->devflags &= ~FailFast1;
2008
2009 if (test_bit(WriteMostly, &rdev->flags))
2010 sb->devflags |= WriteMostly1;
2011 else
2012 sb->devflags &= ~WriteMostly1;
2013 sb->data_offset = cpu_to_le64(rdev->data_offset);
2014 sb->data_size = cpu_to_le64(rdev->sectors);
2015
2016 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2017 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2018 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2019 }
2020
2021 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2022 !test_bit(In_sync, &rdev->flags)) {
2023 sb->feature_map |=
2024 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2025 sb->recovery_offset =
2026 cpu_to_le64(rdev->recovery_offset);
2027 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2028 sb->feature_map |=
2029 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2030 }
2031
2032 if (test_bit(Journal, &rdev->flags))
2033 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2034 if (test_bit(Replacement, &rdev->flags))
2035 sb->feature_map |=
2036 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2037
2038 if (mddev->reshape_position != MaxSector) {
2039 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2040 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2041 sb->new_layout = cpu_to_le32(mddev->new_layout);
2042 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2043 sb->new_level = cpu_to_le32(mddev->new_level);
2044 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2045 if (mddev->delta_disks == 0 &&
2046 mddev->reshape_backwards)
2047 sb->feature_map
2048 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2049 if (rdev->new_data_offset != rdev->data_offset) {
2050 sb->feature_map
2051 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2052 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2053 - rdev->data_offset));
2054 }
2055 }
2056
2057 if (mddev_is_clustered(mddev))
2058 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2059
2060 if (rdev->badblocks.count == 0)
2061 ;
2062 else if (sb->bblog_offset == 0)
2063
2064 md_error(mddev, rdev);
2065 else {
2066 struct badblocks *bb = &rdev->badblocks;
2067 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2068 u64 *p = bb->page;
2069 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2070 if (bb->changed) {
2071 unsigned seq;
2072
2073retry:
2074 seq = read_seqbegin(&bb->lock);
2075
2076 memset(bbp, 0xff, PAGE_SIZE);
2077
2078 for (i = 0 ; i < bb->count ; i++) {
2079 u64 internal_bb = p[i];
2080 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2081 | BB_LEN(internal_bb));
2082 bbp[i] = cpu_to_le64(store_bb);
2083 }
2084 bb->changed = 0;
2085 if (read_seqretry(&bb->lock, seq))
2086 goto retry;
2087
2088 bb->sector = (rdev->sb_start +
2089 (int)le32_to_cpu(sb->bblog_offset));
2090 bb->size = le16_to_cpu(sb->bblog_size);
2091 }
2092 }
2093
2094 max_dev = 0;
2095 rdev_for_each(rdev2, mddev)
2096 if (rdev2->desc_nr+1 > max_dev)
2097 max_dev = rdev2->desc_nr+1;
2098
2099 if (max_dev > le32_to_cpu(sb->max_dev)) {
2100 int bmask;
2101 sb->max_dev = cpu_to_le32(max_dev);
2102 rdev->sb_size = max_dev * 2 + 256;
2103 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2104 if (rdev->sb_size & bmask)
2105 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2106 } else
2107 max_dev = le32_to_cpu(sb->max_dev);
2108
2109 for (i=0; i<max_dev;i++)
2110 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2111
2112 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2113 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2114
2115 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2116 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2117 sb->feature_map |=
2118 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2119 else
2120 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2121 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2122 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2123 }
2124
2125 rdev_for_each(rdev2, mddev) {
2126 i = rdev2->desc_nr;
2127 if (test_bit(Faulty, &rdev2->flags))
2128 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2129 else if (test_bit(In_sync, &rdev2->flags))
2130 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2131 else if (test_bit(Journal, &rdev2->flags))
2132 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2133 else if (rdev2->raid_disk >= 0)
2134 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2135 else
2136 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2137 }
2138
2139 sb->sb_csum = calc_sb_1_csum(sb);
2140}
2141
2142static sector_t super_1_choose_bm_space(sector_t dev_size)
2143{
2144 sector_t bm_space;
2145
2146
2147
2148
2149 if (dev_size < 64*2)
2150 bm_space = 0;
2151 else if (dev_size - 64*2 >= 200*1024*1024*2)
2152 bm_space = 128*2;
2153 else if (dev_size - 4*2 > 8*1024*1024*2)
2154 bm_space = 64*2;
2155 else
2156 bm_space = 4*2;
2157 return bm_space;
2158}
2159
2160static unsigned long long
2161super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2162{
2163 struct mdp_superblock_1 *sb;
2164 sector_t max_sectors;
2165 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2166 return 0;
2167 if (rdev->data_offset != rdev->new_data_offset)
2168 return 0;
2169 if (rdev->sb_start < rdev->data_offset) {
2170
2171 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2172 max_sectors -= rdev->data_offset;
2173 if (!num_sectors || num_sectors > max_sectors)
2174 num_sectors = max_sectors;
2175 } else if (rdev->mddev->bitmap_info.offset) {
2176
2177 return 0;
2178 } else {
2179
2180 sector_t sb_start, bm_space;
2181 sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
2182
2183
2184 sb_start = dev_size - 8*2;
2185 sb_start &= ~(sector_t)(4*2 - 1);
2186
2187 bm_space = super_1_choose_bm_space(dev_size);
2188
2189
2190
2191
2192 max_sectors = sb_start - bm_space - 4*2;
2193
2194 if (!num_sectors || num_sectors > max_sectors)
2195 num_sectors = max_sectors;
2196 }
2197 sb = page_address(rdev->sb_page);
2198 sb->data_size = cpu_to_le64(num_sectors);
2199 sb->super_offset = cpu_to_le64(rdev->sb_start);
2200 sb->sb_csum = calc_sb_1_csum(sb);
2201 do {
2202 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2203 rdev->sb_page);
2204 } while (md_super_wait(rdev->mddev) < 0);
2205 return num_sectors;
2206
2207}
2208
2209static int
2210super_1_allow_new_offset(struct md_rdev *rdev,
2211 unsigned long long new_offset)
2212{
2213
2214 struct bitmap *bitmap;
2215 if (new_offset >= rdev->data_offset)
2216 return 1;
2217
2218
2219
2220 if (rdev->mddev->minor_version == 0)
2221 return 1;
2222
2223
2224
2225
2226
2227
2228
2229 if (rdev->sb_start + (32+4)*2 > new_offset)
2230 return 0;
2231 bitmap = rdev->mddev->bitmap;
2232 if (bitmap && !rdev->mddev->bitmap_info.file &&
2233 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2234 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2235 return 0;
2236 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2237 return 0;
2238
2239 return 1;
2240}
2241
2242static struct super_type super_types[] = {
2243 [0] = {
2244 .name = "0.90.0",
2245 .owner = THIS_MODULE,
2246 .load_super = super_90_load,
2247 .validate_super = super_90_validate,
2248 .sync_super = super_90_sync,
2249 .rdev_size_change = super_90_rdev_size_change,
2250 .allow_new_offset = super_90_allow_new_offset,
2251 },
2252 [1] = {
2253 .name = "md-1",
2254 .owner = THIS_MODULE,
2255 .load_super = super_1_load,
2256 .validate_super = super_1_validate,
2257 .sync_super = super_1_sync,
2258 .rdev_size_change = super_1_rdev_size_change,
2259 .allow_new_offset = super_1_allow_new_offset,
2260 },
2261};
2262
2263static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2264{
2265 if (mddev->sync_super) {
2266 mddev->sync_super(mddev, rdev);
2267 return;
2268 }
2269
2270 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2271
2272 super_types[mddev->major_version].sync_super(mddev, rdev);
2273}
2274
2275static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2276{
2277 struct md_rdev *rdev, *rdev2;
2278
2279 rcu_read_lock();
2280 rdev_for_each_rcu(rdev, mddev1) {
2281 if (test_bit(Faulty, &rdev->flags) ||
2282 test_bit(Journal, &rdev->flags) ||
2283 rdev->raid_disk == -1)
2284 continue;
2285 rdev_for_each_rcu(rdev2, mddev2) {
2286 if (test_bit(Faulty, &rdev2->flags) ||
2287 test_bit(Journal, &rdev2->flags) ||
2288 rdev2->raid_disk == -1)
2289 continue;
2290 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2291 rcu_read_unlock();
2292 return 1;
2293 }
2294 }
2295 }
2296 rcu_read_unlock();
2297 return 0;
2298}
2299
2300static LIST_HEAD(pending_raid_disks);
2301
2302
2303
2304
2305
2306
2307
2308
2309int md_integrity_register(struct mddev *mddev)
2310{
2311 struct md_rdev *rdev, *reference = NULL;
2312
2313 if (list_empty(&mddev->disks))
2314 return 0;
2315 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2316 return 0;
2317 rdev_for_each(rdev, mddev) {
2318
2319 if (test_bit(Faulty, &rdev->flags))
2320 continue;
2321 if (rdev->raid_disk < 0)
2322 continue;
2323 if (!reference) {
2324
2325 reference = rdev;
2326 continue;
2327 }
2328
2329 if (blk_integrity_compare(reference->bdev->bd_disk,
2330 rdev->bdev->bd_disk) < 0)
2331 return -EINVAL;
2332 }
2333 if (!reference || !bdev_get_integrity(reference->bdev))
2334 return 0;
2335
2336
2337
2338
2339 blk_integrity_register(mddev->gendisk,
2340 bdev_get_integrity(reference->bdev));
2341
2342 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2343 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
2344 (mddev->level != 1 && mddev->level != 10 &&
2345 bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
2346
2347
2348
2349
2350
2351
2352 pr_err("md: failed to create integrity pool for %s\n",
2353 mdname(mddev));
2354 return -EINVAL;
2355 }
2356 return 0;
2357}
2358EXPORT_SYMBOL(md_integrity_register);
2359
2360
2361
2362
2363
2364int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2365{
2366 struct blk_integrity *bi_mddev;
2367 char name[BDEVNAME_SIZE];
2368
2369 if (!mddev->gendisk)
2370 return 0;
2371
2372 bi_mddev = blk_get_integrity(mddev->gendisk);
2373
2374 if (!bi_mddev)
2375 return 0;
2376
2377 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2378 pr_err("%s: incompatible integrity profile for %s\n",
2379 mdname(mddev), bdevname(rdev->bdev, name));
2380 return -ENXIO;
2381 }
2382
2383 return 0;
2384}
2385EXPORT_SYMBOL(md_integrity_add_rdev);
2386
2387static bool rdev_read_only(struct md_rdev *rdev)
2388{
2389 return bdev_read_only(rdev->bdev) ||
2390 (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
2391}
2392
2393static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2394{
2395 char b[BDEVNAME_SIZE];
2396 int err;
2397
2398
2399 if (find_rdev(mddev, rdev->bdev->bd_dev))
2400 return -EEXIST;
2401
2402 if (rdev_read_only(rdev) && mddev->pers)
2403 return -EROFS;
2404
2405
2406 if (!test_bit(Journal, &rdev->flags) &&
2407 rdev->sectors &&
2408 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2409 if (mddev->pers) {
2410
2411
2412
2413
2414 if (mddev->level > 0)
2415 return -ENOSPC;
2416 } else
2417 mddev->dev_sectors = rdev->sectors;
2418 }
2419
2420
2421
2422
2423
2424 rcu_read_lock();
2425 if (rdev->desc_nr < 0) {
2426 int choice = 0;
2427 if (mddev->pers)
2428 choice = mddev->raid_disks;
2429 while (md_find_rdev_nr_rcu(mddev, choice))
2430 choice++;
2431 rdev->desc_nr = choice;
2432 } else {
2433 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2434 rcu_read_unlock();
2435 return -EBUSY;
2436 }
2437 }
2438 rcu_read_unlock();
2439 if (!test_bit(Journal, &rdev->flags) &&
2440 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2441 pr_warn("md: %s: array is limited to %d devices\n",
2442 mdname(mddev), mddev->max_disks);
2443 return -EBUSY;
2444 }
2445 bdevname(rdev->bdev,b);
2446 strreplace(b, '/', '!');
2447
2448 rdev->mddev = mddev;
2449 pr_debug("md: bind<%s>\n", b);
2450
2451 if (mddev->raid_disks)
2452 mddev_create_serial_pool(mddev, rdev, false);
2453
2454 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2455 goto fail;
2456
2457
2458 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2459 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2460 rdev->sysfs_unack_badblocks =
2461 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2462 rdev->sysfs_badblocks =
2463 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2464
2465 list_add_rcu(&rdev->same_set, &mddev->disks);
2466 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2467
2468
2469 mddev->recovery_disabled++;
2470
2471 return 0;
2472
2473 fail:
2474 pr_warn("md: failed to register dev-%s for %s\n",
2475 b, mdname(mddev));
2476 return err;
2477}
2478
2479static void rdev_delayed_delete(struct work_struct *ws)
2480{
2481 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2482 kobject_del(&rdev->kobj);
2483 kobject_put(&rdev->kobj);
2484}
2485
2486static void unbind_rdev_from_array(struct md_rdev *rdev)
2487{
2488 char b[BDEVNAME_SIZE];
2489
2490 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2491 list_del_rcu(&rdev->same_set);
2492 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2493 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2494 rdev->mddev = NULL;
2495 sysfs_remove_link(&rdev->kobj, "block");
2496 sysfs_put(rdev->sysfs_state);
2497 sysfs_put(rdev->sysfs_unack_badblocks);
2498 sysfs_put(rdev->sysfs_badblocks);
2499 rdev->sysfs_state = NULL;
2500 rdev->sysfs_unack_badblocks = NULL;
2501 rdev->sysfs_badblocks = NULL;
2502 rdev->badblocks.count = 0;
2503
2504
2505
2506
2507 synchronize_rcu();
2508 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2509 kobject_get(&rdev->kobj);
2510 queue_work(md_rdev_misc_wq, &rdev->del_work);
2511}
2512
2513
2514
2515
2516
2517
2518static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2519{
2520 int err = 0;
2521 struct block_device *bdev;
2522
2523 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2524 shared ? (struct md_rdev *)lock_rdev : rdev);
2525 if (IS_ERR(bdev)) {
2526 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2527 MAJOR(dev), MINOR(dev));
2528 return PTR_ERR(bdev);
2529 }
2530 rdev->bdev = bdev;
2531 return err;
2532}
2533
2534static void unlock_rdev(struct md_rdev *rdev)
2535{
2536 struct block_device *bdev = rdev->bdev;
2537 rdev->bdev = NULL;
2538 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2539}
2540
2541void md_autodetect_dev(dev_t dev);
2542
2543static void export_rdev(struct md_rdev *rdev)
2544{
2545 char b[BDEVNAME_SIZE];
2546
2547 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2548 md_rdev_clear(rdev);
2549#ifndef MODULE
2550 if (test_bit(AutoDetected, &rdev->flags))
2551 md_autodetect_dev(rdev->bdev->bd_dev);
2552#endif
2553 unlock_rdev(rdev);
2554 kobject_put(&rdev->kobj);
2555}
2556
2557void md_kick_rdev_from_array(struct md_rdev *rdev)
2558{
2559 unbind_rdev_from_array(rdev);
2560 export_rdev(rdev);
2561}
2562EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2563
2564static void export_array(struct mddev *mddev)
2565{
2566 struct md_rdev *rdev;
2567
2568 while (!list_empty(&mddev->disks)) {
2569 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2570 same_set);
2571 md_kick_rdev_from_array(rdev);
2572 }
2573 mddev->raid_disks = 0;
2574 mddev->major_version = 0;
2575}
2576
2577static bool set_in_sync(struct mddev *mddev)
2578{
2579 lockdep_assert_held(&mddev->lock);
2580 if (!mddev->in_sync) {
2581 mddev->sync_checkers++;
2582 spin_unlock(&mddev->lock);
2583 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2584 spin_lock(&mddev->lock);
2585 if (!mddev->in_sync &&
2586 percpu_ref_is_zero(&mddev->writes_pending)) {
2587 mddev->in_sync = 1;
2588
2589
2590
2591
2592 smp_mb();
2593 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2594 sysfs_notify_dirent_safe(mddev->sysfs_state);
2595 }
2596 if (--mddev->sync_checkers == 0)
2597 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2598 }
2599 if (mddev->safemode == 1)
2600 mddev->safemode = 0;
2601 return mddev->in_sync;
2602}
2603
2604static void sync_sbs(struct mddev *mddev, int nospares)
2605{
2606
2607
2608
2609
2610
2611
2612 struct md_rdev *rdev;
2613 rdev_for_each(rdev, mddev) {
2614 if (rdev->sb_events == mddev->events ||
2615 (nospares &&
2616 rdev->raid_disk < 0 &&
2617 rdev->sb_events+1 == mddev->events)) {
2618
2619 rdev->sb_loaded = 2;
2620 } else {
2621 sync_super(mddev, rdev);
2622 rdev->sb_loaded = 1;
2623 }
2624 }
2625}
2626
2627static bool does_sb_need_changing(struct mddev *mddev)
2628{
2629 struct md_rdev *rdev;
2630 struct mdp_superblock_1 *sb;
2631 int role;
2632
2633
2634 rdev_for_each(rdev, mddev)
2635 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2636 break;
2637
2638
2639 if (!rdev)
2640 return false;
2641
2642 sb = page_address(rdev->sb_page);
2643
2644 rdev_for_each(rdev, mddev) {
2645 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2646
2647 if (role == 0xffff && rdev->raid_disk >=0 &&
2648 !test_bit(Faulty, &rdev->flags))
2649 return true;
2650
2651 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2652 return true;
2653 }
2654
2655
2656 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2657 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2658 (mddev->layout != le32_to_cpu(sb->layout)) ||
2659 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2660 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2661 return true;
2662
2663 return false;
2664}
2665
2666void md_update_sb(struct mddev *mddev, int force_change)
2667{
2668 struct md_rdev *rdev;
2669 int sync_req;
2670 int nospares = 0;
2671 int any_badblocks_changed = 0;
2672 int ret = -1;
2673
2674 if (mddev->ro) {
2675 if (force_change)
2676 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2677 return;
2678 }
2679
2680repeat:
2681 if (mddev_is_clustered(mddev)) {
2682 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2683 force_change = 1;
2684 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2685 nospares = 1;
2686 ret = md_cluster_ops->metadata_update_start(mddev);
2687
2688 if (!does_sb_need_changing(mddev)) {
2689 if (ret == 0)
2690 md_cluster_ops->metadata_update_cancel(mddev);
2691 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2692 BIT(MD_SB_CHANGE_DEVS) |
2693 BIT(MD_SB_CHANGE_CLEAN));
2694 return;
2695 }
2696 }
2697
2698
2699
2700
2701
2702
2703
2704 rdev_for_each(rdev, mddev) {
2705 if (rdev->raid_disk >= 0 &&
2706 mddev->delta_disks >= 0 &&
2707 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2708 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2709 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2710 !test_bit(Journal, &rdev->flags) &&
2711 !test_bit(In_sync, &rdev->flags) &&
2712 mddev->curr_resync_completed > rdev->recovery_offset)
2713 rdev->recovery_offset = mddev->curr_resync_completed;
2714
2715 }
2716 if (!mddev->persistent) {
2717 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2718 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2719 if (!mddev->external) {
2720 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2721 rdev_for_each(rdev, mddev) {
2722 if (rdev->badblocks.changed) {
2723 rdev->badblocks.changed = 0;
2724 ack_all_badblocks(&rdev->badblocks);
2725 md_error(mddev, rdev);
2726 }
2727 clear_bit(Blocked, &rdev->flags);
2728 clear_bit(BlockedBadBlocks, &rdev->flags);
2729 wake_up(&rdev->blocked_wait);
2730 }
2731 }
2732 wake_up(&mddev->sb_wait);
2733 return;
2734 }
2735
2736 spin_lock(&mddev->lock);
2737
2738 mddev->utime = ktime_get_real_seconds();
2739
2740 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2741 force_change = 1;
2742 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2743
2744
2745
2746
2747 nospares = 1;
2748 if (force_change)
2749 nospares = 0;
2750 if (mddev->degraded)
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760 nospares = 0;
2761
2762 sync_req = mddev->in_sync;
2763
2764
2765
2766 if (nospares
2767 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2768 && mddev->can_decrease_events
2769 && mddev->events != 1) {
2770 mddev->events--;
2771 mddev->can_decrease_events = 0;
2772 } else {
2773
2774 mddev->events ++;
2775 mddev->can_decrease_events = nospares;
2776 }
2777
2778
2779
2780
2781
2782
2783 WARN_ON(mddev->events == 0);
2784
2785 rdev_for_each(rdev, mddev) {
2786 if (rdev->badblocks.changed)
2787 any_badblocks_changed++;
2788 if (test_bit(Faulty, &rdev->flags))
2789 set_bit(FaultRecorded, &rdev->flags);
2790 }
2791
2792 sync_sbs(mddev, nospares);
2793 spin_unlock(&mddev->lock);
2794
2795 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2796 mdname(mddev), mddev->in_sync);
2797
2798 if (mddev->queue)
2799 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2800rewrite:
2801 md_bitmap_update_sb(mddev->bitmap);
2802 rdev_for_each(rdev, mddev) {
2803 char b[BDEVNAME_SIZE];
2804
2805 if (rdev->sb_loaded != 1)
2806 continue;
2807
2808 if (!test_bit(Faulty, &rdev->flags)) {
2809 md_super_write(mddev,rdev,
2810 rdev->sb_start, rdev->sb_size,
2811 rdev->sb_page);
2812 pr_debug("md: (write) %s's sb offset: %llu\n",
2813 bdevname(rdev->bdev, b),
2814 (unsigned long long)rdev->sb_start);
2815 rdev->sb_events = mddev->events;
2816 if (rdev->badblocks.size) {
2817 md_super_write(mddev, rdev,
2818 rdev->badblocks.sector,
2819 rdev->badblocks.size << 9,
2820 rdev->bb_page);
2821 rdev->badblocks.size = 0;
2822 }
2823
2824 } else
2825 pr_debug("md: %s (skipping faulty)\n",
2826 bdevname(rdev->bdev, b));
2827
2828 if (mddev->level == LEVEL_MULTIPATH)
2829
2830 break;
2831 }
2832 if (md_super_wait(mddev) < 0)
2833 goto rewrite;
2834
2835
2836 if (mddev_is_clustered(mddev) && ret == 0)
2837 md_cluster_ops->metadata_update_finish(mddev);
2838
2839 if (mddev->in_sync != sync_req ||
2840 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2841 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2842
2843 goto repeat;
2844 wake_up(&mddev->sb_wait);
2845 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2846 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2847
2848 rdev_for_each(rdev, mddev) {
2849 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2850 clear_bit(Blocked, &rdev->flags);
2851
2852 if (any_badblocks_changed)
2853 ack_all_badblocks(&rdev->badblocks);
2854 clear_bit(BlockedBadBlocks, &rdev->flags);
2855 wake_up(&rdev->blocked_wait);
2856 }
2857}
2858EXPORT_SYMBOL(md_update_sb);
2859
2860static int add_bound_rdev(struct md_rdev *rdev)
2861{
2862 struct mddev *mddev = rdev->mddev;
2863 int err = 0;
2864 bool add_journal = test_bit(Journal, &rdev->flags);
2865
2866 if (!mddev->pers->hot_remove_disk || add_journal) {
2867
2868
2869
2870
2871 super_types[mddev->major_version].
2872 validate_super(mddev, rdev);
2873 if (add_journal)
2874 mddev_suspend(mddev);
2875 err = mddev->pers->hot_add_disk(mddev, rdev);
2876 if (add_journal)
2877 mddev_resume(mddev);
2878 if (err) {
2879 md_kick_rdev_from_array(rdev);
2880 return err;
2881 }
2882 }
2883 sysfs_notify_dirent_safe(rdev->sysfs_state);
2884
2885 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2886 if (mddev->degraded)
2887 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2888 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2889 md_new_event(mddev);
2890 md_wakeup_thread(mddev->thread);
2891 return 0;
2892}
2893
2894
2895
2896
2897static int cmd_match(const char *cmd, const char *str)
2898{
2899
2900
2901
2902
2903 while (*cmd && *str && *cmd == *str) {
2904 cmd++;
2905 str++;
2906 }
2907 if (*cmd == '\n')
2908 cmd++;
2909 if (*str || *cmd)
2910 return 0;
2911 return 1;
2912}
2913
2914struct rdev_sysfs_entry {
2915 struct attribute attr;
2916 ssize_t (*show)(struct md_rdev *, char *);
2917 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2918};
2919
2920static ssize_t
2921state_show(struct md_rdev *rdev, char *page)
2922{
2923 char *sep = ",";
2924 size_t len = 0;
2925 unsigned long flags = READ_ONCE(rdev->flags);
2926
2927 if (test_bit(Faulty, &flags) ||
2928 (!test_bit(ExternalBbl, &flags) &&
2929 rdev->badblocks.unacked_exist))
2930 len += sprintf(page+len, "faulty%s", sep);
2931 if (test_bit(In_sync, &flags))
2932 len += sprintf(page+len, "in_sync%s", sep);
2933 if (test_bit(Journal, &flags))
2934 len += sprintf(page+len, "journal%s", sep);
2935 if (test_bit(WriteMostly, &flags))
2936 len += sprintf(page+len, "write_mostly%s", sep);
2937 if (test_bit(Blocked, &flags) ||
2938 (rdev->badblocks.unacked_exist
2939 && !test_bit(Faulty, &flags)))
2940 len += sprintf(page+len, "blocked%s", sep);
2941 if (!test_bit(Faulty, &flags) &&
2942 !test_bit(Journal, &flags) &&
2943 !test_bit(In_sync, &flags))
2944 len += sprintf(page+len, "spare%s", sep);
2945 if (test_bit(WriteErrorSeen, &flags))
2946 len += sprintf(page+len, "write_error%s", sep);
2947 if (test_bit(WantReplacement, &flags))
2948 len += sprintf(page+len, "want_replacement%s", sep);
2949 if (test_bit(Replacement, &flags))
2950 len += sprintf(page+len, "replacement%s", sep);
2951 if (test_bit(ExternalBbl, &flags))
2952 len += sprintf(page+len, "external_bbl%s", sep);
2953 if (test_bit(FailFast, &flags))
2954 len += sprintf(page+len, "failfast%s", sep);
2955
2956 if (len)
2957 len -= strlen(sep);
2958
2959 return len+sprintf(page+len, "\n");
2960}
2961
2962static ssize_t
2963state_store(struct md_rdev *rdev, const char *buf, size_t len)
2964{
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979 int err = -EINVAL;
2980 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2981 md_error(rdev->mddev, rdev);
2982 if (test_bit(Faulty, &rdev->flags))
2983 err = 0;
2984 else
2985 err = -EBUSY;
2986 } else if (cmd_match(buf, "remove")) {
2987 if (rdev->mddev->pers) {
2988 clear_bit(Blocked, &rdev->flags);
2989 remove_and_add_spares(rdev->mddev, rdev);
2990 }
2991 if (rdev->raid_disk >= 0)
2992 err = -EBUSY;
2993 else {
2994 struct mddev *mddev = rdev->mddev;
2995 err = 0;
2996 if (mddev_is_clustered(mddev))
2997 err = md_cluster_ops->remove_disk(mddev, rdev);
2998
2999 if (err == 0) {
3000 md_kick_rdev_from_array(rdev);
3001 if (mddev->pers) {
3002 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3003 md_wakeup_thread(mddev->thread);
3004 }
3005 md_new_event(mddev);
3006 }
3007 }
3008 } else if (cmd_match(buf, "writemostly")) {
3009 set_bit(WriteMostly, &rdev->flags);
3010 mddev_create_serial_pool(rdev->mddev, rdev, false);
3011 err = 0;
3012 } else if (cmd_match(buf, "-writemostly")) {
3013 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3014 clear_bit(WriteMostly, &rdev->flags);
3015 err = 0;
3016 } else if (cmd_match(buf, "blocked")) {
3017 set_bit(Blocked, &rdev->flags);
3018 err = 0;
3019 } else if (cmd_match(buf, "-blocked")) {
3020 if (!test_bit(Faulty, &rdev->flags) &&
3021 !test_bit(ExternalBbl, &rdev->flags) &&
3022 rdev->badblocks.unacked_exist) {
3023
3024
3025
3026 md_error(rdev->mddev, rdev);
3027 }
3028 clear_bit(Blocked, &rdev->flags);
3029 clear_bit(BlockedBadBlocks, &rdev->flags);
3030 wake_up(&rdev->blocked_wait);
3031 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3032 md_wakeup_thread(rdev->mddev->thread);
3033
3034 err = 0;
3035 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3036 set_bit(In_sync, &rdev->flags);
3037 err = 0;
3038 } else if (cmd_match(buf, "failfast")) {
3039 set_bit(FailFast, &rdev->flags);
3040 err = 0;
3041 } else if (cmd_match(buf, "-failfast")) {
3042 clear_bit(FailFast, &rdev->flags);
3043 err = 0;
3044 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3045 !test_bit(Journal, &rdev->flags)) {
3046 if (rdev->mddev->pers == NULL) {
3047 clear_bit(In_sync, &rdev->flags);
3048 rdev->saved_raid_disk = rdev->raid_disk;
3049 rdev->raid_disk = -1;
3050 err = 0;
3051 }
3052 } else if (cmd_match(buf, "write_error")) {
3053 set_bit(WriteErrorSeen, &rdev->flags);
3054 err = 0;
3055 } else if (cmd_match(buf, "-write_error")) {
3056 clear_bit(WriteErrorSeen, &rdev->flags);
3057 err = 0;
3058 } else if (cmd_match(buf, "want_replacement")) {
3059
3060
3061
3062
3063 if (rdev->raid_disk >= 0 &&
3064 !test_bit(Journal, &rdev->flags) &&
3065 !test_bit(Replacement, &rdev->flags))
3066 set_bit(WantReplacement, &rdev->flags);
3067 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3068 md_wakeup_thread(rdev->mddev->thread);
3069 err = 0;
3070 } else if (cmd_match(buf, "-want_replacement")) {
3071
3072
3073
3074 err = 0;
3075 clear_bit(WantReplacement, &rdev->flags);
3076 } else if (cmd_match(buf, "replacement")) {
3077
3078
3079
3080
3081 if (rdev->mddev->pers)
3082 err = -EBUSY;
3083 else {
3084 set_bit(Replacement, &rdev->flags);
3085 err = 0;
3086 }
3087 } else if (cmd_match(buf, "-replacement")) {
3088
3089 if (rdev->mddev->pers)
3090 err = -EBUSY;
3091 else {
3092 clear_bit(Replacement, &rdev->flags);
3093 err = 0;
3094 }
3095 } else if (cmd_match(buf, "re-add")) {
3096 if (!rdev->mddev->pers)
3097 err = -EINVAL;
3098 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3099 rdev->saved_raid_disk >= 0) {
3100
3101
3102
3103
3104
3105
3106 if (!mddev_is_clustered(rdev->mddev) ||
3107 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3108 clear_bit(Faulty, &rdev->flags);
3109 err = add_bound_rdev(rdev);
3110 }
3111 } else
3112 err = -EBUSY;
3113 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3114 set_bit(ExternalBbl, &rdev->flags);
3115 rdev->badblocks.shift = 0;
3116 err = 0;
3117 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3118 clear_bit(ExternalBbl, &rdev->flags);
3119 err = 0;
3120 }
3121 if (!err)
3122 sysfs_notify_dirent_safe(rdev->sysfs_state);
3123 return err ? err : len;
3124}
3125static struct rdev_sysfs_entry rdev_state =
3126__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3127
3128static ssize_t
3129errors_show(struct md_rdev *rdev, char *page)
3130{
3131 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3132}
3133
3134static ssize_t
3135errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3136{
3137 unsigned int n;
3138 int rv;
3139
3140 rv = kstrtouint(buf, 10, &n);
3141 if (rv < 0)
3142 return rv;
3143 atomic_set(&rdev->corrected_errors, n);
3144 return len;
3145}
3146static struct rdev_sysfs_entry rdev_errors =
3147__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3148
3149static ssize_t
3150slot_show(struct md_rdev *rdev, char *page)
3151{
3152 if (test_bit(Journal, &rdev->flags))
3153 return sprintf(page, "journal\n");
3154 else if (rdev->raid_disk < 0)
3155 return sprintf(page, "none\n");
3156 else
3157 return sprintf(page, "%d\n", rdev->raid_disk);
3158}
3159
3160static ssize_t
3161slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3162{
3163 int slot;
3164 int err;
3165
3166 if (test_bit(Journal, &rdev->flags))
3167 return -EBUSY;
3168 if (strncmp(buf, "none", 4)==0)
3169 slot = -1;
3170 else {
3171 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3172 if (err < 0)
3173 return err;
3174 }
3175 if (rdev->mddev->pers && slot == -1) {
3176
3177
3178
3179
3180
3181
3182
3183 if (rdev->raid_disk == -1)
3184 return -EEXIST;
3185
3186 if (rdev->mddev->pers->hot_remove_disk == NULL)
3187 return -EINVAL;
3188 clear_bit(Blocked, &rdev->flags);
3189 remove_and_add_spares(rdev->mddev, rdev);
3190 if (rdev->raid_disk >= 0)
3191 return -EBUSY;
3192 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3193 md_wakeup_thread(rdev->mddev->thread);
3194 } else if (rdev->mddev->pers) {
3195
3196
3197
3198 int err;
3199
3200 if (rdev->raid_disk != -1)
3201 return -EBUSY;
3202
3203 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3204 return -EBUSY;
3205
3206 if (rdev->mddev->pers->hot_add_disk == NULL)
3207 return -EINVAL;
3208
3209 if (slot >= rdev->mddev->raid_disks &&
3210 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3211 return -ENOSPC;
3212
3213 rdev->raid_disk = slot;
3214 if (test_bit(In_sync, &rdev->flags))
3215 rdev->saved_raid_disk = slot;
3216 else
3217 rdev->saved_raid_disk = -1;
3218 clear_bit(In_sync, &rdev->flags);
3219 clear_bit(Bitmap_sync, &rdev->flags);
3220 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3221 if (err) {
3222 rdev->raid_disk = -1;
3223 return err;
3224 } else
3225 sysfs_notify_dirent_safe(rdev->sysfs_state);
3226 ;
3227 sysfs_link_rdev(rdev->mddev, rdev);
3228
3229 } else {
3230 if (slot >= rdev->mddev->raid_disks &&
3231 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3232 return -ENOSPC;
3233 rdev->raid_disk = slot;
3234
3235 clear_bit(Faulty, &rdev->flags);
3236 clear_bit(WriteMostly, &rdev->flags);
3237 set_bit(In_sync, &rdev->flags);
3238 sysfs_notify_dirent_safe(rdev->sysfs_state);
3239 }
3240 return len;
3241}
3242
3243static struct rdev_sysfs_entry rdev_slot =
3244__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3245
3246static ssize_t
3247offset_show(struct md_rdev *rdev, char *page)
3248{
3249 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3250}
3251
3252static ssize_t
3253offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3254{
3255 unsigned long long offset;
3256 if (kstrtoull(buf, 10, &offset) < 0)
3257 return -EINVAL;
3258 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3259 return -EBUSY;
3260 if (rdev->sectors && rdev->mddev->external)
3261
3262
3263 return -EBUSY;
3264 rdev->data_offset = offset;
3265 rdev->new_data_offset = offset;
3266 return len;
3267}
3268
3269static struct rdev_sysfs_entry rdev_offset =
3270__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3271
3272static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3273{
3274 return sprintf(page, "%llu\n",
3275 (unsigned long long)rdev->new_data_offset);
3276}
3277
3278static ssize_t new_offset_store(struct md_rdev *rdev,
3279 const char *buf, size_t len)
3280{
3281 unsigned long long new_offset;
3282 struct mddev *mddev = rdev->mddev;
3283
3284 if (kstrtoull(buf, 10, &new_offset) < 0)
3285 return -EINVAL;
3286
3287 if (mddev->sync_thread ||
3288 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3289 return -EBUSY;
3290 if (new_offset == rdev->data_offset)
3291
3292 ;
3293 else if (new_offset > rdev->data_offset) {
3294
3295 if (new_offset - rdev->data_offset
3296 + mddev->dev_sectors > rdev->sectors)
3297 return -E2BIG;
3298 }
3299
3300
3301
3302
3303
3304 if (new_offset < rdev->data_offset &&
3305 mddev->reshape_backwards)
3306 return -EINVAL;
3307
3308
3309
3310
3311 if (new_offset > rdev->data_offset &&
3312 !mddev->reshape_backwards)
3313 return -EINVAL;
3314
3315 if (mddev->pers && mddev->persistent &&
3316 !super_types[mddev->major_version]
3317 .allow_new_offset(rdev, new_offset))
3318 return -E2BIG;
3319 rdev->new_data_offset = new_offset;
3320 if (new_offset > rdev->data_offset)
3321 mddev->reshape_backwards = 1;
3322 else if (new_offset < rdev->data_offset)
3323 mddev->reshape_backwards = 0;
3324
3325 return len;
3326}
3327static struct rdev_sysfs_entry rdev_new_offset =
3328__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3329
3330static ssize_t
3331rdev_size_show(struct md_rdev *rdev, char *page)
3332{
3333 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3334}
3335
3336static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3337{
3338
3339 if (s1+l1 <= s2)
3340 return 0;
3341 if (s2+l2 <= s1)
3342 return 0;
3343 return 1;
3344}
3345
3346static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3347{
3348 unsigned long long blocks;
3349 sector_t new;
3350
3351 if (kstrtoull(buf, 10, &blocks) < 0)
3352 return -EINVAL;
3353
3354 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3355 return -EINVAL;
3356
3357 new = blocks * 2;
3358 if (new != blocks * 2)
3359 return -EINVAL;
3360
3361 *sectors = new;
3362 return 0;
3363}
3364
3365static ssize_t
3366rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3367{
3368 struct mddev *my_mddev = rdev->mddev;
3369 sector_t oldsectors = rdev->sectors;
3370 sector_t sectors;
3371
3372 if (test_bit(Journal, &rdev->flags))
3373 return -EBUSY;
3374 if (strict_blocks_to_sectors(buf, §ors) < 0)
3375 return -EINVAL;
3376 if (rdev->data_offset != rdev->new_data_offset)
3377 return -EINVAL;
3378 if (my_mddev->pers && rdev->raid_disk >= 0) {
3379 if (my_mddev->persistent) {
3380 sectors = super_types[my_mddev->major_version].
3381 rdev_size_change(rdev, sectors);
3382 if (!sectors)
3383 return -EBUSY;
3384 } else if (!sectors)
3385 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3386 rdev->data_offset;
3387 if (!my_mddev->pers->resize)
3388
3389 return -EINVAL;
3390 }
3391 if (sectors < my_mddev->dev_sectors)
3392 return -EINVAL;
3393
3394 rdev->sectors = sectors;
3395 if (sectors > oldsectors && my_mddev->external) {
3396
3397
3398
3399
3400
3401
3402 struct mddev *mddev;
3403 int overlap = 0;
3404 struct list_head *tmp;
3405
3406 rcu_read_lock();
3407 for_each_mddev(mddev, tmp) {
3408 struct md_rdev *rdev2;
3409
3410 rdev_for_each(rdev2, mddev)
3411 if (rdev->bdev == rdev2->bdev &&
3412 rdev != rdev2 &&
3413 overlaps(rdev->data_offset, rdev->sectors,
3414 rdev2->data_offset,
3415 rdev2->sectors)) {
3416 overlap = 1;
3417 break;
3418 }
3419 if (overlap) {
3420 mddev_put(mddev);
3421 break;
3422 }
3423 }
3424 rcu_read_unlock();
3425 if (overlap) {
3426
3427
3428
3429
3430
3431
3432 rdev->sectors = oldsectors;
3433 return -EBUSY;
3434 }
3435 }
3436 return len;
3437}
3438
3439static struct rdev_sysfs_entry rdev_size =
3440__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3441
3442static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3443{
3444 unsigned long long recovery_start = rdev->recovery_offset;
3445
3446 if (test_bit(In_sync, &rdev->flags) ||
3447 recovery_start == MaxSector)
3448 return sprintf(page, "none\n");
3449
3450 return sprintf(page, "%llu\n", recovery_start);
3451}
3452
3453static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3454{
3455 unsigned long long recovery_start;
3456
3457 if (cmd_match(buf, "none"))
3458 recovery_start = MaxSector;
3459 else if (kstrtoull(buf, 10, &recovery_start))
3460 return -EINVAL;
3461
3462 if (rdev->mddev->pers &&
3463 rdev->raid_disk >= 0)
3464 return -EBUSY;
3465
3466 rdev->recovery_offset = recovery_start;
3467 if (recovery_start == MaxSector)
3468 set_bit(In_sync, &rdev->flags);
3469 else
3470 clear_bit(In_sync, &rdev->flags);
3471 return len;
3472}
3473
3474static struct rdev_sysfs_entry rdev_recovery_start =
3475__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488static ssize_t bb_show(struct md_rdev *rdev, char *page)
3489{
3490 return badblocks_show(&rdev->badblocks, page, 0);
3491}
3492static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3493{
3494 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3495
3496 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3497 wake_up(&rdev->blocked_wait);
3498 return rv;
3499}
3500static struct rdev_sysfs_entry rdev_bad_blocks =
3501__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3502
3503static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3504{
3505 return badblocks_show(&rdev->badblocks, page, 1);
3506}
3507static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3508{
3509 return badblocks_store(&rdev->badblocks, page, len, 1);
3510}
3511static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3512__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3513
3514static ssize_t
3515ppl_sector_show(struct md_rdev *rdev, char *page)
3516{
3517 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3518}
3519
3520static ssize_t
3521ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3522{
3523 unsigned long long sector;
3524
3525 if (kstrtoull(buf, 10, §or) < 0)
3526 return -EINVAL;
3527 if (sector != (sector_t)sector)
3528 return -EINVAL;
3529
3530 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3531 rdev->raid_disk >= 0)
3532 return -EBUSY;
3533
3534 if (rdev->mddev->persistent) {
3535 if (rdev->mddev->major_version == 0)
3536 return -EINVAL;
3537 if ((sector > rdev->sb_start &&
3538 sector - rdev->sb_start > S16_MAX) ||
3539 (sector < rdev->sb_start &&
3540 rdev->sb_start - sector > -S16_MIN))
3541 return -EINVAL;
3542 rdev->ppl.offset = sector - rdev->sb_start;
3543 } else if (!rdev->mddev->external) {
3544 return -EBUSY;
3545 }
3546 rdev->ppl.sector = sector;
3547 return len;
3548}
3549
3550static struct rdev_sysfs_entry rdev_ppl_sector =
3551__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3552
3553static ssize_t
3554ppl_size_show(struct md_rdev *rdev, char *page)
3555{
3556 return sprintf(page, "%u\n", rdev->ppl.size);
3557}
3558
3559static ssize_t
3560ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3561{
3562 unsigned int size;
3563
3564 if (kstrtouint(buf, 10, &size) < 0)
3565 return -EINVAL;
3566
3567 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3568 rdev->raid_disk >= 0)
3569 return -EBUSY;
3570
3571 if (rdev->mddev->persistent) {
3572 if (rdev->mddev->major_version == 0)
3573 return -EINVAL;
3574 if (size > U16_MAX)
3575 return -EINVAL;
3576 } else if (!rdev->mddev->external) {
3577 return -EBUSY;
3578 }
3579 rdev->ppl.size = size;
3580 return len;
3581}
3582
3583static struct rdev_sysfs_entry rdev_ppl_size =
3584__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3585
3586static struct attribute *rdev_default_attrs[] = {
3587 &rdev_state.attr,
3588 &rdev_errors.attr,
3589 &rdev_slot.attr,
3590 &rdev_offset.attr,
3591 &rdev_new_offset.attr,
3592 &rdev_size.attr,
3593 &rdev_recovery_start.attr,
3594 &rdev_bad_blocks.attr,
3595 &rdev_unack_bad_blocks.attr,
3596 &rdev_ppl_sector.attr,
3597 &rdev_ppl_size.attr,
3598 NULL,
3599};
3600static ssize_t
3601rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3602{
3603 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3604 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3605
3606 if (!entry->show)
3607 return -EIO;
3608 if (!rdev->mddev)
3609 return -ENODEV;
3610 return entry->show(rdev, page);
3611}
3612
3613static ssize_t
3614rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3615 const char *page, size_t length)
3616{
3617 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3618 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3619 ssize_t rv;
3620 struct mddev *mddev = rdev->mddev;
3621
3622 if (!entry->store)
3623 return -EIO;
3624 if (!capable(CAP_SYS_ADMIN))
3625 return -EACCES;
3626 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3627 if (!rv) {
3628 if (rdev->mddev == NULL)
3629 rv = -ENODEV;
3630 else
3631 rv = entry->store(rdev, page, length);
3632 mddev_unlock(mddev);
3633 }
3634 return rv;
3635}
3636
3637static void rdev_free(struct kobject *ko)
3638{
3639 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3640 kfree(rdev);
3641}
3642static const struct sysfs_ops rdev_sysfs_ops = {
3643 .show = rdev_attr_show,
3644 .store = rdev_attr_store,
3645};
3646static struct kobj_type rdev_ktype = {
3647 .release = rdev_free,
3648 .sysfs_ops = &rdev_sysfs_ops,
3649 .default_attrs = rdev_default_attrs,
3650};
3651
3652int md_rdev_init(struct md_rdev *rdev)
3653{
3654 rdev->desc_nr = -1;
3655 rdev->saved_raid_disk = -1;
3656 rdev->raid_disk = -1;
3657 rdev->flags = 0;
3658 rdev->data_offset = 0;
3659 rdev->new_data_offset = 0;
3660 rdev->sb_events = 0;
3661 rdev->last_read_error = 0;
3662 rdev->sb_loaded = 0;
3663 rdev->bb_page = NULL;
3664 atomic_set(&rdev->nr_pending, 0);
3665 atomic_set(&rdev->read_errors, 0);
3666 atomic_set(&rdev->corrected_errors, 0);
3667
3668 INIT_LIST_HEAD(&rdev->same_set);
3669 init_waitqueue_head(&rdev->blocked_wait);
3670
3671
3672
3673
3674
3675 return badblocks_init(&rdev->badblocks, 0);
3676}
3677EXPORT_SYMBOL_GPL(md_rdev_init);
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3689{
3690 char b[BDEVNAME_SIZE];
3691 int err;
3692 struct md_rdev *rdev;
3693 sector_t size;
3694
3695 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3696 if (!rdev)
3697 return ERR_PTR(-ENOMEM);
3698
3699 err = md_rdev_init(rdev);
3700 if (err)
3701 goto abort_free;
3702 err = alloc_disk_sb(rdev);
3703 if (err)
3704 goto abort_free;
3705
3706 err = lock_rdev(rdev, newdev, super_format == -2);
3707 if (err)
3708 goto abort_free;
3709
3710 kobject_init(&rdev->kobj, &rdev_ktype);
3711
3712 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3713 if (!size) {
3714 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3715 bdevname(rdev->bdev,b));
3716 err = -EINVAL;
3717 goto abort_free;
3718 }
3719
3720 if (super_format >= 0) {
3721 err = super_types[super_format].
3722 load_super(rdev, NULL, super_minor);
3723 if (err == -EINVAL) {
3724 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3725 bdevname(rdev->bdev,b),
3726 super_format, super_minor);
3727 goto abort_free;
3728 }
3729 if (err < 0) {
3730 pr_warn("md: could not read %s's sb, not importing!\n",
3731 bdevname(rdev->bdev,b));
3732 goto abort_free;
3733 }
3734 }
3735
3736 return rdev;
3737
3738abort_free:
3739 if (rdev->bdev)
3740 unlock_rdev(rdev);
3741 md_rdev_clear(rdev);
3742 kfree(rdev);
3743 return ERR_PTR(err);
3744}
3745
3746
3747
3748
3749
3750static int analyze_sbs(struct mddev *mddev)
3751{
3752 int i;
3753 struct md_rdev *rdev, *freshest, *tmp;
3754 char b[BDEVNAME_SIZE];
3755
3756 freshest = NULL;
3757 rdev_for_each_safe(rdev, tmp, mddev)
3758 switch (super_types[mddev->major_version].
3759 load_super(rdev, freshest, mddev->minor_version)) {
3760 case 1:
3761 freshest = rdev;
3762 break;
3763 case 0:
3764 break;
3765 default:
3766 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3767 bdevname(rdev->bdev,b));
3768 md_kick_rdev_from_array(rdev);
3769 }
3770
3771
3772 if (!freshest) {
3773 pr_warn("md: cannot find a valid disk\n");
3774 return -EINVAL;
3775 }
3776
3777 super_types[mddev->major_version].
3778 validate_super(mddev, freshest);
3779
3780 i = 0;
3781 rdev_for_each_safe(rdev, tmp, mddev) {
3782 if (mddev->max_disks &&
3783 (rdev->desc_nr >= mddev->max_disks ||
3784 i > mddev->max_disks)) {
3785 pr_warn("md: %s: %s: only %d devices permitted\n",
3786 mdname(mddev), bdevname(rdev->bdev, b),
3787 mddev->max_disks);
3788 md_kick_rdev_from_array(rdev);
3789 continue;
3790 }
3791 if (rdev != freshest) {
3792 if (super_types[mddev->major_version].
3793 validate_super(mddev, rdev)) {
3794 pr_warn("md: kicking non-fresh %s from array!\n",
3795 bdevname(rdev->bdev,b));
3796 md_kick_rdev_from_array(rdev);
3797 continue;
3798 }
3799 }
3800 if (mddev->level == LEVEL_MULTIPATH) {
3801 rdev->desc_nr = i++;
3802 rdev->raid_disk = rdev->desc_nr;
3803 set_bit(In_sync, &rdev->flags);
3804 } else if (rdev->raid_disk >=
3805 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3806 !test_bit(Journal, &rdev->flags)) {
3807 rdev->raid_disk = -1;
3808 clear_bit(In_sync, &rdev->flags);
3809 }
3810 }
3811
3812 return 0;
3813}
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3826{
3827 unsigned long result = 0;
3828 long decimals = -1;
3829 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3830 if (*cp == '.')
3831 decimals = 0;
3832 else if (decimals < scale) {
3833 unsigned int value;
3834 value = *cp - '0';
3835 result = result * 10 + value;
3836 if (decimals >= 0)
3837 decimals++;
3838 }
3839 cp++;
3840 }
3841 if (*cp == '\n')
3842 cp++;
3843 if (*cp)
3844 return -EINVAL;
3845 if (decimals < 0)
3846 decimals = 0;
3847 *res = result * int_pow(10, scale - decimals);
3848 return 0;
3849}
3850
3851static ssize_t
3852safe_delay_show(struct mddev *mddev, char *page)
3853{
3854 int msec = (mddev->safemode_delay*1000)/HZ;
3855 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3856}
3857static ssize_t
3858safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3859{
3860 unsigned long msec;
3861
3862 if (mddev_is_clustered(mddev)) {
3863 pr_warn("md: Safemode is disabled for clustered mode\n");
3864 return -EINVAL;
3865 }
3866
3867 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3868 return -EINVAL;
3869 if (msec == 0)
3870 mddev->safemode_delay = 0;
3871 else {
3872 unsigned long old_delay = mddev->safemode_delay;
3873 unsigned long new_delay = (msec*HZ)/1000;
3874
3875 if (new_delay == 0)
3876 new_delay = 1;
3877 mddev->safemode_delay = new_delay;
3878 if (new_delay < old_delay || old_delay == 0)
3879 mod_timer(&mddev->safemode_timer, jiffies+1);
3880 }
3881 return len;
3882}
3883static struct md_sysfs_entry md_safe_delay =
3884__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3885
3886static ssize_t
3887level_show(struct mddev *mddev, char *page)
3888{
3889 struct md_personality *p;
3890 int ret;
3891 spin_lock(&mddev->lock);
3892 p = mddev->pers;
3893 if (p)
3894 ret = sprintf(page, "%s\n", p->name);
3895 else if (mddev->clevel[0])
3896 ret = sprintf(page, "%s\n", mddev->clevel);
3897 else if (mddev->level != LEVEL_NONE)
3898 ret = sprintf(page, "%d\n", mddev->level);
3899 else
3900 ret = 0;
3901 spin_unlock(&mddev->lock);
3902 return ret;
3903}
3904
3905static ssize_t
3906level_store(struct mddev *mddev, const char *buf, size_t len)
3907{
3908 char clevel[16];
3909 ssize_t rv;
3910 size_t slen = len;
3911 struct md_personality *pers, *oldpers;
3912 long level;
3913 void *priv, *oldpriv;
3914 struct md_rdev *rdev;
3915
3916 if (slen == 0 || slen >= sizeof(clevel))
3917 return -EINVAL;
3918
3919 rv = mddev_lock(mddev);
3920 if (rv)
3921 return rv;
3922
3923 if (mddev->pers == NULL) {
3924 strncpy(mddev->clevel, buf, slen);
3925 if (mddev->clevel[slen-1] == '\n')
3926 slen--;
3927 mddev->clevel[slen] = 0;
3928 mddev->level = LEVEL_NONE;
3929 rv = len;
3930 goto out_unlock;
3931 }
3932 rv = -EROFS;
3933 if (mddev->ro)
3934 goto out_unlock;
3935
3936
3937
3938
3939
3940
3941
3942 rv = -EBUSY;
3943 if (mddev->sync_thread ||
3944 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3945 mddev->reshape_position != MaxSector ||
3946 mddev->sysfs_active)
3947 goto out_unlock;
3948
3949 rv = -EINVAL;
3950 if (!mddev->pers->quiesce) {
3951 pr_warn("md: %s: %s does not support online personality change\n",
3952 mdname(mddev), mddev->pers->name);
3953 goto out_unlock;
3954 }
3955
3956
3957 strncpy(clevel, buf, slen);
3958 if (clevel[slen-1] == '\n')
3959 slen--;
3960 clevel[slen] = 0;
3961 if (kstrtol(clevel, 10, &level))
3962 level = LEVEL_NONE;
3963
3964 if (request_module("md-%s", clevel) != 0)
3965 request_module("md-level-%s", clevel);
3966 spin_lock(&pers_lock);
3967 pers = find_pers(level, clevel);
3968 if (!pers || !try_module_get(pers->owner)) {
3969 spin_unlock(&pers_lock);
3970 pr_warn("md: personality %s not loaded\n", clevel);
3971 rv = -EINVAL;
3972 goto out_unlock;
3973 }
3974 spin_unlock(&pers_lock);
3975
3976 if (pers == mddev->pers) {
3977
3978 module_put(pers->owner);
3979 rv = len;
3980 goto out_unlock;
3981 }
3982 if (!pers->takeover) {
3983 module_put(pers->owner);
3984 pr_warn("md: %s: %s does not support personality takeover\n",
3985 mdname(mddev), clevel);
3986 rv = -EINVAL;
3987 goto out_unlock;
3988 }
3989
3990 rdev_for_each(rdev, mddev)
3991 rdev->new_raid_disk = rdev->raid_disk;
3992
3993
3994
3995
3996 priv = pers->takeover(mddev);
3997 if (IS_ERR(priv)) {
3998 mddev->new_level = mddev->level;
3999 mddev->new_layout = mddev->layout;
4000 mddev->new_chunk_sectors = mddev->chunk_sectors;
4001 mddev->raid_disks -= mddev->delta_disks;
4002 mddev->delta_disks = 0;
4003 mddev->reshape_backwards = 0;
4004 module_put(pers->owner);
4005 pr_warn("md: %s: %s would not accept array\n",
4006 mdname(mddev), clevel);
4007 rv = PTR_ERR(priv);
4008 goto out_unlock;
4009 }
4010
4011
4012 mddev_suspend(mddev);
4013 mddev_detach(mddev);
4014
4015 spin_lock(&mddev->lock);
4016 oldpers = mddev->pers;
4017 oldpriv = mddev->private;
4018 mddev->pers = pers;
4019 mddev->private = priv;
4020 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4021 mddev->level = mddev->new_level;
4022 mddev->layout = mddev->new_layout;
4023 mddev->chunk_sectors = mddev->new_chunk_sectors;
4024 mddev->delta_disks = 0;
4025 mddev->reshape_backwards = 0;
4026 mddev->degraded = 0;
4027 spin_unlock(&mddev->lock);
4028
4029 if (oldpers->sync_request == NULL &&
4030 mddev->external) {
4031
4032
4033
4034
4035
4036
4037
4038 mddev->in_sync = 0;
4039 mddev->safemode_delay = 0;
4040 mddev->safemode = 0;
4041 }
4042
4043 oldpers->free(mddev, oldpriv);
4044
4045 if (oldpers->sync_request == NULL &&
4046 pers->sync_request != NULL) {
4047
4048 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4049 pr_warn("md: cannot register extra attributes for %s\n",
4050 mdname(mddev));
4051 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4052 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4053 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4054 }
4055 if (oldpers->sync_request != NULL &&
4056 pers->sync_request == NULL) {
4057
4058 if (mddev->to_remove == NULL)
4059 mddev->to_remove = &md_redundancy_group;
4060 }
4061
4062 module_put(oldpers->owner);
4063
4064 rdev_for_each(rdev, mddev) {
4065 if (rdev->raid_disk < 0)
4066 continue;
4067 if (rdev->new_raid_disk >= mddev->raid_disks)
4068 rdev->new_raid_disk = -1;
4069 if (rdev->new_raid_disk == rdev->raid_disk)
4070 continue;
4071 sysfs_unlink_rdev(mddev, rdev);
4072 }
4073 rdev_for_each(rdev, mddev) {
4074 if (rdev->raid_disk < 0)
4075 continue;
4076 if (rdev->new_raid_disk == rdev->raid_disk)
4077 continue;
4078 rdev->raid_disk = rdev->new_raid_disk;
4079 if (rdev->raid_disk < 0)
4080 clear_bit(In_sync, &rdev->flags);
4081 else {
4082 if (sysfs_link_rdev(mddev, rdev))
4083 pr_warn("md: cannot register rd%d for %s after level change\n",
4084 rdev->raid_disk, mdname(mddev));
4085 }
4086 }
4087
4088 if (pers->sync_request == NULL) {
4089
4090
4091
4092 mddev->in_sync = 1;
4093 del_timer_sync(&mddev->safemode_timer);
4094 }
4095 blk_set_stacking_limits(&mddev->queue->limits);
4096 pers->run(mddev);
4097 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4098 mddev_resume(mddev);
4099 if (!mddev->thread)
4100 md_update_sb(mddev, 1);
4101 sysfs_notify_dirent_safe(mddev->sysfs_level);
4102 md_new_event(mddev);
4103 rv = len;
4104out_unlock:
4105 mddev_unlock(mddev);
4106 return rv;
4107}
4108
4109static struct md_sysfs_entry md_level =
4110__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4111
4112static ssize_t
4113layout_show(struct mddev *mddev, char *page)
4114{
4115
4116 if (mddev->reshape_position != MaxSector &&
4117 mddev->layout != mddev->new_layout)
4118 return sprintf(page, "%d (%d)\n",
4119 mddev->new_layout, mddev->layout);
4120 return sprintf(page, "%d\n", mddev->layout);
4121}
4122
4123static ssize_t
4124layout_store(struct mddev *mddev, const char *buf, size_t len)
4125{
4126 unsigned int n;
4127 int err;
4128
4129 err = kstrtouint(buf, 10, &n);
4130 if (err < 0)
4131 return err;
4132 err = mddev_lock(mddev);
4133 if (err)
4134 return err;
4135
4136 if (mddev->pers) {
4137 if (mddev->pers->check_reshape == NULL)
4138 err = -EBUSY;
4139 else if (mddev->ro)
4140 err = -EROFS;
4141 else {
4142 mddev->new_layout = n;
4143 err = mddev->pers->check_reshape(mddev);
4144 if (err)
4145 mddev->new_layout = mddev->layout;
4146 }
4147 } else {
4148 mddev->new_layout = n;
4149 if (mddev->reshape_position == MaxSector)
4150 mddev->layout = n;
4151 }
4152 mddev_unlock(mddev);
4153 return err ?: len;
4154}
4155static struct md_sysfs_entry md_layout =
4156__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4157
4158static ssize_t
4159raid_disks_show(struct mddev *mddev, char *page)
4160{
4161 if (mddev->raid_disks == 0)
4162 return 0;
4163 if (mddev->reshape_position != MaxSector &&
4164 mddev->delta_disks != 0)
4165 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4166 mddev->raid_disks - mddev->delta_disks);
4167 return sprintf(page, "%d\n", mddev->raid_disks);
4168}
4169
4170static int update_raid_disks(struct mddev *mddev, int raid_disks);
4171
4172static ssize_t
4173raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4174{
4175 unsigned int n;
4176 int err;
4177
4178 err = kstrtouint(buf, 10, &n);
4179 if (err < 0)
4180 return err;
4181
4182 err = mddev_lock(mddev);
4183 if (err)
4184 return err;
4185 if (mddev->pers)
4186 err = update_raid_disks(mddev, n);
4187 else if (mddev->reshape_position != MaxSector) {
4188 struct md_rdev *rdev;
4189 int olddisks = mddev->raid_disks - mddev->delta_disks;
4190
4191 err = -EINVAL;
4192 rdev_for_each(rdev, mddev) {
4193 if (olddisks < n &&
4194 rdev->data_offset < rdev->new_data_offset)
4195 goto out_unlock;
4196 if (olddisks > n &&
4197 rdev->data_offset > rdev->new_data_offset)
4198 goto out_unlock;
4199 }
4200 err = 0;
4201 mddev->delta_disks = n - olddisks;
4202 mddev->raid_disks = n;
4203 mddev->reshape_backwards = (mddev->delta_disks < 0);
4204 } else
4205 mddev->raid_disks = n;
4206out_unlock:
4207 mddev_unlock(mddev);
4208 return err ? err : len;
4209}
4210static struct md_sysfs_entry md_raid_disks =
4211__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4212
4213static ssize_t
4214uuid_show(struct mddev *mddev, char *page)
4215{
4216 return sprintf(page, "%pU\n", mddev->uuid);
4217}
4218static struct md_sysfs_entry md_uuid =
4219__ATTR(uuid, S_IRUGO, uuid_show, NULL);
4220
4221static ssize_t
4222chunk_size_show(struct mddev *mddev, char *page)
4223{
4224 if (mddev->reshape_position != MaxSector &&
4225 mddev->chunk_sectors != mddev->new_chunk_sectors)
4226 return sprintf(page, "%d (%d)\n",
4227 mddev->new_chunk_sectors << 9,
4228 mddev->chunk_sectors << 9);
4229 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4230}
4231
4232static ssize_t
4233chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4234{
4235 unsigned long n;
4236 int err;
4237
4238 err = kstrtoul(buf, 10, &n);
4239 if (err < 0)
4240 return err;
4241
4242 err = mddev_lock(mddev);
4243 if (err)
4244 return err;
4245 if (mddev->pers) {
4246 if (mddev->pers->check_reshape == NULL)
4247 err = -EBUSY;
4248 else if (mddev->ro)
4249 err = -EROFS;
4250 else {
4251 mddev->new_chunk_sectors = n >> 9;
4252 err = mddev->pers->check_reshape(mddev);
4253 if (err)
4254 mddev->new_chunk_sectors = mddev->chunk_sectors;
4255 }
4256 } else {
4257 mddev->new_chunk_sectors = n >> 9;
4258 if (mddev->reshape_position == MaxSector)
4259 mddev->chunk_sectors = n >> 9;
4260 }
4261 mddev_unlock(mddev);
4262 return err ?: len;
4263}
4264static struct md_sysfs_entry md_chunk_size =
4265__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4266
4267static ssize_t
4268resync_start_show(struct mddev *mddev, char *page)
4269{
4270 if (mddev->recovery_cp == MaxSector)
4271 return sprintf(page, "none\n");
4272 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4273}
4274
4275static ssize_t
4276resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4277{
4278 unsigned long long n;
4279 int err;
4280
4281 if (cmd_match(buf, "none"))
4282 n = MaxSector;
4283 else {
4284 err = kstrtoull(buf, 10, &n);
4285 if (err < 0)
4286 return err;
4287 if (n != (sector_t)n)
4288 return -EINVAL;
4289 }
4290
4291 err = mddev_lock(mddev);
4292 if (err)
4293 return err;
4294 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4295 err = -EBUSY;
4296
4297 if (!err) {
4298 mddev->recovery_cp = n;
4299 if (mddev->pers)
4300 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4301 }
4302 mddev_unlock(mddev);
4303 return err ?: len;
4304}
4305static struct md_sysfs_entry md_resync_start =
4306__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4307 resync_start_show, resync_start_store);
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4351 write_pending, active_idle, broken, bad_word};
4352static char *array_states[] = {
4353 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4354 "write-pending", "active-idle", "broken", NULL };
4355
4356static int match_word(const char *word, char **list)
4357{
4358 int n;
4359 for (n=0; list[n]; n++)
4360 if (cmd_match(word, list[n]))
4361 break;
4362 return n;
4363}
4364
4365static ssize_t
4366array_state_show(struct mddev *mddev, char *page)
4367{
4368 enum array_state st = inactive;
4369
4370 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4371 switch(mddev->ro) {
4372 case 1:
4373 st = readonly;
4374 break;
4375 case 2:
4376 st = read_auto;
4377 break;
4378 case 0:
4379 spin_lock(&mddev->lock);
4380 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4381 st = write_pending;
4382 else if (mddev->in_sync)
4383 st = clean;
4384 else if (mddev->safemode)
4385 st = active_idle;
4386 else
4387 st = active;
4388 spin_unlock(&mddev->lock);
4389 }
4390
4391 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4392 st = broken;
4393 } else {
4394 if (list_empty(&mddev->disks) &&
4395 mddev->raid_disks == 0 &&
4396 mddev->dev_sectors == 0)
4397 st = clear;
4398 else
4399 st = inactive;
4400 }
4401 return sprintf(page, "%s\n", array_states[st]);
4402}
4403
4404static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4405static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4406static int restart_array(struct mddev *mddev);
4407
4408static ssize_t
4409array_state_store(struct mddev *mddev, const char *buf, size_t len)
4410{
4411 int err = 0;
4412 enum array_state st = match_word(buf, array_states);
4413
4414 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4415
4416
4417
4418 spin_lock(&mddev->lock);
4419 if (st == active) {
4420 restart_array(mddev);
4421 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4422 md_wakeup_thread(mddev->thread);
4423 wake_up(&mddev->sb_wait);
4424 } else {
4425 restart_array(mddev);
4426 if (!set_in_sync(mddev))
4427 err = -EBUSY;
4428 }
4429 if (!err)
4430 sysfs_notify_dirent_safe(mddev->sysfs_state);
4431 spin_unlock(&mddev->lock);
4432 return err ?: len;
4433 }
4434 err = mddev_lock(mddev);
4435 if (err)
4436 return err;
4437 err = -EINVAL;
4438 switch(st) {
4439 case bad_word:
4440 break;
4441 case clear:
4442
4443 err = do_md_stop(mddev, 0, NULL);
4444 break;
4445 case inactive:
4446
4447 if (mddev->pers)
4448 err = do_md_stop(mddev, 2, NULL);
4449 else
4450 err = 0;
4451 break;
4452 case suspended:
4453 break;
4454 case readonly:
4455 if (mddev->pers)
4456 err = md_set_readonly(mddev, NULL);
4457 else {
4458 mddev->ro = 1;
4459 set_disk_ro(mddev->gendisk, 1);
4460 err = do_md_run(mddev);
4461 }
4462 break;
4463 case read_auto:
4464 if (mddev->pers) {
4465 if (mddev->ro == 0)
4466 err = md_set_readonly(mddev, NULL);
4467 else if (mddev->ro == 1)
4468 err = restart_array(mddev);
4469 if (err == 0) {
4470 mddev->ro = 2;
4471 set_disk_ro(mddev->gendisk, 0);
4472 }
4473 } else {
4474 mddev->ro = 2;
4475 err = do_md_run(mddev);
4476 }
4477 break;
4478 case clean:
4479 if (mddev->pers) {
4480 err = restart_array(mddev);
4481 if (err)
4482 break;
4483 spin_lock(&mddev->lock);
4484 if (!set_in_sync(mddev))
4485 err = -EBUSY;
4486 spin_unlock(&mddev->lock);
4487 } else
4488 err = -EINVAL;
4489 break;
4490 case active:
4491 if (mddev->pers) {
4492 err = restart_array(mddev);
4493 if (err)
4494 break;
4495 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4496 wake_up(&mddev->sb_wait);
4497 err = 0;
4498 } else {
4499 mddev->ro = 0;
4500 set_disk_ro(mddev->gendisk, 0);
4501 err = do_md_run(mddev);
4502 }
4503 break;
4504 case write_pending:
4505 case active_idle:
4506 case broken:
4507
4508 break;
4509 }
4510
4511 if (!err) {
4512 if (mddev->hold_active == UNTIL_IOCTL)
4513 mddev->hold_active = 0;
4514 sysfs_notify_dirent_safe(mddev->sysfs_state);
4515 }
4516 mddev_unlock(mddev);
4517 return err ?: len;
4518}
4519static struct md_sysfs_entry md_array_state =
4520__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4521
4522static ssize_t
4523max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4524 return sprintf(page, "%d\n",
4525 atomic_read(&mddev->max_corr_read_errors));
4526}
4527
4528static ssize_t
4529max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4530{
4531 unsigned int n;
4532 int rv;
4533
4534 rv = kstrtouint(buf, 10, &n);
4535 if (rv < 0)
4536 return rv;
4537 atomic_set(&mddev->max_corr_read_errors, n);
4538 return len;
4539}
4540
4541static struct md_sysfs_entry max_corr_read_errors =
4542__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4543 max_corrected_read_errors_store);
4544
4545static ssize_t
4546null_show(struct mddev *mddev, char *page)
4547{
4548 return -EINVAL;
4549}
4550
4551
4552static void flush_rdev_wq(struct mddev *mddev)
4553{
4554 struct md_rdev *rdev;
4555
4556 rcu_read_lock();
4557 rdev_for_each_rcu(rdev, mddev)
4558 if (work_pending(&rdev->del_work)) {
4559 flush_workqueue(md_rdev_misc_wq);
4560 break;
4561 }
4562 rcu_read_unlock();
4563}
4564
4565static ssize_t
4566new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4567{
4568
4569
4570
4571
4572
4573
4574
4575 char *e;
4576 int major = simple_strtoul(buf, &e, 10);
4577 int minor;
4578 dev_t dev;
4579 struct md_rdev *rdev;
4580 int err;
4581
4582 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4583 return -EINVAL;
4584 minor = simple_strtoul(e+1, &e, 10);
4585 if (*e && *e != '\n')
4586 return -EINVAL;
4587 dev = MKDEV(major, minor);
4588 if (major != MAJOR(dev) ||
4589 minor != MINOR(dev))
4590 return -EOVERFLOW;
4591
4592 flush_rdev_wq(mddev);
4593 err = mddev_lock(mddev);
4594 if (err)
4595 return err;
4596 if (mddev->persistent) {
4597 rdev = md_import_device(dev, mddev->major_version,
4598 mddev->minor_version);
4599 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4600 struct md_rdev *rdev0
4601 = list_entry(mddev->disks.next,
4602 struct md_rdev, same_set);
4603 err = super_types[mddev->major_version]
4604 .load_super(rdev, rdev0, mddev->minor_version);
4605 if (err < 0)
4606 goto out;
4607 }
4608 } else if (mddev->external)
4609 rdev = md_import_device(dev, -2, -1);
4610 else
4611 rdev = md_import_device(dev, -1, -1);
4612
4613 if (IS_ERR(rdev)) {
4614 mddev_unlock(mddev);
4615 return PTR_ERR(rdev);
4616 }
4617 err = bind_rdev_to_array(rdev, mddev);
4618 out:
4619 if (err)
4620 export_rdev(rdev);
4621 mddev_unlock(mddev);
4622 if (!err)
4623 md_new_event(mddev);
4624 return err ? err : len;
4625}
4626
4627static struct md_sysfs_entry md_new_device =
4628__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4629
4630static ssize_t
4631bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4632{
4633 char *end;
4634 unsigned long chunk, end_chunk;
4635 int err;
4636
4637 err = mddev_lock(mddev);
4638 if (err)
4639 return err;
4640 if (!mddev->bitmap)
4641 goto out;
4642
4643 while (*buf) {
4644 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4645 if (buf == end) break;
4646 if (*end == '-') {
4647 buf = end + 1;
4648 end_chunk = simple_strtoul(buf, &end, 0);
4649 if (buf == end) break;
4650 }
4651 if (*end && !isspace(*end)) break;
4652 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4653 buf = skip_spaces(end);
4654 }
4655 md_bitmap_unplug(mddev->bitmap);
4656out:
4657 mddev_unlock(mddev);
4658 return len;
4659}
4660
4661static struct md_sysfs_entry md_bitmap =
4662__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4663
4664static ssize_t
4665size_show(struct mddev *mddev, char *page)
4666{
4667 return sprintf(page, "%llu\n",
4668 (unsigned long long)mddev->dev_sectors / 2);
4669}
4670
4671static int update_size(struct mddev *mddev, sector_t num_sectors);
4672
4673static ssize_t
4674size_store(struct mddev *mddev, const char *buf, size_t len)
4675{
4676
4677
4678
4679
4680 sector_t sectors;
4681 int err = strict_blocks_to_sectors(buf, §ors);
4682
4683 if (err < 0)
4684 return err;
4685 err = mddev_lock(mddev);
4686 if (err)
4687 return err;
4688 if (mddev->pers) {
4689 err = update_size(mddev, sectors);
4690 if (err == 0)
4691 md_update_sb(mddev, 1);
4692 } else {
4693 if (mddev->dev_sectors == 0 ||
4694 mddev->dev_sectors > sectors)
4695 mddev->dev_sectors = sectors;
4696 else
4697 err = -ENOSPC;
4698 }
4699 mddev_unlock(mddev);
4700 return err ? err : len;
4701}
4702
4703static struct md_sysfs_entry md_size =
4704__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4705
4706
4707
4708
4709
4710
4711
4712static ssize_t
4713metadata_show(struct mddev *mddev, char *page)
4714{
4715 if (mddev->persistent)
4716 return sprintf(page, "%d.%d\n",
4717 mddev->major_version, mddev->minor_version);
4718 else if (mddev->external)
4719 return sprintf(page, "external:%s\n", mddev->metadata_type);
4720 else
4721 return sprintf(page, "none\n");
4722}
4723
4724static ssize_t
4725metadata_store(struct mddev *mddev, const char *buf, size_t len)
4726{
4727 int major, minor;
4728 char *e;
4729 int err;
4730
4731
4732
4733
4734
4735 err = mddev_lock(mddev);
4736 if (err)
4737 return err;
4738 err = -EBUSY;
4739 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4740 ;
4741 else if (!list_empty(&mddev->disks))
4742 goto out_unlock;
4743
4744 err = 0;
4745 if (cmd_match(buf, "none")) {
4746 mddev->persistent = 0;
4747 mddev->external = 0;
4748 mddev->major_version = 0;
4749 mddev->minor_version = 90;
4750 goto out_unlock;
4751 }
4752 if (strncmp(buf, "external:", 9) == 0) {
4753 size_t namelen = len-9;
4754 if (namelen >= sizeof(mddev->metadata_type))
4755 namelen = sizeof(mddev->metadata_type)-1;
4756 strncpy(mddev->metadata_type, buf+9, namelen);
4757 mddev->metadata_type[namelen] = 0;
4758 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4759 mddev->metadata_type[--namelen] = 0;
4760 mddev->persistent = 0;
4761 mddev->external = 1;
4762 mddev->major_version = 0;
4763 mddev->minor_version = 90;
4764 goto out_unlock;
4765 }
4766 major = simple_strtoul(buf, &e, 10);
4767 err = -EINVAL;
4768 if (e==buf || *e != '.')
4769 goto out_unlock;
4770 buf = e+1;
4771 minor = simple_strtoul(buf, &e, 10);
4772 if (e==buf || (*e && *e != '\n') )
4773 goto out_unlock;
4774 err = -ENOENT;
4775 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4776 goto out_unlock;
4777 mddev->major_version = major;
4778 mddev->minor_version = minor;
4779 mddev->persistent = 1;
4780 mddev->external = 0;
4781 err = 0;
4782out_unlock:
4783 mddev_unlock(mddev);
4784 return err ?: len;
4785}
4786
4787static struct md_sysfs_entry md_metadata =
4788__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4789
4790static ssize_t
4791action_show(struct mddev *mddev, char *page)
4792{
4793 char *type = "idle";
4794 unsigned long recovery = mddev->recovery;
4795 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4796 type = "frozen";
4797 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4798 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4799 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4800 type = "reshape";
4801 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4802 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4803 type = "resync";
4804 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4805 type = "check";
4806 else
4807 type = "repair";
4808 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4809 type = "recover";
4810 else if (mddev->reshape_position != MaxSector)
4811 type = "reshape";
4812 }
4813 return sprintf(page, "%s\n", type);
4814}
4815
4816static ssize_t
4817action_store(struct mddev *mddev, const char *page, size_t len)
4818{
4819 if (!mddev->pers || !mddev->pers->sync_request)
4820 return -EINVAL;
4821
4822
4823 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4824 if (cmd_match(page, "frozen"))
4825 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4826 else
4827 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4828 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4829 mddev_lock(mddev) == 0) {
4830 if (work_pending(&mddev->del_work))
4831 flush_workqueue(md_misc_wq);
4832 if (mddev->sync_thread) {
4833 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4834 md_reap_sync_thread(mddev);
4835 }
4836 mddev_unlock(mddev);
4837 }
4838 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4839 return -EBUSY;
4840 else if (cmd_match(page, "resync"))
4841 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4842 else if (cmd_match(page, "recover")) {
4843 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4844 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4845 } else if (cmd_match(page, "reshape")) {
4846 int err;
4847 if (mddev->pers->start_reshape == NULL)
4848 return -EINVAL;
4849 err = mddev_lock(mddev);
4850 if (!err) {
4851 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4852 err = -EBUSY;
4853 else {
4854 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4855 err = mddev->pers->start_reshape(mddev);
4856 }
4857 mddev_unlock(mddev);
4858 }
4859 if (err)
4860 return err;
4861 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4862 } else {
4863 if (cmd_match(page, "check"))
4864 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4865 else if (!cmd_match(page, "repair"))
4866 return -EINVAL;
4867 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4868 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4869 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4870 }
4871 if (mddev->ro == 2) {
4872
4873
4874
4875 mddev->ro = 0;
4876 md_wakeup_thread(mddev->sync_thread);
4877 }
4878 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4879 md_wakeup_thread(mddev->thread);
4880 sysfs_notify_dirent_safe(mddev->sysfs_action);
4881 return len;
4882}
4883
4884static struct md_sysfs_entry md_scan_mode =
4885__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4886
4887static ssize_t
4888last_sync_action_show(struct mddev *mddev, char *page)
4889{
4890 return sprintf(page, "%s\n", mddev->last_sync_action);
4891}
4892
4893static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4894
4895static ssize_t
4896mismatch_cnt_show(struct mddev *mddev, char *page)
4897{
4898 return sprintf(page, "%llu\n",
4899 (unsigned long long)
4900 atomic64_read(&mddev->resync_mismatches));
4901}
4902
4903static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4904
4905static ssize_t
4906sync_min_show(struct mddev *mddev, char *page)
4907{
4908 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4909 mddev->sync_speed_min ? "local": "system");
4910}
4911
4912static ssize_t
4913sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4914{
4915 unsigned int min;
4916 int rv;
4917
4918 if (strncmp(buf, "system", 6)==0) {
4919 min = 0;
4920 } else {
4921 rv = kstrtouint(buf, 10, &min);
4922 if (rv < 0)
4923 return rv;
4924 if (min == 0)
4925 return -EINVAL;
4926 }
4927 mddev->sync_speed_min = min;
4928 return len;
4929}
4930
4931static struct md_sysfs_entry md_sync_min =
4932__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4933
4934static ssize_t
4935sync_max_show(struct mddev *mddev, char *page)
4936{
4937 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4938 mddev->sync_speed_max ? "local": "system");
4939}
4940
4941static ssize_t
4942sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4943{
4944 unsigned int max;
4945 int rv;
4946
4947 if (strncmp(buf, "system", 6)==0) {
4948 max = 0;
4949 } else {
4950 rv = kstrtouint(buf, 10, &max);
4951 if (rv < 0)
4952 return rv;
4953 if (max == 0)
4954 return -EINVAL;
4955 }
4956 mddev->sync_speed_max = max;
4957 return len;
4958}
4959
4960static struct md_sysfs_entry md_sync_max =
4961__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4962
4963static ssize_t
4964degraded_show(struct mddev *mddev, char *page)
4965{
4966 return sprintf(page, "%d\n", mddev->degraded);
4967}
4968static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4969
4970static ssize_t
4971sync_force_parallel_show(struct mddev *mddev, char *page)
4972{
4973 return sprintf(page, "%d\n", mddev->parallel_resync);
4974}
4975
4976static ssize_t
4977sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4978{
4979 long n;
4980
4981 if (kstrtol(buf, 10, &n))
4982 return -EINVAL;
4983
4984 if (n != 0 && n != 1)
4985 return -EINVAL;
4986
4987 mddev->parallel_resync = n;
4988
4989 if (mddev->sync_thread)
4990 wake_up(&resync_wait);
4991
4992 return len;
4993}
4994
4995
4996static struct md_sysfs_entry md_sync_force_parallel =
4997__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4998 sync_force_parallel_show, sync_force_parallel_store);
4999
5000static ssize_t
5001sync_speed_show(struct mddev *mddev, char *page)
5002{
5003 unsigned long resync, dt, db;
5004 if (mddev->curr_resync == 0)
5005 return sprintf(page, "none\n");
5006 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5007 dt = (jiffies - mddev->resync_mark) / HZ;
5008 if (!dt) dt++;
5009 db = resync - mddev->resync_mark_cnt;
5010 return sprintf(page, "%lu\n", db/dt/2);
5011}
5012
5013static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
5014
5015static ssize_t
5016sync_completed_show(struct mddev *mddev, char *page)
5017{
5018 unsigned long long max_sectors, resync;
5019
5020 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5021 return sprintf(page, "none\n");
5022
5023 if (mddev->curr_resync == 1 ||
5024 mddev->curr_resync == 2)
5025 return sprintf(page, "delayed\n");
5026
5027 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5028 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5029 max_sectors = mddev->resync_max_sectors;
5030 else
5031 max_sectors = mddev->dev_sectors;
5032
5033 resync = mddev->curr_resync_completed;
5034 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
5035}
5036
5037static struct md_sysfs_entry md_sync_completed =
5038 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
5039
5040static ssize_t
5041min_sync_show(struct mddev *mddev, char *page)
5042{
5043 return sprintf(page, "%llu\n",
5044 (unsigned long long)mddev->resync_min);
5045}
5046static ssize_t
5047min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5048{
5049 unsigned long long min;
5050 int err;
5051
5052 if (kstrtoull(buf, 10, &min))
5053 return -EINVAL;
5054
5055 spin_lock(&mddev->lock);
5056 err = -EINVAL;
5057 if (min > mddev->resync_max)
5058 goto out_unlock;
5059
5060 err = -EBUSY;
5061 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5062 goto out_unlock;
5063
5064
5065 mddev->resync_min = round_down(min, 8);
5066 err = 0;
5067
5068out_unlock:
5069 spin_unlock(&mddev->lock);
5070 return err ?: len;
5071}
5072
5073static struct md_sysfs_entry md_min_sync =
5074__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5075
5076static ssize_t
5077max_sync_show(struct mddev *mddev, char *page)
5078{
5079 if (mddev->resync_max == MaxSector)
5080 return sprintf(page, "max\n");
5081 else
5082 return sprintf(page, "%llu\n",
5083 (unsigned long long)mddev->resync_max);
5084}
5085static ssize_t
5086max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5087{
5088 int err;
5089 spin_lock(&mddev->lock);
5090 if (strncmp(buf, "max", 3) == 0)
5091 mddev->resync_max = MaxSector;
5092 else {
5093 unsigned long long max;
5094 int chunk;
5095
5096 err = -EINVAL;
5097 if (kstrtoull(buf, 10, &max))
5098 goto out_unlock;
5099 if (max < mddev->resync_min)
5100 goto out_unlock;
5101
5102 err = -EBUSY;
5103 if (max < mddev->resync_max &&
5104 mddev->ro == 0 &&
5105 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5106 goto out_unlock;
5107
5108
5109 chunk = mddev->chunk_sectors;
5110 if (chunk) {
5111 sector_t temp = max;
5112
5113 err = -EINVAL;
5114 if (sector_div(temp, chunk))
5115 goto out_unlock;
5116 }
5117 mddev->resync_max = max;
5118 }
5119 wake_up(&mddev->recovery_wait);
5120 err = 0;
5121out_unlock:
5122 spin_unlock(&mddev->lock);
5123 return err ?: len;
5124}
5125
5126static struct md_sysfs_entry md_max_sync =
5127__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5128
5129static ssize_t
5130suspend_lo_show(struct mddev *mddev, char *page)
5131{
5132 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5133}
5134
5135static ssize_t
5136suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5137{
5138 unsigned long long new;
5139 int err;
5140
5141 err = kstrtoull(buf, 10, &new);
5142 if (err < 0)
5143 return err;
5144 if (new != (sector_t)new)
5145 return -EINVAL;
5146
5147 err = mddev_lock(mddev);
5148 if (err)
5149 return err;
5150 err = -EINVAL;
5151 if (mddev->pers == NULL ||
5152 mddev->pers->quiesce == NULL)
5153 goto unlock;
5154 mddev_suspend(mddev);
5155 mddev->suspend_lo = new;
5156 mddev_resume(mddev);
5157
5158 err = 0;
5159unlock:
5160 mddev_unlock(mddev);
5161 return err ?: len;
5162}
5163static struct md_sysfs_entry md_suspend_lo =
5164__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5165
5166static ssize_t
5167suspend_hi_show(struct mddev *mddev, char *page)
5168{
5169 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5170}
5171
5172static ssize_t
5173suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5174{
5175 unsigned long long new;
5176 int err;
5177
5178 err = kstrtoull(buf, 10, &new);
5179 if (err < 0)
5180 return err;
5181 if (new != (sector_t)new)
5182 return -EINVAL;
5183
5184 err = mddev_lock(mddev);
5185 if (err)
5186 return err;
5187 err = -EINVAL;
5188 if (mddev->pers == NULL)
5189 goto unlock;
5190
5191 mddev_suspend(mddev);
5192 mddev->suspend_hi = new;
5193 mddev_resume(mddev);
5194
5195 err = 0;
5196unlock:
5197 mddev_unlock(mddev);
5198 return err ?: len;
5199}
5200static struct md_sysfs_entry md_suspend_hi =
5201__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5202
5203static ssize_t
5204reshape_position_show(struct mddev *mddev, char *page)
5205{
5206 if (mddev->reshape_position != MaxSector)
5207 return sprintf(page, "%llu\n",
5208 (unsigned long long)mddev->reshape_position);
5209 strcpy(page, "none\n");
5210 return 5;
5211}
5212
5213static ssize_t
5214reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5215{
5216 struct md_rdev *rdev;
5217 unsigned long long new;
5218 int err;
5219
5220 err = kstrtoull(buf, 10, &new);
5221 if (err < 0)
5222 return err;
5223 if (new != (sector_t)new)
5224 return -EINVAL;
5225 err = mddev_lock(mddev);
5226 if (err)
5227 return err;
5228 err = -EBUSY;
5229 if (mddev->pers)
5230 goto unlock;
5231 mddev->reshape_position = new;
5232 mddev->delta_disks = 0;
5233 mddev->reshape_backwards = 0;
5234 mddev->new_level = mddev->level;
5235 mddev->new_layout = mddev->layout;
5236 mddev->new_chunk_sectors = mddev->chunk_sectors;
5237 rdev_for_each(rdev, mddev)
5238 rdev->new_data_offset = rdev->data_offset;
5239 err = 0;
5240unlock:
5241 mddev_unlock(mddev);
5242 return err ?: len;
5243}
5244
5245static struct md_sysfs_entry md_reshape_position =
5246__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5247 reshape_position_store);
5248
5249static ssize_t
5250reshape_direction_show(struct mddev *mddev, char *page)
5251{
5252 return sprintf(page, "%s\n",
5253 mddev->reshape_backwards ? "backwards" : "forwards");
5254}
5255
5256static ssize_t
5257reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5258{
5259 int backwards = 0;
5260 int err;
5261
5262 if (cmd_match(buf, "forwards"))
5263 backwards = 0;
5264 else if (cmd_match(buf, "backwards"))
5265 backwards = 1;
5266 else
5267 return -EINVAL;
5268 if (mddev->reshape_backwards == backwards)
5269 return len;
5270
5271 err = mddev_lock(mddev);
5272 if (err)
5273 return err;
5274
5275 if (mddev->delta_disks)
5276 err = -EBUSY;
5277 else if (mddev->persistent &&
5278 mddev->major_version == 0)
5279 err = -EINVAL;
5280 else
5281 mddev->reshape_backwards = backwards;
5282 mddev_unlock(mddev);
5283 return err ?: len;
5284}
5285
5286static struct md_sysfs_entry md_reshape_direction =
5287__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5288 reshape_direction_store);
5289
5290static ssize_t
5291array_size_show(struct mddev *mddev, char *page)
5292{
5293 if (mddev->external_size)
5294 return sprintf(page, "%llu\n",
5295 (unsigned long long)mddev->array_sectors/2);
5296 else
5297 return sprintf(page, "default\n");
5298}
5299
5300static ssize_t
5301array_size_store(struct mddev *mddev, const char *buf, size_t len)
5302{
5303 sector_t sectors;
5304 int err;
5305
5306 err = mddev_lock(mddev);
5307 if (err)
5308 return err;
5309
5310
5311 if (mddev_is_clustered(mddev)) {
5312 mddev_unlock(mddev);
5313 return -EINVAL;
5314 }
5315
5316 if (strncmp(buf, "default", 7) == 0) {
5317 if (mddev->pers)
5318 sectors = mddev->pers->size(mddev, 0, 0);
5319 else
5320 sectors = mddev->array_sectors;
5321
5322 mddev->external_size = 0;
5323 } else {
5324 if (strict_blocks_to_sectors(buf, §ors) < 0)
5325 err = -EINVAL;
5326 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5327 err = -E2BIG;
5328 else
5329 mddev->external_size = 1;
5330 }
5331
5332 if (!err) {
5333 mddev->array_sectors = sectors;
5334 if (mddev->pers)
5335 set_capacity_and_notify(mddev->gendisk,
5336 mddev->array_sectors);
5337 }
5338 mddev_unlock(mddev);
5339 return err ?: len;
5340}
5341
5342static struct md_sysfs_entry md_array_size =
5343__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5344 array_size_store);
5345
5346static ssize_t
5347consistency_policy_show(struct mddev *mddev, char *page)
5348{
5349 int ret;
5350
5351 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5352 ret = sprintf(page, "journal\n");
5353 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5354 ret = sprintf(page, "ppl\n");
5355 } else if (mddev->bitmap) {
5356 ret = sprintf(page, "bitmap\n");
5357 } else if (mddev->pers) {
5358 if (mddev->pers->sync_request)
5359 ret = sprintf(page, "resync\n");
5360 else
5361 ret = sprintf(page, "none\n");
5362 } else {
5363 ret = sprintf(page, "unknown\n");
5364 }
5365
5366 return ret;
5367}
5368
5369static ssize_t
5370consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5371{
5372 int err = 0;
5373
5374 if (mddev->pers) {
5375 if (mddev->pers->change_consistency_policy)
5376 err = mddev->pers->change_consistency_policy(mddev, buf);
5377 else
5378 err = -EBUSY;
5379 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5380 set_bit(MD_HAS_PPL, &mddev->flags);
5381 } else {
5382 err = -EINVAL;
5383 }
5384
5385 return err ? err : len;
5386}
5387
5388static struct md_sysfs_entry md_consistency_policy =
5389__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5390 consistency_policy_store);
5391
5392static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5393{
5394 return sprintf(page, "%d\n", mddev->fail_last_dev);
5395}
5396
5397
5398
5399
5400
5401static ssize_t
5402fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5403{
5404 int ret;
5405 bool value;
5406
5407 ret = kstrtobool(buf, &value);
5408 if (ret)
5409 return ret;
5410
5411 if (value != mddev->fail_last_dev)
5412 mddev->fail_last_dev = value;
5413
5414 return len;
5415}
5416static struct md_sysfs_entry md_fail_last_dev =
5417__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5418 fail_last_dev_store);
5419
5420static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5421{
5422 if (mddev->pers == NULL || (mddev->pers->level != 1))
5423 return sprintf(page, "n/a\n");
5424 else
5425 return sprintf(page, "%d\n", mddev->serialize_policy);
5426}
5427
5428
5429
5430
5431
5432static ssize_t
5433serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5434{
5435 int err;
5436 bool value;
5437
5438 err = kstrtobool(buf, &value);
5439 if (err)
5440 return err;
5441
5442 if (value == mddev->serialize_policy)
5443 return len;
5444
5445 err = mddev_lock(mddev);
5446 if (err)
5447 return err;
5448 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5449 pr_err("md: serialize_policy is only effective for raid1\n");
5450 err = -EINVAL;
5451 goto unlock;
5452 }
5453
5454 mddev_suspend(mddev);
5455 if (value)
5456 mddev_create_serial_pool(mddev, NULL, true);
5457 else
5458 mddev_destroy_serial_pool(mddev, NULL, true);
5459 mddev->serialize_policy = value;
5460 mddev_resume(mddev);
5461unlock:
5462 mddev_unlock(mddev);
5463 return err ?: len;
5464}
5465
5466static struct md_sysfs_entry md_serialize_policy =
5467__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5468 serialize_policy_store);
5469
5470
5471static struct attribute *md_default_attrs[] = {
5472 &md_level.attr,
5473 &md_layout.attr,
5474 &md_raid_disks.attr,
5475 &md_uuid.attr,
5476 &md_chunk_size.attr,
5477 &md_size.attr,
5478 &md_resync_start.attr,
5479 &md_metadata.attr,
5480 &md_new_device.attr,
5481 &md_safe_delay.attr,
5482 &md_array_state.attr,
5483 &md_reshape_position.attr,
5484 &md_reshape_direction.attr,
5485 &md_array_size.attr,
5486 &max_corr_read_errors.attr,
5487 &md_consistency_policy.attr,
5488 &md_fail_last_dev.attr,
5489 &md_serialize_policy.attr,
5490 NULL,
5491};
5492
5493static struct attribute *md_redundancy_attrs[] = {
5494 &md_scan_mode.attr,
5495 &md_last_scan_mode.attr,
5496 &md_mismatches.attr,
5497 &md_sync_min.attr,
5498 &md_sync_max.attr,
5499 &md_sync_speed.attr,
5500 &md_sync_force_parallel.attr,
5501 &md_sync_completed.attr,
5502 &md_min_sync.attr,
5503 &md_max_sync.attr,
5504 &md_suspend_lo.attr,
5505 &md_suspend_hi.attr,
5506 &md_bitmap.attr,
5507 &md_degraded.attr,
5508 NULL,
5509};
5510static const struct attribute_group md_redundancy_group = {
5511 .name = NULL,
5512 .attrs = md_redundancy_attrs,
5513};
5514
5515static ssize_t
5516md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5517{
5518 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5519 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5520 ssize_t rv;
5521
5522 if (!entry->show)
5523 return -EIO;
5524 spin_lock(&all_mddevs_lock);
5525 if (list_empty(&mddev->all_mddevs)) {
5526 spin_unlock(&all_mddevs_lock);
5527 return -EBUSY;
5528 }
5529 mddev_get(mddev);
5530 spin_unlock(&all_mddevs_lock);
5531
5532 rv = entry->show(mddev, page);
5533 mddev_put(mddev);
5534 return rv;
5535}
5536
5537static ssize_t
5538md_attr_store(struct kobject *kobj, struct attribute *attr,
5539 const char *page, size_t length)
5540{
5541 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5542 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5543 ssize_t rv;
5544
5545 if (!entry->store)
5546 return -EIO;
5547 if (!capable(CAP_SYS_ADMIN))
5548 return -EACCES;
5549 spin_lock(&all_mddevs_lock);
5550 if (list_empty(&mddev->all_mddevs)) {
5551 spin_unlock(&all_mddevs_lock);
5552 return -EBUSY;
5553 }
5554 mddev_get(mddev);
5555 spin_unlock(&all_mddevs_lock);
5556 rv = entry->store(mddev, page, length);
5557 mddev_put(mddev);
5558 return rv;
5559}
5560
5561static void md_free(struct kobject *ko)
5562{
5563 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5564
5565 if (mddev->sysfs_state)
5566 sysfs_put(mddev->sysfs_state);
5567 if (mddev->sysfs_level)
5568 sysfs_put(mddev->sysfs_level);
5569
5570 if (mddev->gendisk) {
5571 del_gendisk(mddev->gendisk);
5572 blk_cleanup_disk(mddev->gendisk);
5573 }
5574 percpu_ref_exit(&mddev->writes_pending);
5575
5576 bioset_exit(&mddev->bio_set);
5577 bioset_exit(&mddev->sync_set);
5578 if (mddev->level != 1 && mddev->level != 10)
5579 bioset_exit(&mddev->io_acct_set);
5580 kfree(mddev);
5581}
5582
5583static const struct sysfs_ops md_sysfs_ops = {
5584 .show = md_attr_show,
5585 .store = md_attr_store,
5586};
5587static struct kobj_type md_ktype = {
5588 .release = md_free,
5589 .sysfs_ops = &md_sysfs_ops,
5590 .default_attrs = md_default_attrs,
5591};
5592
5593int mdp_major = 0;
5594
5595static void mddev_delayed_delete(struct work_struct *ws)
5596{
5597 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5598
5599 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5600 kobject_del(&mddev->kobj);
5601 kobject_put(&mddev->kobj);
5602}
5603
5604static void no_op(struct percpu_ref *r) {}
5605
5606int mddev_init_writes_pending(struct mddev *mddev)
5607{
5608 if (mddev->writes_pending.percpu_count_ptr)
5609 return 0;
5610 if (percpu_ref_init(&mddev->writes_pending, no_op,
5611 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5612 return -ENOMEM;
5613
5614 percpu_ref_put(&mddev->writes_pending);
5615 return 0;
5616}
5617EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5618
5619static int md_alloc(dev_t dev, char *name)
5620{
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630 static DEFINE_MUTEX(disks_mutex);
5631 struct mddev *mddev;
5632 struct gendisk *disk;
5633 int partitioned;
5634 int shift;
5635 int unit;
5636 int error ;
5637
5638
5639
5640
5641
5642 flush_workqueue(md_misc_wq);
5643
5644 mutex_lock(&disks_mutex);
5645 mddev = mddev_alloc(dev);
5646 if (IS_ERR(mddev)) {
5647 mutex_unlock(&disks_mutex);
5648 return PTR_ERR(mddev);
5649 }
5650
5651 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5652 shift = partitioned ? MdpMinorShift : 0;
5653 unit = MINOR(mddev->unit) >> shift;
5654
5655 if (name && !dev) {
5656
5657
5658 struct mddev *mddev2;
5659 spin_lock(&all_mddevs_lock);
5660
5661 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5662 if (mddev2->gendisk &&
5663 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5664 spin_unlock(&all_mddevs_lock);
5665 error = -EEXIST;
5666 goto abort;
5667 }
5668 spin_unlock(&all_mddevs_lock);
5669 }
5670 if (name && dev)
5671
5672
5673
5674 mddev->hold_active = UNTIL_STOP;
5675
5676 error = -ENOMEM;
5677 disk = blk_alloc_disk(NUMA_NO_NODE);
5678 if (!disk)
5679 goto abort;
5680
5681 disk->major = MAJOR(mddev->unit);
5682 disk->first_minor = unit << shift;
5683 disk->minors = 1 << shift;
5684 if (name)
5685 strcpy(disk->disk_name, name);
5686 else if (partitioned)
5687 sprintf(disk->disk_name, "md_d%d", unit);
5688 else
5689 sprintf(disk->disk_name, "md%d", unit);
5690 disk->fops = &md_fops;
5691 disk->private_data = mddev;
5692
5693 mddev->queue = disk->queue;
5694 blk_set_stacking_limits(&mddev->queue->limits);
5695 blk_queue_write_cache(mddev->queue, true, true);
5696
5697
5698
5699
5700 disk->flags |= GENHD_FL_EXT_DEVT;
5701 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5702 mddev->gendisk = disk;
5703
5704
5705
5706 mutex_lock(&mddev->open_mutex);
5707 add_disk(disk);
5708
5709 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5710 if (error) {
5711
5712
5713
5714 pr_debug("md: cannot register %s/md - name in use\n",
5715 disk->disk_name);
5716 error = 0;
5717 }
5718 if (mddev->kobj.sd &&
5719 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5720 pr_debug("pointless warning\n");
5721 mutex_unlock(&mddev->open_mutex);
5722 abort:
5723 mutex_unlock(&disks_mutex);
5724 if (!error && mddev->kobj.sd) {
5725 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5726 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5727 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5728 }
5729 mddev_put(mddev);
5730 return error;
5731}
5732
5733static void md_probe(dev_t dev)
5734{
5735 if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
5736 return;
5737 if (create_on_open)
5738 md_alloc(dev, NULL);
5739}
5740
5741static int add_named_array(const char *val, const struct kernel_param *kp)
5742{
5743
5744
5745
5746
5747
5748
5749
5750 int len = strlen(val);
5751 char buf[DISK_NAME_LEN];
5752 unsigned long devnum;
5753
5754 while (len && val[len-1] == '\n')
5755 len--;
5756 if (len >= DISK_NAME_LEN)
5757 return -E2BIG;
5758 strlcpy(buf, val, len+1);
5759 if (strncmp(buf, "md_", 3) == 0)
5760 return md_alloc(0, buf);
5761 if (strncmp(buf, "md", 2) == 0 &&
5762 isdigit(buf[2]) &&
5763 kstrtoul(buf+2, 10, &devnum) == 0 &&
5764 devnum <= MINORMASK)
5765 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5766
5767 return -EINVAL;
5768}
5769
5770static void md_safemode_timeout(struct timer_list *t)
5771{
5772 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5773
5774 mddev->safemode = 1;
5775 if (mddev->external)
5776 sysfs_notify_dirent_safe(mddev->sysfs_state);
5777
5778 md_wakeup_thread(mddev->thread);
5779}
5780
5781static int start_dirty_degraded;
5782
5783int md_run(struct mddev *mddev)
5784{
5785 int err;
5786 struct md_rdev *rdev;
5787 struct md_personality *pers;
5788
5789 if (list_empty(&mddev->disks))
5790
5791 return -EINVAL;
5792
5793 if (mddev->pers)
5794 return -EBUSY;
5795
5796 if (mddev->sysfs_active)
5797 return -EBUSY;
5798
5799
5800
5801
5802 if (!mddev->raid_disks) {
5803 if (!mddev->persistent)
5804 return -EINVAL;
5805 err = analyze_sbs(mddev);
5806 if (err)
5807 return -EINVAL;
5808 }
5809
5810 if (mddev->level != LEVEL_NONE)
5811 request_module("md-level-%d", mddev->level);
5812 else if (mddev->clevel[0])
5813 request_module("md-%s", mddev->clevel);
5814
5815
5816
5817
5818
5819
5820 mddev->has_superblocks = false;
5821 rdev_for_each(rdev, mddev) {
5822 if (test_bit(Faulty, &rdev->flags))
5823 continue;
5824 sync_blockdev(rdev->bdev);
5825 invalidate_bdev(rdev->bdev);
5826 if (mddev->ro != 1 && rdev_read_only(rdev)) {
5827 mddev->ro = 1;
5828 if (mddev->gendisk)
5829 set_disk_ro(mddev->gendisk, 1);
5830 }
5831
5832 if (rdev->sb_page)
5833 mddev->has_superblocks = true;
5834
5835
5836
5837
5838
5839 if (rdev->meta_bdev) {
5840 ;
5841 } else if (rdev->data_offset < rdev->sb_start) {
5842 if (mddev->dev_sectors &&
5843 rdev->data_offset + mddev->dev_sectors
5844 > rdev->sb_start) {
5845 pr_warn("md: %s: data overlaps metadata\n",
5846 mdname(mddev));
5847 return -EINVAL;
5848 }
5849 } else {
5850 if (rdev->sb_start + rdev->sb_size/512
5851 > rdev->data_offset) {
5852 pr_warn("md: %s: metadata overlaps data\n",
5853 mdname(mddev));
5854 return -EINVAL;
5855 }
5856 }
5857 sysfs_notify_dirent_safe(rdev->sysfs_state);
5858 }
5859
5860 if (!bioset_initialized(&mddev->bio_set)) {
5861 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5862 if (err)
5863 return err;
5864 }
5865 if (!bioset_initialized(&mddev->sync_set)) {
5866 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5867 if (err)
5868 goto exit_bio_set;
5869 }
5870 if (mddev->level != 1 && mddev->level != 10 &&
5871 !bioset_initialized(&mddev->io_acct_set)) {
5872 err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
5873 offsetof(struct md_io_acct, bio_clone), 0);
5874 if (err)
5875 goto exit_sync_set;
5876 }
5877
5878 spin_lock(&pers_lock);
5879 pers = find_pers(mddev->level, mddev->clevel);
5880 if (!pers || !try_module_get(pers->owner)) {
5881 spin_unlock(&pers_lock);
5882 if (mddev->level != LEVEL_NONE)
5883 pr_warn("md: personality for level %d is not loaded!\n",
5884 mddev->level);
5885 else
5886 pr_warn("md: personality for level %s is not loaded!\n",
5887 mddev->clevel);
5888 err = -EINVAL;
5889 goto abort;
5890 }
5891 spin_unlock(&pers_lock);
5892 if (mddev->level != pers->level) {
5893 mddev->level = pers->level;
5894 mddev->new_level = pers->level;
5895 }
5896 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5897
5898 if (mddev->reshape_position != MaxSector &&
5899 pers->start_reshape == NULL) {
5900
5901 module_put(pers->owner);
5902 err = -EINVAL;
5903 goto abort;
5904 }
5905
5906 if (pers->sync_request) {
5907
5908
5909
5910 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5911 struct md_rdev *rdev2;
5912 int warned = 0;
5913
5914 rdev_for_each(rdev, mddev)
5915 rdev_for_each(rdev2, mddev) {
5916 if (rdev < rdev2 &&
5917 rdev->bdev->bd_disk ==
5918 rdev2->bdev->bd_disk) {
5919 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5920 mdname(mddev),
5921 bdevname(rdev->bdev,b),
5922 bdevname(rdev2->bdev,b2));
5923 warned = 1;
5924 }
5925 }
5926
5927 if (warned)
5928 pr_warn("True protection against single-disk failure might be compromised.\n");
5929 }
5930
5931 mddev->recovery = 0;
5932
5933 mddev->resync_max_sectors = mddev->dev_sectors;
5934
5935 mddev->ok_start_degraded = start_dirty_degraded;
5936
5937 if (start_readonly && mddev->ro == 0)
5938 mddev->ro = 2;
5939
5940 err = pers->run(mddev);
5941 if (err)
5942 pr_warn("md: pers->run() failed ...\n");
5943 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5944 WARN_ONCE(!mddev->external_size,
5945 "%s: default size too small, but 'external_size' not in effect?\n",
5946 __func__);
5947 pr_warn("md: invalid array_size %llu > default size %llu\n",
5948 (unsigned long long)mddev->array_sectors / 2,
5949 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5950 err = -EINVAL;
5951 }
5952 if (err == 0 && pers->sync_request &&
5953 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5954 struct bitmap *bitmap;
5955
5956 bitmap = md_bitmap_create(mddev, -1);
5957 if (IS_ERR(bitmap)) {
5958 err = PTR_ERR(bitmap);
5959 pr_warn("%s: failed to create bitmap (%d)\n",
5960 mdname(mddev), err);
5961 } else
5962 mddev->bitmap = bitmap;
5963
5964 }
5965 if (err)
5966 goto bitmap_abort;
5967
5968 if (mddev->bitmap_info.max_write_behind > 0) {
5969 bool create_pool = false;
5970
5971 rdev_for_each(rdev, mddev) {
5972 if (test_bit(WriteMostly, &rdev->flags) &&
5973 rdev_init_serial(rdev))
5974 create_pool = true;
5975 }
5976 if (create_pool && mddev->serial_info_pool == NULL) {
5977 mddev->serial_info_pool =
5978 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
5979 sizeof(struct serial_info));
5980 if (!mddev->serial_info_pool) {
5981 err = -ENOMEM;
5982 goto bitmap_abort;
5983 }
5984 }
5985 }
5986
5987 if (mddev->queue) {
5988 bool nonrot = true;
5989
5990 rdev_for_each(rdev, mddev) {
5991 if (rdev->raid_disk >= 0 &&
5992 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5993 nonrot = false;
5994 break;
5995 }
5996 }
5997 if (mddev->degraded)
5998 nonrot = false;
5999 if (nonrot)
6000 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
6001 else
6002 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
6003 blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
6004 }
6005 if (pers->sync_request) {
6006 if (mddev->kobj.sd &&
6007 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6008 pr_warn("md: cannot register extra attributes for %s\n",
6009 mdname(mddev));
6010 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6011 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6012 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6013 } else if (mddev->ro == 2)
6014 mddev->ro = 0;
6015
6016 atomic_set(&mddev->max_corr_read_errors,
6017 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
6018 mddev->safemode = 0;
6019 if (mddev_is_clustered(mddev))
6020 mddev->safemode_delay = 0;
6021 else
6022 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6023 mddev->in_sync = 1;
6024 smp_wmb();
6025 spin_lock(&mddev->lock);
6026 mddev->pers = pers;
6027 spin_unlock(&mddev->lock);
6028 rdev_for_each(rdev, mddev)
6029 if (rdev->raid_disk >= 0)
6030 sysfs_link_rdev(mddev, rdev);
6031
6032 if (mddev->degraded && !mddev->ro)
6033
6034
6035
6036 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6037 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6038
6039 if (mddev->sb_flags)
6040 md_update_sb(mddev, 0);
6041
6042 md_new_event(mddev);
6043 return 0;
6044
6045bitmap_abort:
6046 mddev_detach(mddev);
6047 if (mddev->private)
6048 pers->free(mddev, mddev->private);
6049 mddev->private = NULL;
6050 module_put(pers->owner);
6051 md_bitmap_destroy(mddev);
6052abort:
6053 if (mddev->level != 1 && mddev->level != 10)
6054 bioset_exit(&mddev->io_acct_set);
6055exit_sync_set:
6056 bioset_exit(&mddev->sync_set);
6057exit_bio_set:
6058 bioset_exit(&mddev->bio_set);
6059 return err;
6060}
6061EXPORT_SYMBOL_GPL(md_run);
6062
6063int do_md_run(struct mddev *mddev)
6064{
6065 int err;
6066
6067 set_bit(MD_NOT_READY, &mddev->flags);
6068 err = md_run(mddev);
6069 if (err)
6070 goto out;
6071 err = md_bitmap_load(mddev);
6072 if (err) {
6073 md_bitmap_destroy(mddev);
6074 goto out;
6075 }
6076
6077 if (mddev_is_clustered(mddev))
6078 md_allow_write(mddev);
6079
6080
6081 md_start(mddev);
6082
6083 md_wakeup_thread(mddev->thread);
6084 md_wakeup_thread(mddev->sync_thread);
6085
6086 set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
6087 clear_bit(MD_NOT_READY, &mddev->flags);
6088 mddev->changed = 1;
6089 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6090 sysfs_notify_dirent_safe(mddev->sysfs_state);
6091 sysfs_notify_dirent_safe(mddev->sysfs_action);
6092 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6093out:
6094 clear_bit(MD_NOT_READY, &mddev->flags);
6095 return err;
6096}
6097
6098int md_start(struct mddev *mddev)
6099{
6100 int ret = 0;
6101
6102 if (mddev->pers->start) {
6103 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6104 md_wakeup_thread(mddev->thread);
6105 ret = mddev->pers->start(mddev);
6106 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6107 md_wakeup_thread(mddev->sync_thread);
6108 }
6109 return ret;
6110}
6111EXPORT_SYMBOL_GPL(md_start);
6112
6113static int restart_array(struct mddev *mddev)
6114{
6115 struct gendisk *disk = mddev->gendisk;
6116 struct md_rdev *rdev;
6117 bool has_journal = false;
6118 bool has_readonly = false;
6119
6120
6121 if (list_empty(&mddev->disks))
6122 return -ENXIO;
6123 if (!mddev->pers)
6124 return -EINVAL;
6125 if (!mddev->ro)
6126 return -EBUSY;
6127
6128 rcu_read_lock();
6129 rdev_for_each_rcu(rdev, mddev) {
6130 if (test_bit(Journal, &rdev->flags) &&
6131 !test_bit(Faulty, &rdev->flags))
6132 has_journal = true;
6133 if (rdev_read_only(rdev))
6134 has_readonly = true;
6135 }
6136 rcu_read_unlock();
6137 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6138
6139 return -EINVAL;
6140 if (has_readonly)
6141 return -EROFS;
6142
6143 mddev->safemode = 0;
6144 mddev->ro = 0;
6145 set_disk_ro(disk, 0);
6146 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6147
6148 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6149 md_wakeup_thread(mddev->thread);
6150 md_wakeup_thread(mddev->sync_thread);
6151 sysfs_notify_dirent_safe(mddev->sysfs_state);
6152 return 0;
6153}
6154
6155static void md_clean(struct mddev *mddev)
6156{
6157 mddev->array_sectors = 0;
6158 mddev->external_size = 0;
6159 mddev->dev_sectors = 0;
6160 mddev->raid_disks = 0;
6161 mddev->recovery_cp = 0;
6162 mddev->resync_min = 0;
6163 mddev->resync_max = MaxSector;
6164 mddev->reshape_position = MaxSector;
6165 mddev->external = 0;
6166 mddev->persistent = 0;
6167 mddev->level = LEVEL_NONE;
6168 mddev->clevel[0] = 0;
6169 mddev->flags = 0;
6170 mddev->sb_flags = 0;
6171 mddev->ro = 0;
6172 mddev->metadata_type[0] = 0;
6173 mddev->chunk_sectors = 0;
6174 mddev->ctime = mddev->utime = 0;
6175 mddev->layout = 0;
6176 mddev->max_disks = 0;
6177 mddev->events = 0;
6178 mddev->can_decrease_events = 0;
6179 mddev->delta_disks = 0;
6180 mddev->reshape_backwards = 0;
6181 mddev->new_level = LEVEL_NONE;
6182 mddev->new_layout = 0;
6183 mddev->new_chunk_sectors = 0;
6184 mddev->curr_resync = 0;
6185 atomic64_set(&mddev->resync_mismatches, 0);
6186 mddev->suspend_lo = mddev->suspend_hi = 0;
6187 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6188 mddev->recovery = 0;
6189 mddev->in_sync = 0;
6190 mddev->changed = 0;
6191 mddev->degraded = 0;
6192 mddev->safemode = 0;
6193 mddev->private = NULL;
6194 mddev->cluster_info = NULL;
6195 mddev->bitmap_info.offset = 0;
6196 mddev->bitmap_info.default_offset = 0;
6197 mddev->bitmap_info.default_space = 0;
6198 mddev->bitmap_info.chunksize = 0;
6199 mddev->bitmap_info.daemon_sleep = 0;
6200 mddev->bitmap_info.max_write_behind = 0;
6201 mddev->bitmap_info.nodes = 0;
6202}
6203
6204static void __md_stop_writes(struct mddev *mddev)
6205{
6206 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6207 if (work_pending(&mddev->del_work))
6208 flush_workqueue(md_misc_wq);
6209 if (mddev->sync_thread) {
6210 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6211 md_reap_sync_thread(mddev);
6212 }
6213
6214 del_timer_sync(&mddev->safemode_timer);
6215
6216 if (mddev->pers && mddev->pers->quiesce) {
6217 mddev->pers->quiesce(mddev, 1);
6218 mddev->pers->quiesce(mddev, 0);
6219 }
6220 md_bitmap_flush(mddev);
6221
6222 if (mddev->ro == 0 &&
6223 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6224 mddev->sb_flags)) {
6225
6226 if (!mddev_is_clustered(mddev))
6227 mddev->in_sync = 1;
6228 md_update_sb(mddev, 1);
6229 }
6230
6231 mddev->serialize_policy = 0;
6232 mddev_destroy_serial_pool(mddev, NULL, true);
6233}
6234
6235void md_stop_writes(struct mddev *mddev)
6236{
6237 mddev_lock_nointr(mddev);
6238 __md_stop_writes(mddev);
6239 mddev_unlock(mddev);
6240}
6241EXPORT_SYMBOL_GPL(md_stop_writes);
6242
6243static void mddev_detach(struct mddev *mddev)
6244{
6245 md_bitmap_wait_behind_writes(mddev);
6246 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6247 mddev->pers->quiesce(mddev, 1);
6248 mddev->pers->quiesce(mddev, 0);
6249 }
6250 md_unregister_thread(&mddev->thread);
6251 if (mddev->queue)
6252 blk_sync_queue(mddev->queue);
6253}
6254
6255static void __md_stop(struct mddev *mddev)
6256{
6257 struct md_personality *pers = mddev->pers;
6258 md_bitmap_destroy(mddev);
6259 mddev_detach(mddev);
6260
6261 if (mddev->event_work.func)
6262 flush_workqueue(md_misc_wq);
6263 spin_lock(&mddev->lock);
6264 mddev->pers = NULL;
6265 spin_unlock(&mddev->lock);
6266 pers->free(mddev, mddev->private);
6267 mddev->private = NULL;
6268 if (pers->sync_request && mddev->to_remove == NULL)
6269 mddev->to_remove = &md_redundancy_group;
6270 module_put(pers->owner);
6271 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6272}
6273
6274void md_stop(struct mddev *mddev)
6275{
6276
6277
6278
6279 __md_stop(mddev);
6280 bioset_exit(&mddev->bio_set);
6281 bioset_exit(&mddev->sync_set);
6282 if (mddev->level != 1 && mddev->level != 10)
6283 bioset_exit(&mddev->io_acct_set);
6284}
6285
6286EXPORT_SYMBOL_GPL(md_stop);
6287
6288static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6289{
6290 int err = 0;
6291 int did_freeze = 0;
6292
6293 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6294 did_freeze = 1;
6295 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6296 md_wakeup_thread(mddev->thread);
6297 }
6298 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6299 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6300 if (mddev->sync_thread)
6301
6302
6303 wake_up_process(mddev->sync_thread->tsk);
6304
6305 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6306 return -EBUSY;
6307 mddev_unlock(mddev);
6308 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6309 &mddev->recovery));
6310 wait_event(mddev->sb_wait,
6311 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6312 mddev_lock_nointr(mddev);
6313
6314 mutex_lock(&mddev->open_mutex);
6315 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6316 mddev->sync_thread ||
6317 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6318 pr_warn("md: %s still in use.\n",mdname(mddev));
6319 if (did_freeze) {
6320 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6321 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6322 md_wakeup_thread(mddev->thread);
6323 }
6324 err = -EBUSY;
6325 goto out;
6326 }
6327 if (mddev->pers) {
6328 __md_stop_writes(mddev);
6329
6330 err = -ENXIO;
6331 if (mddev->ro==1)
6332 goto out;
6333 mddev->ro = 1;
6334 set_disk_ro(mddev->gendisk, 1);
6335 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6336 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6337 md_wakeup_thread(mddev->thread);
6338 sysfs_notify_dirent_safe(mddev->sysfs_state);
6339 err = 0;
6340 }
6341out:
6342 mutex_unlock(&mddev->open_mutex);
6343 return err;
6344}
6345
6346
6347
6348
6349
6350static int do_md_stop(struct mddev *mddev, int mode,
6351 struct block_device *bdev)
6352{
6353 struct gendisk *disk = mddev->gendisk;
6354 struct md_rdev *rdev;
6355 int did_freeze = 0;
6356
6357 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6358 did_freeze = 1;
6359 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6360 md_wakeup_thread(mddev->thread);
6361 }
6362 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6363 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6364 if (mddev->sync_thread)
6365
6366
6367 wake_up_process(mddev->sync_thread->tsk);
6368
6369 mddev_unlock(mddev);
6370 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6371 !test_bit(MD_RECOVERY_RUNNING,
6372 &mddev->recovery)));
6373 mddev_lock_nointr(mddev);
6374
6375 mutex_lock(&mddev->open_mutex);
6376 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6377 mddev->sysfs_active ||
6378 mddev->sync_thread ||
6379 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6380 pr_warn("md: %s still in use.\n",mdname(mddev));
6381 mutex_unlock(&mddev->open_mutex);
6382 if (did_freeze) {
6383 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6384 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6385 md_wakeup_thread(mddev->thread);
6386 }
6387 return -EBUSY;
6388 }
6389 if (mddev->pers) {
6390 if (mddev->ro)
6391 set_disk_ro(disk, 0);
6392
6393 __md_stop_writes(mddev);
6394 __md_stop(mddev);
6395
6396
6397 sysfs_notify_dirent_safe(mddev->sysfs_state);
6398
6399 rdev_for_each(rdev, mddev)
6400 if (rdev->raid_disk >= 0)
6401 sysfs_unlink_rdev(mddev, rdev);
6402
6403 set_capacity_and_notify(disk, 0);
6404 mutex_unlock(&mddev->open_mutex);
6405 mddev->changed = 1;
6406
6407 if (mddev->ro)
6408 mddev->ro = 0;
6409 } else
6410 mutex_unlock(&mddev->open_mutex);
6411
6412
6413
6414 if (mode == 0) {
6415 pr_info("md: %s stopped.\n", mdname(mddev));
6416
6417 if (mddev->bitmap_info.file) {
6418 struct file *f = mddev->bitmap_info.file;
6419 spin_lock(&mddev->lock);
6420 mddev->bitmap_info.file = NULL;
6421 spin_unlock(&mddev->lock);
6422 fput(f);
6423 }
6424 mddev->bitmap_info.offset = 0;
6425
6426 export_array(mddev);
6427
6428 md_clean(mddev);
6429 if (mddev->hold_active == UNTIL_STOP)
6430 mddev->hold_active = 0;
6431 }
6432 md_new_event(mddev);
6433 sysfs_notify_dirent_safe(mddev->sysfs_state);
6434 return 0;
6435}
6436
6437#ifndef MODULE
6438static void autorun_array(struct mddev *mddev)
6439{
6440 struct md_rdev *rdev;
6441 int err;
6442
6443 if (list_empty(&mddev->disks))
6444 return;
6445
6446 pr_info("md: running: ");
6447
6448 rdev_for_each(rdev, mddev) {
6449 char b[BDEVNAME_SIZE];
6450 pr_cont("<%s>", bdevname(rdev->bdev,b));
6451 }
6452 pr_cont("\n");
6453
6454 err = do_md_run(mddev);
6455 if (err) {
6456 pr_warn("md: do_md_run() returned %d\n", err);
6457 do_md_stop(mddev, 0, NULL);
6458 }
6459}
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473static void autorun_devices(int part)
6474{
6475 struct md_rdev *rdev0, *rdev, *tmp;
6476 struct mddev *mddev;
6477 char b[BDEVNAME_SIZE];
6478
6479 pr_info("md: autorun ...\n");
6480 while (!list_empty(&pending_raid_disks)) {
6481 int unit;
6482 dev_t dev;
6483 LIST_HEAD(candidates);
6484 rdev0 = list_entry(pending_raid_disks.next,
6485 struct md_rdev, same_set);
6486
6487 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6488 INIT_LIST_HEAD(&candidates);
6489 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6490 if (super_90_load(rdev, rdev0, 0) >= 0) {
6491 pr_debug("md: adding %s ...\n",
6492 bdevname(rdev->bdev,b));
6493 list_move(&rdev->same_set, &candidates);
6494 }
6495
6496
6497
6498
6499
6500 if (part) {
6501 dev = MKDEV(mdp_major,
6502 rdev0->preferred_minor << MdpMinorShift);
6503 unit = MINOR(dev) >> MdpMinorShift;
6504 } else {
6505 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6506 unit = MINOR(dev);
6507 }
6508 if (rdev0->preferred_minor != unit) {
6509 pr_warn("md: unit number in %s is bad: %d\n",
6510 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6511 break;
6512 }
6513
6514 md_probe(dev);
6515 mddev = mddev_find(dev);
6516 if (!mddev)
6517 break;
6518
6519 if (mddev_lock(mddev))
6520 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6521 else if (mddev->raid_disks || mddev->major_version
6522 || !list_empty(&mddev->disks)) {
6523 pr_warn("md: %s already running, cannot run %s\n",
6524 mdname(mddev), bdevname(rdev0->bdev,b));
6525 mddev_unlock(mddev);
6526 } else {
6527 pr_debug("md: created %s\n", mdname(mddev));
6528 mddev->persistent = 1;
6529 rdev_for_each_list(rdev, tmp, &candidates) {
6530 list_del_init(&rdev->same_set);
6531 if (bind_rdev_to_array(rdev, mddev))
6532 export_rdev(rdev);
6533 }
6534 autorun_array(mddev);
6535 mddev_unlock(mddev);
6536 }
6537
6538
6539
6540 rdev_for_each_list(rdev, tmp, &candidates) {
6541 list_del_init(&rdev->same_set);
6542 export_rdev(rdev);
6543 }
6544 mddev_put(mddev);
6545 }
6546 pr_info("md: ... autorun DONE.\n");
6547}
6548#endif
6549
6550static int get_version(void __user *arg)
6551{
6552 mdu_version_t ver;
6553
6554 ver.major = MD_MAJOR_VERSION;
6555 ver.minor = MD_MINOR_VERSION;
6556 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6557
6558 if (copy_to_user(arg, &ver, sizeof(ver)))
6559 return -EFAULT;
6560
6561 return 0;
6562}
6563
6564static int get_array_info(struct mddev *mddev, void __user *arg)
6565{
6566 mdu_array_info_t info;
6567 int nr,working,insync,failed,spare;
6568 struct md_rdev *rdev;
6569
6570 nr = working = insync = failed = spare = 0;
6571 rcu_read_lock();
6572 rdev_for_each_rcu(rdev, mddev) {
6573 nr++;
6574 if (test_bit(Faulty, &rdev->flags))
6575 failed++;
6576 else {
6577 working++;
6578 if (test_bit(In_sync, &rdev->flags))
6579 insync++;
6580 else if (test_bit(Journal, &rdev->flags))
6581
6582 ;
6583 else
6584 spare++;
6585 }
6586 }
6587 rcu_read_unlock();
6588
6589 info.major_version = mddev->major_version;
6590 info.minor_version = mddev->minor_version;
6591 info.patch_version = MD_PATCHLEVEL_VERSION;
6592 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6593 info.level = mddev->level;
6594 info.size = mddev->dev_sectors / 2;
6595 if (info.size != mddev->dev_sectors / 2)
6596 info.size = -1;
6597 info.nr_disks = nr;
6598 info.raid_disks = mddev->raid_disks;
6599 info.md_minor = mddev->md_minor;
6600 info.not_persistent= !mddev->persistent;
6601
6602 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6603 info.state = 0;
6604 if (mddev->in_sync)
6605 info.state = (1<<MD_SB_CLEAN);
6606 if (mddev->bitmap && mddev->bitmap_info.offset)
6607 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6608 if (mddev_is_clustered(mddev))
6609 info.state |= (1<<MD_SB_CLUSTERED);
6610 info.active_disks = insync;
6611 info.working_disks = working;
6612 info.failed_disks = failed;
6613 info.spare_disks = spare;
6614
6615 info.layout = mddev->layout;
6616 info.chunk_size = mddev->chunk_sectors << 9;
6617
6618 if (copy_to_user(arg, &info, sizeof(info)))
6619 return -EFAULT;
6620
6621 return 0;
6622}
6623
6624static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6625{
6626 mdu_bitmap_file_t *file = NULL;
6627 char *ptr;
6628 int err;
6629
6630 file = kzalloc(sizeof(*file), GFP_NOIO);
6631 if (!file)
6632 return -ENOMEM;
6633
6634 err = 0;
6635 spin_lock(&mddev->lock);
6636
6637 if (mddev->bitmap_info.file) {
6638 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6639 sizeof(file->pathname));
6640 if (IS_ERR(ptr))
6641 err = PTR_ERR(ptr);
6642 else
6643 memmove(file->pathname, ptr,
6644 sizeof(file->pathname)-(ptr-file->pathname));
6645 }
6646 spin_unlock(&mddev->lock);
6647
6648 if (err == 0 &&
6649 copy_to_user(arg, file, sizeof(*file)))
6650 err = -EFAULT;
6651
6652 kfree(file);
6653 return err;
6654}
6655
6656static int get_disk_info(struct mddev *mddev, void __user * arg)
6657{
6658 mdu_disk_info_t info;
6659 struct md_rdev *rdev;
6660
6661 if (copy_from_user(&info, arg, sizeof(info)))
6662 return -EFAULT;
6663
6664 rcu_read_lock();
6665 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6666 if (rdev) {
6667 info.major = MAJOR(rdev->bdev->bd_dev);
6668 info.minor = MINOR(rdev->bdev->bd_dev);
6669 info.raid_disk = rdev->raid_disk;
6670 info.state = 0;
6671 if (test_bit(Faulty, &rdev->flags))
6672 info.state |= (1<<MD_DISK_FAULTY);
6673 else if (test_bit(In_sync, &rdev->flags)) {
6674 info.state |= (1<<MD_DISK_ACTIVE);
6675 info.state |= (1<<MD_DISK_SYNC);
6676 }
6677 if (test_bit(Journal, &rdev->flags))
6678 info.state |= (1<<MD_DISK_JOURNAL);
6679 if (test_bit(WriteMostly, &rdev->flags))
6680 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6681 if (test_bit(FailFast, &rdev->flags))
6682 info.state |= (1<<MD_DISK_FAILFAST);
6683 } else {
6684 info.major = info.minor = 0;
6685 info.raid_disk = -1;
6686 info.state = (1<<MD_DISK_REMOVED);
6687 }
6688 rcu_read_unlock();
6689
6690 if (copy_to_user(arg, &info, sizeof(info)))
6691 return -EFAULT;
6692
6693 return 0;
6694}
6695
6696int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
6697{
6698 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6699 struct md_rdev *rdev;
6700 dev_t dev = MKDEV(info->major,info->minor);
6701
6702 if (mddev_is_clustered(mddev) &&
6703 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6704 pr_warn("%s: Cannot add to clustered mddev.\n",
6705 mdname(mddev));
6706 return -EINVAL;
6707 }
6708
6709 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6710 return -EOVERFLOW;
6711
6712 if (!mddev->raid_disks) {
6713 int err;
6714
6715 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6716 if (IS_ERR(rdev)) {
6717 pr_warn("md: md_import_device returned %ld\n",
6718 PTR_ERR(rdev));
6719 return PTR_ERR(rdev);
6720 }
6721 if (!list_empty(&mddev->disks)) {
6722 struct md_rdev *rdev0
6723 = list_entry(mddev->disks.next,
6724 struct md_rdev, same_set);
6725 err = super_types[mddev->major_version]
6726 .load_super(rdev, rdev0, mddev->minor_version);
6727 if (err < 0) {
6728 pr_warn("md: %s has different UUID to %s\n",
6729 bdevname(rdev->bdev,b),
6730 bdevname(rdev0->bdev,b2));
6731 export_rdev(rdev);
6732 return -EINVAL;
6733 }
6734 }
6735 err = bind_rdev_to_array(rdev, mddev);
6736 if (err)
6737 export_rdev(rdev);
6738 return err;
6739 }
6740
6741
6742
6743
6744
6745
6746 if (mddev->pers) {
6747 int err;
6748 if (!mddev->pers->hot_add_disk) {
6749 pr_warn("%s: personality does not support diskops!\n",
6750 mdname(mddev));
6751 return -EINVAL;
6752 }
6753 if (mddev->persistent)
6754 rdev = md_import_device(dev, mddev->major_version,
6755 mddev->minor_version);
6756 else
6757 rdev = md_import_device(dev, -1, -1);
6758 if (IS_ERR(rdev)) {
6759 pr_warn("md: md_import_device returned %ld\n",
6760 PTR_ERR(rdev));
6761 return PTR_ERR(rdev);
6762 }
6763
6764 if (!mddev->persistent) {
6765 if (info->state & (1<<MD_DISK_SYNC) &&
6766 info->raid_disk < mddev->raid_disks) {
6767 rdev->raid_disk = info->raid_disk;
6768 set_bit(In_sync, &rdev->flags);
6769 clear_bit(Bitmap_sync, &rdev->flags);
6770 } else
6771 rdev->raid_disk = -1;
6772 rdev->saved_raid_disk = rdev->raid_disk;
6773 } else
6774 super_types[mddev->major_version].
6775 validate_super(mddev, rdev);
6776 if ((info->state & (1<<MD_DISK_SYNC)) &&
6777 rdev->raid_disk != info->raid_disk) {
6778
6779
6780
6781 export_rdev(rdev);
6782 return -EINVAL;
6783 }
6784
6785 clear_bit(In_sync, &rdev->flags);
6786 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6787 set_bit(WriteMostly, &rdev->flags);
6788 else
6789 clear_bit(WriteMostly, &rdev->flags);
6790 if (info->state & (1<<MD_DISK_FAILFAST))
6791 set_bit(FailFast, &rdev->flags);
6792 else
6793 clear_bit(FailFast, &rdev->flags);
6794
6795 if (info->state & (1<<MD_DISK_JOURNAL)) {
6796 struct md_rdev *rdev2;
6797 bool has_journal = false;
6798
6799
6800 rdev_for_each(rdev2, mddev) {
6801 if (test_bit(Journal, &rdev2->flags)) {
6802 has_journal = true;
6803 break;
6804 }
6805 }
6806 if (has_journal || mddev->bitmap) {
6807 export_rdev(rdev);
6808 return -EBUSY;
6809 }
6810 set_bit(Journal, &rdev->flags);
6811 }
6812
6813
6814
6815 if (mddev_is_clustered(mddev)) {
6816 if (info->state & (1 << MD_DISK_CANDIDATE))
6817 set_bit(Candidate, &rdev->flags);
6818 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6819
6820 err = md_cluster_ops->add_new_disk(mddev, rdev);
6821 if (err) {
6822 export_rdev(rdev);
6823 return err;
6824 }
6825 }
6826 }
6827
6828 rdev->raid_disk = -1;
6829 err = bind_rdev_to_array(rdev, mddev);
6830
6831 if (err)
6832 export_rdev(rdev);
6833
6834 if (mddev_is_clustered(mddev)) {
6835 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6836 if (!err) {
6837 err = md_cluster_ops->new_disk_ack(mddev,
6838 err == 0);
6839 if (err)
6840 md_kick_rdev_from_array(rdev);
6841 }
6842 } else {
6843 if (err)
6844 md_cluster_ops->add_new_disk_cancel(mddev);
6845 else
6846 err = add_bound_rdev(rdev);
6847 }
6848
6849 } else if (!err)
6850 err = add_bound_rdev(rdev);
6851
6852 return err;
6853 }
6854
6855
6856
6857
6858 if (mddev->major_version != 0) {
6859 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6860 return -EINVAL;
6861 }
6862
6863 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6864 int err;
6865 rdev = md_import_device(dev, -1, 0);
6866 if (IS_ERR(rdev)) {
6867 pr_warn("md: error, md_import_device() returned %ld\n",
6868 PTR_ERR(rdev));
6869 return PTR_ERR(rdev);
6870 }
6871 rdev->desc_nr = info->number;
6872 if (info->raid_disk < mddev->raid_disks)
6873 rdev->raid_disk = info->raid_disk;
6874 else
6875 rdev->raid_disk = -1;
6876
6877 if (rdev->raid_disk < mddev->raid_disks)
6878 if (info->state & (1<<MD_DISK_SYNC))
6879 set_bit(In_sync, &rdev->flags);
6880
6881 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6882 set_bit(WriteMostly, &rdev->flags);
6883 if (info->state & (1<<MD_DISK_FAILFAST))
6884 set_bit(FailFast, &rdev->flags);
6885
6886 if (!mddev->persistent) {
6887 pr_debug("md: nonpersistent superblock ...\n");
6888 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6889 } else
6890 rdev->sb_start = calc_dev_sboffset(rdev);
6891 rdev->sectors = rdev->sb_start;
6892
6893 err = bind_rdev_to_array(rdev, mddev);
6894 if (err) {
6895 export_rdev(rdev);
6896 return err;
6897 }
6898 }
6899
6900 return 0;
6901}
6902
6903static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6904{
6905 char b[BDEVNAME_SIZE];
6906 struct md_rdev *rdev;
6907
6908 if (!mddev->pers)
6909 return -ENODEV;
6910
6911 rdev = find_rdev(mddev, dev);
6912 if (!rdev)
6913 return -ENXIO;
6914
6915 if (rdev->raid_disk < 0)
6916 goto kick_rdev;
6917
6918 clear_bit(Blocked, &rdev->flags);
6919 remove_and_add_spares(mddev, rdev);
6920
6921 if (rdev->raid_disk >= 0)
6922 goto busy;
6923
6924kick_rdev:
6925 if (mddev_is_clustered(mddev)) {
6926 if (md_cluster_ops->remove_disk(mddev, rdev))
6927 goto busy;
6928 }
6929
6930 md_kick_rdev_from_array(rdev);
6931 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6932 if (mddev->thread)
6933 md_wakeup_thread(mddev->thread);
6934 else
6935 md_update_sb(mddev, 1);
6936 md_new_event(mddev);
6937
6938 return 0;
6939busy:
6940 pr_debug("md: cannot remove active disk %s from %s ...\n",
6941 bdevname(rdev->bdev,b), mdname(mddev));
6942 return -EBUSY;
6943}
6944
6945static int hot_add_disk(struct mddev *mddev, dev_t dev)
6946{
6947 char b[BDEVNAME_SIZE];
6948 int err;
6949 struct md_rdev *rdev;
6950
6951 if (!mddev->pers)
6952 return -ENODEV;
6953
6954 if (mddev->major_version != 0) {
6955 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6956 mdname(mddev));
6957 return -EINVAL;
6958 }
6959 if (!mddev->pers->hot_add_disk) {
6960 pr_warn("%s: personality does not support diskops!\n",
6961 mdname(mddev));
6962 return -EINVAL;
6963 }
6964
6965 rdev = md_import_device(dev, -1, 0);
6966 if (IS_ERR(rdev)) {
6967 pr_warn("md: error, md_import_device() returned %ld\n",
6968 PTR_ERR(rdev));
6969 return -EINVAL;
6970 }
6971
6972 if (mddev->persistent)
6973 rdev->sb_start = calc_dev_sboffset(rdev);
6974 else
6975 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6976
6977 rdev->sectors = rdev->sb_start;
6978
6979 if (test_bit(Faulty, &rdev->flags)) {
6980 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6981 bdevname(rdev->bdev,b), mdname(mddev));
6982 err = -EINVAL;
6983 goto abort_export;
6984 }
6985
6986 clear_bit(In_sync, &rdev->flags);
6987 rdev->desc_nr = -1;
6988 rdev->saved_raid_disk = -1;
6989 err = bind_rdev_to_array(rdev, mddev);
6990 if (err)
6991 goto abort_export;
6992
6993
6994
6995
6996
6997
6998 rdev->raid_disk = -1;
6999
7000 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7001 if (!mddev->thread)
7002 md_update_sb(mddev, 1);
7003
7004
7005
7006
7007 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7008 md_wakeup_thread(mddev->thread);
7009 md_new_event(mddev);
7010 return 0;
7011
7012abort_export:
7013 export_rdev(rdev);
7014 return err;
7015}
7016
7017static int set_bitmap_file(struct mddev *mddev, int fd)
7018{
7019 int err = 0;
7020
7021 if (mddev->pers) {
7022 if (!mddev->pers->quiesce || !mddev->thread)
7023 return -EBUSY;
7024 if (mddev->recovery || mddev->sync_thread)
7025 return -EBUSY;
7026
7027 }
7028
7029 if (fd >= 0) {
7030 struct inode *inode;
7031 struct file *f;
7032
7033 if (mddev->bitmap || mddev->bitmap_info.file)
7034 return -EEXIST;
7035 f = fget(fd);
7036
7037 if (f == NULL) {
7038 pr_warn("%s: error: failed to get bitmap file\n",
7039 mdname(mddev));
7040 return -EBADF;
7041 }
7042
7043 inode = f->f_mapping->host;
7044 if (!S_ISREG(inode->i_mode)) {
7045 pr_warn("%s: error: bitmap file must be a regular file\n",
7046 mdname(mddev));
7047 err = -EBADF;
7048 } else if (!(f->f_mode & FMODE_WRITE)) {
7049 pr_warn("%s: error: bitmap file must open for write\n",
7050 mdname(mddev));
7051 err = -EBADF;
7052 } else if (atomic_read(&inode->i_writecount) != 1) {
7053 pr_warn("%s: error: bitmap file is already in use\n",
7054 mdname(mddev));
7055 err = -EBUSY;
7056 }
7057 if (err) {
7058 fput(f);
7059 return err;
7060 }
7061 mddev->bitmap_info.file = f;
7062 mddev->bitmap_info.offset = 0;
7063 } else if (mddev->bitmap == NULL)
7064 return -ENOENT;
7065 err = 0;
7066 if (mddev->pers) {
7067 if (fd >= 0) {
7068 struct bitmap *bitmap;
7069
7070 bitmap = md_bitmap_create(mddev, -1);
7071 mddev_suspend(mddev);
7072 if (!IS_ERR(bitmap)) {
7073 mddev->bitmap = bitmap;
7074 err = md_bitmap_load(mddev);
7075 } else
7076 err = PTR_ERR(bitmap);
7077 if (err) {
7078 md_bitmap_destroy(mddev);
7079 fd = -1;
7080 }
7081 mddev_resume(mddev);
7082 } else if (fd < 0) {
7083 mddev_suspend(mddev);
7084 md_bitmap_destroy(mddev);
7085 mddev_resume(mddev);
7086 }
7087 }
7088 if (fd < 0) {
7089 struct file *f = mddev->bitmap_info.file;
7090 if (f) {
7091 spin_lock(&mddev->lock);
7092 mddev->bitmap_info.file = NULL;
7093 spin_unlock(&mddev->lock);
7094 fput(f);
7095 }
7096 }
7097
7098 return err;
7099}
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
7115{
7116 if (info->raid_disks == 0) {
7117
7118 if (info->major_version < 0 ||
7119 info->major_version >= ARRAY_SIZE(super_types) ||
7120 super_types[info->major_version].name == NULL) {
7121
7122 pr_warn("md: superblock version %d not known\n",
7123 info->major_version);
7124 return -EINVAL;
7125 }
7126 mddev->major_version = info->major_version;
7127 mddev->minor_version = info->minor_version;
7128 mddev->patch_version = info->patch_version;
7129 mddev->persistent = !info->not_persistent;
7130
7131
7132
7133 mddev->ctime = ktime_get_real_seconds();
7134 return 0;
7135 }
7136 mddev->major_version = MD_MAJOR_VERSION;
7137 mddev->minor_version = MD_MINOR_VERSION;
7138 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7139 mddev->ctime = ktime_get_real_seconds();
7140
7141 mddev->level = info->level;
7142 mddev->clevel[0] = 0;
7143 mddev->dev_sectors = 2 * (sector_t)info->size;
7144 mddev->raid_disks = info->raid_disks;
7145
7146
7147
7148 if (info->state & (1<<MD_SB_CLEAN))
7149 mddev->recovery_cp = MaxSector;
7150 else
7151 mddev->recovery_cp = 0;
7152 mddev->persistent = ! info->not_persistent;
7153 mddev->external = 0;
7154
7155 mddev->layout = info->layout;
7156 if (mddev->level == 0)
7157
7158 mddev->layout = -1;
7159 mddev->chunk_sectors = info->chunk_size >> 9;
7160
7161 if (mddev->persistent) {
7162 mddev->max_disks = MD_SB_DISKS;
7163 mddev->flags = 0;
7164 mddev->sb_flags = 0;
7165 }
7166 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7167
7168 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7169 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7170 mddev->bitmap_info.offset = 0;
7171
7172 mddev->reshape_position = MaxSector;
7173
7174
7175
7176
7177 get_random_bytes(mddev->uuid, 16);
7178
7179 mddev->new_level = mddev->level;
7180 mddev->new_chunk_sectors = mddev->chunk_sectors;
7181 mddev->new_layout = mddev->layout;
7182 mddev->delta_disks = 0;
7183 mddev->reshape_backwards = 0;
7184
7185 return 0;
7186}
7187
7188void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7189{
7190 lockdep_assert_held(&mddev->reconfig_mutex);
7191
7192 if (mddev->external_size)
7193 return;
7194
7195 mddev->array_sectors = array_sectors;
7196}
7197EXPORT_SYMBOL(md_set_array_sectors);
7198
7199static int update_size(struct mddev *mddev, sector_t num_sectors)
7200{
7201 struct md_rdev *rdev;
7202 int rv;
7203 int fit = (num_sectors == 0);
7204 sector_t old_dev_sectors = mddev->dev_sectors;
7205
7206 if (mddev->pers->resize == NULL)
7207 return -EINVAL;
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7218 mddev->sync_thread)
7219 return -EBUSY;
7220 if (mddev->ro)
7221 return -EROFS;
7222
7223 rdev_for_each(rdev, mddev) {
7224 sector_t avail = rdev->sectors;
7225
7226 if (fit && (num_sectors == 0 || num_sectors > avail))
7227 num_sectors = avail;
7228 if (avail < num_sectors)
7229 return -ENOSPC;
7230 }
7231 rv = mddev->pers->resize(mddev, num_sectors);
7232 if (!rv) {
7233 if (mddev_is_clustered(mddev))
7234 md_cluster_ops->update_size(mddev, old_dev_sectors);
7235 else if (mddev->queue) {
7236 set_capacity_and_notify(mddev->gendisk,
7237 mddev->array_sectors);
7238 }
7239 }
7240 return rv;
7241}
7242
7243static int update_raid_disks(struct mddev *mddev, int raid_disks)
7244{
7245 int rv;
7246 struct md_rdev *rdev;
7247
7248 if (mddev->pers->check_reshape == NULL)
7249 return -EINVAL;
7250 if (mddev->ro)
7251 return -EROFS;
7252 if (raid_disks <= 0 ||
7253 (mddev->max_disks && raid_disks >= mddev->max_disks))
7254 return -EINVAL;
7255 if (mddev->sync_thread ||
7256 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7257 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7258 mddev->reshape_position != MaxSector)
7259 return -EBUSY;
7260
7261 rdev_for_each(rdev, mddev) {
7262 if (mddev->raid_disks < raid_disks &&
7263 rdev->data_offset < rdev->new_data_offset)
7264 return -EINVAL;
7265 if (mddev->raid_disks > raid_disks &&
7266 rdev->data_offset > rdev->new_data_offset)
7267 return -EINVAL;
7268 }
7269
7270 mddev->delta_disks = raid_disks - mddev->raid_disks;
7271 if (mddev->delta_disks < 0)
7272 mddev->reshape_backwards = 1;
7273 else if (mddev->delta_disks > 0)
7274 mddev->reshape_backwards = 0;
7275
7276 rv = mddev->pers->check_reshape(mddev);
7277 if (rv < 0) {
7278 mddev->delta_disks = 0;
7279 mddev->reshape_backwards = 0;
7280 }
7281 return rv;
7282}
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7293{
7294 int rv = 0;
7295 int cnt = 0;
7296 int state = 0;
7297
7298
7299 if (mddev->bitmap && mddev->bitmap_info.offset)
7300 state |= (1 << MD_SB_BITMAP_PRESENT);
7301
7302 if (mddev->major_version != info->major_version ||
7303 mddev->minor_version != info->minor_version ||
7304
7305 mddev->ctime != info->ctime ||
7306 mddev->level != info->level ||
7307
7308 mddev->persistent != !info->not_persistent ||
7309 mddev->chunk_sectors != info->chunk_size >> 9 ||
7310
7311 ((state^info->state) & 0xfffffe00)
7312 )
7313 return -EINVAL;
7314
7315 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7316 cnt++;
7317 if (mddev->raid_disks != info->raid_disks)
7318 cnt++;
7319 if (mddev->layout != info->layout)
7320 cnt++;
7321 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7322 cnt++;
7323 if (cnt == 0)
7324 return 0;
7325 if (cnt > 1)
7326 return -EINVAL;
7327
7328 if (mddev->layout != info->layout) {
7329
7330
7331
7332
7333 if (mddev->pers->check_reshape == NULL)
7334 return -EINVAL;
7335 else {
7336 mddev->new_layout = info->layout;
7337 rv = mddev->pers->check_reshape(mddev);
7338 if (rv)
7339 mddev->new_layout = mddev->layout;
7340 return rv;
7341 }
7342 }
7343 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7344 rv = update_size(mddev, (sector_t)info->size * 2);
7345
7346 if (mddev->raid_disks != info->raid_disks)
7347 rv = update_raid_disks(mddev, info->raid_disks);
7348
7349 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7350 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7351 rv = -EINVAL;
7352 goto err;
7353 }
7354 if (mddev->recovery || mddev->sync_thread) {
7355 rv = -EBUSY;
7356 goto err;
7357 }
7358 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7359 struct bitmap *bitmap;
7360
7361 if (mddev->bitmap) {
7362 rv = -EEXIST;
7363 goto err;
7364 }
7365 if (mddev->bitmap_info.default_offset == 0) {
7366 rv = -EINVAL;
7367 goto err;
7368 }
7369 mddev->bitmap_info.offset =
7370 mddev->bitmap_info.default_offset;
7371 mddev->bitmap_info.space =
7372 mddev->bitmap_info.default_space;
7373 bitmap = md_bitmap_create(mddev, -1);
7374 mddev_suspend(mddev);
7375 if (!IS_ERR(bitmap)) {
7376 mddev->bitmap = bitmap;
7377 rv = md_bitmap_load(mddev);
7378 } else
7379 rv = PTR_ERR(bitmap);
7380 if (rv)
7381 md_bitmap_destroy(mddev);
7382 mddev_resume(mddev);
7383 } else {
7384
7385 if (!mddev->bitmap) {
7386 rv = -ENOENT;
7387 goto err;
7388 }
7389 if (mddev->bitmap->storage.file) {
7390 rv = -EINVAL;
7391 goto err;
7392 }
7393 if (mddev->bitmap_info.nodes) {
7394
7395 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7396 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7397 rv = -EPERM;
7398 md_cluster_ops->unlock_all_bitmaps(mddev);
7399 goto err;
7400 }
7401
7402 mddev->bitmap_info.nodes = 0;
7403 md_cluster_ops->leave(mddev);
7404 module_put(md_cluster_mod);
7405 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7406 }
7407 mddev_suspend(mddev);
7408 md_bitmap_destroy(mddev);
7409 mddev_resume(mddev);
7410 mddev->bitmap_info.offset = 0;
7411 }
7412 }
7413 md_update_sb(mddev, 1);
7414 return rv;
7415err:
7416 return rv;
7417}
7418
7419static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7420{
7421 struct md_rdev *rdev;
7422 int err = 0;
7423
7424 if (mddev->pers == NULL)
7425 return -ENODEV;
7426
7427 rcu_read_lock();
7428 rdev = md_find_rdev_rcu(mddev, dev);
7429 if (!rdev)
7430 err = -ENODEV;
7431 else {
7432 md_error(mddev, rdev);
7433 if (!test_bit(Faulty, &rdev->flags))
7434 err = -EBUSY;
7435 }
7436 rcu_read_unlock();
7437 return err;
7438}
7439
7440
7441
7442
7443
7444
7445
7446static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7447{
7448 struct mddev *mddev = bdev->bd_disk->private_data;
7449
7450 geo->heads = 2;
7451 geo->sectors = 4;
7452 geo->cylinders = mddev->array_sectors / 8;
7453 return 0;
7454}
7455
7456static inline bool md_ioctl_valid(unsigned int cmd)
7457{
7458 switch (cmd) {
7459 case ADD_NEW_DISK:
7460 case GET_ARRAY_INFO:
7461 case GET_BITMAP_FILE:
7462 case GET_DISK_INFO:
7463 case HOT_ADD_DISK:
7464 case HOT_REMOVE_DISK:
7465 case RAID_VERSION:
7466 case RESTART_ARRAY_RW:
7467 case RUN_ARRAY:
7468 case SET_ARRAY_INFO:
7469 case SET_BITMAP_FILE:
7470 case SET_DISK_FAULTY:
7471 case STOP_ARRAY:
7472 case STOP_ARRAY_RO:
7473 case CLUSTERED_DISK_NACK:
7474 return true;
7475 default:
7476 return false;
7477 }
7478}
7479
7480static int md_ioctl(struct block_device *bdev, fmode_t mode,
7481 unsigned int cmd, unsigned long arg)
7482{
7483 int err = 0;
7484 void __user *argp = (void __user *)arg;
7485 struct mddev *mddev = NULL;
7486 bool did_set_md_closing = false;
7487
7488 if (!md_ioctl_valid(cmd))
7489 return -ENOTTY;
7490
7491 switch (cmd) {
7492 case RAID_VERSION:
7493 case GET_ARRAY_INFO:
7494 case GET_DISK_INFO:
7495 break;
7496 default:
7497 if (!capable(CAP_SYS_ADMIN))
7498 return -EACCES;
7499 }
7500
7501
7502
7503
7504
7505 switch (cmd) {
7506 case RAID_VERSION:
7507 err = get_version(argp);
7508 goto out;
7509 default:;
7510 }
7511
7512
7513
7514
7515
7516 mddev = bdev->bd_disk->private_data;
7517
7518 if (!mddev) {
7519 BUG();
7520 goto out;
7521 }
7522
7523
7524 switch (cmd) {
7525 case GET_ARRAY_INFO:
7526 if (!mddev->raid_disks && !mddev->external)
7527 err = -ENODEV;
7528 else
7529 err = get_array_info(mddev, argp);
7530 goto out;
7531
7532 case GET_DISK_INFO:
7533 if (!mddev->raid_disks && !mddev->external)
7534 err = -ENODEV;
7535 else
7536 err = get_disk_info(mddev, argp);
7537 goto out;
7538
7539 case SET_DISK_FAULTY:
7540 err = set_disk_faulty(mddev, new_decode_dev(arg));
7541 goto out;
7542
7543 case GET_BITMAP_FILE:
7544 err = get_bitmap_file(mddev, argp);
7545 goto out;
7546
7547 }
7548
7549 if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7550 flush_rdev_wq(mddev);
7551
7552 if (cmd == HOT_REMOVE_DISK)
7553
7554 wait_event_interruptible_timeout(mddev->sb_wait,
7555 !test_bit(MD_RECOVERY_NEEDED,
7556 &mddev->recovery),
7557 msecs_to_jiffies(5000));
7558 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7559
7560
7561
7562 mutex_lock(&mddev->open_mutex);
7563 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7564 mutex_unlock(&mddev->open_mutex);
7565 err = -EBUSY;
7566 goto out;
7567 }
7568 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7569 mutex_unlock(&mddev->open_mutex);
7570 err = -EBUSY;
7571 goto out;
7572 }
7573 did_set_md_closing = true;
7574 mutex_unlock(&mddev->open_mutex);
7575 sync_blockdev(bdev);
7576 }
7577 err = mddev_lock(mddev);
7578 if (err) {
7579 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7580 err, cmd);
7581 goto out;
7582 }
7583
7584 if (cmd == SET_ARRAY_INFO) {
7585 mdu_array_info_t info;
7586 if (!arg)
7587 memset(&info, 0, sizeof(info));
7588 else if (copy_from_user(&info, argp, sizeof(info))) {
7589 err = -EFAULT;
7590 goto unlock;
7591 }
7592 if (mddev->pers) {
7593 err = update_array_info(mddev, &info);
7594 if (err) {
7595 pr_warn("md: couldn't update array info. %d\n", err);
7596 goto unlock;
7597 }
7598 goto unlock;
7599 }
7600 if (!list_empty(&mddev->disks)) {
7601 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7602 err = -EBUSY;
7603 goto unlock;
7604 }
7605 if (mddev->raid_disks) {
7606 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7607 err = -EBUSY;
7608 goto unlock;
7609 }
7610 err = md_set_array_info(mddev, &info);
7611 if (err) {
7612 pr_warn("md: couldn't set array info. %d\n", err);
7613 goto unlock;
7614 }
7615 goto unlock;
7616 }
7617
7618
7619
7620
7621
7622
7623 if ((!mddev->raid_disks && !mddev->external)
7624 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7625 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7626 && cmd != GET_BITMAP_FILE) {
7627 err = -ENODEV;
7628 goto unlock;
7629 }
7630
7631
7632
7633
7634 switch (cmd) {
7635 case RESTART_ARRAY_RW:
7636 err = restart_array(mddev);
7637 goto unlock;
7638
7639 case STOP_ARRAY:
7640 err = do_md_stop(mddev, 0, bdev);
7641 goto unlock;
7642
7643 case STOP_ARRAY_RO:
7644 err = md_set_readonly(mddev, bdev);
7645 goto unlock;
7646
7647 case HOT_REMOVE_DISK:
7648 err = hot_remove_disk(mddev, new_decode_dev(arg));
7649 goto unlock;
7650
7651 case ADD_NEW_DISK:
7652
7653
7654
7655
7656 if (mddev->pers) {
7657 mdu_disk_info_t info;
7658 if (copy_from_user(&info, argp, sizeof(info)))
7659 err = -EFAULT;
7660 else if (!(info.state & (1<<MD_DISK_SYNC)))
7661
7662 break;
7663 else
7664 err = md_add_new_disk(mddev, &info);
7665 goto unlock;
7666 }
7667 break;
7668 }
7669
7670
7671
7672
7673
7674 if (mddev->ro && mddev->pers) {
7675 if (mddev->ro == 2) {
7676 mddev->ro = 0;
7677 sysfs_notify_dirent_safe(mddev->sysfs_state);
7678 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7679
7680
7681
7682
7683 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7684 mddev_unlock(mddev);
7685 wait_event(mddev->sb_wait,
7686 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7687 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7688 mddev_lock_nointr(mddev);
7689 }
7690 } else {
7691 err = -EROFS;
7692 goto unlock;
7693 }
7694 }
7695
7696 switch (cmd) {
7697 case ADD_NEW_DISK:
7698 {
7699 mdu_disk_info_t info;
7700 if (copy_from_user(&info, argp, sizeof(info)))
7701 err = -EFAULT;
7702 else
7703 err = md_add_new_disk(mddev, &info);
7704 goto unlock;
7705 }
7706
7707 case CLUSTERED_DISK_NACK:
7708 if (mddev_is_clustered(mddev))
7709 md_cluster_ops->new_disk_ack(mddev, false);
7710 else
7711 err = -EINVAL;
7712 goto unlock;
7713
7714 case HOT_ADD_DISK:
7715 err = hot_add_disk(mddev, new_decode_dev(arg));
7716 goto unlock;
7717
7718 case RUN_ARRAY:
7719 err = do_md_run(mddev);
7720 goto unlock;
7721
7722 case SET_BITMAP_FILE:
7723 err = set_bitmap_file(mddev, (int)arg);
7724 goto unlock;
7725
7726 default:
7727 err = -EINVAL;
7728 goto unlock;
7729 }
7730
7731unlock:
7732 if (mddev->hold_active == UNTIL_IOCTL &&
7733 err != -EINVAL)
7734 mddev->hold_active = 0;
7735 mddev_unlock(mddev);
7736out:
7737 if(did_set_md_closing)
7738 clear_bit(MD_CLOSING, &mddev->flags);
7739 return err;
7740}
7741#ifdef CONFIG_COMPAT
7742static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7743 unsigned int cmd, unsigned long arg)
7744{
7745 switch (cmd) {
7746 case HOT_REMOVE_DISK:
7747 case HOT_ADD_DISK:
7748 case SET_DISK_FAULTY:
7749 case SET_BITMAP_FILE:
7750
7751 break;
7752 default:
7753 arg = (unsigned long)compat_ptr(arg);
7754 break;
7755 }
7756
7757 return md_ioctl(bdev, mode, cmd, arg);
7758}
7759#endif
7760
7761static int md_set_read_only(struct block_device *bdev, bool ro)
7762{
7763 struct mddev *mddev = bdev->bd_disk->private_data;
7764 int err;
7765
7766 err = mddev_lock(mddev);
7767 if (err)
7768 return err;
7769
7770 if (!mddev->raid_disks && !mddev->external) {
7771 err = -ENODEV;
7772 goto out_unlock;
7773 }
7774
7775
7776
7777
7778
7779 if (!ro && mddev->ro == 1 && mddev->pers) {
7780 err = restart_array(mddev);
7781 if (err)
7782 goto out_unlock;
7783 mddev->ro = 2;
7784 }
7785
7786out_unlock:
7787 mddev_unlock(mddev);
7788 return err;
7789}
7790
7791static int md_open(struct block_device *bdev, fmode_t mode)
7792{
7793
7794
7795
7796
7797 struct mddev *mddev = mddev_find(bdev->bd_dev);
7798 int err;
7799
7800 if (!mddev)
7801 return -ENODEV;
7802
7803 if (mddev->gendisk != bdev->bd_disk) {
7804
7805
7806
7807 mddev_put(mddev);
7808
7809 if (work_pending(&mddev->del_work))
7810 flush_workqueue(md_misc_wq);
7811 return -EBUSY;
7812 }
7813 BUG_ON(mddev != bdev->bd_disk->private_data);
7814
7815 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7816 goto out;
7817
7818 if (test_bit(MD_CLOSING, &mddev->flags)) {
7819 mutex_unlock(&mddev->open_mutex);
7820 err = -ENODEV;
7821 goto out;
7822 }
7823
7824 err = 0;
7825 atomic_inc(&mddev->openers);
7826 mutex_unlock(&mddev->open_mutex);
7827
7828 bdev_check_media_change(bdev);
7829 out:
7830 if (err)
7831 mddev_put(mddev);
7832 return err;
7833}
7834
7835static void md_release(struct gendisk *disk, fmode_t mode)
7836{
7837 struct mddev *mddev = disk->private_data;
7838
7839 BUG_ON(!mddev);
7840 atomic_dec(&mddev->openers);
7841 mddev_put(mddev);
7842}
7843
7844static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
7845{
7846 struct mddev *mddev = disk->private_data;
7847 unsigned int ret = 0;
7848
7849 if (mddev->changed)
7850 ret = DISK_EVENT_MEDIA_CHANGE;
7851 mddev->changed = 0;
7852 return ret;
7853}
7854
7855const struct block_device_operations md_fops =
7856{
7857 .owner = THIS_MODULE,
7858 .submit_bio = md_submit_bio,
7859 .open = md_open,
7860 .release = md_release,
7861 .ioctl = md_ioctl,
7862#ifdef CONFIG_COMPAT
7863 .compat_ioctl = md_compat_ioctl,
7864#endif
7865 .getgeo = md_getgeo,
7866 .check_events = md_check_events,
7867 .set_read_only = md_set_read_only,
7868};
7869
7870static int md_thread(void *arg)
7871{
7872 struct md_thread *thread = arg;
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886 allow_signal(SIGKILL);
7887 while (!kthread_should_stop()) {
7888
7889
7890
7891
7892
7893
7894 if (signal_pending(current))
7895 flush_signals(current);
7896
7897 wait_event_interruptible_timeout
7898 (thread->wqueue,
7899 test_bit(THREAD_WAKEUP, &thread->flags)
7900 || kthread_should_stop() || kthread_should_park(),
7901 thread->timeout);
7902
7903 clear_bit(THREAD_WAKEUP, &thread->flags);
7904 if (kthread_should_park())
7905 kthread_parkme();
7906 if (!kthread_should_stop())
7907 thread->run(thread);
7908 }
7909
7910 return 0;
7911}
7912
7913void md_wakeup_thread(struct md_thread *thread)
7914{
7915 if (thread) {
7916 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7917 set_bit(THREAD_WAKEUP, &thread->flags);
7918 wake_up(&thread->wqueue);
7919 }
7920}
7921EXPORT_SYMBOL(md_wakeup_thread);
7922
7923struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7924 struct mddev *mddev, const char *name)
7925{
7926 struct md_thread *thread;
7927
7928 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7929 if (!thread)
7930 return NULL;
7931
7932 init_waitqueue_head(&thread->wqueue);
7933
7934 thread->run = run;
7935 thread->mddev = mddev;
7936 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7937 thread->tsk = kthread_run(md_thread, thread,
7938 "%s_%s",
7939 mdname(thread->mddev),
7940 name);
7941 if (IS_ERR(thread->tsk)) {
7942 kfree(thread);
7943 return NULL;
7944 }
7945 return thread;
7946}
7947EXPORT_SYMBOL(md_register_thread);
7948
7949void md_unregister_thread(struct md_thread **threadp)
7950{
7951 struct md_thread *thread = *threadp;
7952 if (!thread)
7953 return;
7954 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7955
7956
7957
7958 spin_lock(&pers_lock);
7959 *threadp = NULL;
7960 spin_unlock(&pers_lock);
7961
7962 kthread_stop(thread->tsk);
7963 kfree(thread);
7964}
7965EXPORT_SYMBOL(md_unregister_thread);
7966
7967void md_error(struct mddev *mddev, struct md_rdev *rdev)
7968{
7969 if (!rdev || test_bit(Faulty, &rdev->flags))
7970 return;
7971
7972 if (!mddev->pers || !mddev->pers->error_handler)
7973 return;
7974 mddev->pers->error_handler(mddev,rdev);
7975 if (mddev->degraded)
7976 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7977 sysfs_notify_dirent_safe(rdev->sysfs_state);
7978 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7979 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7980 md_wakeup_thread(mddev->thread);
7981 if (mddev->event_work.func)
7982 queue_work(md_misc_wq, &mddev->event_work);
7983 md_new_event(mddev);
7984}
7985EXPORT_SYMBOL(md_error);
7986
7987
7988
7989static void status_unused(struct seq_file *seq)
7990{
7991 int i = 0;
7992 struct md_rdev *rdev;
7993
7994 seq_printf(seq, "unused devices: ");
7995
7996 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7997 char b[BDEVNAME_SIZE];
7998 i++;
7999 seq_printf(seq, "%s ",
8000 bdevname(rdev->bdev,b));
8001 }
8002 if (!i)
8003 seq_printf(seq, "<none>");
8004
8005 seq_printf(seq, "\n");
8006}
8007
8008static int status_resync(struct seq_file *seq, struct mddev *mddev)
8009{
8010 sector_t max_sectors, resync, res;
8011 unsigned long dt, db = 0;
8012 sector_t rt, curr_mark_cnt, resync_mark_cnt;
8013 int scale, recovery_active;
8014 unsigned int per_milli;
8015
8016 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8017 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8018 max_sectors = mddev->resync_max_sectors;
8019 else
8020 max_sectors = mddev->dev_sectors;
8021
8022 resync = mddev->curr_resync;
8023 if (resync <= 3) {
8024 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8025
8026 resync = max_sectors;
8027 } else if (resync > max_sectors)
8028 resync = max_sectors;
8029 else
8030 resync -= atomic_read(&mddev->recovery_active);
8031
8032 if (resync == 0) {
8033 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8034 struct md_rdev *rdev;
8035
8036 rdev_for_each(rdev, mddev)
8037 if (rdev->raid_disk >= 0 &&
8038 !test_bit(Faulty, &rdev->flags) &&
8039 rdev->recovery_offset != MaxSector &&
8040 rdev->recovery_offset) {
8041 seq_printf(seq, "\trecover=REMOTE");
8042 return 1;
8043 }
8044 if (mddev->reshape_position != MaxSector)
8045 seq_printf(seq, "\treshape=REMOTE");
8046 else
8047 seq_printf(seq, "\tresync=REMOTE");
8048 return 1;
8049 }
8050 if (mddev->recovery_cp < MaxSector) {
8051 seq_printf(seq, "\tresync=PENDING");
8052 return 1;
8053 }
8054 return 0;
8055 }
8056 if (resync < 3) {
8057 seq_printf(seq, "\tresync=DELAYED");
8058 return 1;
8059 }
8060
8061 WARN_ON(max_sectors == 0);
8062
8063
8064
8065
8066
8067 scale = 10;
8068 if (sizeof(sector_t) > sizeof(unsigned long)) {
8069 while ( max_sectors/2 > (1ULL<<(scale+32)))
8070 scale++;
8071 }
8072 res = (resync>>scale)*1000;
8073 sector_div(res, (u32)((max_sectors>>scale)+1));
8074
8075 per_milli = res;
8076 {
8077 int i, x = per_milli/50, y = 20-x;
8078 seq_printf(seq, "[");
8079 for (i = 0; i < x; i++)
8080 seq_printf(seq, "=");
8081 seq_printf(seq, ">");
8082 for (i = 0; i < y; i++)
8083 seq_printf(seq, ".");
8084 seq_printf(seq, "] ");
8085 }
8086 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8087 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8088 "reshape" :
8089 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8090 "check" :
8091 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8092 "resync" : "recovery"))),
8093 per_milli/10, per_milli % 10,
8094 (unsigned long long) resync/2,
8095 (unsigned long long) max_sectors/2);
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114 dt = ((jiffies - mddev->resync_mark) / HZ);
8115 if (!dt) dt++;
8116
8117 curr_mark_cnt = mddev->curr_mark_cnt;
8118 recovery_active = atomic_read(&mddev->recovery_active);
8119 resync_mark_cnt = mddev->resync_mark_cnt;
8120
8121 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8122 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8123
8124 rt = max_sectors - resync;
8125 rt = div64_u64(rt, db/32+1);
8126 rt *= dt;
8127 rt >>= 5;
8128
8129 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8130 ((unsigned long)rt % 60)/6);
8131
8132 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8133 return 1;
8134}
8135
8136static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8137{
8138 struct list_head *tmp;
8139 loff_t l = *pos;
8140 struct mddev *mddev;
8141
8142 if (l == 0x10000) {
8143 ++*pos;
8144 return (void *)2;
8145 }
8146 if (l > 0x10000)
8147 return NULL;
8148 if (!l--)
8149
8150 return (void*)1;
8151
8152 spin_lock(&all_mddevs_lock);
8153 list_for_each(tmp,&all_mddevs)
8154 if (!l--) {
8155 mddev = list_entry(tmp, struct mddev, all_mddevs);
8156 mddev_get(mddev);
8157 spin_unlock(&all_mddevs_lock);
8158 return mddev;
8159 }
8160 spin_unlock(&all_mddevs_lock);
8161 if (!l--)
8162 return (void*)2;
8163 return NULL;
8164}
8165
8166static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8167{
8168 struct list_head *tmp;
8169 struct mddev *next_mddev, *mddev = v;
8170
8171 ++*pos;
8172 if (v == (void*)2)
8173 return NULL;
8174
8175 spin_lock(&all_mddevs_lock);
8176 if (v == (void*)1)
8177 tmp = all_mddevs.next;
8178 else
8179 tmp = mddev->all_mddevs.next;
8180 if (tmp != &all_mddevs)
8181 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8182 else {
8183 next_mddev = (void*)2;
8184 *pos = 0x10000;
8185 }
8186 spin_unlock(&all_mddevs_lock);
8187
8188 if (v != (void*)1)
8189 mddev_put(mddev);
8190 return next_mddev;
8191
8192}
8193
8194static void md_seq_stop(struct seq_file *seq, void *v)
8195{
8196 struct mddev *mddev = v;
8197
8198 if (mddev && v != (void*)1 && v != (void*)2)
8199 mddev_put(mddev);
8200}
8201
8202static int md_seq_show(struct seq_file *seq, void *v)
8203{
8204 struct mddev *mddev = v;
8205 sector_t sectors;
8206 struct md_rdev *rdev;
8207
8208 if (v == (void*)1) {
8209 struct md_personality *pers;
8210 seq_printf(seq, "Personalities : ");
8211 spin_lock(&pers_lock);
8212 list_for_each_entry(pers, &pers_list, list)
8213 seq_printf(seq, "[%s] ", pers->name);
8214
8215 spin_unlock(&pers_lock);
8216 seq_printf(seq, "\n");
8217 seq->poll_event = atomic_read(&md_event_count);
8218 return 0;
8219 }
8220 if (v == (void*)2) {
8221 status_unused(seq);
8222 return 0;
8223 }
8224
8225 spin_lock(&mddev->lock);
8226 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8227 seq_printf(seq, "%s : %sactive", mdname(mddev),
8228 mddev->pers ? "" : "in");
8229 if (mddev->pers) {
8230 if (mddev->ro==1)
8231 seq_printf(seq, " (read-only)");
8232 if (mddev->ro==2)
8233 seq_printf(seq, " (auto-read-only)");
8234 seq_printf(seq, " %s", mddev->pers->name);
8235 }
8236
8237 sectors = 0;
8238 rcu_read_lock();
8239 rdev_for_each_rcu(rdev, mddev) {
8240 char b[BDEVNAME_SIZE];
8241 seq_printf(seq, " %s[%d]",
8242 bdevname(rdev->bdev,b), rdev->desc_nr);
8243 if (test_bit(WriteMostly, &rdev->flags))
8244 seq_printf(seq, "(W)");
8245 if (test_bit(Journal, &rdev->flags))
8246 seq_printf(seq, "(J)");
8247 if (test_bit(Faulty, &rdev->flags)) {
8248 seq_printf(seq, "(F)");
8249 continue;
8250 }
8251 if (rdev->raid_disk < 0)
8252 seq_printf(seq, "(S)");
8253 if (test_bit(Replacement, &rdev->flags))
8254 seq_printf(seq, "(R)");
8255 sectors += rdev->sectors;
8256 }
8257 rcu_read_unlock();
8258
8259 if (!list_empty(&mddev->disks)) {
8260 if (mddev->pers)
8261 seq_printf(seq, "\n %llu blocks",
8262 (unsigned long long)
8263 mddev->array_sectors / 2);
8264 else
8265 seq_printf(seq, "\n %llu blocks",
8266 (unsigned long long)sectors / 2);
8267 }
8268 if (mddev->persistent) {
8269 if (mddev->major_version != 0 ||
8270 mddev->minor_version != 90) {
8271 seq_printf(seq," super %d.%d",
8272 mddev->major_version,
8273 mddev->minor_version);
8274 }
8275 } else if (mddev->external)
8276 seq_printf(seq, " super external:%s",
8277 mddev->metadata_type);
8278 else
8279 seq_printf(seq, " super non-persistent");
8280
8281 if (mddev->pers) {
8282 mddev->pers->status(seq, mddev);
8283 seq_printf(seq, "\n ");
8284 if (mddev->pers->sync_request) {
8285 if (status_resync(seq, mddev))
8286 seq_printf(seq, "\n ");
8287 }
8288 } else
8289 seq_printf(seq, "\n ");
8290
8291 md_bitmap_status(seq, mddev->bitmap);
8292
8293 seq_printf(seq, "\n");
8294 }
8295 spin_unlock(&mddev->lock);
8296
8297 return 0;
8298}
8299
8300static const struct seq_operations md_seq_ops = {
8301 .start = md_seq_start,
8302 .next = md_seq_next,
8303 .stop = md_seq_stop,
8304 .show = md_seq_show,
8305};
8306
8307static int md_seq_open(struct inode *inode, struct file *file)
8308{
8309 struct seq_file *seq;
8310 int error;
8311
8312 error = seq_open(file, &md_seq_ops);
8313 if (error)
8314 return error;
8315
8316 seq = file->private_data;
8317 seq->poll_event = atomic_read(&md_event_count);
8318 return error;
8319}
8320
8321static int md_unloading;
8322static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8323{
8324 struct seq_file *seq = filp->private_data;
8325 __poll_t mask;
8326
8327 if (md_unloading)
8328 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8329 poll_wait(filp, &md_event_waiters, wait);
8330
8331
8332 mask = EPOLLIN | EPOLLRDNORM;
8333
8334 if (seq->poll_event != atomic_read(&md_event_count))
8335 mask |= EPOLLERR | EPOLLPRI;
8336 return mask;
8337}
8338
8339static const struct proc_ops mdstat_proc_ops = {
8340 .proc_open = md_seq_open,
8341 .proc_read = seq_read,
8342 .proc_lseek = seq_lseek,
8343 .proc_release = seq_release,
8344 .proc_poll = mdstat_poll,
8345};
8346
8347int register_md_personality(struct md_personality *p)
8348{
8349 pr_debug("md: %s personality registered for level %d\n",
8350 p->name, p->level);
8351 spin_lock(&pers_lock);
8352 list_add_tail(&p->list, &pers_list);
8353 spin_unlock(&pers_lock);
8354 return 0;
8355}
8356EXPORT_SYMBOL(register_md_personality);
8357
8358int unregister_md_personality(struct md_personality *p)
8359{
8360 pr_debug("md: %s personality unregistered\n", p->name);
8361 spin_lock(&pers_lock);
8362 list_del_init(&p->list);
8363 spin_unlock(&pers_lock);
8364 return 0;
8365}
8366EXPORT_SYMBOL(unregister_md_personality);
8367
8368int register_md_cluster_operations(struct md_cluster_operations *ops,
8369 struct module *module)
8370{
8371 int ret = 0;
8372 spin_lock(&pers_lock);
8373 if (md_cluster_ops != NULL)
8374 ret = -EALREADY;
8375 else {
8376 md_cluster_ops = ops;
8377 md_cluster_mod = module;
8378 }
8379 spin_unlock(&pers_lock);
8380 return ret;
8381}
8382EXPORT_SYMBOL(register_md_cluster_operations);
8383
8384int unregister_md_cluster_operations(void)
8385{
8386 spin_lock(&pers_lock);
8387 md_cluster_ops = NULL;
8388 spin_unlock(&pers_lock);
8389 return 0;
8390}
8391EXPORT_SYMBOL(unregister_md_cluster_operations);
8392
8393int md_setup_cluster(struct mddev *mddev, int nodes)
8394{
8395 int ret;
8396 if (!md_cluster_ops)
8397 request_module("md-cluster");
8398 spin_lock(&pers_lock);
8399
8400 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8401 pr_warn("can't find md-cluster module or get it's reference.\n");
8402 spin_unlock(&pers_lock);
8403 return -ENOENT;
8404 }
8405 spin_unlock(&pers_lock);
8406
8407 ret = md_cluster_ops->join(mddev, nodes);
8408 if (!ret)
8409 mddev->safemode_delay = 0;
8410 return ret;
8411}
8412
8413void md_cluster_stop(struct mddev *mddev)
8414{
8415 if (!md_cluster_ops)
8416 return;
8417 md_cluster_ops->leave(mddev);
8418 module_put(md_cluster_mod);
8419}
8420
8421static int is_mddev_idle(struct mddev *mddev, int init)
8422{
8423 struct md_rdev *rdev;
8424 int idle;
8425 int curr_events;
8426
8427 idle = 1;
8428 rcu_read_lock();
8429 rdev_for_each_rcu(rdev, mddev) {
8430 struct gendisk *disk = rdev->bdev->bd_disk;
8431 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
8432 atomic_read(&disk->sync_io);
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455 if (init || curr_events - rdev->last_events > 64) {
8456 rdev->last_events = curr_events;
8457 idle = 0;
8458 }
8459 }
8460 rcu_read_unlock();
8461 return idle;
8462}
8463
8464void md_done_sync(struct mddev *mddev, int blocks, int ok)
8465{
8466
8467 atomic_sub(blocks, &mddev->recovery_active);
8468 wake_up(&mddev->recovery_wait);
8469 if (!ok) {
8470 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8471 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8472 md_wakeup_thread(mddev->thread);
8473
8474 }
8475}
8476EXPORT_SYMBOL(md_done_sync);
8477
8478
8479
8480
8481
8482
8483
8484
8485bool md_write_start(struct mddev *mddev, struct bio *bi)
8486{
8487 int did_change = 0;
8488
8489 if (bio_data_dir(bi) != WRITE)
8490 return true;
8491
8492 BUG_ON(mddev->ro == 1);
8493 if (mddev->ro == 2) {
8494
8495 mddev->ro = 0;
8496 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8497 md_wakeup_thread(mddev->thread);
8498 md_wakeup_thread(mddev->sync_thread);
8499 did_change = 1;
8500 }
8501 rcu_read_lock();
8502 percpu_ref_get(&mddev->writes_pending);
8503 smp_mb();
8504 if (mddev->safemode == 1)
8505 mddev->safemode = 0;
8506
8507 if (mddev->in_sync || mddev->sync_checkers) {
8508 spin_lock(&mddev->lock);
8509 if (mddev->in_sync) {
8510 mddev->in_sync = 0;
8511 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8512 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8513 md_wakeup_thread(mddev->thread);
8514 did_change = 1;
8515 }
8516 spin_unlock(&mddev->lock);
8517 }
8518 rcu_read_unlock();
8519 if (did_change)
8520 sysfs_notify_dirent_safe(mddev->sysfs_state);
8521 if (!mddev->has_superblocks)
8522 return true;
8523 wait_event(mddev->sb_wait,
8524 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8525 mddev->suspended);
8526 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8527 percpu_ref_put(&mddev->writes_pending);
8528 return false;
8529 }
8530 return true;
8531}
8532EXPORT_SYMBOL(md_write_start);
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542void md_write_inc(struct mddev *mddev, struct bio *bi)
8543{
8544 if (bio_data_dir(bi) != WRITE)
8545 return;
8546 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8547 percpu_ref_get(&mddev->writes_pending);
8548}
8549EXPORT_SYMBOL(md_write_inc);
8550
8551void md_write_end(struct mddev *mddev)
8552{
8553 percpu_ref_put(&mddev->writes_pending);
8554
8555 if (mddev->safemode == 2)
8556 md_wakeup_thread(mddev->thread);
8557 else if (mddev->safemode_delay)
8558
8559
8560
8561 mod_timer(&mddev->safemode_timer,
8562 roundup(jiffies, mddev->safemode_delay) +
8563 mddev->safemode_delay);
8564}
8565
8566EXPORT_SYMBOL(md_write_end);
8567
8568
8569void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
8570 struct bio *bio, sector_t start, sector_t size)
8571{
8572 struct bio *discard_bio = NULL;
8573
8574 if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
8575 &discard_bio) || !discard_bio)
8576 return;
8577
8578 bio_chain(discard_bio, bio);
8579 bio_clone_blkg_association(discard_bio, bio);
8580 if (mddev->gendisk)
8581 trace_block_bio_remap(discard_bio,
8582 disk_devt(mddev->gendisk),
8583 bio->bi_iter.bi_sector);
8584 submit_bio_noacct(discard_bio);
8585}
8586EXPORT_SYMBOL_GPL(md_submit_discard_bio);
8587
8588static void md_end_io_acct(struct bio *bio)
8589{
8590 struct md_io_acct *md_io_acct = bio->bi_private;
8591 struct bio *orig_bio = md_io_acct->orig_bio;
8592
8593 orig_bio->bi_status = bio->bi_status;
8594
8595 bio_end_io_acct(orig_bio, md_io_acct->start_time);
8596 bio_put(bio);
8597 bio_endio(orig_bio);
8598}
8599
8600
8601
8602
8603
8604void md_account_bio(struct mddev *mddev, struct bio **bio)
8605{
8606 struct md_io_acct *md_io_acct;
8607 struct bio *clone;
8608
8609 if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
8610 return;
8611
8612 clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
8613 md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
8614 md_io_acct->orig_bio = *bio;
8615 md_io_acct->start_time = bio_start_io_acct(*bio);
8616
8617 clone->bi_end_io = md_end_io_acct;
8618 clone->bi_private = md_io_acct;
8619 *bio = clone;
8620}
8621EXPORT_SYMBOL_GPL(md_account_bio);
8622
8623
8624
8625
8626
8627
8628
8629void md_allow_write(struct mddev *mddev)
8630{
8631 if (!mddev->pers)
8632 return;
8633 if (mddev->ro)
8634 return;
8635 if (!mddev->pers->sync_request)
8636 return;
8637
8638 spin_lock(&mddev->lock);
8639 if (mddev->in_sync) {
8640 mddev->in_sync = 0;
8641 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8642 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8643 if (mddev->safemode_delay &&
8644 mddev->safemode == 0)
8645 mddev->safemode = 1;
8646 spin_unlock(&mddev->lock);
8647 md_update_sb(mddev, 0);
8648 sysfs_notify_dirent_safe(mddev->sysfs_state);
8649
8650 wait_event(mddev->sb_wait,
8651 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8652 } else
8653 spin_unlock(&mddev->lock);
8654}
8655EXPORT_SYMBOL_GPL(md_allow_write);
8656
8657#define SYNC_MARKS 10
8658#define SYNC_MARK_STEP (3*HZ)
8659#define UPDATE_FREQUENCY (5*60*HZ)
8660void md_do_sync(struct md_thread *thread)
8661{
8662 struct mddev *mddev = thread->mddev;
8663 struct mddev *mddev2;
8664 unsigned int currspeed = 0, window;
8665 sector_t max_sectors,j, io_sectors, recovery_done;
8666 unsigned long mark[SYNC_MARKS];
8667 unsigned long update_time;
8668 sector_t mark_cnt[SYNC_MARKS];
8669 int last_mark,m;
8670 struct list_head *tmp;
8671 sector_t last_check;
8672 int skipped = 0;
8673 struct md_rdev *rdev;
8674 char *desc, *action = NULL;
8675 struct blk_plug plug;
8676 int ret;
8677
8678
8679 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8680 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8681 return;
8682 if (mddev->ro) {
8683 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8684 return;
8685 }
8686
8687 if (mddev_is_clustered(mddev)) {
8688 ret = md_cluster_ops->resync_start(mddev);
8689 if (ret)
8690 goto skip;
8691
8692 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8693 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8694 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8695 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8696 && ((unsigned long long)mddev->curr_resync_completed
8697 < (unsigned long long)mddev->resync_max_sectors))
8698 goto skip;
8699 }
8700
8701 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8702 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8703 desc = "data-check";
8704 action = "check";
8705 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8706 desc = "requested-resync";
8707 action = "repair";
8708 } else
8709 desc = "resync";
8710 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8711 desc = "reshape";
8712 else
8713 desc = "recovery";
8714
8715 mddev->last_sync_action = action ?: desc;
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733 do {
8734 int mddev2_minor = -1;
8735 mddev->curr_resync = 2;
8736
8737 try_again:
8738 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8739 goto skip;
8740 for_each_mddev(mddev2, tmp) {
8741 if (mddev2 == mddev)
8742 continue;
8743 if (!mddev->parallel_resync
8744 && mddev2->curr_resync
8745 && match_mddev_units(mddev, mddev2)) {
8746 DEFINE_WAIT(wq);
8747 if (mddev < mddev2 && mddev->curr_resync == 2) {
8748
8749 mddev->curr_resync = 1;
8750 wake_up(&resync_wait);
8751 }
8752 if (mddev > mddev2 && mddev->curr_resync == 1)
8753
8754
8755
8756 continue;
8757
8758
8759
8760
8761 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8762 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8763 mddev2->curr_resync >= mddev->curr_resync) {
8764 if (mddev2_minor != mddev2->md_minor) {
8765 mddev2_minor = mddev2->md_minor;
8766 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8767 desc, mdname(mddev),
8768 mdname(mddev2));
8769 }
8770 mddev_put(mddev2);
8771 if (signal_pending(current))
8772 flush_signals(current);
8773 schedule();
8774 finish_wait(&resync_wait, &wq);
8775 goto try_again;
8776 }
8777 finish_wait(&resync_wait, &wq);
8778 }
8779 }
8780 } while (mddev->curr_resync < 2);
8781
8782 j = 0;
8783 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8784
8785
8786
8787 max_sectors = mddev->resync_max_sectors;
8788 atomic64_set(&mddev->resync_mismatches, 0);
8789
8790 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8791 j = mddev->resync_min;
8792 else if (!mddev->bitmap)
8793 j = mddev->recovery_cp;
8794
8795 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8796 max_sectors = mddev->resync_max_sectors;
8797
8798
8799
8800
8801
8802 if (mddev_is_clustered(mddev) &&
8803 mddev->reshape_position != MaxSector)
8804 j = mddev->reshape_position;
8805 } else {
8806
8807 max_sectors = mddev->dev_sectors;
8808 j = MaxSector;
8809 rcu_read_lock();
8810 rdev_for_each_rcu(rdev, mddev)
8811 if (rdev->raid_disk >= 0 &&
8812 !test_bit(Journal, &rdev->flags) &&
8813 !test_bit(Faulty, &rdev->flags) &&
8814 !test_bit(In_sync, &rdev->flags) &&
8815 rdev->recovery_offset < j)
8816 j = rdev->recovery_offset;
8817 rcu_read_unlock();
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827 if (mddev->bitmap) {
8828 mddev->pers->quiesce(mddev, 1);
8829 mddev->pers->quiesce(mddev, 0);
8830 }
8831 }
8832
8833 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8834 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8835 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8836 speed_max(mddev), desc);
8837
8838 is_mddev_idle(mddev, 1);
8839
8840 io_sectors = 0;
8841 for (m = 0; m < SYNC_MARKS; m++) {
8842 mark[m] = jiffies;
8843 mark_cnt[m] = io_sectors;
8844 }
8845 last_mark = 0;
8846 mddev->resync_mark = mark[last_mark];
8847 mddev->resync_mark_cnt = mark_cnt[last_mark];
8848
8849
8850
8851
8852 window = 32 * (PAGE_SIZE / 512);
8853 pr_debug("md: using %dk window, over a total of %lluk.\n",
8854 window/2, (unsigned long long)max_sectors/2);
8855
8856 atomic_set(&mddev->recovery_active, 0);
8857 last_check = 0;
8858
8859 if (j>2) {
8860 pr_debug("md: resuming %s of %s from checkpoint.\n",
8861 desc, mdname(mddev));
8862 mddev->curr_resync = j;
8863 } else
8864 mddev->curr_resync = 3;
8865 mddev->curr_resync_completed = j;
8866 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8867 md_new_event(mddev);
8868 update_time = jiffies;
8869
8870 blk_start_plug(&plug);
8871 while (j < max_sectors) {
8872 sector_t sectors;
8873
8874 skipped = 0;
8875
8876 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8877 ((mddev->curr_resync > mddev->curr_resync_completed &&
8878 (mddev->curr_resync - mddev->curr_resync_completed)
8879 > (max_sectors >> 4)) ||
8880 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8881 (j - mddev->curr_resync_completed)*2
8882 >= mddev->resync_max - mddev->curr_resync_completed ||
8883 mddev->curr_resync_completed > mddev->resync_max
8884 )) {
8885
8886 wait_event(mddev->recovery_wait,
8887 atomic_read(&mddev->recovery_active) == 0);
8888 mddev->curr_resync_completed = j;
8889 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8890 j > mddev->recovery_cp)
8891 mddev->recovery_cp = j;
8892 update_time = jiffies;
8893 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8894 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8895 }
8896
8897 while (j >= mddev->resync_max &&
8898 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8899
8900
8901
8902
8903 flush_signals(current);
8904 wait_event_interruptible(mddev->recovery_wait,
8905 mddev->resync_max > j
8906 || test_bit(MD_RECOVERY_INTR,
8907 &mddev->recovery));
8908 }
8909
8910 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8911 break;
8912
8913 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8914 if (sectors == 0) {
8915 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8916 break;
8917 }
8918
8919 if (!skipped) {
8920 io_sectors += sectors;
8921 atomic_add(sectors, &mddev->recovery_active);
8922 }
8923
8924 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8925 break;
8926
8927 j += sectors;
8928 if (j > max_sectors)
8929
8930 j = max_sectors;
8931 if (j > 2)
8932 mddev->curr_resync = j;
8933 mddev->curr_mark_cnt = io_sectors;
8934 if (last_check == 0)
8935
8936
8937
8938 md_new_event(mddev);
8939
8940 if (last_check + window > io_sectors || j == max_sectors)
8941 continue;
8942
8943 last_check = io_sectors;
8944 repeat:
8945 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8946
8947 int next = (last_mark+1) % SYNC_MARKS;
8948
8949 mddev->resync_mark = mark[next];
8950 mddev->resync_mark_cnt = mark_cnt[next];
8951 mark[next] = jiffies;
8952 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8953 last_mark = next;
8954 }
8955
8956 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8957 break;
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967 cond_resched();
8968
8969 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8970 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8971 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8972
8973 if (currspeed > speed_min(mddev)) {
8974 if (currspeed > speed_max(mddev)) {
8975 msleep(500);
8976 goto repeat;
8977 }
8978 if (!is_mddev_idle(mddev, 0)) {
8979
8980
8981
8982
8983 wait_event(mddev->recovery_wait,
8984 !atomic_read(&mddev->recovery_active));
8985 }
8986 }
8987 }
8988 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8989 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8990 ? "interrupted" : "done");
8991
8992
8993
8994 blk_finish_plug(&plug);
8995 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8996
8997 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8998 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8999 mddev->curr_resync > 3) {
9000 mddev->curr_resync_completed = mddev->curr_resync;
9001 sysfs_notify_dirent_safe(mddev->sysfs_completed);
9002 }
9003 mddev->pers->sync_request(mddev, max_sectors, &skipped);
9004
9005 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9006 mddev->curr_resync > 3) {
9007 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9008 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9009 if (mddev->curr_resync >= mddev->recovery_cp) {
9010 pr_debug("md: checkpointing %s of %s.\n",
9011 desc, mdname(mddev));
9012 if (test_bit(MD_RECOVERY_ERROR,
9013 &mddev->recovery))
9014 mddev->recovery_cp =
9015 mddev->curr_resync_completed;
9016 else
9017 mddev->recovery_cp =
9018 mddev->curr_resync;
9019 }
9020 } else
9021 mddev->recovery_cp = MaxSector;
9022 } else {
9023 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9024 mddev->curr_resync = MaxSector;
9025 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9026 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9027 rcu_read_lock();
9028 rdev_for_each_rcu(rdev, mddev)
9029 if (rdev->raid_disk >= 0 &&
9030 mddev->delta_disks >= 0 &&
9031 !test_bit(Journal, &rdev->flags) &&
9032 !test_bit(Faulty, &rdev->flags) &&
9033 !test_bit(In_sync, &rdev->flags) &&
9034 rdev->recovery_offset < mddev->curr_resync)
9035 rdev->recovery_offset = mddev->curr_resync;
9036 rcu_read_unlock();
9037 }
9038 }
9039 }
9040 skip:
9041
9042
9043
9044 set_mask_bits(&mddev->sb_flags, 0,
9045 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
9046
9047 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9048 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9049 mddev->delta_disks > 0 &&
9050 mddev->pers->finish_reshape &&
9051 mddev->pers->size &&
9052 mddev->queue) {
9053 mddev_lock_nointr(mddev);
9054 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9055 mddev_unlock(mddev);
9056 if (!mddev_is_clustered(mddev))
9057 set_capacity_and_notify(mddev->gendisk,
9058 mddev->array_sectors);
9059 }
9060
9061 spin_lock(&mddev->lock);
9062 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9063
9064 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9065 mddev->resync_min = 0;
9066 mddev->resync_max = MaxSector;
9067 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9068 mddev->resync_min = mddev->curr_resync_completed;
9069 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9070 mddev->curr_resync = 0;
9071 spin_unlock(&mddev->lock);
9072
9073 wake_up(&resync_wait);
9074 md_wakeup_thread(mddev->thread);
9075 return;
9076}
9077EXPORT_SYMBOL_GPL(md_do_sync);
9078
9079static int remove_and_add_spares(struct mddev *mddev,
9080 struct md_rdev *this)
9081{
9082 struct md_rdev *rdev;
9083 int spares = 0;
9084 int removed = 0;
9085 bool remove_some = false;
9086
9087 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9088
9089 return 0;
9090
9091 rdev_for_each(rdev, mddev) {
9092 if ((this == NULL || rdev == this) &&
9093 rdev->raid_disk >= 0 &&
9094 !test_bit(Blocked, &rdev->flags) &&
9095 test_bit(Faulty, &rdev->flags) &&
9096 atomic_read(&rdev->nr_pending)==0) {
9097
9098
9099
9100
9101
9102 remove_some = true;
9103 set_bit(RemoveSynchronized, &rdev->flags);
9104 }
9105 }
9106
9107 if (remove_some)
9108 synchronize_rcu();
9109 rdev_for_each(rdev, mddev) {
9110 if ((this == NULL || rdev == this) &&
9111 rdev->raid_disk >= 0 &&
9112 !test_bit(Blocked, &rdev->flags) &&
9113 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9114 (!test_bit(In_sync, &rdev->flags) &&
9115 !test_bit(Journal, &rdev->flags))) &&
9116 atomic_read(&rdev->nr_pending)==0)) {
9117 if (mddev->pers->hot_remove_disk(
9118 mddev, rdev) == 0) {
9119 sysfs_unlink_rdev(mddev, rdev);
9120 rdev->saved_raid_disk = rdev->raid_disk;
9121 rdev->raid_disk = -1;
9122 removed++;
9123 }
9124 }
9125 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9126 clear_bit(RemoveSynchronized, &rdev->flags);
9127 }
9128
9129 if (removed && mddev->kobj.sd)
9130 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9131
9132 if (this && removed)
9133 goto no_add;
9134
9135 rdev_for_each(rdev, mddev) {
9136 if (this && this != rdev)
9137 continue;
9138 if (test_bit(Candidate, &rdev->flags))
9139 continue;
9140 if (rdev->raid_disk >= 0 &&
9141 !test_bit(In_sync, &rdev->flags) &&
9142 !test_bit(Journal, &rdev->flags) &&
9143 !test_bit(Faulty, &rdev->flags))
9144 spares++;
9145 if (rdev->raid_disk >= 0)
9146 continue;
9147 if (test_bit(Faulty, &rdev->flags))
9148 continue;
9149 if (!test_bit(Journal, &rdev->flags)) {
9150 if (mddev->ro &&
9151 ! (rdev->saved_raid_disk >= 0 &&
9152 !test_bit(Bitmap_sync, &rdev->flags)))
9153 continue;
9154
9155 rdev->recovery_offset = 0;
9156 }
9157 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9158
9159 sysfs_link_rdev(mddev, rdev);
9160 if (!test_bit(Journal, &rdev->flags))
9161 spares++;
9162 md_new_event(mddev);
9163 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9164 }
9165 }
9166no_add:
9167 if (removed)
9168 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9169 return spares;
9170}
9171
9172static void md_start_sync(struct work_struct *ws)
9173{
9174 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9175
9176 mddev->sync_thread = md_register_thread(md_do_sync,
9177 mddev,
9178 "resync");
9179 if (!mddev->sync_thread) {
9180 pr_warn("%s: could not start resync thread...\n",
9181 mdname(mddev));
9182
9183 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9184 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9185 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9186 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9187 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9188 wake_up(&resync_wait);
9189 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9190 &mddev->recovery))
9191 if (mddev->sysfs_action)
9192 sysfs_notify_dirent_safe(mddev->sysfs_action);
9193 } else
9194 md_wakeup_thread(mddev->sync_thread);
9195 sysfs_notify_dirent_safe(mddev->sysfs_action);
9196 md_new_event(mddev);
9197}
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216
9217
9218
9219
9220
9221void md_check_recovery(struct mddev *mddev)
9222{
9223 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9224
9225
9226
9227 set_bit(MD_UPDATING_SB, &mddev->flags);
9228 smp_mb__after_atomic();
9229 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9230 md_update_sb(mddev, 0);
9231 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9232 wake_up(&mddev->sb_wait);
9233 }
9234
9235 if (mddev->suspended)
9236 return;
9237
9238 if (mddev->bitmap)
9239 md_bitmap_daemon_work(mddev);
9240
9241 if (signal_pending(current)) {
9242 if (mddev->pers->sync_request && !mddev->external) {
9243 pr_debug("md: %s in immediate safe mode\n",
9244 mdname(mddev));
9245 mddev->safemode = 2;
9246 }
9247 flush_signals(current);
9248 }
9249
9250 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9251 return;
9252 if ( ! (
9253 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9254 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9255 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9256 (mddev->external == 0 && mddev->safemode == 1) ||
9257 (mddev->safemode == 2
9258 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9259 ))
9260 return;
9261
9262 if (mddev_trylock(mddev)) {
9263 int spares = 0;
9264 bool try_set_sync = mddev->safemode != 0;
9265
9266 if (!mddev->external && mddev->safemode == 1)
9267 mddev->safemode = 0;
9268
9269 if (mddev->ro) {
9270 struct md_rdev *rdev;
9271 if (!mddev->external && mddev->in_sync)
9272
9273
9274
9275
9276
9277 rdev_for_each(rdev, mddev)
9278 clear_bit(Blocked, &rdev->flags);
9279
9280
9281
9282
9283
9284
9285
9286 remove_and_add_spares(mddev, NULL);
9287
9288
9289
9290 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9291 md_reap_sync_thread(mddev);
9292 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9293 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9294 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9295 goto unlock;
9296 }
9297
9298 if (mddev_is_clustered(mddev)) {
9299 struct md_rdev *rdev, *tmp;
9300
9301
9302
9303 rdev_for_each_safe(rdev, tmp, mddev) {
9304 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9305 rdev->raid_disk < 0)
9306 md_kick_rdev_from_array(rdev);
9307 }
9308 }
9309
9310 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9311 spin_lock(&mddev->lock);
9312 set_in_sync(mddev);
9313 spin_unlock(&mddev->lock);
9314 }
9315
9316 if (mddev->sb_flags)
9317 md_update_sb(mddev, 0);
9318
9319 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9320 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9321
9322 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9323 goto unlock;
9324 }
9325 if (mddev->sync_thread) {
9326 md_reap_sync_thread(mddev);
9327 goto unlock;
9328 }
9329
9330
9331
9332 mddev->curr_resync_completed = 0;
9333 spin_lock(&mddev->lock);
9334 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9335 spin_unlock(&mddev->lock);
9336
9337
9338
9339 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9340 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9341
9342 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9343 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9344 goto not_running;
9345
9346
9347
9348
9349
9350
9351
9352 if (mddev->reshape_position != MaxSector) {
9353 if (mddev->pers->check_reshape == NULL ||
9354 mddev->pers->check_reshape(mddev) != 0)
9355
9356 goto not_running;
9357 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9358 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9359 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9360 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9361 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9362 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9363 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9364 } else if (mddev->recovery_cp < MaxSector) {
9365 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9366 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9367 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9368
9369 goto not_running;
9370
9371 if (mddev->pers->sync_request) {
9372 if (spares) {
9373
9374
9375
9376
9377 md_bitmap_write_all(mddev->bitmap);
9378 }
9379 INIT_WORK(&mddev->del_work, md_start_sync);
9380 queue_work(md_misc_wq, &mddev->del_work);
9381 goto unlock;
9382 }
9383 not_running:
9384 if (!mddev->sync_thread) {
9385 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9386 wake_up(&resync_wait);
9387 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9388 &mddev->recovery))
9389 if (mddev->sysfs_action)
9390 sysfs_notify_dirent_safe(mddev->sysfs_action);
9391 }
9392 unlock:
9393 wake_up(&mddev->sb_wait);
9394 mddev_unlock(mddev);
9395 }
9396}
9397EXPORT_SYMBOL(md_check_recovery);
9398
9399void md_reap_sync_thread(struct mddev *mddev)
9400{
9401 struct md_rdev *rdev;
9402 sector_t old_dev_sectors = mddev->dev_sectors;
9403 bool is_reshaped = false;
9404
9405
9406 md_unregister_thread(&mddev->sync_thread);
9407 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9408 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9409 mddev->degraded != mddev->raid_disks) {
9410
9411
9412 if (mddev->pers->spare_active(mddev)) {
9413 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9414 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9415 }
9416 }
9417 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9418 mddev->pers->finish_reshape) {
9419 mddev->pers->finish_reshape(mddev);
9420 if (mddev_is_clustered(mddev))
9421 is_reshaped = true;
9422 }
9423
9424
9425
9426
9427 if (!mddev->degraded)
9428 rdev_for_each(rdev, mddev)
9429 rdev->saved_raid_disk = -1;
9430
9431 md_update_sb(mddev, 1);
9432
9433
9434
9435 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9436 md_cluster_ops->resync_finish(mddev);
9437 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9438 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9439 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9440 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9441 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9442 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9443
9444
9445
9446
9447
9448 if (mddev_is_clustered(mddev) && is_reshaped
9449 && !test_bit(MD_CLOSING, &mddev->flags))
9450 md_cluster_ops->update_size(mddev, old_dev_sectors);
9451 wake_up(&resync_wait);
9452
9453 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9454 sysfs_notify_dirent_safe(mddev->sysfs_action);
9455 md_new_event(mddev);
9456 if (mddev->event_work.func)
9457 queue_work(md_misc_wq, &mddev->event_work);
9458}
9459EXPORT_SYMBOL(md_reap_sync_thread);
9460
9461void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9462{
9463 sysfs_notify_dirent_safe(rdev->sysfs_state);
9464 wait_event_timeout(rdev->blocked_wait,
9465 !test_bit(Blocked, &rdev->flags) &&
9466 !test_bit(BlockedBadBlocks, &rdev->flags),
9467 msecs_to_jiffies(5000));
9468 rdev_dec_pending(rdev, mddev);
9469}
9470EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9471
9472void md_finish_reshape(struct mddev *mddev)
9473{
9474
9475 struct md_rdev *rdev;
9476
9477 rdev_for_each(rdev, mddev) {
9478 if (rdev->data_offset > rdev->new_data_offset)
9479 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9480 else
9481 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9482 rdev->data_offset = rdev->new_data_offset;
9483 }
9484}
9485EXPORT_SYMBOL(md_finish_reshape);
9486
9487
9488
9489
9490int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9491 int is_new)
9492{
9493 struct mddev *mddev = rdev->mddev;
9494 int rv;
9495 if (is_new)
9496 s += rdev->new_data_offset;
9497 else
9498 s += rdev->data_offset;
9499 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9500 if (rv == 0) {
9501
9502 if (test_bit(ExternalBbl, &rdev->flags))
9503 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9504 sysfs_notify_dirent_safe(rdev->sysfs_state);
9505 set_mask_bits(&mddev->sb_flags, 0,
9506 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9507 md_wakeup_thread(rdev->mddev->thread);
9508 return 1;
9509 } else
9510 return 0;
9511}
9512EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9513
9514int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9515 int is_new)
9516{
9517 int rv;
9518 if (is_new)
9519 s += rdev->new_data_offset;
9520 else
9521 s += rdev->data_offset;
9522 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9523 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9524 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9525 return rv;
9526}
9527EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9528
9529static int md_notify_reboot(struct notifier_block *this,
9530 unsigned long code, void *x)
9531{
9532 struct list_head *tmp;
9533 struct mddev *mddev;
9534 int need_delay = 0;
9535
9536 for_each_mddev(mddev, tmp) {
9537 if (mddev_trylock(mddev)) {
9538 if (mddev->pers)
9539 __md_stop_writes(mddev);
9540 if (mddev->persistent)
9541 mddev->safemode = 2;
9542 mddev_unlock(mddev);
9543 }
9544 need_delay = 1;
9545 }
9546
9547
9548
9549
9550
9551
9552 if (need_delay)
9553 mdelay(1000*1);
9554
9555 return NOTIFY_DONE;
9556}
9557
9558static struct notifier_block md_notifier = {
9559 .notifier_call = md_notify_reboot,
9560 .next = NULL,
9561 .priority = INT_MAX,
9562};
9563
9564static void md_geninit(void)
9565{
9566 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9567
9568 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9569}
9570
9571static int __init md_init(void)
9572{
9573 int ret = -ENOMEM;
9574
9575 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9576 if (!md_wq)
9577 goto err_wq;
9578
9579 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9580 if (!md_misc_wq)
9581 goto err_misc_wq;
9582
9583 md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9584 if (!md_rdev_misc_wq)
9585 goto err_rdev_misc_wq;
9586
9587 ret = __register_blkdev(MD_MAJOR, "md", md_probe);
9588 if (ret < 0)
9589 goto err_md;
9590
9591 ret = __register_blkdev(0, "mdp", md_probe);
9592 if (ret < 0)
9593 goto err_mdp;
9594 mdp_major = ret;
9595
9596 register_reboot_notifier(&md_notifier);
9597 raid_table_header = register_sysctl_table(raid_root_table);
9598
9599 md_geninit();
9600 return 0;
9601
9602err_mdp:
9603 unregister_blkdev(MD_MAJOR, "md");
9604err_md:
9605 destroy_workqueue(md_rdev_misc_wq);
9606err_rdev_misc_wq:
9607 destroy_workqueue(md_misc_wq);
9608err_misc_wq:
9609 destroy_workqueue(md_wq);
9610err_wq:
9611 return ret;
9612}
9613
9614static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9615{
9616 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9617 struct md_rdev *rdev2, *tmp;
9618 int role, ret;
9619 char b[BDEVNAME_SIZE];
9620
9621
9622
9623
9624
9625 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9626 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9627 if (ret)
9628 pr_info("md-cluster: resize failed\n");
9629 else
9630 md_bitmap_update_sb(mddev->bitmap);
9631 }
9632
9633
9634 rdev_for_each_safe(rdev2, tmp, mddev) {
9635 if (test_bit(Faulty, &rdev2->flags))
9636 continue;
9637
9638
9639 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9640
9641 if (test_bit(Candidate, &rdev2->flags)) {
9642 if (role == 0xfffe) {
9643 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9644 md_kick_rdev_from_array(rdev2);
9645 continue;
9646 }
9647 else
9648 clear_bit(Candidate, &rdev2->flags);
9649 }
9650
9651 if (role != rdev2->raid_disk) {
9652
9653
9654
9655 if (rdev2->raid_disk == -1 && role != 0xffff &&
9656 !(le32_to_cpu(sb->feature_map) &
9657 MD_FEATURE_RESHAPE_ACTIVE)) {
9658 rdev2->saved_raid_disk = role;
9659 ret = remove_and_add_spares(mddev, rdev2);
9660 pr_info("Activated spare: %s\n",
9661 bdevname(rdev2->bdev,b));
9662
9663
9664 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9665 md_wakeup_thread(mddev->thread);
9666 }
9667
9668
9669
9670
9671
9672 if ((role == 0xfffe) || (role == 0xfffd)) {
9673 md_error(mddev, rdev2);
9674 clear_bit(Blocked, &rdev2->flags);
9675 }
9676 }
9677 }
9678
9679 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9680 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9681 if (ret)
9682 pr_warn("md: updating array disks failed. %d\n", ret);
9683 }
9684
9685
9686
9687
9688
9689 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9690 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9691
9692
9693
9694
9695 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9696 if (mddev->pers->update_reshape_pos)
9697 mddev->pers->update_reshape_pos(mddev);
9698 if (mddev->pers->start_reshape)
9699 mddev->pers->start_reshape(mddev);
9700 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9701 mddev->reshape_position != MaxSector &&
9702 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9703
9704 mddev->reshape_position = MaxSector;
9705 if (mddev->pers->update_reshape_pos)
9706 mddev->pers->update_reshape_pos(mddev);
9707 }
9708
9709
9710 mddev->events = le64_to_cpu(sb->events);
9711}
9712
9713static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9714{
9715 int err;
9716 struct page *swapout = rdev->sb_page;
9717 struct mdp_superblock_1 *sb;
9718
9719
9720
9721
9722 rdev->sb_page = NULL;
9723 err = alloc_disk_sb(rdev);
9724 if (err == 0) {
9725 ClearPageUptodate(rdev->sb_page);
9726 rdev->sb_loaded = 0;
9727 err = super_types[mddev->major_version].
9728 load_super(rdev, NULL, mddev->minor_version);
9729 }
9730 if (err < 0) {
9731 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9732 __func__, __LINE__, rdev->desc_nr, err);
9733 if (rdev->sb_page)
9734 put_page(rdev->sb_page);
9735 rdev->sb_page = swapout;
9736 rdev->sb_loaded = 1;
9737 return err;
9738 }
9739
9740 sb = page_address(rdev->sb_page);
9741
9742
9743
9744
9745 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9746 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9747
9748
9749
9750
9751 if (rdev->recovery_offset == MaxSector &&
9752 !test_bit(In_sync, &rdev->flags) &&
9753 mddev->pers->spare_active(mddev))
9754 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9755
9756 put_page(swapout);
9757 return 0;
9758}
9759
9760void md_reload_sb(struct mddev *mddev, int nr)
9761{
9762 struct md_rdev *rdev;
9763 int err;
9764
9765
9766 rdev_for_each_rcu(rdev, mddev) {
9767 if (rdev->desc_nr == nr)
9768 break;
9769 }
9770
9771 if (!rdev || rdev->desc_nr != nr) {
9772 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9773 return;
9774 }
9775
9776 err = read_rdev(mddev, rdev);
9777 if (err < 0)
9778 return;
9779
9780 check_sb_changes(mddev, rdev);
9781
9782
9783 rdev_for_each_rcu(rdev, mddev) {
9784 if (!test_bit(Faulty, &rdev->flags))
9785 read_rdev(mddev, rdev);
9786 }
9787}
9788EXPORT_SYMBOL(md_reload_sb);
9789
9790#ifndef MODULE
9791
9792
9793
9794
9795
9796
9797static DEFINE_MUTEX(detected_devices_mutex);
9798static LIST_HEAD(all_detected_devices);
9799struct detected_devices_node {
9800 struct list_head list;
9801 dev_t dev;
9802};
9803
9804void md_autodetect_dev(dev_t dev)
9805{
9806 struct detected_devices_node *node_detected_dev;
9807
9808 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9809 if (node_detected_dev) {
9810 node_detected_dev->dev = dev;
9811 mutex_lock(&detected_devices_mutex);
9812 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9813 mutex_unlock(&detected_devices_mutex);
9814 }
9815}
9816
9817void md_autostart_arrays(int part)
9818{
9819 struct md_rdev *rdev;
9820 struct detected_devices_node *node_detected_dev;
9821 dev_t dev;
9822 int i_scanned, i_passed;
9823
9824 i_scanned = 0;
9825 i_passed = 0;
9826
9827 pr_info("md: Autodetecting RAID arrays.\n");
9828
9829 mutex_lock(&detected_devices_mutex);
9830 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9831 i_scanned++;
9832 node_detected_dev = list_entry(all_detected_devices.next,
9833 struct detected_devices_node, list);
9834 list_del(&node_detected_dev->list);
9835 dev = node_detected_dev->dev;
9836 kfree(node_detected_dev);
9837 mutex_unlock(&detected_devices_mutex);
9838 rdev = md_import_device(dev,0, 90);
9839 mutex_lock(&detected_devices_mutex);
9840 if (IS_ERR(rdev))
9841 continue;
9842
9843 if (test_bit(Faulty, &rdev->flags))
9844 continue;
9845
9846 set_bit(AutoDetected, &rdev->flags);
9847 list_add(&rdev->same_set, &pending_raid_disks);
9848 i_passed++;
9849 }
9850 mutex_unlock(&detected_devices_mutex);
9851
9852 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9853
9854 autorun_devices(part);
9855}
9856
9857#endif
9858
9859static __exit void md_exit(void)
9860{
9861 struct mddev *mddev;
9862 struct list_head *tmp;
9863 int delay = 1;
9864
9865 unregister_blkdev(MD_MAJOR,"md");
9866 unregister_blkdev(mdp_major, "mdp");
9867 unregister_reboot_notifier(&md_notifier);
9868 unregister_sysctl_table(raid_table_header);
9869
9870
9871
9872
9873 md_unloading = 1;
9874 while (waitqueue_active(&md_event_waiters)) {
9875
9876 wake_up(&md_event_waiters);
9877 msleep(delay);
9878 delay += delay;
9879 }
9880 remove_proc_entry("mdstat", NULL);
9881
9882 for_each_mddev(mddev, tmp) {
9883 export_array(mddev);
9884 mddev->ctime = 0;
9885 mddev->hold_active = 0;
9886
9887
9888
9889
9890
9891
9892 }
9893 destroy_workqueue(md_rdev_misc_wq);
9894 destroy_workqueue(md_misc_wq);
9895 destroy_workqueue(md_wq);
9896}
9897
9898subsys_initcall(md_init);
9899module_exit(md_exit)
9900
9901static int get_ro(char *buffer, const struct kernel_param *kp)
9902{
9903 return sprintf(buffer, "%d\n", start_readonly);
9904}
9905static int set_ro(const char *val, const struct kernel_param *kp)
9906{
9907 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9908}
9909
9910module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9911module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9912module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9913module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9914
9915MODULE_LICENSE("GPL");
9916MODULE_DESCRIPTION("MD RAID framework");
9917MODULE_ALIAS("md");
9918MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9919