1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#include <linux/sched/mm.h>
41#include <linux/sched/signal.h>
42#include <linux/kthread.h>
43#include <linux/blkdev.h>
44#include <linux/badblocks.h>
45#include <linux/sysctl.h>
46#include <linux/seq_file.h>
47#include <linux/fs.h>
48#include <linux/poll.h>
49#include <linux/ctype.h>
50#include <linux/string.h>
51#include <linux/hdreg.h>
52#include <linux/proc_fs.h>
53#include <linux/random.h>
54#include <linux/module.h>
55#include <linux/reboot.h>
56#include <linux/file.h>
57#include <linux/compat.h>
58#include <linux/delay.h>
59#include <linux/raid/md_p.h>
60#include <linux/raid/md_u.h>
61#include <linux/raid/detect.h>
62#include <linux/slab.h>
63#include <linux/percpu-refcount.h>
64#include <linux/part_stat.h>
65
66#include <trace/events/block.h>
67#include "md.h"
68#include "md-bitmap.h"
69#include "md-cluster.h"
70
71
72
73
74
75
76static LIST_HEAD(pers_list);
77static DEFINE_SPINLOCK(pers_lock);
78
79static struct kobj_type md_ktype;
80
81struct md_cluster_operations *md_cluster_ops;
82EXPORT_SYMBOL(md_cluster_ops);
83static struct module *md_cluster_mod;
84
85static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
86static struct workqueue_struct *md_wq;
87static struct workqueue_struct *md_misc_wq;
88static struct workqueue_struct *md_rdev_misc_wq;
89
90static int remove_and_add_spares(struct mddev *mddev,
91 struct md_rdev *this);
92static void mddev_detach(struct mddev *mddev);
93
94
95
96
97
98
99#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
100
101#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
102
103
104
105
106
107
108
109
110
111
112
113
114
115static int sysctl_speed_limit_min = 1000;
116static int sysctl_speed_limit_max = 200000;
117static inline int speed_min(struct mddev *mddev)
118{
119 return mddev->sync_speed_min ?
120 mddev->sync_speed_min : sysctl_speed_limit_min;
121}
122
123static inline int speed_max(struct mddev *mddev)
124{
125 return mddev->sync_speed_max ?
126 mddev->sync_speed_max : sysctl_speed_limit_max;
127}
128
129static void rdev_uninit_serial(struct md_rdev *rdev)
130{
131 if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
132 return;
133
134 kvfree(rdev->serial);
135 rdev->serial = NULL;
136}
137
138static void rdevs_uninit_serial(struct mddev *mddev)
139{
140 struct md_rdev *rdev;
141
142 rdev_for_each(rdev, mddev)
143 rdev_uninit_serial(rdev);
144}
145
146static int rdev_init_serial(struct md_rdev *rdev)
147{
148
149 int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
150 struct serial_in_rdev *serial = NULL;
151
152 if (test_bit(CollisionCheck, &rdev->flags))
153 return 0;
154
155 serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
156 GFP_KERNEL);
157 if (!serial)
158 return -ENOMEM;
159
160 for (i = 0; i < serial_nums; i++) {
161 struct serial_in_rdev *serial_tmp = &serial[i];
162
163 spin_lock_init(&serial_tmp->serial_lock);
164 serial_tmp->serial_rb = RB_ROOT_CACHED;
165 init_waitqueue_head(&serial_tmp->serial_io_wait);
166 }
167
168 rdev->serial = serial;
169 set_bit(CollisionCheck, &rdev->flags);
170
171 return 0;
172}
173
174static int rdevs_init_serial(struct mddev *mddev)
175{
176 struct md_rdev *rdev;
177 int ret = 0;
178
179 rdev_for_each(rdev, mddev) {
180 ret = rdev_init_serial(rdev);
181 if (ret)
182 break;
183 }
184
185
186 if (ret && !mddev->serial_info_pool)
187 rdevs_uninit_serial(mddev);
188
189 return ret;
190}
191
192
193
194
195
196
197static int rdev_need_serial(struct md_rdev *rdev)
198{
199 return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
200 rdev->bdev->bd_disk->queue->nr_hw_queues != 1 &&
201 test_bit(WriteMostly, &rdev->flags));
202}
203
204
205
206
207
208
209void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
210 bool is_suspend)
211{
212 int ret = 0;
213
214 if (rdev && !rdev_need_serial(rdev) &&
215 !test_bit(CollisionCheck, &rdev->flags))
216 return;
217
218 if (!is_suspend)
219 mddev_suspend(mddev);
220
221 if (!rdev)
222 ret = rdevs_init_serial(mddev);
223 else
224 ret = rdev_init_serial(rdev);
225 if (ret)
226 goto abort;
227
228 if (mddev->serial_info_pool == NULL) {
229
230
231
232
233 mddev->serial_info_pool =
234 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
235 sizeof(struct serial_info));
236 if (!mddev->serial_info_pool) {
237 rdevs_uninit_serial(mddev);
238 pr_err("can't alloc memory pool for serialization\n");
239 }
240 }
241
242abort:
243 if (!is_suspend)
244 mddev_resume(mddev);
245}
246
247
248
249
250
251
252
253void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
254 bool is_suspend)
255{
256 if (rdev && !test_bit(CollisionCheck, &rdev->flags))
257 return;
258
259 if (mddev->serial_info_pool) {
260 struct md_rdev *temp;
261 int num = 0;
262
263 if (!is_suspend)
264 mddev_suspend(mddev);
265 rdev_for_each(temp, mddev) {
266 if (!rdev) {
267 if (!mddev->serialize_policy ||
268 !rdev_need_serial(temp))
269 rdev_uninit_serial(temp);
270 else
271 num++;
272 } else if (temp != rdev &&
273 test_bit(CollisionCheck, &temp->flags))
274 num++;
275 }
276
277 if (rdev)
278 rdev_uninit_serial(rdev);
279
280 if (num)
281 pr_info("The mempool could be used by other devices\n");
282 else {
283 mempool_destroy(mddev->serial_info_pool);
284 mddev->serial_info_pool = NULL;
285 }
286 if (!is_suspend)
287 mddev_resume(mddev);
288 }
289}
290
291static struct ctl_table_header *raid_table_header;
292
293static struct ctl_table raid_table[] = {
294 {
295 .procname = "speed_limit_min",
296 .data = &sysctl_speed_limit_min,
297 .maxlen = sizeof(int),
298 .mode = S_IRUGO|S_IWUSR,
299 .proc_handler = proc_dointvec,
300 },
301 {
302 .procname = "speed_limit_max",
303 .data = &sysctl_speed_limit_max,
304 .maxlen = sizeof(int),
305 .mode = S_IRUGO|S_IWUSR,
306 .proc_handler = proc_dointvec,
307 },
308 { }
309};
310
311static struct ctl_table raid_dir_table[] = {
312 {
313 .procname = "raid",
314 .maxlen = 0,
315 .mode = S_IRUGO|S_IXUGO,
316 .child = raid_table,
317 },
318 { }
319};
320
321static struct ctl_table raid_root_table[] = {
322 {
323 .procname = "dev",
324 .maxlen = 0,
325 .mode = 0555,
326 .child = raid_dir_table,
327 },
328 { }
329};
330
331static int start_readonly;
332
333
334
335
336
337
338
339
340
341static bool create_on_open = true;
342
343
344
345
346
347
348
349
350
351
352
353static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
354static atomic_t md_event_count;
355void md_new_event(struct mddev *mddev)
356{
357 atomic_inc(&md_event_count);
358 wake_up(&md_event_waiters);
359}
360EXPORT_SYMBOL_GPL(md_new_event);
361
362
363
364
365
366static LIST_HEAD(all_mddevs);
367static DEFINE_SPINLOCK(all_mddevs_lock);
368
369
370
371
372
373
374
375
376#define for_each_mddev(_mddev,_tmp) \
377 \
378 for (({ spin_lock(&all_mddevs_lock); \
379 _tmp = all_mddevs.next; \
380 _mddev = NULL;}); \
381 ({ if (_tmp != &all_mddevs) \
382 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
383 spin_unlock(&all_mddevs_lock); \
384 if (_mddev) mddev_put(_mddev); \
385 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
386 _tmp != &all_mddevs;}); \
387 ({ spin_lock(&all_mddevs_lock); \
388 _tmp = _tmp->next;}) \
389 )
390
391
392
393
394
395
396
397
398static bool is_suspended(struct mddev *mddev, struct bio *bio)
399{
400 if (mddev->suspended)
401 return true;
402 if (bio_data_dir(bio) != WRITE)
403 return false;
404 if (mddev->suspend_lo >= mddev->suspend_hi)
405 return false;
406 if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
407 return false;
408 if (bio_end_sector(bio) < mddev->suspend_lo)
409 return false;
410 return true;
411}
412
413void md_handle_request(struct mddev *mddev, struct bio *bio)
414{
415check_suspended:
416 rcu_read_lock();
417 if (is_suspended(mddev, bio)) {
418 DEFINE_WAIT(__wait);
419 for (;;) {
420 prepare_to_wait(&mddev->sb_wait, &__wait,
421 TASK_UNINTERRUPTIBLE);
422 if (!is_suspended(mddev, bio))
423 break;
424 rcu_read_unlock();
425 schedule();
426 rcu_read_lock();
427 }
428 finish_wait(&mddev->sb_wait, &__wait);
429 }
430 atomic_inc(&mddev->active_io);
431 rcu_read_unlock();
432
433 if (!mddev->pers->make_request(mddev, bio)) {
434 atomic_dec(&mddev->active_io);
435 wake_up(&mddev->sb_wait);
436 goto check_suspended;
437 }
438
439 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
440 wake_up(&mddev->sb_wait);
441}
442EXPORT_SYMBOL(md_handle_request);
443
444static blk_qc_t md_submit_bio(struct bio *bio)
445{
446 const int rw = bio_data_dir(bio);
447 struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
448
449 if (mddev == NULL || mddev->pers == NULL) {
450 bio_io_error(bio);
451 return BLK_QC_T_NONE;
452 }
453
454 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
455 bio_io_error(bio);
456 return BLK_QC_T_NONE;
457 }
458
459 blk_queue_split(&bio);
460
461 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
462 if (bio_sectors(bio) != 0)
463 bio->bi_status = BLK_STS_IOERR;
464 bio_endio(bio);
465 return BLK_QC_T_NONE;
466 }
467
468
469 bio->bi_opf &= ~REQ_NOMERGE;
470
471 md_handle_request(mddev, bio);
472
473 return BLK_QC_T_NONE;
474}
475
476
477
478
479
480
481
482void mddev_suspend(struct mddev *mddev)
483{
484 WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
485 lockdep_assert_held(&mddev->reconfig_mutex);
486 if (mddev->suspended++)
487 return;
488 synchronize_rcu();
489 wake_up(&mddev->sb_wait);
490 set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
491 smp_mb__after_atomic();
492 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
493 mddev->pers->quiesce(mddev, 1);
494 clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
495 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
496
497 del_timer_sync(&mddev->safemode_timer);
498
499 mddev->noio_flag = memalloc_noio_save();
500}
501EXPORT_SYMBOL_GPL(mddev_suspend);
502
503void mddev_resume(struct mddev *mddev)
504{
505
506 memalloc_noio_restore(mddev->noio_flag);
507 lockdep_assert_held(&mddev->reconfig_mutex);
508 if (--mddev->suspended)
509 return;
510 wake_up(&mddev->sb_wait);
511 mddev->pers->quiesce(mddev, 0);
512
513 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
514 md_wakeup_thread(mddev->thread);
515 md_wakeup_thread(mddev->sync_thread);
516}
517EXPORT_SYMBOL_GPL(mddev_resume);
518
519
520
521
522
523static void md_end_flush(struct bio *bio)
524{
525 struct md_rdev *rdev = bio->bi_private;
526 struct mddev *mddev = rdev->mddev;
527
528 rdev_dec_pending(rdev, mddev);
529
530 if (atomic_dec_and_test(&mddev->flush_pending)) {
531
532 queue_work(md_wq, &mddev->flush_work);
533 }
534 bio_put(bio);
535}
536
537static void md_submit_flush_data(struct work_struct *ws);
538
539static void submit_flushes(struct work_struct *ws)
540{
541 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
542 struct md_rdev *rdev;
543
544 mddev->start_flush = ktime_get_boottime();
545 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
546 atomic_set(&mddev->flush_pending, 1);
547 rcu_read_lock();
548 rdev_for_each_rcu(rdev, mddev)
549 if (rdev->raid_disk >= 0 &&
550 !test_bit(Faulty, &rdev->flags)) {
551
552
553
554
555 struct bio *bi;
556 atomic_inc(&rdev->nr_pending);
557 atomic_inc(&rdev->nr_pending);
558 rcu_read_unlock();
559 bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set);
560 bi->bi_end_io = md_end_flush;
561 bi->bi_private = rdev;
562 bio_set_dev(bi, rdev->bdev);
563 bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
564 atomic_inc(&mddev->flush_pending);
565 submit_bio(bi);
566 rcu_read_lock();
567 rdev_dec_pending(rdev, mddev);
568 }
569 rcu_read_unlock();
570 if (atomic_dec_and_test(&mddev->flush_pending))
571 queue_work(md_wq, &mddev->flush_work);
572}
573
574static void md_submit_flush_data(struct work_struct *ws)
575{
576 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
577 struct bio *bio = mddev->flush_bio;
578
579
580
581
582
583
584
585 spin_lock_irq(&mddev->lock);
586 mddev->prev_flush_start = mddev->start_flush;
587 mddev->flush_bio = NULL;
588 spin_unlock_irq(&mddev->lock);
589 wake_up(&mddev->sb_wait);
590
591 if (bio->bi_iter.bi_size == 0) {
592
593 bio_endio(bio);
594 } else {
595 bio->bi_opf &= ~REQ_PREFLUSH;
596 md_handle_request(mddev, bio);
597 }
598}
599
600
601
602
603
604
605
606bool md_flush_request(struct mddev *mddev, struct bio *bio)
607{
608 ktime_t req_start = ktime_get_boottime();
609 spin_lock_irq(&mddev->lock);
610
611
612
613 wait_event_lock_irq(mddev->sb_wait,
614 !mddev->flush_bio ||
615 ktime_before(req_start, mddev->prev_flush_start),
616 mddev->lock);
617
618 if (ktime_after(req_start, mddev->prev_flush_start)) {
619 WARN_ON(mddev->flush_bio);
620 mddev->flush_bio = bio;
621 bio = NULL;
622 }
623 spin_unlock_irq(&mddev->lock);
624
625 if (!bio) {
626 INIT_WORK(&mddev->flush_work, submit_flushes);
627 queue_work(md_wq, &mddev->flush_work);
628 } else {
629
630 if (bio->bi_iter.bi_size == 0)
631
632 bio_endio(bio);
633 else {
634 bio->bi_opf &= ~REQ_PREFLUSH;
635 return false;
636 }
637 }
638 return true;
639}
640EXPORT_SYMBOL(md_flush_request);
641
642static inline struct mddev *mddev_get(struct mddev *mddev)
643{
644 atomic_inc(&mddev->active);
645 return mddev;
646}
647
648static void mddev_delayed_delete(struct work_struct *ws);
649
650static void mddev_put(struct mddev *mddev)
651{
652 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
653 return;
654 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
655 mddev->ctime == 0 && !mddev->hold_active) {
656
657
658 list_del_init(&mddev->all_mddevs);
659
660
661
662
663
664
665 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
666 queue_work(md_misc_wq, &mddev->del_work);
667 }
668 spin_unlock(&all_mddevs_lock);
669}
670
671static void md_safemode_timeout(struct timer_list *t);
672
673void mddev_init(struct mddev *mddev)
674{
675 kobject_init(&mddev->kobj, &md_ktype);
676 mutex_init(&mddev->open_mutex);
677 mutex_init(&mddev->reconfig_mutex);
678 mutex_init(&mddev->bitmap_info.mutex);
679 INIT_LIST_HEAD(&mddev->disks);
680 INIT_LIST_HEAD(&mddev->all_mddevs);
681 timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
682 atomic_set(&mddev->active, 1);
683 atomic_set(&mddev->openers, 0);
684 atomic_set(&mddev->active_io, 0);
685 spin_lock_init(&mddev->lock);
686 atomic_set(&mddev->flush_pending, 0);
687 init_waitqueue_head(&mddev->sb_wait);
688 init_waitqueue_head(&mddev->recovery_wait);
689 mddev->reshape_position = MaxSector;
690 mddev->reshape_backwards = 0;
691 mddev->last_sync_action = "none";
692 mddev->resync_min = 0;
693 mddev->resync_max = MaxSector;
694 mddev->level = LEVEL_NONE;
695}
696EXPORT_SYMBOL_GPL(mddev_init);
697
698static struct mddev *mddev_find_locked(dev_t unit)
699{
700 struct mddev *mddev;
701
702 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
703 if (mddev->unit == unit)
704 return mddev;
705
706 return NULL;
707}
708
709
710static dev_t mddev_alloc_unit(void)
711{
712 static int next_minor = 512;
713 int start = next_minor;
714 bool is_free = 0;
715 dev_t dev = 0;
716
717 while (!is_free) {
718 dev = MKDEV(MD_MAJOR, next_minor);
719 next_minor++;
720 if (next_minor > MINORMASK)
721 next_minor = 0;
722 if (next_minor == start)
723 return 0;
724 is_free = !mddev_find_locked(dev);
725 }
726
727 return dev;
728}
729
730static struct mddev *mddev_find(dev_t unit)
731{
732 struct mddev *mddev;
733
734 if (MAJOR(unit) != MD_MAJOR)
735 unit &= ~((1 << MdpMinorShift) - 1);
736
737 spin_lock(&all_mddevs_lock);
738 mddev = mddev_find_locked(unit);
739 if (mddev)
740 mddev_get(mddev);
741 spin_unlock(&all_mddevs_lock);
742
743 return mddev;
744}
745
746static struct mddev *mddev_alloc(dev_t unit)
747{
748 struct mddev *new;
749 int error;
750
751 if (unit && MAJOR(unit) != MD_MAJOR)
752 unit &= ~((1 << MdpMinorShift) - 1);
753
754 new = kzalloc(sizeof(*new), GFP_KERNEL);
755 if (!new)
756 return ERR_PTR(-ENOMEM);
757 mddev_init(new);
758
759 spin_lock(&all_mddevs_lock);
760 if (unit) {
761 error = -EEXIST;
762 if (mddev_find_locked(unit))
763 goto out_free_new;
764 new->unit = unit;
765 if (MAJOR(unit) == MD_MAJOR)
766 new->md_minor = MINOR(unit);
767 else
768 new->md_minor = MINOR(unit) >> MdpMinorShift;
769 new->hold_active = UNTIL_IOCTL;
770 } else {
771 error = -ENODEV;
772 new->unit = mddev_alloc_unit();
773 if (!new->unit)
774 goto out_free_new;
775 new->md_minor = MINOR(new->unit);
776 new->hold_active = UNTIL_STOP;
777 }
778
779 list_add(&new->all_mddevs, &all_mddevs);
780 spin_unlock(&all_mddevs_lock);
781 return new;
782out_free_new:
783 spin_unlock(&all_mddevs_lock);
784 kfree(new);
785 return ERR_PTR(error);
786}
787
788static const struct attribute_group md_redundancy_group;
789
790void mddev_unlock(struct mddev *mddev)
791{
792 if (mddev->to_remove) {
793
794
795
796
797
798
799
800
801
802
803
804
805 const struct attribute_group *to_remove = mddev->to_remove;
806 mddev->to_remove = NULL;
807 mddev->sysfs_active = 1;
808 mutex_unlock(&mddev->reconfig_mutex);
809
810 if (mddev->kobj.sd) {
811 if (to_remove != &md_redundancy_group)
812 sysfs_remove_group(&mddev->kobj, to_remove);
813 if (mddev->pers == NULL ||
814 mddev->pers->sync_request == NULL) {
815 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
816 if (mddev->sysfs_action)
817 sysfs_put(mddev->sysfs_action);
818 if (mddev->sysfs_completed)
819 sysfs_put(mddev->sysfs_completed);
820 if (mddev->sysfs_degraded)
821 sysfs_put(mddev->sysfs_degraded);
822 mddev->sysfs_action = NULL;
823 mddev->sysfs_completed = NULL;
824 mddev->sysfs_degraded = NULL;
825 }
826 }
827 mddev->sysfs_active = 0;
828 } else
829 mutex_unlock(&mddev->reconfig_mutex);
830
831
832
833
834 spin_lock(&pers_lock);
835 md_wakeup_thread(mddev->thread);
836 wake_up(&mddev->sb_wait);
837 spin_unlock(&pers_lock);
838}
839EXPORT_SYMBOL_GPL(mddev_unlock);
840
841struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
842{
843 struct md_rdev *rdev;
844
845 rdev_for_each_rcu(rdev, mddev)
846 if (rdev->desc_nr == nr)
847 return rdev;
848
849 return NULL;
850}
851EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
852
853static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
854{
855 struct md_rdev *rdev;
856
857 rdev_for_each(rdev, mddev)
858 if (rdev->bdev->bd_dev == dev)
859 return rdev;
860
861 return NULL;
862}
863
864struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
865{
866 struct md_rdev *rdev;
867
868 rdev_for_each_rcu(rdev, mddev)
869 if (rdev->bdev->bd_dev == dev)
870 return rdev;
871
872 return NULL;
873}
874EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
875
876static struct md_personality *find_pers(int level, char *clevel)
877{
878 struct md_personality *pers;
879 list_for_each_entry(pers, &pers_list, list) {
880 if (level != LEVEL_NONE && pers->level == level)
881 return pers;
882 if (strcmp(pers->name, clevel)==0)
883 return pers;
884 }
885 return NULL;
886}
887
888
889static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
890{
891 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
892 return MD_NEW_SIZE_SECTORS(num_sectors);
893}
894
895static int alloc_disk_sb(struct md_rdev *rdev)
896{
897 rdev->sb_page = alloc_page(GFP_KERNEL);
898 if (!rdev->sb_page)
899 return -ENOMEM;
900 return 0;
901}
902
903void md_rdev_clear(struct md_rdev *rdev)
904{
905 if (rdev->sb_page) {
906 put_page(rdev->sb_page);
907 rdev->sb_loaded = 0;
908 rdev->sb_page = NULL;
909 rdev->sb_start = 0;
910 rdev->sectors = 0;
911 }
912 if (rdev->bb_page) {
913 put_page(rdev->bb_page);
914 rdev->bb_page = NULL;
915 }
916 badblocks_exit(&rdev->badblocks);
917}
918EXPORT_SYMBOL_GPL(md_rdev_clear);
919
920static void super_written(struct bio *bio)
921{
922 struct md_rdev *rdev = bio->bi_private;
923 struct mddev *mddev = rdev->mddev;
924
925 if (bio->bi_status) {
926 pr_err("md: %s gets error=%d\n", __func__,
927 blk_status_to_errno(bio->bi_status));
928 md_error(mddev, rdev);
929 if (!test_bit(Faulty, &rdev->flags)
930 && (bio->bi_opf & MD_FAILFAST)) {
931 set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
932 set_bit(LastDev, &rdev->flags);
933 }
934 } else
935 clear_bit(LastDev, &rdev->flags);
936
937 if (atomic_dec_and_test(&mddev->pending_writes))
938 wake_up(&mddev->sb_wait);
939 rdev_dec_pending(rdev, mddev);
940 bio_put(bio);
941}
942
943void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
944 sector_t sector, int size, struct page *page)
945{
946
947
948
949
950
951
952 struct bio *bio;
953 int ff = 0;
954
955 if (!page)
956 return;
957
958 if (test_bit(Faulty, &rdev->flags))
959 return;
960
961 bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
962
963 atomic_inc(&rdev->nr_pending);
964
965 bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev);
966 bio->bi_iter.bi_sector = sector;
967 bio_add_page(bio, page, size, 0);
968 bio->bi_private = rdev;
969 bio->bi_end_io = super_written;
970
971 if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
972 test_bit(FailFast, &rdev->flags) &&
973 !test_bit(LastDev, &rdev->flags))
974 ff = MD_FAILFAST;
975 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff;
976
977 atomic_inc(&mddev->pending_writes);
978 submit_bio(bio);
979}
980
981int md_super_wait(struct mddev *mddev)
982{
983
984 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
985 if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
986 return -EAGAIN;
987 return 0;
988}
989
990int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
991 struct page *page, int op, int op_flags, bool metadata_op)
992{
993 struct bio bio;
994 struct bio_vec bvec;
995
996 bio_init(&bio, &bvec, 1);
997
998 if (metadata_op && rdev->meta_bdev)
999 bio_set_dev(&bio, rdev->meta_bdev);
1000 else
1001 bio_set_dev(&bio, rdev->bdev);
1002 bio.bi_opf = op | op_flags;
1003 if (metadata_op)
1004 bio.bi_iter.bi_sector = sector + rdev->sb_start;
1005 else if (rdev->mddev->reshape_position != MaxSector &&
1006 (rdev->mddev->reshape_backwards ==
1007 (sector >= rdev->mddev->reshape_position)))
1008 bio.bi_iter.bi_sector = sector + rdev->new_data_offset;
1009 else
1010 bio.bi_iter.bi_sector = sector + rdev->data_offset;
1011 bio_add_page(&bio, page, size, 0);
1012
1013 submit_bio_wait(&bio);
1014
1015 return !bio.bi_status;
1016}
1017EXPORT_SYMBOL_GPL(sync_page_io);
1018
1019static int read_disk_sb(struct md_rdev *rdev, int size)
1020{
1021 char b[BDEVNAME_SIZE];
1022
1023 if (rdev->sb_loaded)
1024 return 0;
1025
1026 if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
1027 goto fail;
1028 rdev->sb_loaded = 1;
1029 return 0;
1030
1031fail:
1032 pr_err("md: disabled device %s, could not read superblock.\n",
1033 bdevname(rdev->bdev,b));
1034 return -EINVAL;
1035}
1036
1037static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1038{
1039 return sb1->set_uuid0 == sb2->set_uuid0 &&
1040 sb1->set_uuid1 == sb2->set_uuid1 &&
1041 sb1->set_uuid2 == sb2->set_uuid2 &&
1042 sb1->set_uuid3 == sb2->set_uuid3;
1043}
1044
1045static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
1046{
1047 int ret;
1048 mdp_super_t *tmp1, *tmp2;
1049
1050 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
1051 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
1052
1053 if (!tmp1 || !tmp2) {
1054 ret = 0;
1055 goto abort;
1056 }
1057
1058 *tmp1 = *sb1;
1059 *tmp2 = *sb2;
1060
1061
1062
1063
1064 tmp1->nr_disks = 0;
1065 tmp2->nr_disks = 0;
1066
1067 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
1068abort:
1069 kfree(tmp1);
1070 kfree(tmp2);
1071 return ret;
1072}
1073
1074static u32 md_csum_fold(u32 csum)
1075{
1076 csum = (csum & 0xffff) + (csum >> 16);
1077 return (csum & 0xffff) + (csum >> 16);
1078}
1079
1080static unsigned int calc_sb_csum(mdp_super_t *sb)
1081{
1082 u64 newcsum = 0;
1083 u32 *sb32 = (u32*)sb;
1084 int i;
1085 unsigned int disk_csum, csum;
1086
1087 disk_csum = sb->sb_csum;
1088 sb->sb_csum = 0;
1089
1090 for (i = 0; i < MD_SB_BYTES/4 ; i++)
1091 newcsum += sb32[i];
1092 csum = (newcsum & 0xffffffff) + (newcsum>>32);
1093
1094#ifdef CONFIG_ALPHA
1095
1096
1097
1098
1099
1100
1101
1102
1103 sb->sb_csum = md_csum_fold(disk_csum);
1104#else
1105 sb->sb_csum = disk_csum;
1106#endif
1107 return csum;
1108}
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140struct super_type {
1141 char *name;
1142 struct module *owner;
1143 int (*load_super)(struct md_rdev *rdev,
1144 struct md_rdev *refdev,
1145 int minor_version);
1146 int (*validate_super)(struct mddev *mddev,
1147 struct md_rdev *rdev);
1148 void (*sync_super)(struct mddev *mddev,
1149 struct md_rdev *rdev);
1150 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
1151 sector_t num_sectors);
1152 int (*allow_new_offset)(struct md_rdev *rdev,
1153 unsigned long long new_offset);
1154};
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164int md_check_no_bitmap(struct mddev *mddev)
1165{
1166 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1167 return 0;
1168 pr_warn("%s: bitmaps are not supported for %s\n",
1169 mdname(mddev), mddev->pers->name);
1170 return 1;
1171}
1172EXPORT_SYMBOL(md_check_no_bitmap);
1173
1174
1175
1176
1177static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1178{
1179 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1180 mdp_super_t *sb;
1181 int ret;
1182 bool spare_disk = true;
1183
1184
1185
1186
1187
1188
1189
1190 rdev->sb_start = calc_dev_sboffset(rdev);
1191
1192 ret = read_disk_sb(rdev, MD_SB_BYTES);
1193 if (ret)
1194 return ret;
1195
1196 ret = -EINVAL;
1197
1198 bdevname(rdev->bdev, b);
1199 sb = page_address(rdev->sb_page);
1200
1201 if (sb->md_magic != MD_SB_MAGIC) {
1202 pr_warn("md: invalid raid superblock magic on %s\n", b);
1203 goto abort;
1204 }
1205
1206 if (sb->major_version != 0 ||
1207 sb->minor_version < 90 ||
1208 sb->minor_version > 91) {
1209 pr_warn("Bad version number %d.%d on %s\n",
1210 sb->major_version, sb->minor_version, b);
1211 goto abort;
1212 }
1213
1214 if (sb->raid_disks <= 0)
1215 goto abort;
1216
1217 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1218 pr_warn("md: invalid superblock checksum on %s\n", b);
1219 goto abort;
1220 }
1221
1222 rdev->preferred_minor = sb->md_minor;
1223 rdev->data_offset = 0;
1224 rdev->new_data_offset = 0;
1225 rdev->sb_size = MD_SB_BYTES;
1226 rdev->badblocks.shift = -1;
1227
1228 if (sb->level == LEVEL_MULTIPATH)
1229 rdev->desc_nr = -1;
1230 else
1231 rdev->desc_nr = sb->this_disk.number;
1232
1233
1234 if (sb->level == LEVEL_MULTIPATH ||
1235 (rdev->desc_nr >= 0 &&
1236 rdev->desc_nr < MD_SB_DISKS &&
1237 sb->disks[rdev->desc_nr].state &
1238 ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
1239 spare_disk = false;
1240
1241 if (!refdev) {
1242 if (!spare_disk)
1243 ret = 1;
1244 else
1245 ret = 0;
1246 } else {
1247 __u64 ev1, ev2;
1248 mdp_super_t *refsb = page_address(refdev->sb_page);
1249 if (!md_uuid_equal(refsb, sb)) {
1250 pr_warn("md: %s has different UUID to %s\n",
1251 b, bdevname(refdev->bdev,b2));
1252 goto abort;
1253 }
1254 if (!md_sb_equal(refsb, sb)) {
1255 pr_warn("md: %s has same UUID but different superblock to %s\n",
1256 b, bdevname(refdev->bdev, b2));
1257 goto abort;
1258 }
1259 ev1 = md_event(sb);
1260 ev2 = md_event(refsb);
1261
1262 if (!spare_disk && ev1 > ev2)
1263 ret = 1;
1264 else
1265 ret = 0;
1266 }
1267 rdev->sectors = rdev->sb_start;
1268
1269
1270
1271
1272 if ((u64)rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1273 rdev->sectors = (sector_t)(2ULL << 32) - 2;
1274
1275 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1276
1277 ret = -EINVAL;
1278
1279 abort:
1280 return ret;
1281}
1282
1283
1284
1285
1286static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1287{
1288 mdp_disk_t *desc;
1289 mdp_super_t *sb = page_address(rdev->sb_page);
1290 __u64 ev1 = md_event(sb);
1291
1292 rdev->raid_disk = -1;
1293 clear_bit(Faulty, &rdev->flags);
1294 clear_bit(In_sync, &rdev->flags);
1295 clear_bit(Bitmap_sync, &rdev->flags);
1296 clear_bit(WriteMostly, &rdev->flags);
1297
1298 if (mddev->raid_disks == 0) {
1299 mddev->major_version = 0;
1300 mddev->minor_version = sb->minor_version;
1301 mddev->patch_version = sb->patch_version;
1302 mddev->external = 0;
1303 mddev->chunk_sectors = sb->chunk_size >> 9;
1304 mddev->ctime = sb->ctime;
1305 mddev->utime = sb->utime;
1306 mddev->level = sb->level;
1307 mddev->clevel[0] = 0;
1308 mddev->layout = sb->layout;
1309 mddev->raid_disks = sb->raid_disks;
1310 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1311 mddev->events = ev1;
1312 mddev->bitmap_info.offset = 0;
1313 mddev->bitmap_info.space = 0;
1314
1315 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1316 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1317 mddev->reshape_backwards = 0;
1318
1319 if (mddev->minor_version >= 91) {
1320 mddev->reshape_position = sb->reshape_position;
1321 mddev->delta_disks = sb->delta_disks;
1322 mddev->new_level = sb->new_level;
1323 mddev->new_layout = sb->new_layout;
1324 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1325 if (mddev->delta_disks < 0)
1326 mddev->reshape_backwards = 1;
1327 } else {
1328 mddev->reshape_position = MaxSector;
1329 mddev->delta_disks = 0;
1330 mddev->new_level = mddev->level;
1331 mddev->new_layout = mddev->layout;
1332 mddev->new_chunk_sectors = mddev->chunk_sectors;
1333 }
1334 if (mddev->level == 0)
1335 mddev->layout = -1;
1336
1337 if (sb->state & (1<<MD_SB_CLEAN))
1338 mddev->recovery_cp = MaxSector;
1339 else {
1340 if (sb->events_hi == sb->cp_events_hi &&
1341 sb->events_lo == sb->cp_events_lo) {
1342 mddev->recovery_cp = sb->recovery_cp;
1343 } else
1344 mddev->recovery_cp = 0;
1345 }
1346
1347 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1348 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1349 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1350 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1351
1352 mddev->max_disks = MD_SB_DISKS;
1353
1354 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1355 mddev->bitmap_info.file == NULL) {
1356 mddev->bitmap_info.offset =
1357 mddev->bitmap_info.default_offset;
1358 mddev->bitmap_info.space =
1359 mddev->bitmap_info.default_space;
1360 }
1361
1362 } else if (mddev->pers == NULL) {
1363
1364
1365 ++ev1;
1366 if (sb->disks[rdev->desc_nr].state & (
1367 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1368 if (ev1 < mddev->events)
1369 return -EINVAL;
1370 } else if (mddev->bitmap) {
1371
1372
1373
1374 if (ev1 < mddev->bitmap->events_cleared)
1375 return 0;
1376 if (ev1 < mddev->events)
1377 set_bit(Bitmap_sync, &rdev->flags);
1378 } else {
1379 if (ev1 < mddev->events)
1380
1381 return 0;
1382 }
1383
1384 if (mddev->level != LEVEL_MULTIPATH) {
1385 desc = sb->disks + rdev->desc_nr;
1386
1387 if (desc->state & (1<<MD_DISK_FAULTY))
1388 set_bit(Faulty, &rdev->flags);
1389 else if (desc->state & (1<<MD_DISK_SYNC)
1390) {
1391 set_bit(In_sync, &rdev->flags);
1392 rdev->raid_disk = desc->raid_disk;
1393 rdev->saved_raid_disk = desc->raid_disk;
1394 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1395
1396
1397
1398 if (mddev->minor_version >= 91) {
1399 rdev->recovery_offset = 0;
1400 rdev->raid_disk = desc->raid_disk;
1401 }
1402 }
1403 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1404 set_bit(WriteMostly, &rdev->flags);
1405 if (desc->state & (1<<MD_DISK_FAILFAST))
1406 set_bit(FailFast, &rdev->flags);
1407 } else
1408 set_bit(In_sync, &rdev->flags);
1409 return 0;
1410}
1411
1412
1413
1414
1415static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1416{
1417 mdp_super_t *sb;
1418 struct md_rdev *rdev2;
1419 int next_spare = mddev->raid_disks;
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431 int i;
1432 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1433
1434 rdev->sb_size = MD_SB_BYTES;
1435
1436 sb = page_address(rdev->sb_page);
1437
1438 memset(sb, 0, sizeof(*sb));
1439
1440 sb->md_magic = MD_SB_MAGIC;
1441 sb->major_version = mddev->major_version;
1442 sb->patch_version = mddev->patch_version;
1443 sb->gvalid_words = 0;
1444 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1445 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1446 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1447 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1448
1449 sb->ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
1450 sb->level = mddev->level;
1451 sb->size = mddev->dev_sectors / 2;
1452 sb->raid_disks = mddev->raid_disks;
1453 sb->md_minor = mddev->md_minor;
1454 sb->not_persistent = 0;
1455 sb->utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
1456 sb->state = 0;
1457 sb->events_hi = (mddev->events>>32);
1458 sb->events_lo = (u32)mddev->events;
1459
1460 if (mddev->reshape_position == MaxSector)
1461 sb->minor_version = 90;
1462 else {
1463 sb->minor_version = 91;
1464 sb->reshape_position = mddev->reshape_position;
1465 sb->new_level = mddev->new_level;
1466 sb->delta_disks = mddev->delta_disks;
1467 sb->new_layout = mddev->new_layout;
1468 sb->new_chunk = mddev->new_chunk_sectors << 9;
1469 }
1470 mddev->minor_version = sb->minor_version;
1471 if (mddev->in_sync)
1472 {
1473 sb->recovery_cp = mddev->recovery_cp;
1474 sb->cp_events_hi = (mddev->events>>32);
1475 sb->cp_events_lo = (u32)mddev->events;
1476 if (mddev->recovery_cp == MaxSector)
1477 sb->state = (1<< MD_SB_CLEAN);
1478 } else
1479 sb->recovery_cp = 0;
1480
1481 sb->layout = mddev->layout;
1482 sb->chunk_size = mddev->chunk_sectors << 9;
1483
1484 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1485 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1486
1487 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1488 rdev_for_each(rdev2, mddev) {
1489 mdp_disk_t *d;
1490 int desc_nr;
1491 int is_active = test_bit(In_sync, &rdev2->flags);
1492
1493 if (rdev2->raid_disk >= 0 &&
1494 sb->minor_version >= 91)
1495
1496
1497
1498
1499 is_active = 1;
1500 if (rdev2->raid_disk < 0 ||
1501 test_bit(Faulty, &rdev2->flags))
1502 is_active = 0;
1503 if (is_active)
1504 desc_nr = rdev2->raid_disk;
1505 else
1506 desc_nr = next_spare++;
1507 rdev2->desc_nr = desc_nr;
1508 d = &sb->disks[rdev2->desc_nr];
1509 nr_disks++;
1510 d->number = rdev2->desc_nr;
1511 d->major = MAJOR(rdev2->bdev->bd_dev);
1512 d->minor = MINOR(rdev2->bdev->bd_dev);
1513 if (is_active)
1514 d->raid_disk = rdev2->raid_disk;
1515 else
1516 d->raid_disk = rdev2->desc_nr;
1517 if (test_bit(Faulty, &rdev2->flags))
1518 d->state = (1<<MD_DISK_FAULTY);
1519 else if (is_active) {
1520 d->state = (1<<MD_DISK_ACTIVE);
1521 if (test_bit(In_sync, &rdev2->flags))
1522 d->state |= (1<<MD_DISK_SYNC);
1523 active++;
1524 working++;
1525 } else {
1526 d->state = 0;
1527 spare++;
1528 working++;
1529 }
1530 if (test_bit(WriteMostly, &rdev2->flags))
1531 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1532 if (test_bit(FailFast, &rdev2->flags))
1533 d->state |= (1<<MD_DISK_FAILFAST);
1534 }
1535
1536 for (i=0 ; i < mddev->raid_disks ; i++) {
1537 mdp_disk_t *d = &sb->disks[i];
1538 if (d->state == 0 && d->number == 0) {
1539 d->number = i;
1540 d->raid_disk = i;
1541 d->state = (1<<MD_DISK_REMOVED);
1542 d->state |= (1<<MD_DISK_FAULTY);
1543 failed++;
1544 }
1545 }
1546 sb->nr_disks = nr_disks;
1547 sb->active_disks = active;
1548 sb->working_disks = working;
1549 sb->failed_disks = failed;
1550 sb->spare_disks = spare;
1551
1552 sb->this_disk = sb->disks[rdev->desc_nr];
1553 sb->sb_csum = calc_sb_csum(sb);
1554}
1555
1556
1557
1558
1559static unsigned long long
1560super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1561{
1562 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1563 return 0;
1564 if (rdev->mddev->bitmap_info.offset)
1565 return 0;
1566 rdev->sb_start = calc_dev_sboffset(rdev);
1567 if (!num_sectors || num_sectors > rdev->sb_start)
1568 num_sectors = rdev->sb_start;
1569
1570
1571
1572 if ((u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1573 num_sectors = (sector_t)(2ULL << 32) - 2;
1574 do {
1575 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1576 rdev->sb_page);
1577 } while (md_super_wait(rdev->mddev) < 0);
1578 return num_sectors;
1579}
1580
1581static int
1582super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1583{
1584
1585 return new_offset == 0;
1586}
1587
1588
1589
1590
1591
1592static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1593{
1594 __le32 disk_csum;
1595 u32 csum;
1596 unsigned long long newcsum;
1597 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1598 __le32 *isuper = (__le32*)sb;
1599
1600 disk_csum = sb->sb_csum;
1601 sb->sb_csum = 0;
1602 newcsum = 0;
1603 for (; size >= 4; size -= 4)
1604 newcsum += le32_to_cpu(*isuper++);
1605
1606 if (size == 2)
1607 newcsum += le16_to_cpu(*(__le16*) isuper);
1608
1609 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1610 sb->sb_csum = disk_csum;
1611 return cpu_to_le32(csum);
1612}
1613
1614static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1615{
1616 struct mdp_superblock_1 *sb;
1617 int ret;
1618 sector_t sb_start;
1619 sector_t sectors;
1620 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1621 int bmask;
1622 bool spare_disk = true;
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632 switch(minor_version) {
1633 case 0:
1634 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1635 sb_start -= 8*2;
1636 sb_start &= ~(sector_t)(4*2-1);
1637 break;
1638 case 1:
1639 sb_start = 0;
1640 break;
1641 case 2:
1642 sb_start = 8;
1643 break;
1644 default:
1645 return -EINVAL;
1646 }
1647 rdev->sb_start = sb_start;
1648
1649
1650
1651
1652 ret = read_disk_sb(rdev, 4096);
1653 if (ret) return ret;
1654
1655 sb = page_address(rdev->sb_page);
1656
1657 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1658 sb->major_version != cpu_to_le32(1) ||
1659 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1660 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1661 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1662 return -EINVAL;
1663
1664 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1665 pr_warn("md: invalid superblock checksum on %s\n",
1666 bdevname(rdev->bdev,b));
1667 return -EINVAL;
1668 }
1669 if (le64_to_cpu(sb->data_size) < 10) {
1670 pr_warn("md: data_size too small on %s\n",
1671 bdevname(rdev->bdev,b));
1672 return -EINVAL;
1673 }
1674 if (sb->pad0 ||
1675 sb->pad3[0] ||
1676 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1677
1678 return -EINVAL;
1679
1680 rdev->preferred_minor = 0xffff;
1681 rdev->data_offset = le64_to_cpu(sb->data_offset);
1682 rdev->new_data_offset = rdev->data_offset;
1683 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1684 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1685 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1686 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1687
1688 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1689 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1690 if (rdev->sb_size & bmask)
1691 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1692
1693 if (minor_version
1694 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1695 return -EINVAL;
1696 if (minor_version
1697 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1698 return -EINVAL;
1699
1700 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1701 rdev->desc_nr = -1;
1702 else
1703 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1704
1705 if (!rdev->bb_page) {
1706 rdev->bb_page = alloc_page(GFP_KERNEL);
1707 if (!rdev->bb_page)
1708 return -ENOMEM;
1709 }
1710 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1711 rdev->badblocks.count == 0) {
1712
1713
1714
1715 s32 offset;
1716 sector_t bb_sector;
1717 __le64 *bbp;
1718 int i;
1719 int sectors = le16_to_cpu(sb->bblog_size);
1720 if (sectors > (PAGE_SIZE / 512))
1721 return -EINVAL;
1722 offset = le32_to_cpu(sb->bblog_offset);
1723 if (offset == 0)
1724 return -EINVAL;
1725 bb_sector = (long long)offset;
1726 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1727 rdev->bb_page, REQ_OP_READ, 0, true))
1728 return -EIO;
1729 bbp = (__le64 *)page_address(rdev->bb_page);
1730 rdev->badblocks.shift = sb->bblog_shift;
1731 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1732 u64 bb = le64_to_cpu(*bbp);
1733 int count = bb & (0x3ff);
1734 u64 sector = bb >> 10;
1735 sector <<= sb->bblog_shift;
1736 count <<= sb->bblog_shift;
1737 if (bb + 1 == 0)
1738 break;
1739 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1740 return -EINVAL;
1741 }
1742 } else if (sb->bblog_offset != 0)
1743 rdev->badblocks.shift = 0;
1744
1745 if ((le32_to_cpu(sb->feature_map) &
1746 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS))) {
1747 rdev->ppl.offset = (__s16)le16_to_cpu(sb->ppl.offset);
1748 rdev->ppl.size = le16_to_cpu(sb->ppl.size);
1749 rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
1750 }
1751
1752 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
1753 sb->level != 0)
1754 return -EINVAL;
1755
1756
1757 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
1758 (rdev->desc_nr >= 0 &&
1759 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1760 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1761 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
1762 spare_disk = false;
1763
1764 if (!refdev) {
1765 if (!spare_disk)
1766 ret = 1;
1767 else
1768 ret = 0;
1769 } else {
1770 __u64 ev1, ev2;
1771 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1772
1773 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1774 sb->level != refsb->level ||
1775 sb->layout != refsb->layout ||
1776 sb->chunksize != refsb->chunksize) {
1777 pr_warn("md: %s has strangely different superblock to %s\n",
1778 bdevname(rdev->bdev,b),
1779 bdevname(refdev->bdev,b2));
1780 return -EINVAL;
1781 }
1782 ev1 = le64_to_cpu(sb->events);
1783 ev2 = le64_to_cpu(refsb->events);
1784
1785 if (!spare_disk && ev1 > ev2)
1786 ret = 1;
1787 else
1788 ret = 0;
1789 }
1790 if (minor_version) {
1791 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1792 sectors -= rdev->data_offset;
1793 } else
1794 sectors = rdev->sb_start;
1795 if (sectors < le64_to_cpu(sb->data_size))
1796 return -EINVAL;
1797 rdev->sectors = le64_to_cpu(sb->data_size);
1798 return ret;
1799}
1800
1801static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1802{
1803 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1804 __u64 ev1 = le64_to_cpu(sb->events);
1805
1806 rdev->raid_disk = -1;
1807 clear_bit(Faulty, &rdev->flags);
1808 clear_bit(In_sync, &rdev->flags);
1809 clear_bit(Bitmap_sync, &rdev->flags);
1810 clear_bit(WriteMostly, &rdev->flags);
1811
1812 if (mddev->raid_disks == 0) {
1813 mddev->major_version = 1;
1814 mddev->patch_version = 0;
1815 mddev->external = 0;
1816 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1817 mddev->ctime = le64_to_cpu(sb->ctime);
1818 mddev->utime = le64_to_cpu(sb->utime);
1819 mddev->level = le32_to_cpu(sb->level);
1820 mddev->clevel[0] = 0;
1821 mddev->layout = le32_to_cpu(sb->layout);
1822 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1823 mddev->dev_sectors = le64_to_cpu(sb->size);
1824 mddev->events = ev1;
1825 mddev->bitmap_info.offset = 0;
1826 mddev->bitmap_info.space = 0;
1827
1828
1829
1830 mddev->bitmap_info.default_offset = 1024 >> 9;
1831 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1832 mddev->reshape_backwards = 0;
1833
1834 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1835 memcpy(mddev->uuid, sb->set_uuid, 16);
1836
1837 mddev->max_disks = (4096-256)/2;
1838
1839 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1840 mddev->bitmap_info.file == NULL) {
1841 mddev->bitmap_info.offset =
1842 (__s32)le32_to_cpu(sb->bitmap_offset);
1843
1844
1845
1846
1847
1848 if (mddev->minor_version > 0)
1849 mddev->bitmap_info.space = 0;
1850 else if (mddev->bitmap_info.offset > 0)
1851 mddev->bitmap_info.space =
1852 8 - mddev->bitmap_info.offset;
1853 else
1854 mddev->bitmap_info.space =
1855 -mddev->bitmap_info.offset;
1856 }
1857
1858 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1859 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1860 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1861 mddev->new_level = le32_to_cpu(sb->new_level);
1862 mddev->new_layout = le32_to_cpu(sb->new_layout);
1863 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1864 if (mddev->delta_disks < 0 ||
1865 (mddev->delta_disks == 0 &&
1866 (le32_to_cpu(sb->feature_map)
1867 & MD_FEATURE_RESHAPE_BACKWARDS)))
1868 mddev->reshape_backwards = 1;
1869 } else {
1870 mddev->reshape_position = MaxSector;
1871 mddev->delta_disks = 0;
1872 mddev->new_level = mddev->level;
1873 mddev->new_layout = mddev->layout;
1874 mddev->new_chunk_sectors = mddev->chunk_sectors;
1875 }
1876
1877 if (mddev->level == 0 &&
1878 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
1879 mddev->layout = -1;
1880
1881 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
1882 set_bit(MD_HAS_JOURNAL, &mddev->flags);
1883
1884 if (le32_to_cpu(sb->feature_map) &
1885 (MD_FEATURE_PPL | MD_FEATURE_MULTIPLE_PPLS)) {
1886 if (le32_to_cpu(sb->feature_map) &
1887 (MD_FEATURE_BITMAP_OFFSET | MD_FEATURE_JOURNAL))
1888 return -EINVAL;
1889 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_PPL) &&
1890 (le32_to_cpu(sb->feature_map) &
1891 MD_FEATURE_MULTIPLE_PPLS))
1892 return -EINVAL;
1893 set_bit(MD_HAS_PPL, &mddev->flags);
1894 }
1895 } else if (mddev->pers == NULL) {
1896
1897
1898 ++ev1;
1899 if (rdev->desc_nr >= 0 &&
1900 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1901 (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
1902 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
1903 if (ev1 < mddev->events)
1904 return -EINVAL;
1905 } else if (mddev->bitmap) {
1906
1907
1908
1909 if (ev1 < mddev->bitmap->events_cleared)
1910 return 0;
1911 if (ev1 < mddev->events)
1912 set_bit(Bitmap_sync, &rdev->flags);
1913 } else {
1914 if (ev1 < mddev->events)
1915
1916 return 0;
1917 }
1918 if (mddev->level != LEVEL_MULTIPATH) {
1919 int role;
1920 if (rdev->desc_nr < 0 ||
1921 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1922 role = MD_DISK_ROLE_SPARE;
1923 rdev->desc_nr = -1;
1924 } else
1925 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1926 switch(role) {
1927 case MD_DISK_ROLE_SPARE:
1928 break;
1929 case MD_DISK_ROLE_FAULTY:
1930 set_bit(Faulty, &rdev->flags);
1931 break;
1932 case MD_DISK_ROLE_JOURNAL:
1933 if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
1934
1935 pr_warn("md: journal device provided without journal feature, ignoring the device\n");
1936 return -EINVAL;
1937 }
1938 set_bit(Journal, &rdev->flags);
1939 rdev->journal_tail = le64_to_cpu(sb->journal_tail);
1940 rdev->raid_disk = 0;
1941 break;
1942 default:
1943 rdev->saved_raid_disk = role;
1944 if ((le32_to_cpu(sb->feature_map) &
1945 MD_FEATURE_RECOVERY_OFFSET)) {
1946 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1947 if (!(le32_to_cpu(sb->feature_map) &
1948 MD_FEATURE_RECOVERY_BITMAP))
1949 rdev->saved_raid_disk = -1;
1950 } else {
1951
1952
1953
1954
1955 if (!test_bit(MD_RECOVERY_FROZEN,
1956 &mddev->recovery))
1957 set_bit(In_sync, &rdev->flags);
1958 }
1959 rdev->raid_disk = role;
1960 break;
1961 }
1962 if (sb->devflags & WriteMostly1)
1963 set_bit(WriteMostly, &rdev->flags);
1964 if (sb->devflags & FailFast1)
1965 set_bit(FailFast, &rdev->flags);
1966 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1967 set_bit(Replacement, &rdev->flags);
1968 } else
1969 set_bit(In_sync, &rdev->flags);
1970
1971 return 0;
1972}
1973
1974static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1975{
1976 struct mdp_superblock_1 *sb;
1977 struct md_rdev *rdev2;
1978 int max_dev, i;
1979
1980
1981 sb = page_address(rdev->sb_page);
1982
1983 sb->feature_map = 0;
1984 sb->pad0 = 0;
1985 sb->recovery_offset = cpu_to_le64(0);
1986 memset(sb->pad3, 0, sizeof(sb->pad3));
1987
1988 sb->utime = cpu_to_le64((__u64)mddev->utime);
1989 sb->events = cpu_to_le64(mddev->events);
1990 if (mddev->in_sync)
1991 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1992 else if (test_bit(MD_JOURNAL_CLEAN, &mddev->flags))
1993 sb->resync_offset = cpu_to_le64(MaxSector);
1994 else
1995 sb->resync_offset = cpu_to_le64(0);
1996
1997 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1998
1999 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
2000 sb->size = cpu_to_le64(mddev->dev_sectors);
2001 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
2002 sb->level = cpu_to_le32(mddev->level);
2003 sb->layout = cpu_to_le32(mddev->layout);
2004 if (test_bit(FailFast, &rdev->flags))
2005 sb->devflags |= FailFast1;
2006 else
2007 sb->devflags &= ~FailFast1;
2008
2009 if (test_bit(WriteMostly, &rdev->flags))
2010 sb->devflags |= WriteMostly1;
2011 else
2012 sb->devflags &= ~WriteMostly1;
2013 sb->data_offset = cpu_to_le64(rdev->data_offset);
2014 sb->data_size = cpu_to_le64(rdev->sectors);
2015
2016 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
2017 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
2018 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
2019 }
2020
2021 if (rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags) &&
2022 !test_bit(In_sync, &rdev->flags)) {
2023 sb->feature_map |=
2024 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
2025 sb->recovery_offset =
2026 cpu_to_le64(rdev->recovery_offset);
2027 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
2028 sb->feature_map |=
2029 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
2030 }
2031
2032 if (test_bit(Journal, &rdev->flags))
2033 sb->journal_tail = cpu_to_le64(rdev->journal_tail);
2034 if (test_bit(Replacement, &rdev->flags))
2035 sb->feature_map |=
2036 cpu_to_le32(MD_FEATURE_REPLACEMENT);
2037
2038 if (mddev->reshape_position != MaxSector) {
2039 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
2040 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
2041 sb->new_layout = cpu_to_le32(mddev->new_layout);
2042 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
2043 sb->new_level = cpu_to_le32(mddev->new_level);
2044 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
2045 if (mddev->delta_disks == 0 &&
2046 mddev->reshape_backwards)
2047 sb->feature_map
2048 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
2049 if (rdev->new_data_offset != rdev->data_offset) {
2050 sb->feature_map
2051 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
2052 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
2053 - rdev->data_offset));
2054 }
2055 }
2056
2057 if (mddev_is_clustered(mddev))
2058 sb->feature_map |= cpu_to_le32(MD_FEATURE_CLUSTERED);
2059
2060 if (rdev->badblocks.count == 0)
2061 ;
2062 else if (sb->bblog_offset == 0)
2063
2064 md_error(mddev, rdev);
2065 else {
2066 struct badblocks *bb = &rdev->badblocks;
2067 __le64 *bbp = (__le64 *)page_address(rdev->bb_page);
2068 u64 *p = bb->page;
2069 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
2070 if (bb->changed) {
2071 unsigned seq;
2072
2073retry:
2074 seq = read_seqbegin(&bb->lock);
2075
2076 memset(bbp, 0xff, PAGE_SIZE);
2077
2078 for (i = 0 ; i < bb->count ; i++) {
2079 u64 internal_bb = p[i];
2080 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
2081 | BB_LEN(internal_bb));
2082 bbp[i] = cpu_to_le64(store_bb);
2083 }
2084 bb->changed = 0;
2085 if (read_seqretry(&bb->lock, seq))
2086 goto retry;
2087
2088 bb->sector = (rdev->sb_start +
2089 (int)le32_to_cpu(sb->bblog_offset));
2090 bb->size = le16_to_cpu(sb->bblog_size);
2091 }
2092 }
2093
2094 max_dev = 0;
2095 rdev_for_each(rdev2, mddev)
2096 if (rdev2->desc_nr+1 > max_dev)
2097 max_dev = rdev2->desc_nr+1;
2098
2099 if (max_dev > le32_to_cpu(sb->max_dev)) {
2100 int bmask;
2101 sb->max_dev = cpu_to_le32(max_dev);
2102 rdev->sb_size = max_dev * 2 + 256;
2103 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
2104 if (rdev->sb_size & bmask)
2105 rdev->sb_size = (rdev->sb_size | bmask) + 1;
2106 } else
2107 max_dev = le32_to_cpu(sb->max_dev);
2108
2109 for (i=0; i<max_dev;i++)
2110 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2111
2112 if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
2113 sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
2114
2115 if (test_bit(MD_HAS_PPL, &mddev->flags)) {
2116 if (test_bit(MD_HAS_MULTIPLE_PPLS, &mddev->flags))
2117 sb->feature_map |=
2118 cpu_to_le32(MD_FEATURE_MULTIPLE_PPLS);
2119 else
2120 sb->feature_map |= cpu_to_le32(MD_FEATURE_PPL);
2121 sb->ppl.offset = cpu_to_le16(rdev->ppl.offset);
2122 sb->ppl.size = cpu_to_le16(rdev->ppl.size);
2123 }
2124
2125 rdev_for_each(rdev2, mddev) {
2126 i = rdev2->desc_nr;
2127 if (test_bit(Faulty, &rdev2->flags))
2128 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
2129 else if (test_bit(In_sync, &rdev2->flags))
2130 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2131 else if (test_bit(Journal, &rdev2->flags))
2132 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_JOURNAL);
2133 else if (rdev2->raid_disk >= 0)
2134 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
2135 else
2136 sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
2137 }
2138
2139 sb->sb_csum = calc_sb_1_csum(sb);
2140}
2141
2142static sector_t super_1_choose_bm_space(sector_t dev_size)
2143{
2144 sector_t bm_space;
2145
2146
2147
2148
2149 if (dev_size < 64*2)
2150 bm_space = 0;
2151 else if (dev_size - 64*2 >= 200*1024*1024*2)
2152 bm_space = 128*2;
2153 else if (dev_size - 4*2 > 8*1024*1024*2)
2154 bm_space = 64*2;
2155 else
2156 bm_space = 4*2;
2157 return bm_space;
2158}
2159
2160static unsigned long long
2161super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
2162{
2163 struct mdp_superblock_1 *sb;
2164 sector_t max_sectors;
2165 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
2166 return 0;
2167 if (rdev->data_offset != rdev->new_data_offset)
2168 return 0;
2169 if (rdev->sb_start < rdev->data_offset) {
2170
2171 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
2172 max_sectors -= rdev->data_offset;
2173 if (!num_sectors || num_sectors > max_sectors)
2174 num_sectors = max_sectors;
2175 } else if (rdev->mddev->bitmap_info.offset) {
2176
2177 return 0;
2178 } else {
2179
2180 sector_t sb_start, bm_space;
2181 sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
2182
2183
2184 sb_start = dev_size - 8*2;
2185 sb_start &= ~(sector_t)(4*2 - 1);
2186
2187 bm_space = super_1_choose_bm_space(dev_size);
2188
2189
2190
2191
2192 max_sectors = sb_start - bm_space - 4*2;
2193
2194 if (!num_sectors || num_sectors > max_sectors)
2195 num_sectors = max_sectors;
2196 }
2197 sb = page_address(rdev->sb_page);
2198 sb->data_size = cpu_to_le64(num_sectors);
2199 sb->super_offset = cpu_to_le64(rdev->sb_start);
2200 sb->sb_csum = calc_sb_1_csum(sb);
2201 do {
2202 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
2203 rdev->sb_page);
2204 } while (md_super_wait(rdev->mddev) < 0);
2205 return num_sectors;
2206
2207}
2208
2209static int
2210super_1_allow_new_offset(struct md_rdev *rdev,
2211 unsigned long long new_offset)
2212{
2213
2214 struct bitmap *bitmap;
2215 if (new_offset >= rdev->data_offset)
2216 return 1;
2217
2218
2219
2220 if (rdev->mddev->minor_version == 0)
2221 return 1;
2222
2223
2224
2225
2226
2227
2228
2229 if (rdev->sb_start + (32+4)*2 > new_offset)
2230 return 0;
2231 bitmap = rdev->mddev->bitmap;
2232 if (bitmap && !rdev->mddev->bitmap_info.file &&
2233 rdev->sb_start + rdev->mddev->bitmap_info.offset +
2234 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
2235 return 0;
2236 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
2237 return 0;
2238
2239 return 1;
2240}
2241
2242static struct super_type super_types[] = {
2243 [0] = {
2244 .name = "0.90.0",
2245 .owner = THIS_MODULE,
2246 .load_super = super_90_load,
2247 .validate_super = super_90_validate,
2248 .sync_super = super_90_sync,
2249 .rdev_size_change = super_90_rdev_size_change,
2250 .allow_new_offset = super_90_allow_new_offset,
2251 },
2252 [1] = {
2253 .name = "md-1",
2254 .owner = THIS_MODULE,
2255 .load_super = super_1_load,
2256 .validate_super = super_1_validate,
2257 .sync_super = super_1_sync,
2258 .rdev_size_change = super_1_rdev_size_change,
2259 .allow_new_offset = super_1_allow_new_offset,
2260 },
2261};
2262
2263static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
2264{
2265 if (mddev->sync_super) {
2266 mddev->sync_super(mddev, rdev);
2267 return;
2268 }
2269
2270 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
2271
2272 super_types[mddev->major_version].sync_super(mddev, rdev);
2273}
2274
2275static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
2276{
2277 struct md_rdev *rdev, *rdev2;
2278
2279 rcu_read_lock();
2280 rdev_for_each_rcu(rdev, mddev1) {
2281 if (test_bit(Faulty, &rdev->flags) ||
2282 test_bit(Journal, &rdev->flags) ||
2283 rdev->raid_disk == -1)
2284 continue;
2285 rdev_for_each_rcu(rdev2, mddev2) {
2286 if (test_bit(Faulty, &rdev2->flags) ||
2287 test_bit(Journal, &rdev2->flags) ||
2288 rdev2->raid_disk == -1)
2289 continue;
2290 if (rdev->bdev->bd_disk == rdev2->bdev->bd_disk) {
2291 rcu_read_unlock();
2292 return 1;
2293 }
2294 }
2295 }
2296 rcu_read_unlock();
2297 return 0;
2298}
2299
2300static LIST_HEAD(pending_raid_disks);
2301
2302
2303
2304
2305
2306
2307
2308
2309int md_integrity_register(struct mddev *mddev)
2310{
2311 struct md_rdev *rdev, *reference = NULL;
2312
2313 if (list_empty(&mddev->disks))
2314 return 0;
2315 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2316 return 0;
2317 rdev_for_each(rdev, mddev) {
2318
2319 if (test_bit(Faulty, &rdev->flags))
2320 continue;
2321 if (rdev->raid_disk < 0)
2322 continue;
2323 if (!reference) {
2324
2325 reference = rdev;
2326 continue;
2327 }
2328
2329 if (blk_integrity_compare(reference->bdev->bd_disk,
2330 rdev->bdev->bd_disk) < 0)
2331 return -EINVAL;
2332 }
2333 if (!reference || !bdev_get_integrity(reference->bdev))
2334 return 0;
2335
2336
2337
2338
2339 blk_integrity_register(mddev->gendisk,
2340 bdev_get_integrity(reference->bdev));
2341
2342 pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
2343 if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
2344 (mddev->level != 1 && mddev->level != 10 &&
2345 bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
2346
2347
2348
2349
2350
2351
2352 pr_err("md: failed to create integrity pool for %s\n",
2353 mdname(mddev));
2354 return -EINVAL;
2355 }
2356 return 0;
2357}
2358EXPORT_SYMBOL(md_integrity_register);
2359
2360
2361
2362
2363
2364int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2365{
2366 struct blk_integrity *bi_mddev;
2367 char name[BDEVNAME_SIZE];
2368
2369 if (!mddev->gendisk)
2370 return 0;
2371
2372 bi_mddev = blk_get_integrity(mddev->gendisk);
2373
2374 if (!bi_mddev)
2375 return 0;
2376
2377 if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
2378 pr_err("%s: incompatible integrity profile for %s\n",
2379 mdname(mddev), bdevname(rdev->bdev, name));
2380 return -ENXIO;
2381 }
2382
2383 return 0;
2384}
2385EXPORT_SYMBOL(md_integrity_add_rdev);
2386
2387static bool rdev_read_only(struct md_rdev *rdev)
2388{
2389 return bdev_read_only(rdev->bdev) ||
2390 (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev));
2391}
2392
2393static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
2394{
2395 char b[BDEVNAME_SIZE];
2396 int err;
2397
2398
2399 if (find_rdev(mddev, rdev->bdev->bd_dev))
2400 return -EEXIST;
2401
2402 if (rdev_read_only(rdev) && mddev->pers)
2403 return -EROFS;
2404
2405
2406 if (!test_bit(Journal, &rdev->flags) &&
2407 rdev->sectors &&
2408 (mddev->dev_sectors == 0 || rdev->sectors < mddev->dev_sectors)) {
2409 if (mddev->pers) {
2410
2411
2412
2413
2414 if (mddev->level > 0)
2415 return -ENOSPC;
2416 } else
2417 mddev->dev_sectors = rdev->sectors;
2418 }
2419
2420
2421
2422
2423
2424 rcu_read_lock();
2425 if (rdev->desc_nr < 0) {
2426 int choice = 0;
2427 if (mddev->pers)
2428 choice = mddev->raid_disks;
2429 while (md_find_rdev_nr_rcu(mddev, choice))
2430 choice++;
2431 rdev->desc_nr = choice;
2432 } else {
2433 if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
2434 rcu_read_unlock();
2435 return -EBUSY;
2436 }
2437 }
2438 rcu_read_unlock();
2439 if (!test_bit(Journal, &rdev->flags) &&
2440 mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2441 pr_warn("md: %s: array is limited to %d devices\n",
2442 mdname(mddev), mddev->max_disks);
2443 return -EBUSY;
2444 }
2445 bdevname(rdev->bdev,b);
2446 strreplace(b, '/', '!');
2447
2448 rdev->mddev = mddev;
2449 pr_debug("md: bind<%s>\n", b);
2450
2451 if (mddev->raid_disks)
2452 mddev_create_serial_pool(mddev, rdev, false);
2453
2454 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2455 goto fail;
2456
2457
2458 err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
2459 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2460 rdev->sysfs_unack_badblocks =
2461 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
2462 rdev->sysfs_badblocks =
2463 sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
2464
2465 list_add_rcu(&rdev->same_set, &mddev->disks);
2466 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2467
2468
2469 mddev->recovery_disabled++;
2470
2471 return 0;
2472
2473 fail:
2474 pr_warn("md: failed to register dev-%s for %s\n",
2475 b, mdname(mddev));
2476 return err;
2477}
2478
2479static void rdev_delayed_delete(struct work_struct *ws)
2480{
2481 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2482 kobject_del(&rdev->kobj);
2483 kobject_put(&rdev->kobj);
2484}
2485
2486static void unbind_rdev_from_array(struct md_rdev *rdev)
2487{
2488 char b[BDEVNAME_SIZE];
2489
2490 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2491 list_del_rcu(&rdev->same_set);
2492 pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
2493 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
2494 rdev->mddev = NULL;
2495 sysfs_remove_link(&rdev->kobj, "block");
2496 sysfs_put(rdev->sysfs_state);
2497 sysfs_put(rdev->sysfs_unack_badblocks);
2498 sysfs_put(rdev->sysfs_badblocks);
2499 rdev->sysfs_state = NULL;
2500 rdev->sysfs_unack_badblocks = NULL;
2501 rdev->sysfs_badblocks = NULL;
2502 rdev->badblocks.count = 0;
2503
2504
2505
2506
2507 synchronize_rcu();
2508 INIT_WORK(&rdev->del_work, rdev_delayed_delete);
2509 kobject_get(&rdev->kobj);
2510 queue_work(md_rdev_misc_wq, &rdev->del_work);
2511}
2512
2513
2514
2515
2516
2517
2518static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2519{
2520 int err = 0;
2521 struct block_device *bdev;
2522
2523 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2524 shared ? (struct md_rdev *)lock_rdev : rdev);
2525 if (IS_ERR(bdev)) {
2526 pr_warn("md: could not open device unknown-block(%u,%u).\n",
2527 MAJOR(dev), MINOR(dev));
2528 return PTR_ERR(bdev);
2529 }
2530 rdev->bdev = bdev;
2531 return err;
2532}
2533
2534static void unlock_rdev(struct md_rdev *rdev)
2535{
2536 struct block_device *bdev = rdev->bdev;
2537 rdev->bdev = NULL;
2538 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2539}
2540
2541void md_autodetect_dev(dev_t dev);
2542
2543static void export_rdev(struct md_rdev *rdev)
2544{
2545 char b[BDEVNAME_SIZE];
2546
2547 pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
2548 md_rdev_clear(rdev);
2549#ifndef MODULE
2550 if (test_bit(AutoDetected, &rdev->flags))
2551 md_autodetect_dev(rdev->bdev->bd_dev);
2552#endif
2553 unlock_rdev(rdev);
2554 kobject_put(&rdev->kobj);
2555}
2556
2557void md_kick_rdev_from_array(struct md_rdev *rdev)
2558{
2559 unbind_rdev_from_array(rdev);
2560 export_rdev(rdev);
2561}
2562EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
2563
2564static void export_array(struct mddev *mddev)
2565{
2566 struct md_rdev *rdev;
2567
2568 while (!list_empty(&mddev->disks)) {
2569 rdev = list_first_entry(&mddev->disks, struct md_rdev,
2570 same_set);
2571 md_kick_rdev_from_array(rdev);
2572 }
2573 mddev->raid_disks = 0;
2574 mddev->major_version = 0;
2575}
2576
2577static bool set_in_sync(struct mddev *mddev)
2578{
2579 lockdep_assert_held(&mddev->lock);
2580 if (!mddev->in_sync) {
2581 mddev->sync_checkers++;
2582 spin_unlock(&mddev->lock);
2583 percpu_ref_switch_to_atomic_sync(&mddev->writes_pending);
2584 spin_lock(&mddev->lock);
2585 if (!mddev->in_sync &&
2586 percpu_ref_is_zero(&mddev->writes_pending)) {
2587 mddev->in_sync = 1;
2588
2589
2590
2591
2592 smp_mb();
2593 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2594 sysfs_notify_dirent_safe(mddev->sysfs_state);
2595 }
2596 if (--mddev->sync_checkers == 0)
2597 percpu_ref_switch_to_percpu(&mddev->writes_pending);
2598 }
2599 if (mddev->safemode == 1)
2600 mddev->safemode = 0;
2601 return mddev->in_sync;
2602}
2603
2604static void sync_sbs(struct mddev *mddev, int nospares)
2605{
2606
2607
2608
2609
2610
2611
2612 struct md_rdev *rdev;
2613 rdev_for_each(rdev, mddev) {
2614 if (rdev->sb_events == mddev->events ||
2615 (nospares &&
2616 rdev->raid_disk < 0 &&
2617 rdev->sb_events+1 == mddev->events)) {
2618
2619 rdev->sb_loaded = 2;
2620 } else {
2621 sync_super(mddev, rdev);
2622 rdev->sb_loaded = 1;
2623 }
2624 }
2625}
2626
2627static bool does_sb_need_changing(struct mddev *mddev)
2628{
2629 struct md_rdev *rdev;
2630 struct mdp_superblock_1 *sb;
2631 int role;
2632
2633
2634 rdev_for_each(rdev, mddev)
2635 if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
2636 break;
2637
2638
2639 if (!rdev)
2640 return false;
2641
2642 sb = page_address(rdev->sb_page);
2643
2644 rdev_for_each(rdev, mddev) {
2645 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
2646
2647 if (role == 0xffff && rdev->raid_disk >=0 &&
2648 !test_bit(Faulty, &rdev->flags))
2649 return true;
2650
2651 if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
2652 return true;
2653 }
2654
2655
2656 if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
2657 (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
2658 (mddev->layout != le32_to_cpu(sb->layout)) ||
2659 (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
2660 (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
2661 return true;
2662
2663 return false;
2664}
2665
2666void md_update_sb(struct mddev *mddev, int force_change)
2667{
2668 struct md_rdev *rdev;
2669 int sync_req;
2670 int nospares = 0;
2671 int any_badblocks_changed = 0;
2672 int ret = -1;
2673
2674 if (mddev->ro) {
2675 if (force_change)
2676 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2677 return;
2678 }
2679
2680repeat:
2681 if (mddev_is_clustered(mddev)) {
2682 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2683 force_change = 1;
2684 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2685 nospares = 1;
2686 ret = md_cluster_ops->metadata_update_start(mddev);
2687
2688 if (!does_sb_need_changing(mddev)) {
2689 if (ret == 0)
2690 md_cluster_ops->metadata_update_cancel(mddev);
2691 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2692 BIT(MD_SB_CHANGE_DEVS) |
2693 BIT(MD_SB_CHANGE_CLEAN));
2694 return;
2695 }
2696 }
2697
2698
2699
2700
2701
2702
2703
2704 rdev_for_each(rdev, mddev) {
2705 if (rdev->raid_disk >= 0 &&
2706 mddev->delta_disks >= 0 &&
2707 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
2708 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
2709 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2710 !test_bit(Journal, &rdev->flags) &&
2711 !test_bit(In_sync, &rdev->flags) &&
2712 mddev->curr_resync_completed > rdev->recovery_offset)
2713 rdev->recovery_offset = mddev->curr_resync_completed;
2714
2715 }
2716 if (!mddev->persistent) {
2717 clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
2718 clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2719 if (!mddev->external) {
2720 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
2721 rdev_for_each(rdev, mddev) {
2722 if (rdev->badblocks.changed) {
2723 rdev->badblocks.changed = 0;
2724 ack_all_badblocks(&rdev->badblocks);
2725 md_error(mddev, rdev);
2726 }
2727 clear_bit(Blocked, &rdev->flags);
2728 clear_bit(BlockedBadBlocks, &rdev->flags);
2729 wake_up(&rdev->blocked_wait);
2730 }
2731 }
2732 wake_up(&mddev->sb_wait);
2733 return;
2734 }
2735
2736 spin_lock(&mddev->lock);
2737
2738 mddev->utime = ktime_get_real_seconds();
2739
2740 if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
2741 force_change = 1;
2742 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
2743
2744
2745
2746
2747 nospares = 1;
2748 if (force_change)
2749 nospares = 0;
2750 if (mddev->degraded)
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760 nospares = 0;
2761
2762 sync_req = mddev->in_sync;
2763
2764
2765
2766 if (nospares
2767 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2768 && mddev->can_decrease_events
2769 && mddev->events != 1) {
2770 mddev->events--;
2771 mddev->can_decrease_events = 0;
2772 } else {
2773
2774 mddev->events ++;
2775 mddev->can_decrease_events = nospares;
2776 }
2777
2778
2779
2780
2781
2782
2783 WARN_ON(mddev->events == 0);
2784
2785 rdev_for_each(rdev, mddev) {
2786 if (rdev->badblocks.changed)
2787 any_badblocks_changed++;
2788 if (test_bit(Faulty, &rdev->flags))
2789 set_bit(FaultRecorded, &rdev->flags);
2790 }
2791
2792 sync_sbs(mddev, nospares);
2793 spin_unlock(&mddev->lock);
2794
2795 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2796 mdname(mddev), mddev->in_sync);
2797
2798 if (mddev->queue)
2799 blk_add_trace_msg(mddev->queue, "md md_update_sb");
2800rewrite:
2801 md_bitmap_update_sb(mddev->bitmap);
2802 rdev_for_each(rdev, mddev) {
2803 char b[BDEVNAME_SIZE];
2804
2805 if (rdev->sb_loaded != 1)
2806 continue;
2807
2808 if (!test_bit(Faulty, &rdev->flags)) {
2809 md_super_write(mddev,rdev,
2810 rdev->sb_start, rdev->sb_size,
2811 rdev->sb_page);
2812 pr_debug("md: (write) %s's sb offset: %llu\n",
2813 bdevname(rdev->bdev, b),
2814 (unsigned long long)rdev->sb_start);
2815 rdev->sb_events = mddev->events;
2816 if (rdev->badblocks.size) {
2817 md_super_write(mddev, rdev,
2818 rdev->badblocks.sector,
2819 rdev->badblocks.size << 9,
2820 rdev->bb_page);
2821 rdev->badblocks.size = 0;
2822 }
2823
2824 } else
2825 pr_debug("md: %s (skipping faulty)\n",
2826 bdevname(rdev->bdev, b));
2827
2828 if (mddev->level == LEVEL_MULTIPATH)
2829
2830 break;
2831 }
2832 if (md_super_wait(mddev) < 0)
2833 goto rewrite;
2834
2835
2836 if (mddev_is_clustered(mddev) && ret == 0)
2837 md_cluster_ops->metadata_update_finish(mddev);
2838
2839 if (mddev->in_sync != sync_req ||
2840 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
2841 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
2842
2843 goto repeat;
2844 wake_up(&mddev->sb_wait);
2845 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2846 sysfs_notify_dirent_safe(mddev->sysfs_completed);
2847
2848 rdev_for_each(rdev, mddev) {
2849 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2850 clear_bit(Blocked, &rdev->flags);
2851
2852 if (any_badblocks_changed)
2853 ack_all_badblocks(&rdev->badblocks);
2854 clear_bit(BlockedBadBlocks, &rdev->flags);
2855 wake_up(&rdev->blocked_wait);
2856 }
2857}
2858EXPORT_SYMBOL(md_update_sb);
2859
2860static int add_bound_rdev(struct md_rdev *rdev)
2861{
2862 struct mddev *mddev = rdev->mddev;
2863 int err = 0;
2864 bool add_journal = test_bit(Journal, &rdev->flags);
2865
2866 if (!mddev->pers->hot_remove_disk || add_journal) {
2867
2868
2869
2870
2871 super_types[mddev->major_version].
2872 validate_super(mddev, rdev);
2873 if (add_journal)
2874 mddev_suspend(mddev);
2875 err = mddev->pers->hot_add_disk(mddev, rdev);
2876 if (add_journal)
2877 mddev_resume(mddev);
2878 if (err) {
2879 md_kick_rdev_from_array(rdev);
2880 return err;
2881 }
2882 }
2883 sysfs_notify_dirent_safe(rdev->sysfs_state);
2884
2885 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
2886 if (mddev->degraded)
2887 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
2888 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2889 md_new_event(mddev);
2890 md_wakeup_thread(mddev->thread);
2891 return 0;
2892}
2893
2894
2895
2896
2897static int cmd_match(const char *cmd, const char *str)
2898{
2899
2900
2901
2902
2903 while (*cmd && *str && *cmd == *str) {
2904 cmd++;
2905 str++;
2906 }
2907 if (*cmd == '\n')
2908 cmd++;
2909 if (*str || *cmd)
2910 return 0;
2911 return 1;
2912}
2913
2914struct rdev_sysfs_entry {
2915 struct attribute attr;
2916 ssize_t (*show)(struct md_rdev *, char *);
2917 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2918};
2919
2920static ssize_t
2921state_show(struct md_rdev *rdev, char *page)
2922{
2923 char *sep = ",";
2924 size_t len = 0;
2925 unsigned long flags = READ_ONCE(rdev->flags);
2926
2927 if (test_bit(Faulty, &flags) ||
2928 (!test_bit(ExternalBbl, &flags) &&
2929 rdev->badblocks.unacked_exist))
2930 len += sprintf(page+len, "faulty%s", sep);
2931 if (test_bit(In_sync, &flags))
2932 len += sprintf(page+len, "in_sync%s", sep);
2933 if (test_bit(Journal, &flags))
2934 len += sprintf(page+len, "journal%s", sep);
2935 if (test_bit(WriteMostly, &flags))
2936 len += sprintf(page+len, "write_mostly%s", sep);
2937 if (test_bit(Blocked, &flags) ||
2938 (rdev->badblocks.unacked_exist
2939 && !test_bit(Faulty, &flags)))
2940 len += sprintf(page+len, "blocked%s", sep);
2941 if (!test_bit(Faulty, &flags) &&
2942 !test_bit(Journal, &flags) &&
2943 !test_bit(In_sync, &flags))
2944 len += sprintf(page+len, "spare%s", sep);
2945 if (test_bit(WriteErrorSeen, &flags))
2946 len += sprintf(page+len, "write_error%s", sep);
2947 if (test_bit(WantReplacement, &flags))
2948 len += sprintf(page+len, "want_replacement%s", sep);
2949 if (test_bit(Replacement, &flags))
2950 len += sprintf(page+len, "replacement%s", sep);
2951 if (test_bit(ExternalBbl, &flags))
2952 len += sprintf(page+len, "external_bbl%s", sep);
2953 if (test_bit(FailFast, &flags))
2954 len += sprintf(page+len, "failfast%s", sep);
2955
2956 if (len)
2957 len -= strlen(sep);
2958
2959 return len+sprintf(page+len, "\n");
2960}
2961
2962static ssize_t
2963state_store(struct md_rdev *rdev, const char *buf, size_t len)
2964{
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979 int err = -EINVAL;
2980 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2981 md_error(rdev->mddev, rdev);
2982 if (test_bit(Faulty, &rdev->flags))
2983 err = 0;
2984 else
2985 err = -EBUSY;
2986 } else if (cmd_match(buf, "remove")) {
2987 if (rdev->mddev->pers) {
2988 clear_bit(Blocked, &rdev->flags);
2989 remove_and_add_spares(rdev->mddev, rdev);
2990 }
2991 if (rdev->raid_disk >= 0)
2992 err = -EBUSY;
2993 else {
2994 struct mddev *mddev = rdev->mddev;
2995 err = 0;
2996 if (mddev_is_clustered(mddev))
2997 err = md_cluster_ops->remove_disk(mddev, rdev);
2998
2999 if (err == 0) {
3000 md_kick_rdev_from_array(rdev);
3001 if (mddev->pers) {
3002 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
3003 md_wakeup_thread(mddev->thread);
3004 }
3005 md_new_event(mddev);
3006 }
3007 }
3008 } else if (cmd_match(buf, "writemostly")) {
3009 set_bit(WriteMostly, &rdev->flags);
3010 mddev_create_serial_pool(rdev->mddev, rdev, false);
3011 err = 0;
3012 } else if (cmd_match(buf, "-writemostly")) {
3013 mddev_destroy_serial_pool(rdev->mddev, rdev, false);
3014 clear_bit(WriteMostly, &rdev->flags);
3015 err = 0;
3016 } else if (cmd_match(buf, "blocked")) {
3017 set_bit(Blocked, &rdev->flags);
3018 err = 0;
3019 } else if (cmd_match(buf, "-blocked")) {
3020 if (!test_bit(Faulty, &rdev->flags) &&
3021 !test_bit(ExternalBbl, &rdev->flags) &&
3022 rdev->badblocks.unacked_exist) {
3023
3024
3025
3026 md_error(rdev->mddev, rdev);
3027 }
3028 clear_bit(Blocked, &rdev->flags);
3029 clear_bit(BlockedBadBlocks, &rdev->flags);
3030 wake_up(&rdev->blocked_wait);
3031 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3032 md_wakeup_thread(rdev->mddev->thread);
3033
3034 err = 0;
3035 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
3036 set_bit(In_sync, &rdev->flags);
3037 err = 0;
3038 } else if (cmd_match(buf, "failfast")) {
3039 set_bit(FailFast, &rdev->flags);
3040 err = 0;
3041 } else if (cmd_match(buf, "-failfast")) {
3042 clear_bit(FailFast, &rdev->flags);
3043 err = 0;
3044 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
3045 !test_bit(Journal, &rdev->flags)) {
3046 if (rdev->mddev->pers == NULL) {
3047 clear_bit(In_sync, &rdev->flags);
3048 rdev->saved_raid_disk = rdev->raid_disk;
3049 rdev->raid_disk = -1;
3050 err = 0;
3051 }
3052 } else if (cmd_match(buf, "write_error")) {
3053 set_bit(WriteErrorSeen, &rdev->flags);
3054 err = 0;
3055 } else if (cmd_match(buf, "-write_error")) {
3056 clear_bit(WriteErrorSeen, &rdev->flags);
3057 err = 0;
3058 } else if (cmd_match(buf, "want_replacement")) {
3059
3060
3061
3062
3063 if (rdev->raid_disk >= 0 &&
3064 !test_bit(Journal, &rdev->flags) &&
3065 !test_bit(Replacement, &rdev->flags))
3066 set_bit(WantReplacement, &rdev->flags);
3067 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3068 md_wakeup_thread(rdev->mddev->thread);
3069 err = 0;
3070 } else if (cmd_match(buf, "-want_replacement")) {
3071
3072
3073
3074 err = 0;
3075 clear_bit(WantReplacement, &rdev->flags);
3076 } else if (cmd_match(buf, "replacement")) {
3077
3078
3079
3080
3081 if (rdev->mddev->pers)
3082 err = -EBUSY;
3083 else {
3084 set_bit(Replacement, &rdev->flags);
3085 err = 0;
3086 }
3087 } else if (cmd_match(buf, "-replacement")) {
3088
3089 if (rdev->mddev->pers)
3090 err = -EBUSY;
3091 else {
3092 clear_bit(Replacement, &rdev->flags);
3093 err = 0;
3094 }
3095 } else if (cmd_match(buf, "re-add")) {
3096 if (!rdev->mddev->pers)
3097 err = -EINVAL;
3098 else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
3099 rdev->saved_raid_disk >= 0) {
3100
3101
3102
3103
3104
3105
3106 if (!mddev_is_clustered(rdev->mddev) ||
3107 (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
3108 clear_bit(Faulty, &rdev->flags);
3109 err = add_bound_rdev(rdev);
3110 }
3111 } else
3112 err = -EBUSY;
3113 } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
3114 set_bit(ExternalBbl, &rdev->flags);
3115 rdev->badblocks.shift = 0;
3116 err = 0;
3117 } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
3118 clear_bit(ExternalBbl, &rdev->flags);
3119 err = 0;
3120 }
3121 if (!err)
3122 sysfs_notify_dirent_safe(rdev->sysfs_state);
3123 return err ? err : len;
3124}
3125static struct rdev_sysfs_entry rdev_state =
3126__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
3127
3128static ssize_t
3129errors_show(struct md_rdev *rdev, char *page)
3130{
3131 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
3132}
3133
3134static ssize_t
3135errors_store(struct md_rdev *rdev, const char *buf, size_t len)
3136{
3137 unsigned int n;
3138 int rv;
3139
3140 rv = kstrtouint(buf, 10, &n);
3141 if (rv < 0)
3142 return rv;
3143 atomic_set(&rdev->corrected_errors, n);
3144 return len;
3145}
3146static struct rdev_sysfs_entry rdev_errors =
3147__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
3148
3149static ssize_t
3150slot_show(struct md_rdev *rdev, char *page)
3151{
3152 if (test_bit(Journal, &rdev->flags))
3153 return sprintf(page, "journal\n");
3154 else if (rdev->raid_disk < 0)
3155 return sprintf(page, "none\n");
3156 else
3157 return sprintf(page, "%d\n", rdev->raid_disk);
3158}
3159
3160static ssize_t
3161slot_store(struct md_rdev *rdev, const char *buf, size_t len)
3162{
3163 int slot;
3164 int err;
3165
3166 if (test_bit(Journal, &rdev->flags))
3167 return -EBUSY;
3168 if (strncmp(buf, "none", 4)==0)
3169 slot = -1;
3170 else {
3171 err = kstrtouint(buf, 10, (unsigned int *)&slot);
3172 if (err < 0)
3173 return err;
3174 }
3175 if (rdev->mddev->pers && slot == -1) {
3176
3177
3178
3179
3180
3181
3182
3183 if (rdev->raid_disk == -1)
3184 return -EEXIST;
3185
3186 if (rdev->mddev->pers->hot_remove_disk == NULL)
3187 return -EINVAL;
3188 clear_bit(Blocked, &rdev->flags);
3189 remove_and_add_spares(rdev->mddev, rdev);
3190 if (rdev->raid_disk >= 0)
3191 return -EBUSY;
3192 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
3193 md_wakeup_thread(rdev->mddev->thread);
3194 } else if (rdev->mddev->pers) {
3195
3196
3197
3198 int err;
3199
3200 if (rdev->raid_disk != -1)
3201 return -EBUSY;
3202
3203 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
3204 return -EBUSY;
3205
3206 if (rdev->mddev->pers->hot_add_disk == NULL)
3207 return -EINVAL;
3208
3209 if (slot >= rdev->mddev->raid_disks &&
3210 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3211 return -ENOSPC;
3212
3213 rdev->raid_disk = slot;
3214 if (test_bit(In_sync, &rdev->flags))
3215 rdev->saved_raid_disk = slot;
3216 else
3217 rdev->saved_raid_disk = -1;
3218 clear_bit(In_sync, &rdev->flags);
3219 clear_bit(Bitmap_sync, &rdev->flags);
3220 err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
3221 if (err) {
3222 rdev->raid_disk = -1;
3223 return err;
3224 } else
3225 sysfs_notify_dirent_safe(rdev->sysfs_state);
3226 ;
3227 sysfs_link_rdev(rdev->mddev, rdev);
3228
3229 } else {
3230 if (slot >= rdev->mddev->raid_disks &&
3231 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
3232 return -ENOSPC;
3233 rdev->raid_disk = slot;
3234
3235 clear_bit(Faulty, &rdev->flags);
3236 clear_bit(WriteMostly, &rdev->flags);
3237 set_bit(In_sync, &rdev->flags);
3238 sysfs_notify_dirent_safe(rdev->sysfs_state);
3239 }
3240 return len;
3241}
3242
3243static struct rdev_sysfs_entry rdev_slot =
3244__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
3245
3246static ssize_t
3247offset_show(struct md_rdev *rdev, char *page)
3248{
3249 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
3250}
3251
3252static ssize_t
3253offset_store(struct md_rdev *rdev, const char *buf, size_t len)
3254{
3255 unsigned long long offset;
3256 if (kstrtoull(buf, 10, &offset) < 0)
3257 return -EINVAL;
3258 if (rdev->mddev->pers && rdev->raid_disk >= 0)
3259 return -EBUSY;
3260 if (rdev->sectors && rdev->mddev->external)
3261
3262
3263 return -EBUSY;
3264 rdev->data_offset = offset;
3265 rdev->new_data_offset = offset;
3266 return len;
3267}
3268
3269static struct rdev_sysfs_entry rdev_offset =
3270__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
3271
3272static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
3273{
3274 return sprintf(page, "%llu\n",
3275 (unsigned long long)rdev->new_data_offset);
3276}
3277
3278static ssize_t new_offset_store(struct md_rdev *rdev,
3279 const char *buf, size_t len)
3280{
3281 unsigned long long new_offset;
3282 struct mddev *mddev = rdev->mddev;
3283
3284 if (kstrtoull(buf, 10, &new_offset) < 0)
3285 return -EINVAL;
3286
3287 if (mddev->sync_thread ||
3288 test_bit(MD_RECOVERY_RUNNING,&mddev->recovery))
3289 return -EBUSY;
3290 if (new_offset == rdev->data_offset)
3291
3292 ;
3293 else if (new_offset > rdev->data_offset) {
3294
3295 if (new_offset - rdev->data_offset
3296 + mddev->dev_sectors > rdev->sectors)
3297 return -E2BIG;
3298 }
3299
3300
3301
3302
3303
3304 if (new_offset < rdev->data_offset &&
3305 mddev->reshape_backwards)
3306 return -EINVAL;
3307
3308
3309
3310
3311 if (new_offset > rdev->data_offset &&
3312 !mddev->reshape_backwards)
3313 return -EINVAL;
3314
3315 if (mddev->pers && mddev->persistent &&
3316 !super_types[mddev->major_version]
3317 .allow_new_offset(rdev, new_offset))
3318 return -E2BIG;
3319 rdev->new_data_offset = new_offset;
3320 if (new_offset > rdev->data_offset)
3321 mddev->reshape_backwards = 1;
3322 else if (new_offset < rdev->data_offset)
3323 mddev->reshape_backwards = 0;
3324
3325 return len;
3326}
3327static struct rdev_sysfs_entry rdev_new_offset =
3328__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
3329
3330static ssize_t
3331rdev_size_show(struct md_rdev *rdev, char *page)
3332{
3333 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
3334}
3335
3336static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
3337{
3338
3339 if (s1+l1 <= s2)
3340 return 0;
3341 if (s2+l2 <= s1)
3342 return 0;
3343 return 1;
3344}
3345
3346static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
3347{
3348 unsigned long long blocks;
3349 sector_t new;
3350
3351 if (kstrtoull(buf, 10, &blocks) < 0)
3352 return -EINVAL;
3353
3354 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
3355 return -EINVAL;
3356
3357 new = blocks * 2;
3358 if (new != blocks * 2)
3359 return -EINVAL;
3360
3361 *sectors = new;
3362 return 0;
3363}
3364
3365static ssize_t
3366rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3367{
3368 struct mddev *my_mddev = rdev->mddev;
3369 sector_t oldsectors = rdev->sectors;
3370 sector_t sectors;
3371
3372 if (test_bit(Journal, &rdev->flags))
3373 return -EBUSY;
3374 if (strict_blocks_to_sectors(buf, §ors) < 0)
3375 return -EINVAL;
3376 if (rdev->data_offset != rdev->new_data_offset)
3377 return -EINVAL;
3378 if (my_mddev->pers && rdev->raid_disk >= 0) {
3379 if (my_mddev->persistent) {
3380 sectors = super_types[my_mddev->major_version].
3381 rdev_size_change(rdev, sectors);
3382 if (!sectors)
3383 return -EBUSY;
3384 } else if (!sectors)
3385 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
3386 rdev->data_offset;
3387 if (!my_mddev->pers->resize)
3388
3389 return -EINVAL;
3390 }
3391 if (sectors < my_mddev->dev_sectors)
3392 return -EINVAL;
3393
3394 rdev->sectors = sectors;
3395 if (sectors > oldsectors && my_mddev->external) {
3396
3397
3398
3399
3400
3401
3402 struct mddev *mddev;
3403 int overlap = 0;
3404 struct list_head *tmp;
3405
3406 rcu_read_lock();
3407 for_each_mddev(mddev, tmp) {
3408 struct md_rdev *rdev2;
3409
3410 rdev_for_each(rdev2, mddev)
3411 if (rdev->bdev == rdev2->bdev &&
3412 rdev != rdev2 &&
3413 overlaps(rdev->data_offset, rdev->sectors,
3414 rdev2->data_offset,
3415 rdev2->sectors)) {
3416 overlap = 1;
3417 break;
3418 }
3419 if (overlap) {
3420 mddev_put(mddev);
3421 break;
3422 }
3423 }
3424 rcu_read_unlock();
3425 if (overlap) {
3426
3427
3428
3429
3430
3431
3432 rdev->sectors = oldsectors;
3433 return -EBUSY;
3434 }
3435 }
3436 return len;
3437}
3438
3439static struct rdev_sysfs_entry rdev_size =
3440__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3441
3442static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3443{
3444 unsigned long long recovery_start = rdev->recovery_offset;
3445
3446 if (test_bit(In_sync, &rdev->flags) ||
3447 recovery_start == MaxSector)
3448 return sprintf(page, "none\n");
3449
3450 return sprintf(page, "%llu\n", recovery_start);
3451}
3452
3453static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3454{
3455 unsigned long long recovery_start;
3456
3457 if (cmd_match(buf, "none"))
3458 recovery_start = MaxSector;
3459 else if (kstrtoull(buf, 10, &recovery_start))
3460 return -EINVAL;
3461
3462 if (rdev->mddev->pers &&
3463 rdev->raid_disk >= 0)
3464 return -EBUSY;
3465
3466 rdev->recovery_offset = recovery_start;
3467 if (recovery_start == MaxSector)
3468 set_bit(In_sync, &rdev->flags);
3469 else
3470 clear_bit(In_sync, &rdev->flags);
3471 return len;
3472}
3473
3474static struct rdev_sysfs_entry rdev_recovery_start =
3475__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488static ssize_t bb_show(struct md_rdev *rdev, char *page)
3489{
3490 return badblocks_show(&rdev->badblocks, page, 0);
3491}
3492static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3493{
3494 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3495
3496 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3497 wake_up(&rdev->blocked_wait);
3498 return rv;
3499}
3500static struct rdev_sysfs_entry rdev_bad_blocks =
3501__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3502
3503static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3504{
3505 return badblocks_show(&rdev->badblocks, page, 1);
3506}
3507static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3508{
3509 return badblocks_store(&rdev->badblocks, page, len, 1);
3510}
3511static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3512__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3513
3514static ssize_t
3515ppl_sector_show(struct md_rdev *rdev, char *page)
3516{
3517 return sprintf(page, "%llu\n", (unsigned long long)rdev->ppl.sector);
3518}
3519
3520static ssize_t
3521ppl_sector_store(struct md_rdev *rdev, const char *buf, size_t len)
3522{
3523 unsigned long long sector;
3524
3525 if (kstrtoull(buf, 10, §or) < 0)
3526 return -EINVAL;
3527 if (sector != (sector_t)sector)
3528 return -EINVAL;
3529
3530 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3531 rdev->raid_disk >= 0)
3532 return -EBUSY;
3533
3534 if (rdev->mddev->persistent) {
3535 if (rdev->mddev->major_version == 0)
3536 return -EINVAL;
3537 if ((sector > rdev->sb_start &&
3538 sector - rdev->sb_start > S16_MAX) ||
3539 (sector < rdev->sb_start &&
3540 rdev->sb_start - sector > -S16_MIN))
3541 return -EINVAL;
3542 rdev->ppl.offset = sector - rdev->sb_start;
3543 } else if (!rdev->mddev->external) {
3544 return -EBUSY;
3545 }
3546 rdev->ppl.sector = sector;
3547 return len;
3548}
3549
3550static struct rdev_sysfs_entry rdev_ppl_sector =
3551__ATTR(ppl_sector, S_IRUGO|S_IWUSR, ppl_sector_show, ppl_sector_store);
3552
3553static ssize_t
3554ppl_size_show(struct md_rdev *rdev, char *page)
3555{
3556 return sprintf(page, "%u\n", rdev->ppl.size);
3557}
3558
3559static ssize_t
3560ppl_size_store(struct md_rdev *rdev, const char *buf, size_t len)
3561{
3562 unsigned int size;
3563
3564 if (kstrtouint(buf, 10, &size) < 0)
3565 return -EINVAL;
3566
3567 if (rdev->mddev->pers && test_bit(MD_HAS_PPL, &rdev->mddev->flags) &&
3568 rdev->raid_disk >= 0)
3569 return -EBUSY;
3570
3571 if (rdev->mddev->persistent) {
3572 if (rdev->mddev->major_version == 0)
3573 return -EINVAL;
3574 if (size > U16_MAX)
3575 return -EINVAL;
3576 } else if (!rdev->mddev->external) {
3577 return -EBUSY;
3578 }
3579 rdev->ppl.size = size;
3580 return len;
3581}
3582
3583static struct rdev_sysfs_entry rdev_ppl_size =
3584__ATTR(ppl_size, S_IRUGO|S_IWUSR, ppl_size_show, ppl_size_store);
3585
3586static struct attribute *rdev_default_attrs[] = {
3587 &rdev_state.attr,
3588 &rdev_errors.attr,
3589 &rdev_slot.attr,
3590 &rdev_offset.attr,
3591 &rdev_new_offset.attr,
3592 &rdev_size.attr,
3593 &rdev_recovery_start.attr,
3594 &rdev_bad_blocks.attr,
3595 &rdev_unack_bad_blocks.attr,
3596 &rdev_ppl_sector.attr,
3597 &rdev_ppl_size.attr,
3598 NULL,
3599};
3600static ssize_t
3601rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3602{
3603 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3604 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3605
3606 if (!entry->show)
3607 return -EIO;
3608 if (!rdev->mddev)
3609 return -ENODEV;
3610 return entry->show(rdev, page);
3611}
3612
3613static ssize_t
3614rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3615 const char *page, size_t length)
3616{
3617 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3618 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3619 ssize_t rv;
3620 struct mddev *mddev = rdev->mddev;
3621
3622 if (!entry->store)
3623 return -EIO;
3624 if (!capable(CAP_SYS_ADMIN))
3625 return -EACCES;
3626 rv = mddev ? mddev_lock(mddev) : -ENODEV;
3627 if (!rv) {
3628 if (rdev->mddev == NULL)
3629 rv = -ENODEV;
3630 else
3631 rv = entry->store(rdev, page, length);
3632 mddev_unlock(mddev);
3633 }
3634 return rv;
3635}
3636
3637static void rdev_free(struct kobject *ko)
3638{
3639 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3640 kfree(rdev);
3641}
3642static const struct sysfs_ops rdev_sysfs_ops = {
3643 .show = rdev_attr_show,
3644 .store = rdev_attr_store,
3645};
3646static struct kobj_type rdev_ktype = {
3647 .release = rdev_free,
3648 .sysfs_ops = &rdev_sysfs_ops,
3649 .default_attrs = rdev_default_attrs,
3650};
3651
3652int md_rdev_init(struct md_rdev *rdev)
3653{
3654 rdev->desc_nr = -1;
3655 rdev->saved_raid_disk = -1;
3656 rdev->raid_disk = -1;
3657 rdev->flags = 0;
3658 rdev->data_offset = 0;
3659 rdev->new_data_offset = 0;
3660 rdev->sb_events = 0;
3661 rdev->last_read_error = 0;
3662 rdev->sb_loaded = 0;
3663 rdev->bb_page = NULL;
3664 atomic_set(&rdev->nr_pending, 0);
3665 atomic_set(&rdev->read_errors, 0);
3666 atomic_set(&rdev->corrected_errors, 0);
3667
3668 INIT_LIST_HEAD(&rdev->same_set);
3669 init_waitqueue_head(&rdev->blocked_wait);
3670
3671
3672
3673
3674
3675 return badblocks_init(&rdev->badblocks, 0);
3676}
3677EXPORT_SYMBOL_GPL(md_rdev_init);
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3689{
3690 char b[BDEVNAME_SIZE];
3691 int err;
3692 struct md_rdev *rdev;
3693 sector_t size;
3694
3695 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3696 if (!rdev)
3697 return ERR_PTR(-ENOMEM);
3698
3699 err = md_rdev_init(rdev);
3700 if (err)
3701 goto abort_free;
3702 err = alloc_disk_sb(rdev);
3703 if (err)
3704 goto abort_free;
3705
3706 err = lock_rdev(rdev, newdev, super_format == -2);
3707 if (err)
3708 goto abort_free;
3709
3710 kobject_init(&rdev->kobj, &rdev_ktype);
3711
3712 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3713 if (!size) {
3714 pr_warn("md: %s has zero or unknown size, marking faulty!\n",
3715 bdevname(rdev->bdev,b));
3716 err = -EINVAL;
3717 goto abort_free;
3718 }
3719
3720 if (super_format >= 0) {
3721 err = super_types[super_format].
3722 load_super(rdev, NULL, super_minor);
3723 if (err == -EINVAL) {
3724 pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
3725 bdevname(rdev->bdev,b),
3726 super_format, super_minor);
3727 goto abort_free;
3728 }
3729 if (err < 0) {
3730 pr_warn("md: could not read %s's sb, not importing!\n",
3731 bdevname(rdev->bdev,b));
3732 goto abort_free;
3733 }
3734 }
3735
3736 return rdev;
3737
3738abort_free:
3739 if (rdev->bdev)
3740 unlock_rdev(rdev);
3741 md_rdev_clear(rdev);
3742 kfree(rdev);
3743 return ERR_PTR(err);
3744}
3745
3746
3747
3748
3749
3750static int analyze_sbs(struct mddev *mddev)
3751{
3752 int i;
3753 struct md_rdev *rdev, *freshest, *tmp;
3754 char b[BDEVNAME_SIZE];
3755
3756 freshest = NULL;
3757 rdev_for_each_safe(rdev, tmp, mddev)
3758 switch (super_types[mddev->major_version].
3759 load_super(rdev, freshest, mddev->minor_version)) {
3760 case 1:
3761 freshest = rdev;
3762 break;
3763 case 0:
3764 break;
3765 default:
3766 pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
3767 bdevname(rdev->bdev,b));
3768 md_kick_rdev_from_array(rdev);
3769 }
3770
3771
3772 if (!freshest) {
3773 pr_warn("md: cannot find a valid disk\n");
3774 return -EINVAL;
3775 }
3776
3777 super_types[mddev->major_version].
3778 validate_super(mddev, freshest);
3779
3780 i = 0;
3781 rdev_for_each_safe(rdev, tmp, mddev) {
3782 if (mddev->max_disks &&
3783 (rdev->desc_nr >= mddev->max_disks ||
3784 i > mddev->max_disks)) {
3785 pr_warn("md: %s: %s: only %d devices permitted\n",
3786 mdname(mddev), bdevname(rdev->bdev, b),
3787 mddev->max_disks);
3788 md_kick_rdev_from_array(rdev);
3789 continue;
3790 }
3791 if (rdev != freshest) {
3792 if (super_types[mddev->major_version].
3793 validate_super(mddev, rdev)) {
3794 pr_warn("md: kicking non-fresh %s from array!\n",
3795 bdevname(rdev->bdev,b));
3796 md_kick_rdev_from_array(rdev);
3797 continue;
3798 }
3799 }
3800 if (mddev->level == LEVEL_MULTIPATH) {
3801 rdev->desc_nr = i++;
3802 rdev->raid_disk = rdev->desc_nr;
3803 set_bit(In_sync, &rdev->flags);
3804 } else if (rdev->raid_disk >=
3805 (mddev->raid_disks - min(0, mddev->delta_disks)) &&
3806 !test_bit(Journal, &rdev->flags)) {
3807 rdev->raid_disk = -1;
3808 clear_bit(In_sync, &rdev->flags);
3809 }
3810 }
3811
3812 return 0;
3813}
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3826{
3827 unsigned long result = 0;
3828 long decimals = -1;
3829 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3830 if (*cp == '.')
3831 decimals = 0;
3832 else if (decimals < scale) {
3833 unsigned int value;
3834 value = *cp - '0';
3835 result = result * 10 + value;
3836 if (decimals >= 0)
3837 decimals++;
3838 }
3839 cp++;
3840 }
3841 if (*cp == '\n')
3842 cp++;
3843 if (*cp)
3844 return -EINVAL;
3845 if (decimals < 0)
3846 decimals = 0;
3847 *res = result * int_pow(10, scale - decimals);
3848 return 0;
3849}
3850
3851static ssize_t
3852safe_delay_show(struct mddev *mddev, char *page)
3853{
3854 int msec = (mddev->safemode_delay*1000)/HZ;
3855 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3856}
3857static ssize_t
3858safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3859{
3860 unsigned long msec;
3861
3862 if (mddev_is_clustered(mddev)) {
3863 pr_warn("md: Safemode is disabled for clustered mode\n");
3864 return -EINVAL;
3865 }
3866
3867 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3868 return -EINVAL;
3869 if (msec == 0)
3870 mddev->safemode_delay = 0;
3871 else {
3872 unsigned long old_delay = mddev->safemode_delay;
3873 unsigned long new_delay = (msec*HZ)/1000;
3874
3875 if (new_delay == 0)
3876 new_delay = 1;
3877 mddev->safemode_delay = new_delay;
3878 if (new_delay < old_delay || old_delay == 0)
3879 mod_timer(&mddev->safemode_timer, jiffies+1);
3880 }
3881 return len;
3882}
3883static struct md_sysfs_entry md_safe_delay =
3884__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3885
3886static ssize_t
3887level_show(struct mddev *mddev, char *page)
3888{
3889 struct md_personality *p;
3890 int ret;
3891 spin_lock(&mddev->lock);
3892 p = mddev->pers;
3893 if (p)
3894 ret = sprintf(page, "%s\n", p->name);
3895 else if (mddev->clevel[0])
3896 ret = sprintf(page, "%s\n", mddev->clevel);
3897 else if (mddev->level != LEVEL_NONE)
3898 ret = sprintf(page, "%d\n", mddev->level);
3899 else
3900 ret = 0;
3901 spin_unlock(&mddev->lock);
3902 return ret;
3903}
3904
3905static ssize_t
3906level_store(struct mddev *mddev, const char *buf, size_t len)
3907{
3908 char clevel[16];
3909 ssize_t rv;
3910 size_t slen = len;
3911 struct md_personality *pers, *oldpers;
3912 long level;
3913 void *priv, *oldpriv;
3914 struct md_rdev *rdev;
3915
3916 if (slen == 0 || slen >= sizeof(clevel))
3917 return -EINVAL;
3918
3919 rv = mddev_lock(mddev);
3920 if (rv)
3921 return rv;
3922
3923 if (mddev->pers == NULL) {
3924 strncpy(mddev->clevel, buf, slen);
3925 if (mddev->clevel[slen-1] == '\n')
3926 slen--;
3927 mddev->clevel[slen] = 0;
3928 mddev->level = LEVEL_NONE;
3929 rv = len;
3930 goto out_unlock;
3931 }
3932 rv = -EROFS;
3933 if (mddev->ro)
3934 goto out_unlock;
3935
3936
3937
3938
3939
3940
3941
3942 rv = -EBUSY;
3943 if (mddev->sync_thread ||
3944 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3945 mddev->reshape_position != MaxSector ||
3946 mddev->sysfs_active)
3947 goto out_unlock;
3948
3949 rv = -EINVAL;
3950 if (!mddev->pers->quiesce) {
3951 pr_warn("md: %s: %s does not support online personality change\n",
3952 mdname(mddev), mddev->pers->name);
3953 goto out_unlock;
3954 }
3955
3956
3957 strncpy(clevel, buf, slen);
3958 if (clevel[slen-1] == '\n')
3959 slen--;
3960 clevel[slen] = 0;
3961 if (kstrtol(clevel, 10, &level))
3962 level = LEVEL_NONE;
3963
3964 if (request_module("md-%s", clevel) != 0)
3965 request_module("md-level-%s", clevel);
3966 spin_lock(&pers_lock);
3967 pers = find_pers(level, clevel);
3968 if (!pers || !try_module_get(pers->owner)) {
3969 spin_unlock(&pers_lock);
3970 pr_warn("md: personality %s not loaded\n", clevel);
3971 rv = -EINVAL;
3972 goto out_unlock;
3973 }
3974 spin_unlock(&pers_lock);
3975
3976 if (pers == mddev->pers) {
3977
3978 module_put(pers->owner);
3979 rv = len;
3980 goto out_unlock;
3981 }
3982 if (!pers->takeover) {
3983 module_put(pers->owner);
3984 pr_warn("md: %s: %s does not support personality takeover\n",
3985 mdname(mddev), clevel);
3986 rv = -EINVAL;
3987 goto out_unlock;
3988 }
3989
3990 rdev_for_each(rdev, mddev)
3991 rdev->new_raid_disk = rdev->raid_disk;
3992
3993
3994
3995
3996 priv = pers->takeover(mddev);
3997 if (IS_ERR(priv)) {
3998 mddev->new_level = mddev->level;
3999 mddev->new_layout = mddev->layout;
4000 mddev->new_chunk_sectors = mddev->chunk_sectors;
4001 mddev->raid_disks -= mddev->delta_disks;
4002 mddev->delta_disks = 0;
4003 mddev->reshape_backwards = 0;
4004 module_put(pers->owner);
4005 pr_warn("md: %s: %s would not accept array\n",
4006 mdname(mddev), clevel);
4007 rv = PTR_ERR(priv);
4008 goto out_unlock;
4009 }
4010
4011
4012 mddev_suspend(mddev);
4013 mddev_detach(mddev);
4014
4015 spin_lock(&mddev->lock);
4016 oldpers = mddev->pers;
4017 oldpriv = mddev->private;
4018 mddev->pers = pers;
4019 mddev->private = priv;
4020 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4021 mddev->level = mddev->new_level;
4022 mddev->layout = mddev->new_layout;
4023 mddev->chunk_sectors = mddev->new_chunk_sectors;
4024 mddev->delta_disks = 0;
4025 mddev->reshape_backwards = 0;
4026 mddev->degraded = 0;
4027 spin_unlock(&mddev->lock);
4028
4029 if (oldpers->sync_request == NULL &&
4030 mddev->external) {
4031
4032
4033
4034
4035
4036
4037
4038 mddev->in_sync = 0;
4039 mddev->safemode_delay = 0;
4040 mddev->safemode = 0;
4041 }
4042
4043 oldpers->free(mddev, oldpriv);
4044
4045 if (oldpers->sync_request == NULL &&
4046 pers->sync_request != NULL) {
4047
4048 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4049 pr_warn("md: cannot register extra attributes for %s\n",
4050 mdname(mddev));
4051 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
4052 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
4053 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
4054 }
4055 if (oldpers->sync_request != NULL &&
4056 pers->sync_request == NULL) {
4057
4058 if (mddev->to_remove == NULL)
4059 mddev->to_remove = &md_redundancy_group;
4060 }
4061
4062 module_put(oldpers->owner);
4063
4064 rdev_for_each(rdev, mddev) {
4065 if (rdev->raid_disk < 0)
4066 continue;
4067 if (rdev->new_raid_disk >= mddev->raid_disks)
4068 rdev->new_raid_disk = -1;
4069 if (rdev->new_raid_disk == rdev->raid_disk)
4070 continue;
4071 sysfs_unlink_rdev(mddev, rdev);
4072 }
4073 rdev_for_each(rdev, mddev) {
4074 if (rdev->raid_disk < 0)
4075 continue;
4076 if (rdev->new_raid_disk == rdev->raid_disk)
4077 continue;
4078 rdev->raid_disk = rdev->new_raid_disk;
4079 if (rdev->raid_disk < 0)
4080 clear_bit(In_sync, &rdev->flags);
4081 else {
4082 if (sysfs_link_rdev(mddev, rdev))
4083 pr_warn("md: cannot register rd%d for %s after level change\n",
4084 rdev->raid_disk, mdname(mddev));
4085 }
4086 }
4087
4088 if (pers->sync_request == NULL) {
4089
4090
4091
4092 mddev->in_sync = 1;
4093 del_timer_sync(&mddev->safemode_timer);
4094 }
4095 blk_set_stacking_limits(&mddev->queue->limits);
4096 pers->run(mddev);
4097 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4098 mddev_resume(mddev);
4099 if (!mddev->thread)
4100 md_update_sb(mddev, 1);
4101 sysfs_notify_dirent_safe(mddev->sysfs_level);
4102 md_new_event(mddev);
4103 rv = len;
4104out_unlock:
4105 mddev_unlock(mddev);
4106 return rv;
4107}
4108
4109static struct md_sysfs_entry md_level =
4110__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
4111
4112static ssize_t
4113layout_show(struct mddev *mddev, char *page)
4114{
4115
4116 if (mddev->reshape_position != MaxSector &&
4117 mddev->layout != mddev->new_layout)
4118 return sprintf(page, "%d (%d)\n",
4119 mddev->new_layout, mddev->layout);
4120 return sprintf(page, "%d\n", mddev->layout);
4121}
4122
4123static ssize_t
4124layout_store(struct mddev *mddev, const char *buf, size_t len)
4125{
4126 unsigned int n;
4127 int err;
4128
4129 err = kstrtouint(buf, 10, &n);
4130 if (err < 0)
4131 return err;
4132 err = mddev_lock(mddev);
4133 if (err)
4134 return err;
4135
4136 if (mddev->pers) {
4137 if (mddev->pers->check_reshape == NULL)
4138 err = -EBUSY;
4139 else if (mddev->ro)
4140 err = -EROFS;
4141 else {
4142 mddev->new_layout = n;
4143 err = mddev->pers->check_reshape(mddev);
4144 if (err)
4145 mddev->new_layout = mddev->layout;
4146 }
4147 } else {
4148 mddev->new_layout = n;
4149 if (mddev->reshape_position == MaxSector)
4150 mddev->layout = n;
4151 }
4152 mddev_unlock(mddev);
4153 return err ?: len;
4154}
4155static struct md_sysfs_entry md_layout =
4156__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
4157
4158static ssize_t
4159raid_disks_show(struct mddev *mddev, char *page)
4160{
4161 if (mddev->raid_disks == 0)
4162 return 0;
4163 if (mddev->reshape_position != MaxSector &&
4164 mddev->delta_disks != 0)
4165 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
4166 mddev->raid_disks - mddev->delta_disks);
4167 return sprintf(page, "%d\n", mddev->raid_disks);
4168}
4169
4170static int update_raid_disks(struct mddev *mddev, int raid_disks);
4171
4172static ssize_t
4173raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
4174{
4175 unsigned int n;
4176 int err;
4177
4178 err = kstrtouint(buf, 10, &n);
4179 if (err < 0)
4180 return err;
4181
4182 err = mddev_lock(mddev);
4183 if (err)
4184 return err;
4185 if (mddev->pers)
4186 err = update_raid_disks(mddev, n);
4187 else if (mddev->reshape_position != MaxSector) {
4188 struct md_rdev *rdev;
4189 int olddisks = mddev->raid_disks - mddev->delta_disks;
4190
4191 err = -EINVAL;
4192 rdev_for_each(rdev, mddev) {
4193 if (olddisks < n &&
4194 rdev->data_offset < rdev->new_data_offset)
4195 goto out_unlock;
4196 if (olddisks > n &&
4197 rdev->data_offset > rdev->new_data_offset)
4198 goto out_unlock;
4199 }
4200 err = 0;
4201 mddev->delta_disks = n - olddisks;
4202 mddev->raid_disks = n;
4203 mddev->reshape_backwards = (mddev->delta_disks < 0);
4204 } else
4205 mddev->raid_disks = n;
4206out_unlock:
4207 mddev_unlock(mddev);
4208 return err ? err : len;
4209}
4210static struct md_sysfs_entry md_raid_disks =
4211__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
4212
4213static ssize_t
4214uuid_show(struct mddev *mddev, char *page)
4215{
4216 return sprintf(page, "%pU\n", mddev->uuid);
4217}
4218static struct md_sysfs_entry md_uuid =
4219__ATTR(uuid, S_IRUGO, uuid_show, NULL);
4220
4221static ssize_t
4222chunk_size_show(struct mddev *mddev, char *page)
4223{
4224 if (mddev->reshape_position != MaxSector &&
4225 mddev->chunk_sectors != mddev->new_chunk_sectors)
4226 return sprintf(page, "%d (%d)\n",
4227 mddev->new_chunk_sectors << 9,
4228 mddev->chunk_sectors << 9);
4229 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
4230}
4231
4232static ssize_t
4233chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
4234{
4235 unsigned long n;
4236 int err;
4237
4238 err = kstrtoul(buf, 10, &n);
4239 if (err < 0)
4240 return err;
4241
4242 err = mddev_lock(mddev);
4243 if (err)
4244 return err;
4245 if (mddev->pers) {
4246 if (mddev->pers->check_reshape == NULL)
4247 err = -EBUSY;
4248 else if (mddev->ro)
4249 err = -EROFS;
4250 else {
4251 mddev->new_chunk_sectors = n >> 9;
4252 err = mddev->pers->check_reshape(mddev);
4253 if (err)
4254 mddev->new_chunk_sectors = mddev->chunk_sectors;
4255 }
4256 } else {
4257 mddev->new_chunk_sectors = n >> 9;
4258 if (mddev->reshape_position == MaxSector)
4259 mddev->chunk_sectors = n >> 9;
4260 }
4261 mddev_unlock(mddev);
4262 return err ?: len;
4263}
4264static struct md_sysfs_entry md_chunk_size =
4265__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
4266
4267static ssize_t
4268resync_start_show(struct mddev *mddev, char *page)
4269{
4270 if (mddev->recovery_cp == MaxSector)
4271 return sprintf(page, "none\n");
4272 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
4273}
4274
4275static ssize_t
4276resync_start_store(struct mddev *mddev, const char *buf, size_t len)
4277{
4278 unsigned long long n;
4279 int err;
4280
4281 if (cmd_match(buf, "none"))
4282 n = MaxSector;
4283 else {
4284 err = kstrtoull(buf, 10, &n);
4285 if (err < 0)
4286 return err;
4287 if (n != (sector_t)n)
4288 return -EINVAL;
4289 }
4290
4291 err = mddev_lock(mddev);
4292 if (err)
4293 return err;
4294 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4295 err = -EBUSY;
4296
4297 if (!err) {
4298 mddev->recovery_cp = n;
4299 if (mddev->pers)
4300 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
4301 }
4302 mddev_unlock(mddev);
4303 return err ?: len;
4304}
4305static struct md_sysfs_entry md_resync_start =
4306__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
4307 resync_start_show, resync_start_store);
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
4351 write_pending, active_idle, broken, bad_word};
4352static char *array_states[] = {
4353 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
4354 "write-pending", "active-idle", "broken", NULL };
4355
4356static int match_word(const char *word, char **list)
4357{
4358 int n;
4359 for (n=0; list[n]; n++)
4360 if (cmd_match(word, list[n]))
4361 break;
4362 return n;
4363}
4364
4365static ssize_t
4366array_state_show(struct mddev *mddev, char *page)
4367{
4368 enum array_state st = inactive;
4369
4370 if (mddev->pers && !test_bit(MD_NOT_READY, &mddev->flags)) {
4371 switch(mddev->ro) {
4372 case 1:
4373 st = readonly;
4374 break;
4375 case 2:
4376 st = read_auto;
4377 break;
4378 case 0:
4379 spin_lock(&mddev->lock);
4380 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
4381 st = write_pending;
4382 else if (mddev->in_sync)
4383 st = clean;
4384 else if (mddev->safemode)
4385 st = active_idle;
4386 else
4387 st = active;
4388 spin_unlock(&mddev->lock);
4389 }
4390
4391 if (test_bit(MD_BROKEN, &mddev->flags) && st == clean)
4392 st = broken;
4393 } else {
4394 if (list_empty(&mddev->disks) &&
4395 mddev->raid_disks == 0 &&
4396 mddev->dev_sectors == 0)
4397 st = clear;
4398 else
4399 st = inactive;
4400 }
4401 return sprintf(page, "%s\n", array_states[st]);
4402}
4403
4404static int do_md_stop(struct mddev *mddev, int ro, struct block_device *bdev);
4405static int md_set_readonly(struct mddev *mddev, struct block_device *bdev);
4406static int restart_array(struct mddev *mddev);
4407
4408static ssize_t
4409array_state_store(struct mddev *mddev, const char *buf, size_t len)
4410{
4411 int err = 0;
4412 enum array_state st = match_word(buf, array_states);
4413
4414 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
4415
4416
4417
4418 spin_lock(&mddev->lock);
4419 if (st == active) {
4420 restart_array(mddev);
4421 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4422 md_wakeup_thread(mddev->thread);
4423 wake_up(&mddev->sb_wait);
4424 } else {
4425 restart_array(mddev);
4426 if (!set_in_sync(mddev))
4427 err = -EBUSY;
4428 }
4429 if (!err)
4430 sysfs_notify_dirent_safe(mddev->sysfs_state);
4431 spin_unlock(&mddev->lock);
4432 return err ?: len;
4433 }
4434 err = mddev_lock(mddev);
4435 if (err)
4436 return err;
4437 err = -EINVAL;
4438 switch(st) {
4439 case bad_word:
4440 break;
4441 case clear:
4442
4443 err = do_md_stop(mddev, 0, NULL);
4444 break;
4445 case inactive:
4446
4447 if (mddev->pers)
4448 err = do_md_stop(mddev, 2, NULL);
4449 else
4450 err = 0;
4451 break;
4452 case suspended:
4453 break;
4454 case readonly:
4455 if (mddev->pers)
4456 err = md_set_readonly(mddev, NULL);
4457 else {
4458 mddev->ro = 1;
4459 set_disk_ro(mddev->gendisk, 1);
4460 err = do_md_run(mddev);
4461 }
4462 break;
4463 case read_auto:
4464 if (mddev->pers) {
4465 if (mddev->ro == 0)
4466 err = md_set_readonly(mddev, NULL);
4467 else if (mddev->ro == 1)
4468 err = restart_array(mddev);
4469 if (err == 0) {
4470 mddev->ro = 2;
4471 set_disk_ro(mddev->gendisk, 0);
4472 }
4473 } else {
4474 mddev->ro = 2;
4475 err = do_md_run(mddev);
4476 }
4477 break;
4478 case clean:
4479 if (mddev->pers) {
4480 err = restart_array(mddev);
4481 if (err)
4482 break;
4483 spin_lock(&mddev->lock);
4484 if (!set_in_sync(mddev))
4485 err = -EBUSY;
4486 spin_unlock(&mddev->lock);
4487 } else
4488 err = -EINVAL;
4489 break;
4490 case active:
4491 if (mddev->pers) {
4492 err = restart_array(mddev);
4493 if (err)
4494 break;
4495 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
4496 wake_up(&mddev->sb_wait);
4497 err = 0;
4498 } else {
4499 mddev->ro = 0;
4500 set_disk_ro(mddev->gendisk, 0);
4501 err = do_md_run(mddev);
4502 }
4503 break;
4504 case write_pending:
4505 case active_idle:
4506 case broken:
4507
4508 break;
4509 }
4510
4511 if (!err) {
4512 if (mddev->hold_active == UNTIL_IOCTL)
4513 mddev->hold_active = 0;
4514 sysfs_notify_dirent_safe(mddev->sysfs_state);
4515 }
4516 mddev_unlock(mddev);
4517 return err ?: len;
4518}
4519static struct md_sysfs_entry md_array_state =
4520__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
4521
4522static ssize_t
4523max_corrected_read_errors_show(struct mddev *mddev, char *page) {
4524 return sprintf(page, "%d\n",
4525 atomic_read(&mddev->max_corr_read_errors));
4526}
4527
4528static ssize_t
4529max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
4530{
4531 unsigned int n;
4532 int rv;
4533
4534 rv = kstrtouint(buf, 10, &n);
4535 if (rv < 0)
4536 return rv;
4537 atomic_set(&mddev->max_corr_read_errors, n);
4538 return len;
4539}
4540
4541static struct md_sysfs_entry max_corr_read_errors =
4542__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
4543 max_corrected_read_errors_store);
4544
4545static ssize_t
4546null_show(struct mddev *mddev, char *page)
4547{
4548 return -EINVAL;
4549}
4550
4551
4552static void flush_rdev_wq(struct mddev *mddev)
4553{
4554 struct md_rdev *rdev;
4555
4556 rcu_read_lock();
4557 rdev_for_each_rcu(rdev, mddev)
4558 if (work_pending(&rdev->del_work)) {
4559 flush_workqueue(md_rdev_misc_wq);
4560 break;
4561 }
4562 rcu_read_unlock();
4563}
4564
4565static ssize_t
4566new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4567{
4568
4569
4570
4571
4572
4573
4574
4575 char *e;
4576 int major = simple_strtoul(buf, &e, 10);
4577 int minor;
4578 dev_t dev;
4579 struct md_rdev *rdev;
4580 int err;
4581
4582 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4583 return -EINVAL;
4584 minor = simple_strtoul(e+1, &e, 10);
4585 if (*e && *e != '\n')
4586 return -EINVAL;
4587 dev = MKDEV(major, minor);
4588 if (major != MAJOR(dev) ||
4589 minor != MINOR(dev))
4590 return -EOVERFLOW;
4591
4592 flush_rdev_wq(mddev);
4593 err = mddev_lock(mddev);
4594 if (err)
4595 return err;
4596 if (mddev->persistent) {
4597 rdev = md_import_device(dev, mddev->major_version,
4598 mddev->minor_version);
4599 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4600 struct md_rdev *rdev0
4601 = list_entry(mddev->disks.next,
4602 struct md_rdev, same_set);
4603 err = super_types[mddev->major_version]
4604 .load_super(rdev, rdev0, mddev->minor_version);
4605 if (err < 0)
4606 goto out;
4607 }
4608 } else if (mddev->external)
4609 rdev = md_import_device(dev, -2, -1);
4610 else
4611 rdev = md_import_device(dev, -1, -1);
4612
4613 if (IS_ERR(rdev)) {
4614 mddev_unlock(mddev);
4615 return PTR_ERR(rdev);
4616 }
4617 err = bind_rdev_to_array(rdev, mddev);
4618 out:
4619 if (err)
4620 export_rdev(rdev);
4621 mddev_unlock(mddev);
4622 if (!err)
4623 md_new_event(mddev);
4624 return err ? err : len;
4625}
4626
4627static struct md_sysfs_entry md_new_device =
4628__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4629
4630static ssize_t
4631bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4632{
4633 char *end;
4634 unsigned long chunk, end_chunk;
4635 int err;
4636
4637 err = mddev_lock(mddev);
4638 if (err)
4639 return err;
4640 if (!mddev->bitmap)
4641 goto out;
4642
4643 while (*buf) {
4644 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4645 if (buf == end) break;
4646 if (*end == '-') {
4647 buf = end + 1;
4648 end_chunk = simple_strtoul(buf, &end, 0);
4649 if (buf == end) break;
4650 }
4651 if (*end && !isspace(*end)) break;
4652 md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4653 buf = skip_spaces(end);
4654 }
4655 md_bitmap_unplug(mddev->bitmap);
4656out:
4657 mddev_unlock(mddev);
4658 return len;
4659}
4660
4661static struct md_sysfs_entry md_bitmap =
4662__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4663
4664static ssize_t
4665size_show(struct mddev *mddev, char *page)
4666{
4667 return sprintf(page, "%llu\n",
4668 (unsigned long long)mddev->dev_sectors / 2);
4669}
4670
4671static int update_size(struct mddev *mddev, sector_t num_sectors);
4672
4673static ssize_t
4674size_store(struct mddev *mddev, const char *buf, size_t len)
4675{
4676
4677
4678
4679
4680 sector_t sectors;
4681 int err = strict_blocks_to_sectors(buf, §ors);
4682
4683 if (err < 0)
4684 return err;
4685 err = mddev_lock(mddev);
4686 if (err)
4687 return err;
4688 if (mddev->pers) {
4689 err = update_size(mddev, sectors);
4690 if (err == 0)
4691 md_update_sb(mddev, 1);
4692 } else {
4693 if (mddev->dev_sectors == 0 ||
4694 mddev->dev_sectors > sectors)
4695 mddev->dev_sectors = sectors;
4696 else
4697 err = -ENOSPC;
4698 }
4699 mddev_unlock(mddev);
4700 return err ? err : len;
4701}
4702
4703static struct md_sysfs_entry md_size =
4704__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4705
4706
4707
4708
4709
4710
4711
4712static ssize_t
4713metadata_show(struct mddev *mddev, char *page)
4714{
4715 if (mddev->persistent)
4716 return sprintf(page, "%d.%d\n",
4717 mddev->major_version, mddev->minor_version);
4718 else if (mddev->external)
4719 return sprintf(page, "external:%s\n", mddev->metadata_type);
4720 else
4721 return sprintf(page, "none\n");
4722}
4723
4724static ssize_t
4725metadata_store(struct mddev *mddev, const char *buf, size_t len)
4726{
4727 int major, minor;
4728 char *e;
4729 int err;
4730
4731
4732
4733
4734
4735 err = mddev_lock(mddev);
4736 if (err)
4737 return err;
4738 err = -EBUSY;
4739 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4740 ;
4741 else if (!list_empty(&mddev->disks))
4742 goto out_unlock;
4743
4744 err = 0;
4745 if (cmd_match(buf, "none")) {
4746 mddev->persistent = 0;
4747 mddev->external = 0;
4748 mddev->major_version = 0;
4749 mddev->minor_version = 90;
4750 goto out_unlock;
4751 }
4752 if (strncmp(buf, "external:", 9) == 0) {
4753 size_t namelen = len-9;
4754 if (namelen >= sizeof(mddev->metadata_type))
4755 namelen = sizeof(mddev->metadata_type)-1;
4756 strncpy(mddev->metadata_type, buf+9, namelen);
4757 mddev->metadata_type[namelen] = 0;
4758 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4759 mddev->metadata_type[--namelen] = 0;
4760 mddev->persistent = 0;
4761 mddev->external = 1;
4762 mddev->major_version = 0;
4763 mddev->minor_version = 90;
4764 goto out_unlock;
4765 }
4766 major = simple_strtoul(buf, &e, 10);
4767 err = -EINVAL;
4768 if (e==buf || *e != '.')
4769 goto out_unlock;
4770 buf = e+1;
4771 minor = simple_strtoul(buf, &e, 10);
4772 if (e==buf || (*e && *e != '\n') )
4773 goto out_unlock;
4774 err = -ENOENT;
4775 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4776 goto out_unlock;
4777 mddev->major_version = major;
4778 mddev->minor_version = minor;
4779 mddev->persistent = 1;
4780 mddev->external = 0;
4781 err = 0;
4782out_unlock:
4783 mddev_unlock(mddev);
4784 return err ?: len;
4785}
4786
4787static struct md_sysfs_entry md_metadata =
4788__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4789
4790static ssize_t
4791action_show(struct mddev *mddev, char *page)
4792{
4793 char *type = "idle";
4794 unsigned long recovery = mddev->recovery;
4795 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
4796 type = "frozen";
4797 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
4798 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4799 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4800 type = "reshape";
4801 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4802 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4803 type = "resync";
4804 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4805 type = "check";
4806 else
4807 type = "repair";
4808 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4809 type = "recover";
4810 else if (mddev->reshape_position != MaxSector)
4811 type = "reshape";
4812 }
4813 return sprintf(page, "%s\n", type);
4814}
4815
4816static ssize_t
4817action_store(struct mddev *mddev, const char *page, size_t len)
4818{
4819 if (!mddev->pers || !mddev->pers->sync_request)
4820 return -EINVAL;
4821
4822
4823 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4824 if (cmd_match(page, "frozen"))
4825 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4826 else
4827 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4828 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
4829 mddev_lock(mddev) == 0) {
4830 if (work_pending(&mddev->del_work))
4831 flush_workqueue(md_misc_wq);
4832 if (mddev->sync_thread) {
4833 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4834 md_reap_sync_thread(mddev);
4835 }
4836 mddev_unlock(mddev);
4837 }
4838 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4839 return -EBUSY;
4840 else if (cmd_match(page, "resync"))
4841 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4842 else if (cmd_match(page, "recover")) {
4843 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4844 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4845 } else if (cmd_match(page, "reshape")) {
4846 int err;
4847 if (mddev->pers->start_reshape == NULL)
4848 return -EINVAL;
4849 err = mddev_lock(mddev);
4850 if (!err) {
4851 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4852 err = -EBUSY;
4853 else {
4854 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4855 err = mddev->pers->start_reshape(mddev);
4856 }
4857 mddev_unlock(mddev);
4858 }
4859 if (err)
4860 return err;
4861 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
4862 } else {
4863 if (cmd_match(page, "check"))
4864 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4865 else if (!cmd_match(page, "repair"))
4866 return -EINVAL;
4867 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4868 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4869 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4870 }
4871 if (mddev->ro == 2) {
4872
4873
4874
4875 mddev->ro = 0;
4876 md_wakeup_thread(mddev->sync_thread);
4877 }
4878 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4879 md_wakeup_thread(mddev->thread);
4880 sysfs_notify_dirent_safe(mddev->sysfs_action);
4881 return len;
4882}
4883
4884static struct md_sysfs_entry md_scan_mode =
4885__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4886
4887static ssize_t
4888last_sync_action_show(struct mddev *mddev, char *page)
4889{
4890 return sprintf(page, "%s\n", mddev->last_sync_action);
4891}
4892
4893static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4894
4895static ssize_t
4896mismatch_cnt_show(struct mddev *mddev, char *page)
4897{
4898 return sprintf(page, "%llu\n",
4899 (unsigned long long)
4900 atomic64_read(&mddev->resync_mismatches));
4901}
4902
4903static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4904
4905static ssize_t
4906sync_min_show(struct mddev *mddev, char *page)
4907{
4908 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4909 mddev->sync_speed_min ? "local": "system");
4910}
4911
4912static ssize_t
4913sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4914{
4915 unsigned int min;
4916 int rv;
4917
4918 if (strncmp(buf, "system", 6)==0) {
4919 min = 0;
4920 } else {
4921 rv = kstrtouint(buf, 10, &min);
4922 if (rv < 0)
4923 return rv;
4924 if (min == 0)
4925 return -EINVAL;
4926 }
4927 mddev->sync_speed_min = min;
4928 return len;
4929}
4930
4931static struct md_sysfs_entry md_sync_min =
4932__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4933
4934static ssize_t
4935sync_max_show(struct mddev *mddev, char *page)
4936{
4937 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4938 mddev->sync_speed_max ? "local": "system");
4939}
4940
4941static ssize_t
4942sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4943{
4944 unsigned int max;
4945 int rv;
4946
4947 if (strncmp(buf, "system", 6)==0) {
4948 max = 0;
4949 } else {
4950 rv = kstrtouint(buf, 10, &max);
4951 if (rv < 0)
4952 return rv;
4953 if (max == 0)
4954 return -EINVAL;
4955 }
4956 mddev->sync_speed_max = max;
4957 return len;
4958}
4959
4960static struct md_sysfs_entry md_sync_max =
4961__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4962
4963static ssize_t
4964degraded_show(struct mddev *mddev, char *page)
4965{
4966 return sprintf(page, "%d\n", mddev->degraded);
4967}
4968static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4969
4970static ssize_t
4971sync_force_parallel_show(struct mddev *mddev, char *page)
4972{
4973 return sprintf(page, "%d\n", mddev->parallel_resync);
4974}
4975
4976static ssize_t
4977sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4978{
4979 long n;
4980
4981 if (kstrtol(buf, 10, &n))
4982 return -EINVAL;
4983
4984 if (n != 0 && n != 1)
4985 return -EINVAL;
4986
4987 mddev->parallel_resync = n;
4988
4989 if (mddev->sync_thread)
4990 wake_up(&resync_wait);
4991
4992 return len;
4993}
4994
4995
4996static struct md_sysfs_entry md_sync_force_parallel =
4997__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4998 sync_force_parallel_show, sync_force_parallel_store);
4999
5000static ssize_t
5001sync_speed_show(struct mddev *mddev, char *page)
5002{
5003 unsigned long resync, dt, db;
5004 if (mddev->curr_resync == 0)
5005 return sprintf(page, "none\n");
5006 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
5007 dt = (jiffies - mddev->resync_mark) / HZ;
5008 if (!dt) dt++;
5009 db = resync - mddev->resync_mark_cnt;
5010 return sprintf(page, "%lu\n", db/dt/2);
5011}
5012
5013static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
5014
5015static ssize_t
5016sync_completed_show(struct mddev *mddev, char *page)
5017{
5018 unsigned long long max_sectors, resync;
5019
5020 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5021 return sprintf(page, "none\n");
5022
5023 if (mddev->curr_resync == 1 ||
5024 mddev->curr_resync == 2)
5025 return sprintf(page, "delayed\n");
5026
5027 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
5028 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
5029 max_sectors = mddev->resync_max_sectors;
5030 else
5031 max_sectors = mddev->dev_sectors;
5032
5033 resync = mddev->curr_resync_completed;
5034 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
5035}
5036
5037static struct md_sysfs_entry md_sync_completed =
5038 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
5039
5040static ssize_t
5041min_sync_show(struct mddev *mddev, char *page)
5042{
5043 return sprintf(page, "%llu\n",
5044 (unsigned long long)mddev->resync_min);
5045}
5046static ssize_t
5047min_sync_store(struct mddev *mddev, const char *buf, size_t len)
5048{
5049 unsigned long long min;
5050 int err;
5051
5052 if (kstrtoull(buf, 10, &min))
5053 return -EINVAL;
5054
5055 spin_lock(&mddev->lock);
5056 err = -EINVAL;
5057 if (min > mddev->resync_max)
5058 goto out_unlock;
5059
5060 err = -EBUSY;
5061 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5062 goto out_unlock;
5063
5064
5065 mddev->resync_min = round_down(min, 8);
5066 err = 0;
5067
5068out_unlock:
5069 spin_unlock(&mddev->lock);
5070 return err ?: len;
5071}
5072
5073static struct md_sysfs_entry md_min_sync =
5074__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
5075
5076static ssize_t
5077max_sync_show(struct mddev *mddev, char *page)
5078{
5079 if (mddev->resync_max == MaxSector)
5080 return sprintf(page, "max\n");
5081 else
5082 return sprintf(page, "%llu\n",
5083 (unsigned long long)mddev->resync_max);
5084}
5085static ssize_t
5086max_sync_store(struct mddev *mddev, const char *buf, size_t len)
5087{
5088 int err;
5089 spin_lock(&mddev->lock);
5090 if (strncmp(buf, "max", 3) == 0)
5091 mddev->resync_max = MaxSector;
5092 else {
5093 unsigned long long max;
5094 int chunk;
5095
5096 err = -EINVAL;
5097 if (kstrtoull(buf, 10, &max))
5098 goto out_unlock;
5099 if (max < mddev->resync_min)
5100 goto out_unlock;
5101
5102 err = -EBUSY;
5103 if (max < mddev->resync_max &&
5104 mddev->ro == 0 &&
5105 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5106 goto out_unlock;
5107
5108
5109 chunk = mddev->chunk_sectors;
5110 if (chunk) {
5111 sector_t temp = max;
5112
5113 err = -EINVAL;
5114 if (sector_div(temp, chunk))
5115 goto out_unlock;
5116 }
5117 mddev->resync_max = max;
5118 }
5119 wake_up(&mddev->recovery_wait);
5120 err = 0;
5121out_unlock:
5122 spin_unlock(&mddev->lock);
5123 return err ?: len;
5124}
5125
5126static struct md_sysfs_entry md_max_sync =
5127__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
5128
5129static ssize_t
5130suspend_lo_show(struct mddev *mddev, char *page)
5131{
5132 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
5133}
5134
5135static ssize_t
5136suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
5137{
5138 unsigned long long new;
5139 int err;
5140
5141 err = kstrtoull(buf, 10, &new);
5142 if (err < 0)
5143 return err;
5144 if (new != (sector_t)new)
5145 return -EINVAL;
5146
5147 err = mddev_lock(mddev);
5148 if (err)
5149 return err;
5150 err = -EINVAL;
5151 if (mddev->pers == NULL ||
5152 mddev->pers->quiesce == NULL)
5153 goto unlock;
5154 mddev_suspend(mddev);
5155 mddev->suspend_lo = new;
5156 mddev_resume(mddev);
5157
5158 err = 0;
5159unlock:
5160 mddev_unlock(mddev);
5161 return err ?: len;
5162}
5163static struct md_sysfs_entry md_suspend_lo =
5164__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
5165
5166static ssize_t
5167suspend_hi_show(struct mddev *mddev, char *page)
5168{
5169 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
5170}
5171
5172static ssize_t
5173suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
5174{
5175 unsigned long long new;
5176 int err;
5177
5178 err = kstrtoull(buf, 10, &new);
5179 if (err < 0)
5180 return err;
5181 if (new != (sector_t)new)
5182 return -EINVAL;
5183
5184 err = mddev_lock(mddev);
5185 if (err)
5186 return err;
5187 err = -EINVAL;
5188 if (mddev->pers == NULL)
5189 goto unlock;
5190
5191 mddev_suspend(mddev);
5192 mddev->suspend_hi = new;
5193 mddev_resume(mddev);
5194
5195 err = 0;
5196unlock:
5197 mddev_unlock(mddev);
5198 return err ?: len;
5199}
5200static struct md_sysfs_entry md_suspend_hi =
5201__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
5202
5203static ssize_t
5204reshape_position_show(struct mddev *mddev, char *page)
5205{
5206 if (mddev->reshape_position != MaxSector)
5207 return sprintf(page, "%llu\n",
5208 (unsigned long long)mddev->reshape_position);
5209 strcpy(page, "none\n");
5210 return 5;
5211}
5212
5213static ssize_t
5214reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
5215{
5216 struct md_rdev *rdev;
5217 unsigned long long new;
5218 int err;
5219
5220 err = kstrtoull(buf, 10, &new);
5221 if (err < 0)
5222 return err;
5223 if (new != (sector_t)new)
5224 return -EINVAL;
5225 err = mddev_lock(mddev);
5226 if (err)
5227 return err;
5228 err = -EBUSY;
5229 if (mddev->pers)
5230 goto unlock;
5231 mddev->reshape_position = new;
5232 mddev->delta_disks = 0;
5233 mddev->reshape_backwards = 0;
5234 mddev->new_level = mddev->level;
5235 mddev->new_layout = mddev->layout;
5236 mddev->new_chunk_sectors = mddev->chunk_sectors;
5237 rdev_for_each(rdev, mddev)
5238 rdev->new_data_offset = rdev->data_offset;
5239 err = 0;
5240unlock:
5241 mddev_unlock(mddev);
5242 return err ?: len;
5243}
5244
5245static struct md_sysfs_entry md_reshape_position =
5246__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
5247 reshape_position_store);
5248
5249static ssize_t
5250reshape_direction_show(struct mddev *mddev, char *page)
5251{
5252 return sprintf(page, "%s\n",
5253 mddev->reshape_backwards ? "backwards" : "forwards");
5254}
5255
5256static ssize_t
5257reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
5258{
5259 int backwards = 0;
5260 int err;
5261
5262 if (cmd_match(buf, "forwards"))
5263 backwards = 0;
5264 else if (cmd_match(buf, "backwards"))
5265 backwards = 1;
5266 else
5267 return -EINVAL;
5268 if (mddev->reshape_backwards == backwards)
5269 return len;
5270
5271 err = mddev_lock(mddev);
5272 if (err)
5273 return err;
5274
5275 if (mddev->delta_disks)
5276 err = -EBUSY;
5277 else if (mddev->persistent &&
5278 mddev->major_version == 0)
5279 err = -EINVAL;
5280 else
5281 mddev->reshape_backwards = backwards;
5282 mddev_unlock(mddev);
5283 return err ?: len;
5284}
5285
5286static struct md_sysfs_entry md_reshape_direction =
5287__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
5288 reshape_direction_store);
5289
5290static ssize_t
5291array_size_show(struct mddev *mddev, char *page)
5292{
5293 if (mddev->external_size)
5294 return sprintf(page, "%llu\n",
5295 (unsigned long long)mddev->array_sectors/2);
5296 else
5297 return sprintf(page, "default\n");
5298}
5299
5300static ssize_t
5301array_size_store(struct mddev *mddev, const char *buf, size_t len)
5302{
5303 sector_t sectors;
5304 int err;
5305
5306 err = mddev_lock(mddev);
5307 if (err)
5308 return err;
5309
5310
5311 if (mddev_is_clustered(mddev)) {
5312 mddev_unlock(mddev);
5313 return -EINVAL;
5314 }
5315
5316 if (strncmp(buf, "default", 7) == 0) {
5317 if (mddev->pers)
5318 sectors = mddev->pers->size(mddev, 0, 0);
5319 else
5320 sectors = mddev->array_sectors;
5321
5322 mddev->external_size = 0;
5323 } else {
5324 if (strict_blocks_to_sectors(buf, §ors) < 0)
5325 err = -EINVAL;
5326 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
5327 err = -E2BIG;
5328 else
5329 mddev->external_size = 1;
5330 }
5331
5332 if (!err) {
5333 mddev->array_sectors = sectors;
5334 if (mddev->pers)
5335 set_capacity_and_notify(mddev->gendisk,
5336 mddev->array_sectors);
5337 }
5338 mddev_unlock(mddev);
5339 return err ?: len;
5340}
5341
5342static struct md_sysfs_entry md_array_size =
5343__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
5344 array_size_store);
5345
5346static ssize_t
5347consistency_policy_show(struct mddev *mddev, char *page)
5348{
5349 int ret;
5350
5351 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
5352 ret = sprintf(page, "journal\n");
5353 } else if (test_bit(MD_HAS_PPL, &mddev->flags)) {
5354 ret = sprintf(page, "ppl\n");
5355 } else if (mddev->bitmap) {
5356 ret = sprintf(page, "bitmap\n");
5357 } else if (mddev->pers) {
5358 if (mddev->pers->sync_request)
5359 ret = sprintf(page, "resync\n");
5360 else
5361 ret = sprintf(page, "none\n");
5362 } else {
5363 ret = sprintf(page, "unknown\n");
5364 }
5365
5366 return ret;
5367}
5368
5369static ssize_t
5370consistency_policy_store(struct mddev *mddev, const char *buf, size_t len)
5371{
5372 int err = 0;
5373
5374 if (mddev->pers) {
5375 if (mddev->pers->change_consistency_policy)
5376 err = mddev->pers->change_consistency_policy(mddev, buf);
5377 else
5378 err = -EBUSY;
5379 } else if (mddev->external && strncmp(buf, "ppl", 3) == 0) {
5380 set_bit(MD_HAS_PPL, &mddev->flags);
5381 } else {
5382 err = -EINVAL;
5383 }
5384
5385 return err ? err : len;
5386}
5387
5388static struct md_sysfs_entry md_consistency_policy =
5389__ATTR(consistency_policy, S_IRUGO | S_IWUSR, consistency_policy_show,
5390 consistency_policy_store);
5391
5392static ssize_t fail_last_dev_show(struct mddev *mddev, char *page)
5393{
5394 return sprintf(page, "%d\n", mddev->fail_last_dev);
5395}
5396
5397
5398
5399
5400
5401static ssize_t
5402fail_last_dev_store(struct mddev *mddev, const char *buf, size_t len)
5403{
5404 int ret;
5405 bool value;
5406
5407 ret = kstrtobool(buf, &value);
5408 if (ret)
5409 return ret;
5410
5411 if (value != mddev->fail_last_dev)
5412 mddev->fail_last_dev = value;
5413
5414 return len;
5415}
5416static struct md_sysfs_entry md_fail_last_dev =
5417__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
5418 fail_last_dev_store);
5419
5420static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
5421{
5422 if (mddev->pers == NULL || (mddev->pers->level != 1))
5423 return sprintf(page, "n/a\n");
5424 else
5425 return sprintf(page, "%d\n", mddev->serialize_policy);
5426}
5427
5428
5429
5430
5431
5432static ssize_t
5433serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
5434{
5435 int err;
5436 bool value;
5437
5438 err = kstrtobool(buf, &value);
5439 if (err)
5440 return err;
5441
5442 if (value == mddev->serialize_policy)
5443 return len;
5444
5445 err = mddev_lock(mddev);
5446 if (err)
5447 return err;
5448 if (mddev->pers == NULL || (mddev->pers->level != 1)) {
5449 pr_err("md: serialize_policy is only effective for raid1\n");
5450 err = -EINVAL;
5451 goto unlock;
5452 }
5453
5454 mddev_suspend(mddev);
5455 if (value)
5456 mddev_create_serial_pool(mddev, NULL, true);
5457 else
5458 mddev_destroy_serial_pool(mddev, NULL, true);
5459 mddev->serialize_policy = value;
5460 mddev_resume(mddev);
5461unlock:
5462 mddev_unlock(mddev);
5463 return err ?: len;
5464}
5465
5466static struct md_sysfs_entry md_serialize_policy =
5467__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
5468 serialize_policy_store);
5469
5470
5471static struct attribute *md_default_attrs[] = {
5472 &md_level.attr,
5473 &md_layout.attr,
5474 &md_raid_disks.attr,
5475 &md_uuid.attr,
5476 &md_chunk_size.attr,
5477 &md_size.attr,
5478 &md_resync_start.attr,
5479 &md_metadata.attr,
5480 &md_new_device.attr,
5481 &md_safe_delay.attr,
5482 &md_array_state.attr,
5483 &md_reshape_position.attr,
5484 &md_reshape_direction.attr,
5485 &md_array_size.attr,
5486 &max_corr_read_errors.attr,
5487 &md_consistency_policy.attr,
5488 &md_fail_last_dev.attr,
5489 &md_serialize_policy.attr,
5490 NULL,
5491};
5492
5493static struct attribute *md_redundancy_attrs[] = {
5494 &md_scan_mode.attr,
5495 &md_last_scan_mode.attr,
5496 &md_mismatches.attr,
5497 &md_sync_min.attr,
5498 &md_sync_max.attr,
5499 &md_sync_speed.attr,
5500 &md_sync_force_parallel.attr,
5501 &md_sync_completed.attr,
5502 &md_min_sync.attr,
5503 &md_max_sync.attr,
5504 &md_suspend_lo.attr,
5505 &md_suspend_hi.attr,
5506 &md_bitmap.attr,
5507 &md_degraded.attr,
5508 NULL,
5509};
5510static const struct attribute_group md_redundancy_group = {
5511 .name = NULL,
5512 .attrs = md_redundancy_attrs,
5513};
5514
5515static ssize_t
5516md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
5517{
5518 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5519 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5520 ssize_t rv;
5521
5522 if (!entry->show)
5523 return -EIO;
5524 spin_lock(&all_mddevs_lock);
5525 if (list_empty(&mddev->all_mddevs)) {
5526 spin_unlock(&all_mddevs_lock);
5527 return -EBUSY;
5528 }
5529 mddev_get(mddev);
5530 spin_unlock(&all_mddevs_lock);
5531
5532 rv = entry->show(mddev, page);
5533 mddev_put(mddev);
5534 return rv;
5535}
5536
5537static ssize_t
5538md_attr_store(struct kobject *kobj, struct attribute *attr,
5539 const char *page, size_t length)
5540{
5541 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
5542 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
5543 ssize_t rv;
5544
5545 if (!entry->store)
5546 return -EIO;
5547 if (!capable(CAP_SYS_ADMIN))
5548 return -EACCES;
5549 spin_lock(&all_mddevs_lock);
5550 if (list_empty(&mddev->all_mddevs)) {
5551 spin_unlock(&all_mddevs_lock);
5552 return -EBUSY;
5553 }
5554 mddev_get(mddev);
5555 spin_unlock(&all_mddevs_lock);
5556 rv = entry->store(mddev, page, length);
5557 mddev_put(mddev);
5558 return rv;
5559}
5560
5561static void md_free(struct kobject *ko)
5562{
5563 struct mddev *mddev = container_of(ko, struct mddev, kobj);
5564
5565 if (mddev->sysfs_state)
5566 sysfs_put(mddev->sysfs_state);
5567 if (mddev->sysfs_level)
5568 sysfs_put(mddev->sysfs_level);
5569
5570 if (mddev->gendisk) {
5571 del_gendisk(mddev->gendisk);
5572 blk_cleanup_disk(mddev->gendisk);
5573 }
5574 percpu_ref_exit(&mddev->writes_pending);
5575
5576 bioset_exit(&mddev->bio_set);
5577 bioset_exit(&mddev->sync_set);
5578 if (mddev->level != 1 && mddev->level != 10)
5579 bioset_exit(&mddev->io_acct_set);
5580 kfree(mddev);
5581}
5582
5583static const struct sysfs_ops md_sysfs_ops = {
5584 .show = md_attr_show,
5585 .store = md_attr_store,
5586};
5587static struct kobj_type md_ktype = {
5588 .release = md_free,
5589 .sysfs_ops = &md_sysfs_ops,
5590 .default_attrs = md_default_attrs,
5591};
5592
5593int mdp_major = 0;
5594
5595static void mddev_delayed_delete(struct work_struct *ws)
5596{
5597 struct mddev *mddev = container_of(ws, struct mddev, del_work);
5598
5599 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
5600 kobject_del(&mddev->kobj);
5601 kobject_put(&mddev->kobj);
5602}
5603
5604static void no_op(struct percpu_ref *r) {}
5605
5606int mddev_init_writes_pending(struct mddev *mddev)
5607{
5608 if (mddev->writes_pending.percpu_count_ptr)
5609 return 0;
5610 if (percpu_ref_init(&mddev->writes_pending, no_op,
5611 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
5612 return -ENOMEM;
5613
5614 percpu_ref_put(&mddev->writes_pending);
5615 return 0;
5616}
5617EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
5618
5619static int md_alloc(dev_t dev, char *name)
5620{
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630 static DEFINE_MUTEX(disks_mutex);
5631 struct mddev *mddev;
5632 struct gendisk *disk;
5633 int partitioned;
5634 int shift;
5635 int unit;
5636 int error ;
5637
5638
5639
5640
5641
5642 flush_workqueue(md_misc_wq);
5643
5644 mutex_lock(&disks_mutex);
5645 mddev = mddev_alloc(dev);
5646 if (IS_ERR(mddev)) {
5647 mutex_unlock(&disks_mutex);
5648 return PTR_ERR(mddev);
5649 }
5650
5651 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
5652 shift = partitioned ? MdpMinorShift : 0;
5653 unit = MINOR(mddev->unit) >> shift;
5654
5655 if (name && !dev) {
5656
5657
5658 struct mddev *mddev2;
5659 spin_lock(&all_mddevs_lock);
5660
5661 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
5662 if (mddev2->gendisk &&
5663 strcmp(mddev2->gendisk->disk_name, name) == 0) {
5664 spin_unlock(&all_mddevs_lock);
5665 error = -EEXIST;
5666 goto abort;
5667 }
5668 spin_unlock(&all_mddevs_lock);
5669 }
5670 if (name && dev)
5671
5672
5673
5674 mddev->hold_active = UNTIL_STOP;
5675
5676 error = -ENOMEM;
5677 disk = blk_alloc_disk(NUMA_NO_NODE);
5678 if (!disk)
5679 goto abort;
5680
5681 disk->major = MAJOR(mddev->unit);
5682 disk->first_minor = unit << shift;
5683 disk->minors = 1 << shift;
5684 if (name)
5685 strcpy(disk->disk_name, name);
5686 else if (partitioned)
5687 sprintf(disk->disk_name, "md_d%d", unit);
5688 else
5689 sprintf(disk->disk_name, "md%d", unit);
5690 disk->fops = &md_fops;
5691 disk->private_data = mddev;
5692
5693 mddev->queue = disk->queue;
5694 blk_set_stacking_limits(&mddev->queue->limits);
5695 blk_queue_write_cache(mddev->queue, true, true);
5696
5697
5698
5699
5700 disk->flags |= GENHD_FL_EXT_DEVT;
5701 disk->events |= DISK_EVENT_MEDIA_CHANGE;
5702 mddev->gendisk = disk;
5703 add_disk(disk);
5704
5705 error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md");
5706 if (error) {
5707
5708
5709
5710 pr_debug("md: cannot register %s/md - name in use\n",
5711 disk->disk_name);
5712 error = 0;
5713 }
5714 if (mddev->kobj.sd &&
5715 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
5716 pr_debug("pointless warning\n");
5717 abort:
5718 mutex_unlock(&disks_mutex);
5719 if (!error && mddev->kobj.sd) {
5720 kobject_uevent(&mddev->kobj, KOBJ_ADD);
5721 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
5722 mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
5723 }
5724 mddev_put(mddev);
5725 return error;
5726}
5727
5728static void md_probe(dev_t dev)
5729{
5730 if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
5731 return;
5732 if (create_on_open)
5733 md_alloc(dev, NULL);
5734}
5735
5736static int add_named_array(const char *val, const struct kernel_param *kp)
5737{
5738
5739
5740
5741
5742
5743
5744
5745 int len = strlen(val);
5746 char buf[DISK_NAME_LEN];
5747 unsigned long devnum;
5748
5749 while (len && val[len-1] == '\n')
5750 len--;
5751 if (len >= DISK_NAME_LEN)
5752 return -E2BIG;
5753 strlcpy(buf, val, len+1);
5754 if (strncmp(buf, "md_", 3) == 0)
5755 return md_alloc(0, buf);
5756 if (strncmp(buf, "md", 2) == 0 &&
5757 isdigit(buf[2]) &&
5758 kstrtoul(buf+2, 10, &devnum) == 0 &&
5759 devnum <= MINORMASK)
5760 return md_alloc(MKDEV(MD_MAJOR, devnum), NULL);
5761
5762 return -EINVAL;
5763}
5764
5765static void md_safemode_timeout(struct timer_list *t)
5766{
5767 struct mddev *mddev = from_timer(mddev, t, safemode_timer);
5768
5769 mddev->safemode = 1;
5770 if (mddev->external)
5771 sysfs_notify_dirent_safe(mddev->sysfs_state);
5772
5773 md_wakeup_thread(mddev->thread);
5774}
5775
5776static int start_dirty_degraded;
5777
5778int md_run(struct mddev *mddev)
5779{
5780 int err;
5781 struct md_rdev *rdev;
5782 struct md_personality *pers;
5783
5784 if (list_empty(&mddev->disks))
5785
5786 return -EINVAL;
5787
5788 if (mddev->pers)
5789 return -EBUSY;
5790
5791 if (mddev->sysfs_active)
5792 return -EBUSY;
5793
5794
5795
5796
5797 if (!mddev->raid_disks) {
5798 if (!mddev->persistent)
5799 return -EINVAL;
5800 err = analyze_sbs(mddev);
5801 if (err)
5802 return -EINVAL;
5803 }
5804
5805 if (mddev->level != LEVEL_NONE)
5806 request_module("md-level-%d", mddev->level);
5807 else if (mddev->clevel[0])
5808 request_module("md-%s", mddev->clevel);
5809
5810
5811
5812
5813
5814
5815 mddev->has_superblocks = false;
5816 rdev_for_each(rdev, mddev) {
5817 if (test_bit(Faulty, &rdev->flags))
5818 continue;
5819 sync_blockdev(rdev->bdev);
5820 invalidate_bdev(rdev->bdev);
5821 if (mddev->ro != 1 && rdev_read_only(rdev)) {
5822 mddev->ro = 1;
5823 if (mddev->gendisk)
5824 set_disk_ro(mddev->gendisk, 1);
5825 }
5826
5827 if (rdev->sb_page)
5828 mddev->has_superblocks = true;
5829
5830
5831
5832
5833
5834 if (rdev->meta_bdev) {
5835 ;
5836 } else if (rdev->data_offset < rdev->sb_start) {
5837 if (mddev->dev_sectors &&
5838 rdev->data_offset + mddev->dev_sectors
5839 > rdev->sb_start) {
5840 pr_warn("md: %s: data overlaps metadata\n",
5841 mdname(mddev));
5842 return -EINVAL;
5843 }
5844 } else {
5845 if (rdev->sb_start + rdev->sb_size/512
5846 > rdev->data_offset) {
5847 pr_warn("md: %s: metadata overlaps data\n",
5848 mdname(mddev));
5849 return -EINVAL;
5850 }
5851 }
5852 sysfs_notify_dirent_safe(rdev->sysfs_state);
5853 }
5854
5855 if (!bioset_initialized(&mddev->bio_set)) {
5856 err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5857 if (err)
5858 return err;
5859 }
5860 if (!bioset_initialized(&mddev->sync_set)) {
5861 err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5862 if (err)
5863 goto exit_bio_set;
5864 }
5865 if (mddev->level != 1 && mddev->level != 10 &&
5866 !bioset_initialized(&mddev->io_acct_set)) {
5867 err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
5868 offsetof(struct md_io_acct, bio_clone), 0);
5869 if (err)
5870 goto exit_sync_set;
5871 }
5872
5873 spin_lock(&pers_lock);
5874 pers = find_pers(mddev->level, mddev->clevel);
5875 if (!pers || !try_module_get(pers->owner)) {
5876 spin_unlock(&pers_lock);
5877 if (mddev->level != LEVEL_NONE)
5878 pr_warn("md: personality for level %d is not loaded!\n",
5879 mddev->level);
5880 else
5881 pr_warn("md: personality for level %s is not loaded!\n",
5882 mddev->clevel);
5883 err = -EINVAL;
5884 goto abort;
5885 }
5886 spin_unlock(&pers_lock);
5887 if (mddev->level != pers->level) {
5888 mddev->level = pers->level;
5889 mddev->new_level = pers->level;
5890 }
5891 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5892
5893 if (mddev->reshape_position != MaxSector &&
5894 pers->start_reshape == NULL) {
5895
5896 module_put(pers->owner);
5897 err = -EINVAL;
5898 goto abort;
5899 }
5900
5901 if (pers->sync_request) {
5902
5903
5904
5905 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5906 struct md_rdev *rdev2;
5907 int warned = 0;
5908
5909 rdev_for_each(rdev, mddev)
5910 rdev_for_each(rdev2, mddev) {
5911 if (rdev < rdev2 &&
5912 rdev->bdev->bd_disk ==
5913 rdev2->bdev->bd_disk) {
5914 pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
5915 mdname(mddev),
5916 bdevname(rdev->bdev,b),
5917 bdevname(rdev2->bdev,b2));
5918 warned = 1;
5919 }
5920 }
5921
5922 if (warned)
5923 pr_warn("True protection against single-disk failure might be compromised.\n");
5924 }
5925
5926 mddev->recovery = 0;
5927
5928 mddev->resync_max_sectors = mddev->dev_sectors;
5929
5930 mddev->ok_start_degraded = start_dirty_degraded;
5931
5932 if (start_readonly && mddev->ro == 0)
5933 mddev->ro = 2;
5934
5935 err = pers->run(mddev);
5936 if (err)
5937 pr_warn("md: pers->run() failed ...\n");
5938 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
5939 WARN_ONCE(!mddev->external_size,
5940 "%s: default size too small, but 'external_size' not in effect?\n",
5941 __func__);
5942 pr_warn("md: invalid array_size %llu > default size %llu\n",
5943 (unsigned long long)mddev->array_sectors / 2,
5944 (unsigned long long)pers->size(mddev, 0, 0) / 2);
5945 err = -EINVAL;
5946 }
5947 if (err == 0 && pers->sync_request &&
5948 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5949 struct bitmap *bitmap;
5950
5951 bitmap = md_bitmap_create(mddev, -1);
5952 if (IS_ERR(bitmap)) {
5953 err = PTR_ERR(bitmap);
5954 pr_warn("%s: failed to create bitmap (%d)\n",
5955 mdname(mddev), err);
5956 } else
5957 mddev->bitmap = bitmap;
5958
5959 }
5960 if (err)
5961 goto bitmap_abort;
5962
5963 if (mddev->bitmap_info.max_write_behind > 0) {
5964 bool create_pool = false;
5965
5966 rdev_for_each(rdev, mddev) {
5967 if (test_bit(WriteMostly, &rdev->flags) &&
5968 rdev_init_serial(rdev))
5969 create_pool = true;
5970 }
5971 if (create_pool && mddev->serial_info_pool == NULL) {
5972 mddev->serial_info_pool =
5973 mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
5974 sizeof(struct serial_info));
5975 if (!mddev->serial_info_pool) {
5976 err = -ENOMEM;
5977 goto bitmap_abort;
5978 }
5979 }
5980 }
5981
5982 if (mddev->queue) {
5983 bool nonrot = true;
5984
5985 rdev_for_each(rdev, mddev) {
5986 if (rdev->raid_disk >= 0 &&
5987 !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
5988 nonrot = false;
5989 break;
5990 }
5991 }
5992 if (mddev->degraded)
5993 nonrot = false;
5994 if (nonrot)
5995 blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
5996 else
5997 blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
5998 blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
5999 }
6000 if (pers->sync_request) {
6001 if (mddev->kobj.sd &&
6002 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
6003 pr_warn("md: cannot register extra attributes for %s\n",
6004 mdname(mddev));
6005 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
6006 mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
6007 mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
6008 } else if (mddev->ro == 2)
6009 mddev->ro = 0;
6010
6011 atomic_set(&mddev->max_corr_read_errors,
6012 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
6013 mddev->safemode = 0;
6014 if (mddev_is_clustered(mddev))
6015 mddev->safemode_delay = 0;
6016 else
6017 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
6018 mddev->in_sync = 1;
6019 smp_wmb();
6020 spin_lock(&mddev->lock);
6021 mddev->pers = pers;
6022 spin_unlock(&mddev->lock);
6023 rdev_for_each(rdev, mddev)
6024 if (rdev->raid_disk >= 0)
6025 sysfs_link_rdev(mddev, rdev);
6026
6027 if (mddev->degraded && !mddev->ro)
6028
6029
6030
6031 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6032 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6033
6034 if (mddev->sb_flags)
6035 md_update_sb(mddev, 0);
6036
6037 md_new_event(mddev);
6038 return 0;
6039
6040bitmap_abort:
6041 mddev_detach(mddev);
6042 if (mddev->private)
6043 pers->free(mddev, mddev->private);
6044 mddev->private = NULL;
6045 module_put(pers->owner);
6046 md_bitmap_destroy(mddev);
6047abort:
6048 if (mddev->level != 1 && mddev->level != 10)
6049 bioset_exit(&mddev->io_acct_set);
6050exit_sync_set:
6051 bioset_exit(&mddev->sync_set);
6052exit_bio_set:
6053 bioset_exit(&mddev->bio_set);
6054 return err;
6055}
6056EXPORT_SYMBOL_GPL(md_run);
6057
6058int do_md_run(struct mddev *mddev)
6059{
6060 int err;
6061
6062 set_bit(MD_NOT_READY, &mddev->flags);
6063 err = md_run(mddev);
6064 if (err)
6065 goto out;
6066 err = md_bitmap_load(mddev);
6067 if (err) {
6068 md_bitmap_destroy(mddev);
6069 goto out;
6070 }
6071
6072 if (mddev_is_clustered(mddev))
6073 md_allow_write(mddev);
6074
6075
6076 md_start(mddev);
6077
6078 md_wakeup_thread(mddev->thread);
6079 md_wakeup_thread(mddev->sync_thread);
6080
6081 set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
6082 clear_bit(MD_NOT_READY, &mddev->flags);
6083 mddev->changed = 1;
6084 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
6085 sysfs_notify_dirent_safe(mddev->sysfs_state);
6086 sysfs_notify_dirent_safe(mddev->sysfs_action);
6087 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
6088out:
6089 clear_bit(MD_NOT_READY, &mddev->flags);
6090 return err;
6091}
6092
6093int md_start(struct mddev *mddev)
6094{
6095 int ret = 0;
6096
6097 if (mddev->pers->start) {
6098 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6099 md_wakeup_thread(mddev->thread);
6100 ret = mddev->pers->start(mddev);
6101 clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
6102 md_wakeup_thread(mddev->sync_thread);
6103 }
6104 return ret;
6105}
6106EXPORT_SYMBOL_GPL(md_start);
6107
6108static int restart_array(struct mddev *mddev)
6109{
6110 struct gendisk *disk = mddev->gendisk;
6111 struct md_rdev *rdev;
6112 bool has_journal = false;
6113 bool has_readonly = false;
6114
6115
6116 if (list_empty(&mddev->disks))
6117 return -ENXIO;
6118 if (!mddev->pers)
6119 return -EINVAL;
6120 if (!mddev->ro)
6121 return -EBUSY;
6122
6123 rcu_read_lock();
6124 rdev_for_each_rcu(rdev, mddev) {
6125 if (test_bit(Journal, &rdev->flags) &&
6126 !test_bit(Faulty, &rdev->flags))
6127 has_journal = true;
6128 if (rdev_read_only(rdev))
6129 has_readonly = true;
6130 }
6131 rcu_read_unlock();
6132 if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !has_journal)
6133
6134 return -EINVAL;
6135 if (has_readonly)
6136 return -EROFS;
6137
6138 mddev->safemode = 0;
6139 mddev->ro = 0;
6140 set_disk_ro(disk, 0);
6141 pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
6142
6143 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6144 md_wakeup_thread(mddev->thread);
6145 md_wakeup_thread(mddev->sync_thread);
6146 sysfs_notify_dirent_safe(mddev->sysfs_state);
6147 return 0;
6148}
6149
6150static void md_clean(struct mddev *mddev)
6151{
6152 mddev->array_sectors = 0;
6153 mddev->external_size = 0;
6154 mddev->dev_sectors = 0;
6155 mddev->raid_disks = 0;
6156 mddev->recovery_cp = 0;
6157 mddev->resync_min = 0;
6158 mddev->resync_max = MaxSector;
6159 mddev->reshape_position = MaxSector;
6160 mddev->external = 0;
6161 mddev->persistent = 0;
6162 mddev->level = LEVEL_NONE;
6163 mddev->clevel[0] = 0;
6164 mddev->flags = 0;
6165 mddev->sb_flags = 0;
6166 mddev->ro = 0;
6167 mddev->metadata_type[0] = 0;
6168 mddev->chunk_sectors = 0;
6169 mddev->ctime = mddev->utime = 0;
6170 mddev->layout = 0;
6171 mddev->max_disks = 0;
6172 mddev->events = 0;
6173 mddev->can_decrease_events = 0;
6174 mddev->delta_disks = 0;
6175 mddev->reshape_backwards = 0;
6176 mddev->new_level = LEVEL_NONE;
6177 mddev->new_layout = 0;
6178 mddev->new_chunk_sectors = 0;
6179 mddev->curr_resync = 0;
6180 atomic64_set(&mddev->resync_mismatches, 0);
6181 mddev->suspend_lo = mddev->suspend_hi = 0;
6182 mddev->sync_speed_min = mddev->sync_speed_max = 0;
6183 mddev->recovery = 0;
6184 mddev->in_sync = 0;
6185 mddev->changed = 0;
6186 mddev->degraded = 0;
6187 mddev->safemode = 0;
6188 mddev->private = NULL;
6189 mddev->cluster_info = NULL;
6190 mddev->bitmap_info.offset = 0;
6191 mddev->bitmap_info.default_offset = 0;
6192 mddev->bitmap_info.default_space = 0;
6193 mddev->bitmap_info.chunksize = 0;
6194 mddev->bitmap_info.daemon_sleep = 0;
6195 mddev->bitmap_info.max_write_behind = 0;
6196 mddev->bitmap_info.nodes = 0;
6197}
6198
6199static void __md_stop_writes(struct mddev *mddev)
6200{
6201 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6202 if (work_pending(&mddev->del_work))
6203 flush_workqueue(md_misc_wq);
6204 if (mddev->sync_thread) {
6205 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6206 md_reap_sync_thread(mddev);
6207 }
6208
6209 del_timer_sync(&mddev->safemode_timer);
6210
6211 if (mddev->pers && mddev->pers->quiesce) {
6212 mddev->pers->quiesce(mddev, 1);
6213 mddev->pers->quiesce(mddev, 0);
6214 }
6215 md_bitmap_flush(mddev);
6216
6217 if (mddev->ro == 0 &&
6218 ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
6219 mddev->sb_flags)) {
6220
6221 if (!mddev_is_clustered(mddev))
6222 mddev->in_sync = 1;
6223 md_update_sb(mddev, 1);
6224 }
6225
6226 mddev->serialize_policy = 0;
6227 mddev_destroy_serial_pool(mddev, NULL, true);
6228}
6229
6230void md_stop_writes(struct mddev *mddev)
6231{
6232 mddev_lock_nointr(mddev);
6233 __md_stop_writes(mddev);
6234 mddev_unlock(mddev);
6235}
6236EXPORT_SYMBOL_GPL(md_stop_writes);
6237
6238static void mddev_detach(struct mddev *mddev)
6239{
6240 md_bitmap_wait_behind_writes(mddev);
6241 if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
6242 mddev->pers->quiesce(mddev, 1);
6243 mddev->pers->quiesce(mddev, 0);
6244 }
6245 md_unregister_thread(&mddev->thread);
6246 if (mddev->queue)
6247 blk_sync_queue(mddev->queue);
6248}
6249
6250static void __md_stop(struct mddev *mddev)
6251{
6252 struct md_personality *pers = mddev->pers;
6253 md_bitmap_destroy(mddev);
6254 mddev_detach(mddev);
6255
6256 if (mddev->event_work.func)
6257 flush_workqueue(md_misc_wq);
6258 spin_lock(&mddev->lock);
6259 mddev->pers = NULL;
6260 spin_unlock(&mddev->lock);
6261 pers->free(mddev, mddev->private);
6262 mddev->private = NULL;
6263 if (pers->sync_request && mddev->to_remove == NULL)
6264 mddev->to_remove = &md_redundancy_group;
6265 module_put(pers->owner);
6266 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6267}
6268
6269void md_stop(struct mddev *mddev)
6270{
6271
6272
6273
6274 __md_stop(mddev);
6275 bioset_exit(&mddev->bio_set);
6276 bioset_exit(&mddev->sync_set);
6277 if (mddev->level != 1 && mddev->level != 10)
6278 bioset_exit(&mddev->io_acct_set);
6279}
6280
6281EXPORT_SYMBOL_GPL(md_stop);
6282
6283static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
6284{
6285 int err = 0;
6286 int did_freeze = 0;
6287
6288 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6289 did_freeze = 1;
6290 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6291 md_wakeup_thread(mddev->thread);
6292 }
6293 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6294 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6295 if (mddev->sync_thread)
6296
6297
6298 wake_up_process(mddev->sync_thread->tsk);
6299
6300 if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
6301 return -EBUSY;
6302 mddev_unlock(mddev);
6303 wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
6304 &mddev->recovery));
6305 wait_event(mddev->sb_wait,
6306 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
6307 mddev_lock_nointr(mddev);
6308
6309 mutex_lock(&mddev->open_mutex);
6310 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6311 mddev->sync_thread ||
6312 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6313 pr_warn("md: %s still in use.\n",mdname(mddev));
6314 if (did_freeze) {
6315 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6316 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6317 md_wakeup_thread(mddev->thread);
6318 }
6319 err = -EBUSY;
6320 goto out;
6321 }
6322 if (mddev->pers) {
6323 __md_stop_writes(mddev);
6324
6325 err = -ENXIO;
6326 if (mddev->ro==1)
6327 goto out;
6328 mddev->ro = 1;
6329 set_disk_ro(mddev->gendisk, 1);
6330 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6331 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6332 md_wakeup_thread(mddev->thread);
6333 sysfs_notify_dirent_safe(mddev->sysfs_state);
6334 err = 0;
6335 }
6336out:
6337 mutex_unlock(&mddev->open_mutex);
6338 return err;
6339}
6340
6341
6342
6343
6344
6345static int do_md_stop(struct mddev *mddev, int mode,
6346 struct block_device *bdev)
6347{
6348 struct gendisk *disk = mddev->gendisk;
6349 struct md_rdev *rdev;
6350 int did_freeze = 0;
6351
6352 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
6353 did_freeze = 1;
6354 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6355 md_wakeup_thread(mddev->thread);
6356 }
6357 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
6358 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6359 if (mddev->sync_thread)
6360
6361
6362 wake_up_process(mddev->sync_thread->tsk);
6363
6364 mddev_unlock(mddev);
6365 wait_event(resync_wait, (mddev->sync_thread == NULL &&
6366 !test_bit(MD_RECOVERY_RUNNING,
6367 &mddev->recovery)));
6368 mddev_lock_nointr(mddev);
6369
6370 mutex_lock(&mddev->open_mutex);
6371 if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
6372 mddev->sysfs_active ||
6373 mddev->sync_thread ||
6374 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
6375 pr_warn("md: %s still in use.\n",mdname(mddev));
6376 mutex_unlock(&mddev->open_mutex);
6377 if (did_freeze) {
6378 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6379 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6380 md_wakeup_thread(mddev->thread);
6381 }
6382 return -EBUSY;
6383 }
6384 if (mddev->pers) {
6385 if (mddev->ro)
6386 set_disk_ro(disk, 0);
6387
6388 __md_stop_writes(mddev);
6389 __md_stop(mddev);
6390
6391
6392 sysfs_notify_dirent_safe(mddev->sysfs_state);
6393
6394 rdev_for_each(rdev, mddev)
6395 if (rdev->raid_disk >= 0)
6396 sysfs_unlink_rdev(mddev, rdev);
6397
6398 set_capacity_and_notify(disk, 0);
6399 mutex_unlock(&mddev->open_mutex);
6400 mddev->changed = 1;
6401
6402 if (mddev->ro)
6403 mddev->ro = 0;
6404 } else
6405 mutex_unlock(&mddev->open_mutex);
6406
6407
6408
6409 if (mode == 0) {
6410 pr_info("md: %s stopped.\n", mdname(mddev));
6411
6412 if (mddev->bitmap_info.file) {
6413 struct file *f = mddev->bitmap_info.file;
6414 spin_lock(&mddev->lock);
6415 mddev->bitmap_info.file = NULL;
6416 spin_unlock(&mddev->lock);
6417 fput(f);
6418 }
6419 mddev->bitmap_info.offset = 0;
6420
6421 export_array(mddev);
6422
6423 md_clean(mddev);
6424 if (mddev->hold_active == UNTIL_STOP)
6425 mddev->hold_active = 0;
6426 }
6427 md_new_event(mddev);
6428 sysfs_notify_dirent_safe(mddev->sysfs_state);
6429 return 0;
6430}
6431
6432#ifndef MODULE
6433static void autorun_array(struct mddev *mddev)
6434{
6435 struct md_rdev *rdev;
6436 int err;
6437
6438 if (list_empty(&mddev->disks))
6439 return;
6440
6441 pr_info("md: running: ");
6442
6443 rdev_for_each(rdev, mddev) {
6444 char b[BDEVNAME_SIZE];
6445 pr_cont("<%s>", bdevname(rdev->bdev,b));
6446 }
6447 pr_cont("\n");
6448
6449 err = do_md_run(mddev);
6450 if (err) {
6451 pr_warn("md: do_md_run() returned %d\n", err);
6452 do_md_stop(mddev, 0, NULL);
6453 }
6454}
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468static void autorun_devices(int part)
6469{
6470 struct md_rdev *rdev0, *rdev, *tmp;
6471 struct mddev *mddev;
6472 char b[BDEVNAME_SIZE];
6473
6474 pr_info("md: autorun ...\n");
6475 while (!list_empty(&pending_raid_disks)) {
6476 int unit;
6477 dev_t dev;
6478 LIST_HEAD(candidates);
6479 rdev0 = list_entry(pending_raid_disks.next,
6480 struct md_rdev, same_set);
6481
6482 pr_debug("md: considering %s ...\n", bdevname(rdev0->bdev,b));
6483 INIT_LIST_HEAD(&candidates);
6484 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
6485 if (super_90_load(rdev, rdev0, 0) >= 0) {
6486 pr_debug("md: adding %s ...\n",
6487 bdevname(rdev->bdev,b));
6488 list_move(&rdev->same_set, &candidates);
6489 }
6490
6491
6492
6493
6494
6495 if (part) {
6496 dev = MKDEV(mdp_major,
6497 rdev0->preferred_minor << MdpMinorShift);
6498 unit = MINOR(dev) >> MdpMinorShift;
6499 } else {
6500 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
6501 unit = MINOR(dev);
6502 }
6503 if (rdev0->preferred_minor != unit) {
6504 pr_warn("md: unit number in %s is bad: %d\n",
6505 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
6506 break;
6507 }
6508
6509 md_probe(dev);
6510 mddev = mddev_find(dev);
6511 if (!mddev)
6512 break;
6513
6514 if (mddev_lock(mddev))
6515 pr_warn("md: %s locked, cannot run\n", mdname(mddev));
6516 else if (mddev->raid_disks || mddev->major_version
6517 || !list_empty(&mddev->disks)) {
6518 pr_warn("md: %s already running, cannot run %s\n",
6519 mdname(mddev), bdevname(rdev0->bdev,b));
6520 mddev_unlock(mddev);
6521 } else {
6522 pr_debug("md: created %s\n", mdname(mddev));
6523 mddev->persistent = 1;
6524 rdev_for_each_list(rdev, tmp, &candidates) {
6525 list_del_init(&rdev->same_set);
6526 if (bind_rdev_to_array(rdev, mddev))
6527 export_rdev(rdev);
6528 }
6529 autorun_array(mddev);
6530 mddev_unlock(mddev);
6531 }
6532
6533
6534
6535 rdev_for_each_list(rdev, tmp, &candidates) {
6536 list_del_init(&rdev->same_set);
6537 export_rdev(rdev);
6538 }
6539 mddev_put(mddev);
6540 }
6541 pr_info("md: ... autorun DONE.\n");
6542}
6543#endif
6544
6545static int get_version(void __user *arg)
6546{
6547 mdu_version_t ver;
6548
6549 ver.major = MD_MAJOR_VERSION;
6550 ver.minor = MD_MINOR_VERSION;
6551 ver.patchlevel = MD_PATCHLEVEL_VERSION;
6552
6553 if (copy_to_user(arg, &ver, sizeof(ver)))
6554 return -EFAULT;
6555
6556 return 0;
6557}
6558
6559static int get_array_info(struct mddev *mddev, void __user *arg)
6560{
6561 mdu_array_info_t info;
6562 int nr,working,insync,failed,spare;
6563 struct md_rdev *rdev;
6564
6565 nr = working = insync = failed = spare = 0;
6566 rcu_read_lock();
6567 rdev_for_each_rcu(rdev, mddev) {
6568 nr++;
6569 if (test_bit(Faulty, &rdev->flags))
6570 failed++;
6571 else {
6572 working++;
6573 if (test_bit(In_sync, &rdev->flags))
6574 insync++;
6575 else if (test_bit(Journal, &rdev->flags))
6576
6577 ;
6578 else
6579 spare++;
6580 }
6581 }
6582 rcu_read_unlock();
6583
6584 info.major_version = mddev->major_version;
6585 info.minor_version = mddev->minor_version;
6586 info.patch_version = MD_PATCHLEVEL_VERSION;
6587 info.ctime = clamp_t(time64_t, mddev->ctime, 0, U32_MAX);
6588 info.level = mddev->level;
6589 info.size = mddev->dev_sectors / 2;
6590 if (info.size != mddev->dev_sectors / 2)
6591 info.size = -1;
6592 info.nr_disks = nr;
6593 info.raid_disks = mddev->raid_disks;
6594 info.md_minor = mddev->md_minor;
6595 info.not_persistent= !mddev->persistent;
6596
6597 info.utime = clamp_t(time64_t, mddev->utime, 0, U32_MAX);
6598 info.state = 0;
6599 if (mddev->in_sync)
6600 info.state = (1<<MD_SB_CLEAN);
6601 if (mddev->bitmap && mddev->bitmap_info.offset)
6602 info.state |= (1<<MD_SB_BITMAP_PRESENT);
6603 if (mddev_is_clustered(mddev))
6604 info.state |= (1<<MD_SB_CLUSTERED);
6605 info.active_disks = insync;
6606 info.working_disks = working;
6607 info.failed_disks = failed;
6608 info.spare_disks = spare;
6609
6610 info.layout = mddev->layout;
6611 info.chunk_size = mddev->chunk_sectors << 9;
6612
6613 if (copy_to_user(arg, &info, sizeof(info)))
6614 return -EFAULT;
6615
6616 return 0;
6617}
6618
6619static int get_bitmap_file(struct mddev *mddev, void __user * arg)
6620{
6621 mdu_bitmap_file_t *file = NULL;
6622 char *ptr;
6623 int err;
6624
6625 file = kzalloc(sizeof(*file), GFP_NOIO);
6626 if (!file)
6627 return -ENOMEM;
6628
6629 err = 0;
6630 spin_lock(&mddev->lock);
6631
6632 if (mddev->bitmap_info.file) {
6633 ptr = file_path(mddev->bitmap_info.file, file->pathname,
6634 sizeof(file->pathname));
6635 if (IS_ERR(ptr))
6636 err = PTR_ERR(ptr);
6637 else
6638 memmove(file->pathname, ptr,
6639 sizeof(file->pathname)-(ptr-file->pathname));
6640 }
6641 spin_unlock(&mddev->lock);
6642
6643 if (err == 0 &&
6644 copy_to_user(arg, file, sizeof(*file)))
6645 err = -EFAULT;
6646
6647 kfree(file);
6648 return err;
6649}
6650
6651static int get_disk_info(struct mddev *mddev, void __user * arg)
6652{
6653 mdu_disk_info_t info;
6654 struct md_rdev *rdev;
6655
6656 if (copy_from_user(&info, arg, sizeof(info)))
6657 return -EFAULT;
6658
6659 rcu_read_lock();
6660 rdev = md_find_rdev_nr_rcu(mddev, info.number);
6661 if (rdev) {
6662 info.major = MAJOR(rdev->bdev->bd_dev);
6663 info.minor = MINOR(rdev->bdev->bd_dev);
6664 info.raid_disk = rdev->raid_disk;
6665 info.state = 0;
6666 if (test_bit(Faulty, &rdev->flags))
6667 info.state |= (1<<MD_DISK_FAULTY);
6668 else if (test_bit(In_sync, &rdev->flags)) {
6669 info.state |= (1<<MD_DISK_ACTIVE);
6670 info.state |= (1<<MD_DISK_SYNC);
6671 }
6672 if (test_bit(Journal, &rdev->flags))
6673 info.state |= (1<<MD_DISK_JOURNAL);
6674 if (test_bit(WriteMostly, &rdev->flags))
6675 info.state |= (1<<MD_DISK_WRITEMOSTLY);
6676 if (test_bit(FailFast, &rdev->flags))
6677 info.state |= (1<<MD_DISK_FAILFAST);
6678 } else {
6679 info.major = info.minor = 0;
6680 info.raid_disk = -1;
6681 info.state = (1<<MD_DISK_REMOVED);
6682 }
6683 rcu_read_unlock();
6684
6685 if (copy_to_user(arg, &info, sizeof(info)))
6686 return -EFAULT;
6687
6688 return 0;
6689}
6690
6691int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
6692{
6693 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
6694 struct md_rdev *rdev;
6695 dev_t dev = MKDEV(info->major,info->minor);
6696
6697 if (mddev_is_clustered(mddev) &&
6698 !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
6699 pr_warn("%s: Cannot add to clustered mddev.\n",
6700 mdname(mddev));
6701 return -EINVAL;
6702 }
6703
6704 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
6705 return -EOVERFLOW;
6706
6707 if (!mddev->raid_disks) {
6708 int err;
6709
6710 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
6711 if (IS_ERR(rdev)) {
6712 pr_warn("md: md_import_device returned %ld\n",
6713 PTR_ERR(rdev));
6714 return PTR_ERR(rdev);
6715 }
6716 if (!list_empty(&mddev->disks)) {
6717 struct md_rdev *rdev0
6718 = list_entry(mddev->disks.next,
6719 struct md_rdev, same_set);
6720 err = super_types[mddev->major_version]
6721 .load_super(rdev, rdev0, mddev->minor_version);
6722 if (err < 0) {
6723 pr_warn("md: %s has different UUID to %s\n",
6724 bdevname(rdev->bdev,b),
6725 bdevname(rdev0->bdev,b2));
6726 export_rdev(rdev);
6727 return -EINVAL;
6728 }
6729 }
6730 err = bind_rdev_to_array(rdev, mddev);
6731 if (err)
6732 export_rdev(rdev);
6733 return err;
6734 }
6735
6736
6737
6738
6739
6740
6741 if (mddev->pers) {
6742 int err;
6743 if (!mddev->pers->hot_add_disk) {
6744 pr_warn("%s: personality does not support diskops!\n",
6745 mdname(mddev));
6746 return -EINVAL;
6747 }
6748 if (mddev->persistent)
6749 rdev = md_import_device(dev, mddev->major_version,
6750 mddev->minor_version);
6751 else
6752 rdev = md_import_device(dev, -1, -1);
6753 if (IS_ERR(rdev)) {
6754 pr_warn("md: md_import_device returned %ld\n",
6755 PTR_ERR(rdev));
6756 return PTR_ERR(rdev);
6757 }
6758
6759 if (!mddev->persistent) {
6760 if (info->state & (1<<MD_DISK_SYNC) &&
6761 info->raid_disk < mddev->raid_disks) {
6762 rdev->raid_disk = info->raid_disk;
6763 set_bit(In_sync, &rdev->flags);
6764 clear_bit(Bitmap_sync, &rdev->flags);
6765 } else
6766 rdev->raid_disk = -1;
6767 rdev->saved_raid_disk = rdev->raid_disk;
6768 } else
6769 super_types[mddev->major_version].
6770 validate_super(mddev, rdev);
6771 if ((info->state & (1<<MD_DISK_SYNC)) &&
6772 rdev->raid_disk != info->raid_disk) {
6773
6774
6775
6776 export_rdev(rdev);
6777 return -EINVAL;
6778 }
6779
6780 clear_bit(In_sync, &rdev->flags);
6781 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6782 set_bit(WriteMostly, &rdev->flags);
6783 else
6784 clear_bit(WriteMostly, &rdev->flags);
6785 if (info->state & (1<<MD_DISK_FAILFAST))
6786 set_bit(FailFast, &rdev->flags);
6787 else
6788 clear_bit(FailFast, &rdev->flags);
6789
6790 if (info->state & (1<<MD_DISK_JOURNAL)) {
6791 struct md_rdev *rdev2;
6792 bool has_journal = false;
6793
6794
6795 rdev_for_each(rdev2, mddev) {
6796 if (test_bit(Journal, &rdev2->flags)) {
6797 has_journal = true;
6798 break;
6799 }
6800 }
6801 if (has_journal || mddev->bitmap) {
6802 export_rdev(rdev);
6803 return -EBUSY;
6804 }
6805 set_bit(Journal, &rdev->flags);
6806 }
6807
6808
6809
6810 if (mddev_is_clustered(mddev)) {
6811 if (info->state & (1 << MD_DISK_CANDIDATE))
6812 set_bit(Candidate, &rdev->flags);
6813 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
6814
6815 err = md_cluster_ops->add_new_disk(mddev, rdev);
6816 if (err) {
6817 export_rdev(rdev);
6818 return err;
6819 }
6820 }
6821 }
6822
6823 rdev->raid_disk = -1;
6824 err = bind_rdev_to_array(rdev, mddev);
6825
6826 if (err)
6827 export_rdev(rdev);
6828
6829 if (mddev_is_clustered(mddev)) {
6830 if (info->state & (1 << MD_DISK_CANDIDATE)) {
6831 if (!err) {
6832 err = md_cluster_ops->new_disk_ack(mddev,
6833 err == 0);
6834 if (err)
6835 md_kick_rdev_from_array(rdev);
6836 }
6837 } else {
6838 if (err)
6839 md_cluster_ops->add_new_disk_cancel(mddev);
6840 else
6841 err = add_bound_rdev(rdev);
6842 }
6843
6844 } else if (!err)
6845 err = add_bound_rdev(rdev);
6846
6847 return err;
6848 }
6849
6850
6851
6852
6853 if (mddev->major_version != 0) {
6854 pr_warn("%s: ADD_NEW_DISK not supported\n", mdname(mddev));
6855 return -EINVAL;
6856 }
6857
6858 if (!(info->state & (1<<MD_DISK_FAULTY))) {
6859 int err;
6860 rdev = md_import_device(dev, -1, 0);
6861 if (IS_ERR(rdev)) {
6862 pr_warn("md: error, md_import_device() returned %ld\n",
6863 PTR_ERR(rdev));
6864 return PTR_ERR(rdev);
6865 }
6866 rdev->desc_nr = info->number;
6867 if (info->raid_disk < mddev->raid_disks)
6868 rdev->raid_disk = info->raid_disk;
6869 else
6870 rdev->raid_disk = -1;
6871
6872 if (rdev->raid_disk < mddev->raid_disks)
6873 if (info->state & (1<<MD_DISK_SYNC))
6874 set_bit(In_sync, &rdev->flags);
6875
6876 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
6877 set_bit(WriteMostly, &rdev->flags);
6878 if (info->state & (1<<MD_DISK_FAILFAST))
6879 set_bit(FailFast, &rdev->flags);
6880
6881 if (!mddev->persistent) {
6882 pr_debug("md: nonpersistent superblock ...\n");
6883 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6884 } else
6885 rdev->sb_start = calc_dev_sboffset(rdev);
6886 rdev->sectors = rdev->sb_start;
6887
6888 err = bind_rdev_to_array(rdev, mddev);
6889 if (err) {
6890 export_rdev(rdev);
6891 return err;
6892 }
6893 }
6894
6895 return 0;
6896}
6897
6898static int hot_remove_disk(struct mddev *mddev, dev_t dev)
6899{
6900 char b[BDEVNAME_SIZE];
6901 struct md_rdev *rdev;
6902
6903 if (!mddev->pers)
6904 return -ENODEV;
6905
6906 rdev = find_rdev(mddev, dev);
6907 if (!rdev)
6908 return -ENXIO;
6909
6910 if (rdev->raid_disk < 0)
6911 goto kick_rdev;
6912
6913 clear_bit(Blocked, &rdev->flags);
6914 remove_and_add_spares(mddev, rdev);
6915
6916 if (rdev->raid_disk >= 0)
6917 goto busy;
6918
6919kick_rdev:
6920 if (mddev_is_clustered(mddev)) {
6921 if (md_cluster_ops->remove_disk(mddev, rdev))
6922 goto busy;
6923 }
6924
6925 md_kick_rdev_from_array(rdev);
6926 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6927 if (mddev->thread)
6928 md_wakeup_thread(mddev->thread);
6929 else
6930 md_update_sb(mddev, 1);
6931 md_new_event(mddev);
6932
6933 return 0;
6934busy:
6935 pr_debug("md: cannot remove active disk %s from %s ...\n",
6936 bdevname(rdev->bdev,b), mdname(mddev));
6937 return -EBUSY;
6938}
6939
6940static int hot_add_disk(struct mddev *mddev, dev_t dev)
6941{
6942 char b[BDEVNAME_SIZE];
6943 int err;
6944 struct md_rdev *rdev;
6945
6946 if (!mddev->pers)
6947 return -ENODEV;
6948
6949 if (mddev->major_version != 0) {
6950 pr_warn("%s: HOT_ADD may only be used with version-0 superblocks.\n",
6951 mdname(mddev));
6952 return -EINVAL;
6953 }
6954 if (!mddev->pers->hot_add_disk) {
6955 pr_warn("%s: personality does not support diskops!\n",
6956 mdname(mddev));
6957 return -EINVAL;
6958 }
6959
6960 rdev = md_import_device(dev, -1, 0);
6961 if (IS_ERR(rdev)) {
6962 pr_warn("md: error, md_import_device() returned %ld\n",
6963 PTR_ERR(rdev));
6964 return -EINVAL;
6965 }
6966
6967 if (mddev->persistent)
6968 rdev->sb_start = calc_dev_sboffset(rdev);
6969 else
6970 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
6971
6972 rdev->sectors = rdev->sb_start;
6973
6974 if (test_bit(Faulty, &rdev->flags)) {
6975 pr_warn("md: can not hot-add faulty %s disk to %s!\n",
6976 bdevname(rdev->bdev,b), mdname(mddev));
6977 err = -EINVAL;
6978 goto abort_export;
6979 }
6980
6981 clear_bit(In_sync, &rdev->flags);
6982 rdev->desc_nr = -1;
6983 rdev->saved_raid_disk = -1;
6984 err = bind_rdev_to_array(rdev, mddev);
6985 if (err)
6986 goto abort_export;
6987
6988
6989
6990
6991
6992
6993 rdev->raid_disk = -1;
6994
6995 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
6996 if (!mddev->thread)
6997 md_update_sb(mddev, 1);
6998
6999
7000
7001
7002 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7003 md_wakeup_thread(mddev->thread);
7004 md_new_event(mddev);
7005 return 0;
7006
7007abort_export:
7008 export_rdev(rdev);
7009 return err;
7010}
7011
7012static int set_bitmap_file(struct mddev *mddev, int fd)
7013{
7014 int err = 0;
7015
7016 if (mddev->pers) {
7017 if (!mddev->pers->quiesce || !mddev->thread)
7018 return -EBUSY;
7019 if (mddev->recovery || mddev->sync_thread)
7020 return -EBUSY;
7021
7022 }
7023
7024 if (fd >= 0) {
7025 struct inode *inode;
7026 struct file *f;
7027
7028 if (mddev->bitmap || mddev->bitmap_info.file)
7029 return -EEXIST;
7030 f = fget(fd);
7031
7032 if (f == NULL) {
7033 pr_warn("%s: error: failed to get bitmap file\n",
7034 mdname(mddev));
7035 return -EBADF;
7036 }
7037
7038 inode = f->f_mapping->host;
7039 if (!S_ISREG(inode->i_mode)) {
7040 pr_warn("%s: error: bitmap file must be a regular file\n",
7041 mdname(mddev));
7042 err = -EBADF;
7043 } else if (!(f->f_mode & FMODE_WRITE)) {
7044 pr_warn("%s: error: bitmap file must open for write\n",
7045 mdname(mddev));
7046 err = -EBADF;
7047 } else if (atomic_read(&inode->i_writecount) != 1) {
7048 pr_warn("%s: error: bitmap file is already in use\n",
7049 mdname(mddev));
7050 err = -EBUSY;
7051 }
7052 if (err) {
7053 fput(f);
7054 return err;
7055 }
7056 mddev->bitmap_info.file = f;
7057 mddev->bitmap_info.offset = 0;
7058 } else if (mddev->bitmap == NULL)
7059 return -ENOENT;
7060 err = 0;
7061 if (mddev->pers) {
7062 if (fd >= 0) {
7063 struct bitmap *bitmap;
7064
7065 bitmap = md_bitmap_create(mddev, -1);
7066 mddev_suspend(mddev);
7067 if (!IS_ERR(bitmap)) {
7068 mddev->bitmap = bitmap;
7069 err = md_bitmap_load(mddev);
7070 } else
7071 err = PTR_ERR(bitmap);
7072 if (err) {
7073 md_bitmap_destroy(mddev);
7074 fd = -1;
7075 }
7076 mddev_resume(mddev);
7077 } else if (fd < 0) {
7078 mddev_suspend(mddev);
7079 md_bitmap_destroy(mddev);
7080 mddev_resume(mddev);
7081 }
7082 }
7083 if (fd < 0) {
7084 struct file *f = mddev->bitmap_info.file;
7085 if (f) {
7086 spin_lock(&mddev->lock);
7087 mddev->bitmap_info.file = NULL;
7088 spin_unlock(&mddev->lock);
7089 fput(f);
7090 }
7091 }
7092
7093 return err;
7094}
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info)
7110{
7111 if (info->raid_disks == 0) {
7112
7113 if (info->major_version < 0 ||
7114 info->major_version >= ARRAY_SIZE(super_types) ||
7115 super_types[info->major_version].name == NULL) {
7116
7117 pr_warn("md: superblock version %d not known\n",
7118 info->major_version);
7119 return -EINVAL;
7120 }
7121 mddev->major_version = info->major_version;
7122 mddev->minor_version = info->minor_version;
7123 mddev->patch_version = info->patch_version;
7124 mddev->persistent = !info->not_persistent;
7125
7126
7127
7128 mddev->ctime = ktime_get_real_seconds();
7129 return 0;
7130 }
7131 mddev->major_version = MD_MAJOR_VERSION;
7132 mddev->minor_version = MD_MINOR_VERSION;
7133 mddev->patch_version = MD_PATCHLEVEL_VERSION;
7134 mddev->ctime = ktime_get_real_seconds();
7135
7136 mddev->level = info->level;
7137 mddev->clevel[0] = 0;
7138 mddev->dev_sectors = 2 * (sector_t)info->size;
7139 mddev->raid_disks = info->raid_disks;
7140
7141
7142
7143 if (info->state & (1<<MD_SB_CLEAN))
7144 mddev->recovery_cp = MaxSector;
7145 else
7146 mddev->recovery_cp = 0;
7147 mddev->persistent = ! info->not_persistent;
7148 mddev->external = 0;
7149
7150 mddev->layout = info->layout;
7151 if (mddev->level == 0)
7152
7153 mddev->layout = -1;
7154 mddev->chunk_sectors = info->chunk_size >> 9;
7155
7156 if (mddev->persistent) {
7157 mddev->max_disks = MD_SB_DISKS;
7158 mddev->flags = 0;
7159 mddev->sb_flags = 0;
7160 }
7161 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
7162
7163 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
7164 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
7165 mddev->bitmap_info.offset = 0;
7166
7167 mddev->reshape_position = MaxSector;
7168
7169
7170
7171
7172 get_random_bytes(mddev->uuid, 16);
7173
7174 mddev->new_level = mddev->level;
7175 mddev->new_chunk_sectors = mddev->chunk_sectors;
7176 mddev->new_layout = mddev->layout;
7177 mddev->delta_disks = 0;
7178 mddev->reshape_backwards = 0;
7179
7180 return 0;
7181}
7182
7183void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
7184{
7185 lockdep_assert_held(&mddev->reconfig_mutex);
7186
7187 if (mddev->external_size)
7188 return;
7189
7190 mddev->array_sectors = array_sectors;
7191}
7192EXPORT_SYMBOL(md_set_array_sectors);
7193
7194static int update_size(struct mddev *mddev, sector_t num_sectors)
7195{
7196 struct md_rdev *rdev;
7197 int rv;
7198 int fit = (num_sectors == 0);
7199 sector_t old_dev_sectors = mddev->dev_sectors;
7200
7201 if (mddev->pers->resize == NULL)
7202 return -EINVAL;
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7213 mddev->sync_thread)
7214 return -EBUSY;
7215 if (mddev->ro)
7216 return -EROFS;
7217
7218 rdev_for_each(rdev, mddev) {
7219 sector_t avail = rdev->sectors;
7220
7221 if (fit && (num_sectors == 0 || num_sectors > avail))
7222 num_sectors = avail;
7223 if (avail < num_sectors)
7224 return -ENOSPC;
7225 }
7226 rv = mddev->pers->resize(mddev, num_sectors);
7227 if (!rv) {
7228 if (mddev_is_clustered(mddev))
7229 md_cluster_ops->update_size(mddev, old_dev_sectors);
7230 else if (mddev->queue) {
7231 set_capacity_and_notify(mddev->gendisk,
7232 mddev->array_sectors);
7233 }
7234 }
7235 return rv;
7236}
7237
7238static int update_raid_disks(struct mddev *mddev, int raid_disks)
7239{
7240 int rv;
7241 struct md_rdev *rdev;
7242
7243 if (mddev->pers->check_reshape == NULL)
7244 return -EINVAL;
7245 if (mddev->ro)
7246 return -EROFS;
7247 if (raid_disks <= 0 ||
7248 (mddev->max_disks && raid_disks >= mddev->max_disks))
7249 return -EINVAL;
7250 if (mddev->sync_thread ||
7251 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
7252 test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
7253 mddev->reshape_position != MaxSector)
7254 return -EBUSY;
7255
7256 rdev_for_each(rdev, mddev) {
7257 if (mddev->raid_disks < raid_disks &&
7258 rdev->data_offset < rdev->new_data_offset)
7259 return -EINVAL;
7260 if (mddev->raid_disks > raid_disks &&
7261 rdev->data_offset > rdev->new_data_offset)
7262 return -EINVAL;
7263 }
7264
7265 mddev->delta_disks = raid_disks - mddev->raid_disks;
7266 if (mddev->delta_disks < 0)
7267 mddev->reshape_backwards = 1;
7268 else if (mddev->delta_disks > 0)
7269 mddev->reshape_backwards = 0;
7270
7271 rv = mddev->pers->check_reshape(mddev);
7272 if (rv < 0) {
7273 mddev->delta_disks = 0;
7274 mddev->reshape_backwards = 0;
7275 }
7276 return rv;
7277}
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
7288{
7289 int rv = 0;
7290 int cnt = 0;
7291 int state = 0;
7292
7293
7294 if (mddev->bitmap && mddev->bitmap_info.offset)
7295 state |= (1 << MD_SB_BITMAP_PRESENT);
7296
7297 if (mddev->major_version != info->major_version ||
7298 mddev->minor_version != info->minor_version ||
7299
7300 mddev->ctime != info->ctime ||
7301 mddev->level != info->level ||
7302
7303 mddev->persistent != !info->not_persistent ||
7304 mddev->chunk_sectors != info->chunk_size >> 9 ||
7305
7306 ((state^info->state) & 0xfffffe00)
7307 )
7308 return -EINVAL;
7309
7310 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7311 cnt++;
7312 if (mddev->raid_disks != info->raid_disks)
7313 cnt++;
7314 if (mddev->layout != info->layout)
7315 cnt++;
7316 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
7317 cnt++;
7318 if (cnt == 0)
7319 return 0;
7320 if (cnt > 1)
7321 return -EINVAL;
7322
7323 if (mddev->layout != info->layout) {
7324
7325
7326
7327
7328 if (mddev->pers->check_reshape == NULL)
7329 return -EINVAL;
7330 else {
7331 mddev->new_layout = info->layout;
7332 rv = mddev->pers->check_reshape(mddev);
7333 if (rv)
7334 mddev->new_layout = mddev->layout;
7335 return rv;
7336 }
7337 }
7338 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
7339 rv = update_size(mddev, (sector_t)info->size * 2);
7340
7341 if (mddev->raid_disks != info->raid_disks)
7342 rv = update_raid_disks(mddev, info->raid_disks);
7343
7344 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
7345 if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
7346 rv = -EINVAL;
7347 goto err;
7348 }
7349 if (mddev->recovery || mddev->sync_thread) {
7350 rv = -EBUSY;
7351 goto err;
7352 }
7353 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
7354 struct bitmap *bitmap;
7355
7356 if (mddev->bitmap) {
7357 rv = -EEXIST;
7358 goto err;
7359 }
7360 if (mddev->bitmap_info.default_offset == 0) {
7361 rv = -EINVAL;
7362 goto err;
7363 }
7364 mddev->bitmap_info.offset =
7365 mddev->bitmap_info.default_offset;
7366 mddev->bitmap_info.space =
7367 mddev->bitmap_info.default_space;
7368 bitmap = md_bitmap_create(mddev, -1);
7369 mddev_suspend(mddev);
7370 if (!IS_ERR(bitmap)) {
7371 mddev->bitmap = bitmap;
7372 rv = md_bitmap_load(mddev);
7373 } else
7374 rv = PTR_ERR(bitmap);
7375 if (rv)
7376 md_bitmap_destroy(mddev);
7377 mddev_resume(mddev);
7378 } else {
7379
7380 if (!mddev->bitmap) {
7381 rv = -ENOENT;
7382 goto err;
7383 }
7384 if (mddev->bitmap->storage.file) {
7385 rv = -EINVAL;
7386 goto err;
7387 }
7388 if (mddev->bitmap_info.nodes) {
7389
7390 if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
7391 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
7392 rv = -EPERM;
7393 md_cluster_ops->unlock_all_bitmaps(mddev);
7394 goto err;
7395 }
7396
7397 mddev->bitmap_info.nodes = 0;
7398 md_cluster_ops->leave(mddev);
7399 module_put(md_cluster_mod);
7400 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
7401 }
7402 mddev_suspend(mddev);
7403 md_bitmap_destroy(mddev);
7404 mddev_resume(mddev);
7405 mddev->bitmap_info.offset = 0;
7406 }
7407 }
7408 md_update_sb(mddev, 1);
7409 return rv;
7410err:
7411 return rv;
7412}
7413
7414static int set_disk_faulty(struct mddev *mddev, dev_t dev)
7415{
7416 struct md_rdev *rdev;
7417 int err = 0;
7418
7419 if (mddev->pers == NULL)
7420 return -ENODEV;
7421
7422 rcu_read_lock();
7423 rdev = md_find_rdev_rcu(mddev, dev);
7424 if (!rdev)
7425 err = -ENODEV;
7426 else {
7427 md_error(mddev, rdev);
7428 if (!test_bit(Faulty, &rdev->flags))
7429 err = -EBUSY;
7430 }
7431 rcu_read_unlock();
7432 return err;
7433}
7434
7435
7436
7437
7438
7439
7440
7441static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
7442{
7443 struct mddev *mddev = bdev->bd_disk->private_data;
7444
7445 geo->heads = 2;
7446 geo->sectors = 4;
7447 geo->cylinders = mddev->array_sectors / 8;
7448 return 0;
7449}
7450
7451static inline bool md_ioctl_valid(unsigned int cmd)
7452{
7453 switch (cmd) {
7454 case ADD_NEW_DISK:
7455 case GET_ARRAY_INFO:
7456 case GET_BITMAP_FILE:
7457 case GET_DISK_INFO:
7458 case HOT_ADD_DISK:
7459 case HOT_REMOVE_DISK:
7460 case RAID_VERSION:
7461 case RESTART_ARRAY_RW:
7462 case RUN_ARRAY:
7463 case SET_ARRAY_INFO:
7464 case SET_BITMAP_FILE:
7465 case SET_DISK_FAULTY:
7466 case STOP_ARRAY:
7467 case STOP_ARRAY_RO:
7468 case CLUSTERED_DISK_NACK:
7469 return true;
7470 default:
7471 return false;
7472 }
7473}
7474
7475static int md_ioctl(struct block_device *bdev, fmode_t mode,
7476 unsigned int cmd, unsigned long arg)
7477{
7478 int err = 0;
7479 void __user *argp = (void __user *)arg;
7480 struct mddev *mddev = NULL;
7481 bool did_set_md_closing = false;
7482
7483 if (!md_ioctl_valid(cmd))
7484 return -ENOTTY;
7485
7486 switch (cmd) {
7487 case RAID_VERSION:
7488 case GET_ARRAY_INFO:
7489 case GET_DISK_INFO:
7490 break;
7491 default:
7492 if (!capable(CAP_SYS_ADMIN))
7493 return -EACCES;
7494 }
7495
7496
7497
7498
7499
7500 switch (cmd) {
7501 case RAID_VERSION:
7502 err = get_version(argp);
7503 goto out;
7504 default:;
7505 }
7506
7507
7508
7509
7510
7511 mddev = bdev->bd_disk->private_data;
7512
7513 if (!mddev) {
7514 BUG();
7515 goto out;
7516 }
7517
7518
7519 switch (cmd) {
7520 case GET_ARRAY_INFO:
7521 if (!mddev->raid_disks && !mddev->external)
7522 err = -ENODEV;
7523 else
7524 err = get_array_info(mddev, argp);
7525 goto out;
7526
7527 case GET_DISK_INFO:
7528 if (!mddev->raid_disks && !mddev->external)
7529 err = -ENODEV;
7530 else
7531 err = get_disk_info(mddev, argp);
7532 goto out;
7533
7534 case SET_DISK_FAULTY:
7535 err = set_disk_faulty(mddev, new_decode_dev(arg));
7536 goto out;
7537
7538 case GET_BITMAP_FILE:
7539 err = get_bitmap_file(mddev, argp);
7540 goto out;
7541
7542 }
7543
7544 if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
7545 flush_rdev_wq(mddev);
7546
7547 if (cmd == HOT_REMOVE_DISK)
7548
7549 wait_event_interruptible_timeout(mddev->sb_wait,
7550 !test_bit(MD_RECOVERY_NEEDED,
7551 &mddev->recovery),
7552 msecs_to_jiffies(5000));
7553 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
7554
7555
7556
7557 mutex_lock(&mddev->open_mutex);
7558 if (mddev->pers && atomic_read(&mddev->openers) > 1) {
7559 mutex_unlock(&mddev->open_mutex);
7560 err = -EBUSY;
7561 goto out;
7562 }
7563 if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
7564 mutex_unlock(&mddev->open_mutex);
7565 err = -EBUSY;
7566 goto out;
7567 }
7568 did_set_md_closing = true;
7569 mutex_unlock(&mddev->open_mutex);
7570 sync_blockdev(bdev);
7571 }
7572 err = mddev_lock(mddev);
7573 if (err) {
7574 pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
7575 err, cmd);
7576 goto out;
7577 }
7578
7579 if (cmd == SET_ARRAY_INFO) {
7580 mdu_array_info_t info;
7581 if (!arg)
7582 memset(&info, 0, sizeof(info));
7583 else if (copy_from_user(&info, argp, sizeof(info))) {
7584 err = -EFAULT;
7585 goto unlock;
7586 }
7587 if (mddev->pers) {
7588 err = update_array_info(mddev, &info);
7589 if (err) {
7590 pr_warn("md: couldn't update array info. %d\n", err);
7591 goto unlock;
7592 }
7593 goto unlock;
7594 }
7595 if (!list_empty(&mddev->disks)) {
7596 pr_warn("md: array %s already has disks!\n", mdname(mddev));
7597 err = -EBUSY;
7598 goto unlock;
7599 }
7600 if (mddev->raid_disks) {
7601 pr_warn("md: array %s already initialised!\n", mdname(mddev));
7602 err = -EBUSY;
7603 goto unlock;
7604 }
7605 err = md_set_array_info(mddev, &info);
7606 if (err) {
7607 pr_warn("md: couldn't set array info. %d\n", err);
7608 goto unlock;
7609 }
7610 goto unlock;
7611 }
7612
7613
7614
7615
7616
7617
7618 if ((!mddev->raid_disks && !mddev->external)
7619 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
7620 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
7621 && cmd != GET_BITMAP_FILE) {
7622 err = -ENODEV;
7623 goto unlock;
7624 }
7625
7626
7627
7628
7629 switch (cmd) {
7630 case RESTART_ARRAY_RW:
7631 err = restart_array(mddev);
7632 goto unlock;
7633
7634 case STOP_ARRAY:
7635 err = do_md_stop(mddev, 0, bdev);
7636 goto unlock;
7637
7638 case STOP_ARRAY_RO:
7639 err = md_set_readonly(mddev, bdev);
7640 goto unlock;
7641
7642 case HOT_REMOVE_DISK:
7643 err = hot_remove_disk(mddev, new_decode_dev(arg));
7644 goto unlock;
7645
7646 case ADD_NEW_DISK:
7647
7648
7649
7650
7651 if (mddev->pers) {
7652 mdu_disk_info_t info;
7653 if (copy_from_user(&info, argp, sizeof(info)))
7654 err = -EFAULT;
7655 else if (!(info.state & (1<<MD_DISK_SYNC)))
7656
7657 break;
7658 else
7659 err = md_add_new_disk(mddev, &info);
7660 goto unlock;
7661 }
7662 break;
7663 }
7664
7665
7666
7667
7668
7669 if (mddev->ro && mddev->pers) {
7670 if (mddev->ro == 2) {
7671 mddev->ro = 0;
7672 sysfs_notify_dirent_safe(mddev->sysfs_state);
7673 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7674
7675
7676
7677
7678 if (test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) {
7679 mddev_unlock(mddev);
7680 wait_event(mddev->sb_wait,
7681 !test_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags) &&
7682 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
7683 mddev_lock_nointr(mddev);
7684 }
7685 } else {
7686 err = -EROFS;
7687 goto unlock;
7688 }
7689 }
7690
7691 switch (cmd) {
7692 case ADD_NEW_DISK:
7693 {
7694 mdu_disk_info_t info;
7695 if (copy_from_user(&info, argp, sizeof(info)))
7696 err = -EFAULT;
7697 else
7698 err = md_add_new_disk(mddev, &info);
7699 goto unlock;
7700 }
7701
7702 case CLUSTERED_DISK_NACK:
7703 if (mddev_is_clustered(mddev))
7704 md_cluster_ops->new_disk_ack(mddev, false);
7705 else
7706 err = -EINVAL;
7707 goto unlock;
7708
7709 case HOT_ADD_DISK:
7710 err = hot_add_disk(mddev, new_decode_dev(arg));
7711 goto unlock;
7712
7713 case RUN_ARRAY:
7714 err = do_md_run(mddev);
7715 goto unlock;
7716
7717 case SET_BITMAP_FILE:
7718 err = set_bitmap_file(mddev, (int)arg);
7719 goto unlock;
7720
7721 default:
7722 err = -EINVAL;
7723 goto unlock;
7724 }
7725
7726unlock:
7727 if (mddev->hold_active == UNTIL_IOCTL &&
7728 err != -EINVAL)
7729 mddev->hold_active = 0;
7730 mddev_unlock(mddev);
7731out:
7732 if(did_set_md_closing)
7733 clear_bit(MD_CLOSING, &mddev->flags);
7734 return err;
7735}
7736#ifdef CONFIG_COMPAT
7737static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
7738 unsigned int cmd, unsigned long arg)
7739{
7740 switch (cmd) {
7741 case HOT_REMOVE_DISK:
7742 case HOT_ADD_DISK:
7743 case SET_DISK_FAULTY:
7744 case SET_BITMAP_FILE:
7745
7746 break;
7747 default:
7748 arg = (unsigned long)compat_ptr(arg);
7749 break;
7750 }
7751
7752 return md_ioctl(bdev, mode, cmd, arg);
7753}
7754#endif
7755
7756static int md_set_read_only(struct block_device *bdev, bool ro)
7757{
7758 struct mddev *mddev = bdev->bd_disk->private_data;
7759 int err;
7760
7761 err = mddev_lock(mddev);
7762 if (err)
7763 return err;
7764
7765 if (!mddev->raid_disks && !mddev->external) {
7766 err = -ENODEV;
7767 goto out_unlock;
7768 }
7769
7770
7771
7772
7773
7774 if (!ro && mddev->ro == 1 && mddev->pers) {
7775 err = restart_array(mddev);
7776 if (err)
7777 goto out_unlock;
7778 mddev->ro = 2;
7779 }
7780
7781out_unlock:
7782 mddev_unlock(mddev);
7783 return err;
7784}
7785
7786static int md_open(struct block_device *bdev, fmode_t mode)
7787{
7788
7789
7790
7791
7792 struct mddev *mddev = mddev_find(bdev->bd_dev);
7793 int err;
7794
7795 if (!mddev)
7796 return -ENODEV;
7797
7798 if (mddev->gendisk != bdev->bd_disk) {
7799
7800
7801
7802 mddev_put(mddev);
7803
7804 if (work_pending(&mddev->del_work))
7805 flush_workqueue(md_misc_wq);
7806 return -EBUSY;
7807 }
7808 BUG_ON(mddev != bdev->bd_disk->private_data);
7809
7810 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
7811 goto out;
7812
7813 if (test_bit(MD_CLOSING, &mddev->flags)) {
7814 mutex_unlock(&mddev->open_mutex);
7815 err = -ENODEV;
7816 goto out;
7817 }
7818
7819 err = 0;
7820 atomic_inc(&mddev->openers);
7821 mutex_unlock(&mddev->open_mutex);
7822
7823 bdev_check_media_change(bdev);
7824 out:
7825 if (err)
7826 mddev_put(mddev);
7827 return err;
7828}
7829
7830static void md_release(struct gendisk *disk, fmode_t mode)
7831{
7832 struct mddev *mddev = disk->private_data;
7833
7834 BUG_ON(!mddev);
7835 atomic_dec(&mddev->openers);
7836 mddev_put(mddev);
7837}
7838
7839static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing)
7840{
7841 struct mddev *mddev = disk->private_data;
7842 unsigned int ret = 0;
7843
7844 if (mddev->changed)
7845 ret = DISK_EVENT_MEDIA_CHANGE;
7846 mddev->changed = 0;
7847 return ret;
7848}
7849
7850const struct block_device_operations md_fops =
7851{
7852 .owner = THIS_MODULE,
7853 .submit_bio = md_submit_bio,
7854 .open = md_open,
7855 .release = md_release,
7856 .ioctl = md_ioctl,
7857#ifdef CONFIG_COMPAT
7858 .compat_ioctl = md_compat_ioctl,
7859#endif
7860 .getgeo = md_getgeo,
7861 .check_events = md_check_events,
7862 .set_read_only = md_set_read_only,
7863};
7864
7865static int md_thread(void *arg)
7866{
7867 struct md_thread *thread = arg;
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881 allow_signal(SIGKILL);
7882 while (!kthread_should_stop()) {
7883
7884
7885
7886
7887
7888
7889 if (signal_pending(current))
7890 flush_signals(current);
7891
7892 wait_event_interruptible_timeout
7893 (thread->wqueue,
7894 test_bit(THREAD_WAKEUP, &thread->flags)
7895 || kthread_should_stop() || kthread_should_park(),
7896 thread->timeout);
7897
7898 clear_bit(THREAD_WAKEUP, &thread->flags);
7899 if (kthread_should_park())
7900 kthread_parkme();
7901 if (!kthread_should_stop())
7902 thread->run(thread);
7903 }
7904
7905 return 0;
7906}
7907
7908void md_wakeup_thread(struct md_thread *thread)
7909{
7910 if (thread) {
7911 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
7912 set_bit(THREAD_WAKEUP, &thread->flags);
7913 wake_up(&thread->wqueue);
7914 }
7915}
7916EXPORT_SYMBOL(md_wakeup_thread);
7917
7918struct md_thread *md_register_thread(void (*run) (struct md_thread *),
7919 struct mddev *mddev, const char *name)
7920{
7921 struct md_thread *thread;
7922
7923 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
7924 if (!thread)
7925 return NULL;
7926
7927 init_waitqueue_head(&thread->wqueue);
7928
7929 thread->run = run;
7930 thread->mddev = mddev;
7931 thread->timeout = MAX_SCHEDULE_TIMEOUT;
7932 thread->tsk = kthread_run(md_thread, thread,
7933 "%s_%s",
7934 mdname(thread->mddev),
7935 name);
7936 if (IS_ERR(thread->tsk)) {
7937 kfree(thread);
7938 return NULL;
7939 }
7940 return thread;
7941}
7942EXPORT_SYMBOL(md_register_thread);
7943
7944void md_unregister_thread(struct md_thread **threadp)
7945{
7946 struct md_thread *thread = *threadp;
7947 if (!thread)
7948 return;
7949 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
7950
7951
7952
7953 spin_lock(&pers_lock);
7954 *threadp = NULL;
7955 spin_unlock(&pers_lock);
7956
7957 kthread_stop(thread->tsk);
7958 kfree(thread);
7959}
7960EXPORT_SYMBOL(md_unregister_thread);
7961
7962void md_error(struct mddev *mddev, struct md_rdev *rdev)
7963{
7964 if (!rdev || test_bit(Faulty, &rdev->flags))
7965 return;
7966
7967 if (!mddev->pers || !mddev->pers->error_handler)
7968 return;
7969 mddev->pers->error_handler(mddev,rdev);
7970 if (mddev->degraded)
7971 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7972 sysfs_notify_dirent_safe(rdev->sysfs_state);
7973 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7974 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7975 md_wakeup_thread(mddev->thread);
7976 if (mddev->event_work.func)
7977 queue_work(md_misc_wq, &mddev->event_work);
7978 md_new_event(mddev);
7979}
7980EXPORT_SYMBOL(md_error);
7981
7982
7983
7984static void status_unused(struct seq_file *seq)
7985{
7986 int i = 0;
7987 struct md_rdev *rdev;
7988
7989 seq_printf(seq, "unused devices: ");
7990
7991 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
7992 char b[BDEVNAME_SIZE];
7993 i++;
7994 seq_printf(seq, "%s ",
7995 bdevname(rdev->bdev,b));
7996 }
7997 if (!i)
7998 seq_printf(seq, "<none>");
7999
8000 seq_printf(seq, "\n");
8001}
8002
8003static int status_resync(struct seq_file *seq, struct mddev *mddev)
8004{
8005 sector_t max_sectors, resync, res;
8006 unsigned long dt, db = 0;
8007 sector_t rt, curr_mark_cnt, resync_mark_cnt;
8008 int scale, recovery_active;
8009 unsigned int per_milli;
8010
8011 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8012 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8013 max_sectors = mddev->resync_max_sectors;
8014 else
8015 max_sectors = mddev->dev_sectors;
8016
8017 resync = mddev->curr_resync;
8018 if (resync <= 3) {
8019 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
8020
8021 resync = max_sectors;
8022 } else if (resync > max_sectors)
8023 resync = max_sectors;
8024 else
8025 resync -= atomic_read(&mddev->recovery_active);
8026
8027 if (resync == 0) {
8028 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
8029 struct md_rdev *rdev;
8030
8031 rdev_for_each(rdev, mddev)
8032 if (rdev->raid_disk >= 0 &&
8033 !test_bit(Faulty, &rdev->flags) &&
8034 rdev->recovery_offset != MaxSector &&
8035 rdev->recovery_offset) {
8036 seq_printf(seq, "\trecover=REMOTE");
8037 return 1;
8038 }
8039 if (mddev->reshape_position != MaxSector)
8040 seq_printf(seq, "\treshape=REMOTE");
8041 else
8042 seq_printf(seq, "\tresync=REMOTE");
8043 return 1;
8044 }
8045 if (mddev->recovery_cp < MaxSector) {
8046 seq_printf(seq, "\tresync=PENDING");
8047 return 1;
8048 }
8049 return 0;
8050 }
8051 if (resync < 3) {
8052 seq_printf(seq, "\tresync=DELAYED");
8053 return 1;
8054 }
8055
8056 WARN_ON(max_sectors == 0);
8057
8058
8059
8060
8061
8062 scale = 10;
8063 if (sizeof(sector_t) > sizeof(unsigned long)) {
8064 while ( max_sectors/2 > (1ULL<<(scale+32)))
8065 scale++;
8066 }
8067 res = (resync>>scale)*1000;
8068 sector_div(res, (u32)((max_sectors>>scale)+1));
8069
8070 per_milli = res;
8071 {
8072 int i, x = per_milli/50, y = 20-x;
8073 seq_printf(seq, "[");
8074 for (i = 0; i < x; i++)
8075 seq_printf(seq, "=");
8076 seq_printf(seq, ">");
8077 for (i = 0; i < y; i++)
8078 seq_printf(seq, ".");
8079 seq_printf(seq, "] ");
8080 }
8081 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
8082 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
8083 "reshape" :
8084 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
8085 "check" :
8086 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
8087 "resync" : "recovery"))),
8088 per_milli/10, per_milli % 10,
8089 (unsigned long long) resync/2,
8090 (unsigned long long) max_sectors/2);
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109 dt = ((jiffies - mddev->resync_mark) / HZ);
8110 if (!dt) dt++;
8111
8112 curr_mark_cnt = mddev->curr_mark_cnt;
8113 recovery_active = atomic_read(&mddev->recovery_active);
8114 resync_mark_cnt = mddev->resync_mark_cnt;
8115
8116 if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
8117 db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
8118
8119 rt = max_sectors - resync;
8120 rt = div64_u64(rt, db/32+1);
8121 rt *= dt;
8122 rt >>= 5;
8123
8124 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
8125 ((unsigned long)rt % 60)/6);
8126
8127 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
8128 return 1;
8129}
8130
8131static void *md_seq_start(struct seq_file *seq, loff_t *pos)
8132{
8133 struct list_head *tmp;
8134 loff_t l = *pos;
8135 struct mddev *mddev;
8136
8137 if (l == 0x10000) {
8138 ++*pos;
8139 return (void *)2;
8140 }
8141 if (l > 0x10000)
8142 return NULL;
8143 if (!l--)
8144
8145 return (void*)1;
8146
8147 spin_lock(&all_mddevs_lock);
8148 list_for_each(tmp,&all_mddevs)
8149 if (!l--) {
8150 mddev = list_entry(tmp, struct mddev, all_mddevs);
8151 mddev_get(mddev);
8152 spin_unlock(&all_mddevs_lock);
8153 return mddev;
8154 }
8155 spin_unlock(&all_mddevs_lock);
8156 if (!l--)
8157 return (void*)2;
8158 return NULL;
8159}
8160
8161static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
8162{
8163 struct list_head *tmp;
8164 struct mddev *next_mddev, *mddev = v;
8165
8166 ++*pos;
8167 if (v == (void*)2)
8168 return NULL;
8169
8170 spin_lock(&all_mddevs_lock);
8171 if (v == (void*)1)
8172 tmp = all_mddevs.next;
8173 else
8174 tmp = mddev->all_mddevs.next;
8175 if (tmp != &all_mddevs)
8176 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
8177 else {
8178 next_mddev = (void*)2;
8179 *pos = 0x10000;
8180 }
8181 spin_unlock(&all_mddevs_lock);
8182
8183 if (v != (void*)1)
8184 mddev_put(mddev);
8185 return next_mddev;
8186
8187}
8188
8189static void md_seq_stop(struct seq_file *seq, void *v)
8190{
8191 struct mddev *mddev = v;
8192
8193 if (mddev && v != (void*)1 && v != (void*)2)
8194 mddev_put(mddev);
8195}
8196
8197static int md_seq_show(struct seq_file *seq, void *v)
8198{
8199 struct mddev *mddev = v;
8200 sector_t sectors;
8201 struct md_rdev *rdev;
8202
8203 if (v == (void*)1) {
8204 struct md_personality *pers;
8205 seq_printf(seq, "Personalities : ");
8206 spin_lock(&pers_lock);
8207 list_for_each_entry(pers, &pers_list, list)
8208 seq_printf(seq, "[%s] ", pers->name);
8209
8210 spin_unlock(&pers_lock);
8211 seq_printf(seq, "\n");
8212 seq->poll_event = atomic_read(&md_event_count);
8213 return 0;
8214 }
8215 if (v == (void*)2) {
8216 status_unused(seq);
8217 return 0;
8218 }
8219
8220 spin_lock(&mddev->lock);
8221 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
8222 seq_printf(seq, "%s : %sactive", mdname(mddev),
8223 mddev->pers ? "" : "in");
8224 if (mddev->pers) {
8225 if (mddev->ro==1)
8226 seq_printf(seq, " (read-only)");
8227 if (mddev->ro==2)
8228 seq_printf(seq, " (auto-read-only)");
8229 seq_printf(seq, " %s", mddev->pers->name);
8230 }
8231
8232 sectors = 0;
8233 rcu_read_lock();
8234 rdev_for_each_rcu(rdev, mddev) {
8235 char b[BDEVNAME_SIZE];
8236 seq_printf(seq, " %s[%d]",
8237 bdevname(rdev->bdev,b), rdev->desc_nr);
8238 if (test_bit(WriteMostly, &rdev->flags))
8239 seq_printf(seq, "(W)");
8240 if (test_bit(Journal, &rdev->flags))
8241 seq_printf(seq, "(J)");
8242 if (test_bit(Faulty, &rdev->flags)) {
8243 seq_printf(seq, "(F)");
8244 continue;
8245 }
8246 if (rdev->raid_disk < 0)
8247 seq_printf(seq, "(S)");
8248 if (test_bit(Replacement, &rdev->flags))
8249 seq_printf(seq, "(R)");
8250 sectors += rdev->sectors;
8251 }
8252 rcu_read_unlock();
8253
8254 if (!list_empty(&mddev->disks)) {
8255 if (mddev->pers)
8256 seq_printf(seq, "\n %llu blocks",
8257 (unsigned long long)
8258 mddev->array_sectors / 2);
8259 else
8260 seq_printf(seq, "\n %llu blocks",
8261 (unsigned long long)sectors / 2);
8262 }
8263 if (mddev->persistent) {
8264 if (mddev->major_version != 0 ||
8265 mddev->minor_version != 90) {
8266 seq_printf(seq," super %d.%d",
8267 mddev->major_version,
8268 mddev->minor_version);
8269 }
8270 } else if (mddev->external)
8271 seq_printf(seq, " super external:%s",
8272 mddev->metadata_type);
8273 else
8274 seq_printf(seq, " super non-persistent");
8275
8276 if (mddev->pers) {
8277 mddev->pers->status(seq, mddev);
8278 seq_printf(seq, "\n ");
8279 if (mddev->pers->sync_request) {
8280 if (status_resync(seq, mddev))
8281 seq_printf(seq, "\n ");
8282 }
8283 } else
8284 seq_printf(seq, "\n ");
8285
8286 md_bitmap_status(seq, mddev->bitmap);
8287
8288 seq_printf(seq, "\n");
8289 }
8290 spin_unlock(&mddev->lock);
8291
8292 return 0;
8293}
8294
8295static const struct seq_operations md_seq_ops = {
8296 .start = md_seq_start,
8297 .next = md_seq_next,
8298 .stop = md_seq_stop,
8299 .show = md_seq_show,
8300};
8301
8302static int md_seq_open(struct inode *inode, struct file *file)
8303{
8304 struct seq_file *seq;
8305 int error;
8306
8307 error = seq_open(file, &md_seq_ops);
8308 if (error)
8309 return error;
8310
8311 seq = file->private_data;
8312 seq->poll_event = atomic_read(&md_event_count);
8313 return error;
8314}
8315
8316static int md_unloading;
8317static __poll_t mdstat_poll(struct file *filp, poll_table *wait)
8318{
8319 struct seq_file *seq = filp->private_data;
8320 __poll_t mask;
8321
8322 if (md_unloading)
8323 return EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
8324 poll_wait(filp, &md_event_waiters, wait);
8325
8326
8327 mask = EPOLLIN | EPOLLRDNORM;
8328
8329 if (seq->poll_event != atomic_read(&md_event_count))
8330 mask |= EPOLLERR | EPOLLPRI;
8331 return mask;
8332}
8333
8334static const struct proc_ops mdstat_proc_ops = {
8335 .proc_open = md_seq_open,
8336 .proc_read = seq_read,
8337 .proc_lseek = seq_lseek,
8338 .proc_release = seq_release,
8339 .proc_poll = mdstat_poll,
8340};
8341
8342int register_md_personality(struct md_personality *p)
8343{
8344 pr_debug("md: %s personality registered for level %d\n",
8345 p->name, p->level);
8346 spin_lock(&pers_lock);
8347 list_add_tail(&p->list, &pers_list);
8348 spin_unlock(&pers_lock);
8349 return 0;
8350}
8351EXPORT_SYMBOL(register_md_personality);
8352
8353int unregister_md_personality(struct md_personality *p)
8354{
8355 pr_debug("md: %s personality unregistered\n", p->name);
8356 spin_lock(&pers_lock);
8357 list_del_init(&p->list);
8358 spin_unlock(&pers_lock);
8359 return 0;
8360}
8361EXPORT_SYMBOL(unregister_md_personality);
8362
8363int register_md_cluster_operations(struct md_cluster_operations *ops,
8364 struct module *module)
8365{
8366 int ret = 0;
8367 spin_lock(&pers_lock);
8368 if (md_cluster_ops != NULL)
8369 ret = -EALREADY;
8370 else {
8371 md_cluster_ops = ops;
8372 md_cluster_mod = module;
8373 }
8374 spin_unlock(&pers_lock);
8375 return ret;
8376}
8377EXPORT_SYMBOL(register_md_cluster_operations);
8378
8379int unregister_md_cluster_operations(void)
8380{
8381 spin_lock(&pers_lock);
8382 md_cluster_ops = NULL;
8383 spin_unlock(&pers_lock);
8384 return 0;
8385}
8386EXPORT_SYMBOL(unregister_md_cluster_operations);
8387
8388int md_setup_cluster(struct mddev *mddev, int nodes)
8389{
8390 int ret;
8391 if (!md_cluster_ops)
8392 request_module("md-cluster");
8393 spin_lock(&pers_lock);
8394
8395 if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
8396 pr_warn("can't find md-cluster module or get it's reference.\n");
8397 spin_unlock(&pers_lock);
8398 return -ENOENT;
8399 }
8400 spin_unlock(&pers_lock);
8401
8402 ret = md_cluster_ops->join(mddev, nodes);
8403 if (!ret)
8404 mddev->safemode_delay = 0;
8405 return ret;
8406}
8407
8408void md_cluster_stop(struct mddev *mddev)
8409{
8410 if (!md_cluster_ops)
8411 return;
8412 md_cluster_ops->leave(mddev);
8413 module_put(md_cluster_mod);
8414}
8415
8416static int is_mddev_idle(struct mddev *mddev, int init)
8417{
8418 struct md_rdev *rdev;
8419 int idle;
8420 int curr_events;
8421
8422 idle = 1;
8423 rcu_read_lock();
8424 rdev_for_each_rcu(rdev, mddev) {
8425 struct gendisk *disk = rdev->bdev->bd_disk;
8426 curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
8427 atomic_read(&disk->sync_io);
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450 if (init || curr_events - rdev->last_events > 64) {
8451 rdev->last_events = curr_events;
8452 idle = 0;
8453 }
8454 }
8455 rcu_read_unlock();
8456 return idle;
8457}
8458
8459void md_done_sync(struct mddev *mddev, int blocks, int ok)
8460{
8461
8462 atomic_sub(blocks, &mddev->recovery_active);
8463 wake_up(&mddev->recovery_wait);
8464 if (!ok) {
8465 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8466 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
8467 md_wakeup_thread(mddev->thread);
8468
8469 }
8470}
8471EXPORT_SYMBOL(md_done_sync);
8472
8473
8474
8475
8476
8477
8478
8479
8480bool md_write_start(struct mddev *mddev, struct bio *bi)
8481{
8482 int did_change = 0;
8483
8484 if (bio_data_dir(bi) != WRITE)
8485 return true;
8486
8487 BUG_ON(mddev->ro == 1);
8488 if (mddev->ro == 2) {
8489
8490 mddev->ro = 0;
8491 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8492 md_wakeup_thread(mddev->thread);
8493 md_wakeup_thread(mddev->sync_thread);
8494 did_change = 1;
8495 }
8496 rcu_read_lock();
8497 percpu_ref_get(&mddev->writes_pending);
8498 smp_mb();
8499 if (mddev->safemode == 1)
8500 mddev->safemode = 0;
8501
8502 if (mddev->in_sync || mddev->sync_checkers) {
8503 spin_lock(&mddev->lock);
8504 if (mddev->in_sync) {
8505 mddev->in_sync = 0;
8506 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8507 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8508 md_wakeup_thread(mddev->thread);
8509 did_change = 1;
8510 }
8511 spin_unlock(&mddev->lock);
8512 }
8513 rcu_read_unlock();
8514 if (did_change)
8515 sysfs_notify_dirent_safe(mddev->sysfs_state);
8516 if (!mddev->has_superblocks)
8517 return true;
8518 wait_event(mddev->sb_wait,
8519 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
8520 mddev->suspended);
8521 if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
8522 percpu_ref_put(&mddev->writes_pending);
8523 return false;
8524 }
8525 return true;
8526}
8527EXPORT_SYMBOL(md_write_start);
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537void md_write_inc(struct mddev *mddev, struct bio *bi)
8538{
8539 if (bio_data_dir(bi) != WRITE)
8540 return;
8541 WARN_ON_ONCE(mddev->in_sync || mddev->ro);
8542 percpu_ref_get(&mddev->writes_pending);
8543}
8544EXPORT_SYMBOL(md_write_inc);
8545
8546void md_write_end(struct mddev *mddev)
8547{
8548 percpu_ref_put(&mddev->writes_pending);
8549
8550 if (mddev->safemode == 2)
8551 md_wakeup_thread(mddev->thread);
8552 else if (mddev->safemode_delay)
8553
8554
8555
8556 mod_timer(&mddev->safemode_timer,
8557 roundup(jiffies, mddev->safemode_delay) +
8558 mddev->safemode_delay);
8559}
8560
8561EXPORT_SYMBOL(md_write_end);
8562
8563
8564void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
8565 struct bio *bio, sector_t start, sector_t size)
8566{
8567 struct bio *discard_bio = NULL;
8568
8569 if (__blkdev_issue_discard(rdev->bdev, start, size, GFP_NOIO, 0,
8570 &discard_bio) || !discard_bio)
8571 return;
8572
8573 bio_chain(discard_bio, bio);
8574 bio_clone_blkg_association(discard_bio, bio);
8575 if (mddev->gendisk)
8576 trace_block_bio_remap(discard_bio,
8577 disk_devt(mddev->gendisk),
8578 bio->bi_iter.bi_sector);
8579 submit_bio_noacct(discard_bio);
8580}
8581EXPORT_SYMBOL_GPL(md_submit_discard_bio);
8582
8583static void md_end_io_acct(struct bio *bio)
8584{
8585 struct md_io_acct *md_io_acct = bio->bi_private;
8586 struct bio *orig_bio = md_io_acct->orig_bio;
8587
8588 orig_bio->bi_status = bio->bi_status;
8589
8590 bio_end_io_acct(orig_bio, md_io_acct->start_time);
8591 bio_put(bio);
8592 bio_endio(orig_bio);
8593}
8594
8595
8596
8597
8598
8599void md_account_bio(struct mddev *mddev, struct bio **bio)
8600{
8601 struct md_io_acct *md_io_acct;
8602 struct bio *clone;
8603
8604 if (!blk_queue_io_stat((*bio)->bi_bdev->bd_disk->queue))
8605 return;
8606
8607 clone = bio_clone_fast(*bio, GFP_NOIO, &mddev->io_acct_set);
8608 md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
8609 md_io_acct->orig_bio = *bio;
8610 md_io_acct->start_time = bio_start_io_acct(*bio);
8611
8612 clone->bi_end_io = md_end_io_acct;
8613 clone->bi_private = md_io_acct;
8614 *bio = clone;
8615}
8616EXPORT_SYMBOL_GPL(md_account_bio);
8617
8618
8619
8620
8621
8622
8623
8624void md_allow_write(struct mddev *mddev)
8625{
8626 if (!mddev->pers)
8627 return;
8628 if (mddev->ro)
8629 return;
8630 if (!mddev->pers->sync_request)
8631 return;
8632
8633 spin_lock(&mddev->lock);
8634 if (mddev->in_sync) {
8635 mddev->in_sync = 0;
8636 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8637 set_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
8638 if (mddev->safemode_delay &&
8639 mddev->safemode == 0)
8640 mddev->safemode = 1;
8641 spin_unlock(&mddev->lock);
8642 md_update_sb(mddev, 0);
8643 sysfs_notify_dirent_safe(mddev->sysfs_state);
8644
8645 wait_event(mddev->sb_wait,
8646 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
8647 } else
8648 spin_unlock(&mddev->lock);
8649}
8650EXPORT_SYMBOL_GPL(md_allow_write);
8651
8652#define SYNC_MARKS 10
8653#define SYNC_MARK_STEP (3*HZ)
8654#define UPDATE_FREQUENCY (5*60*HZ)
8655void md_do_sync(struct md_thread *thread)
8656{
8657 struct mddev *mddev = thread->mddev;
8658 struct mddev *mddev2;
8659 unsigned int currspeed = 0, window;
8660 sector_t max_sectors,j, io_sectors, recovery_done;
8661 unsigned long mark[SYNC_MARKS];
8662 unsigned long update_time;
8663 sector_t mark_cnt[SYNC_MARKS];
8664 int last_mark,m;
8665 struct list_head *tmp;
8666 sector_t last_check;
8667 int skipped = 0;
8668 struct md_rdev *rdev;
8669 char *desc, *action = NULL;
8670 struct blk_plug plug;
8671 int ret;
8672
8673
8674 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
8675 test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
8676 return;
8677 if (mddev->ro) {
8678 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8679 return;
8680 }
8681
8682 if (mddev_is_clustered(mddev)) {
8683 ret = md_cluster_ops->resync_start(mddev);
8684 if (ret)
8685 goto skip;
8686
8687 set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
8688 if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
8689 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
8690 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
8691 && ((unsigned long long)mddev->curr_resync_completed
8692 < (unsigned long long)mddev->resync_max_sectors))
8693 goto skip;
8694 }
8695
8696 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8697 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
8698 desc = "data-check";
8699 action = "check";
8700 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
8701 desc = "requested-resync";
8702 action = "repair";
8703 } else
8704 desc = "resync";
8705 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
8706 desc = "reshape";
8707 else
8708 desc = "recovery";
8709
8710 mddev->last_sync_action = action ?: desc;
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728 do {
8729 int mddev2_minor = -1;
8730 mddev->curr_resync = 2;
8731
8732 try_again:
8733 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8734 goto skip;
8735 for_each_mddev(mddev2, tmp) {
8736 if (mddev2 == mddev)
8737 continue;
8738 if (!mddev->parallel_resync
8739 && mddev2->curr_resync
8740 && match_mddev_units(mddev, mddev2)) {
8741 DEFINE_WAIT(wq);
8742 if (mddev < mddev2 && mddev->curr_resync == 2) {
8743
8744 mddev->curr_resync = 1;
8745 wake_up(&resync_wait);
8746 }
8747 if (mddev > mddev2 && mddev->curr_resync == 1)
8748
8749
8750
8751 continue;
8752
8753
8754
8755
8756 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
8757 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8758 mddev2->curr_resync >= mddev->curr_resync) {
8759 if (mddev2_minor != mddev2->md_minor) {
8760 mddev2_minor = mddev2->md_minor;
8761 pr_info("md: delaying %s of %s until %s has finished (they share one or more physical units)\n",
8762 desc, mdname(mddev),
8763 mdname(mddev2));
8764 }
8765 mddev_put(mddev2);
8766 if (signal_pending(current))
8767 flush_signals(current);
8768 schedule();
8769 finish_wait(&resync_wait, &wq);
8770 goto try_again;
8771 }
8772 finish_wait(&resync_wait, &wq);
8773 }
8774 }
8775 } while (mddev->curr_resync < 2);
8776
8777 j = 0;
8778 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
8779
8780
8781
8782 max_sectors = mddev->resync_max_sectors;
8783 atomic64_set(&mddev->resync_mismatches, 0);
8784
8785 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
8786 j = mddev->resync_min;
8787 else if (!mddev->bitmap)
8788 j = mddev->recovery_cp;
8789
8790 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
8791 max_sectors = mddev->resync_max_sectors;
8792
8793
8794
8795
8796
8797 if (mddev_is_clustered(mddev) &&
8798 mddev->reshape_position != MaxSector)
8799 j = mddev->reshape_position;
8800 } else {
8801
8802 max_sectors = mddev->dev_sectors;
8803 j = MaxSector;
8804 rcu_read_lock();
8805 rdev_for_each_rcu(rdev, mddev)
8806 if (rdev->raid_disk >= 0 &&
8807 !test_bit(Journal, &rdev->flags) &&
8808 !test_bit(Faulty, &rdev->flags) &&
8809 !test_bit(In_sync, &rdev->flags) &&
8810 rdev->recovery_offset < j)
8811 j = rdev->recovery_offset;
8812 rcu_read_unlock();
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822 if (mddev->bitmap) {
8823 mddev->pers->quiesce(mddev, 1);
8824 mddev->pers->quiesce(mddev, 0);
8825 }
8826 }
8827
8828 pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
8829 pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
8830 pr_debug("md: using maximum available idle IO bandwidth (but not more than %d KB/sec) for %s.\n",
8831 speed_max(mddev), desc);
8832
8833 is_mddev_idle(mddev, 1);
8834
8835 io_sectors = 0;
8836 for (m = 0; m < SYNC_MARKS; m++) {
8837 mark[m] = jiffies;
8838 mark_cnt[m] = io_sectors;
8839 }
8840 last_mark = 0;
8841 mddev->resync_mark = mark[last_mark];
8842 mddev->resync_mark_cnt = mark_cnt[last_mark];
8843
8844
8845
8846
8847 window = 32 * (PAGE_SIZE / 512);
8848 pr_debug("md: using %dk window, over a total of %lluk.\n",
8849 window/2, (unsigned long long)max_sectors/2);
8850
8851 atomic_set(&mddev->recovery_active, 0);
8852 last_check = 0;
8853
8854 if (j>2) {
8855 pr_debug("md: resuming %s of %s from checkpoint.\n",
8856 desc, mdname(mddev));
8857 mddev->curr_resync = j;
8858 } else
8859 mddev->curr_resync = 3;
8860 mddev->curr_resync_completed = j;
8861 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8862 md_new_event(mddev);
8863 update_time = jiffies;
8864
8865 blk_start_plug(&plug);
8866 while (j < max_sectors) {
8867 sector_t sectors;
8868
8869 skipped = 0;
8870
8871 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8872 ((mddev->curr_resync > mddev->curr_resync_completed &&
8873 (mddev->curr_resync - mddev->curr_resync_completed)
8874 > (max_sectors >> 4)) ||
8875 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
8876 (j - mddev->curr_resync_completed)*2
8877 >= mddev->resync_max - mddev->curr_resync_completed ||
8878 mddev->curr_resync_completed > mddev->resync_max
8879 )) {
8880
8881 wait_event(mddev->recovery_wait,
8882 atomic_read(&mddev->recovery_active) == 0);
8883 mddev->curr_resync_completed = j;
8884 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
8885 j > mddev->recovery_cp)
8886 mddev->recovery_cp = j;
8887 update_time = jiffies;
8888 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
8889 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8890 }
8891
8892 while (j >= mddev->resync_max &&
8893 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
8894
8895
8896
8897
8898 flush_signals(current);
8899 wait_event_interruptible(mddev->recovery_wait,
8900 mddev->resync_max > j
8901 || test_bit(MD_RECOVERY_INTR,
8902 &mddev->recovery));
8903 }
8904
8905 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8906 break;
8907
8908 sectors = mddev->pers->sync_request(mddev, j, &skipped);
8909 if (sectors == 0) {
8910 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
8911 break;
8912 }
8913
8914 if (!skipped) {
8915 io_sectors += sectors;
8916 atomic_add(sectors, &mddev->recovery_active);
8917 }
8918
8919 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8920 break;
8921
8922 j += sectors;
8923 if (j > max_sectors)
8924
8925 j = max_sectors;
8926 if (j > 2)
8927 mddev->curr_resync = j;
8928 mddev->curr_mark_cnt = io_sectors;
8929 if (last_check == 0)
8930
8931
8932
8933 md_new_event(mddev);
8934
8935 if (last_check + window > io_sectors || j == max_sectors)
8936 continue;
8937
8938 last_check = io_sectors;
8939 repeat:
8940 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
8941
8942 int next = (last_mark+1) % SYNC_MARKS;
8943
8944 mddev->resync_mark = mark[next];
8945 mddev->resync_mark_cnt = mark_cnt[next];
8946 mark[next] = jiffies;
8947 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
8948 last_mark = next;
8949 }
8950
8951 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
8952 break;
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962 cond_resched();
8963
8964 recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
8965 currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
8966 /((jiffies-mddev->resync_mark)/HZ +1) +1;
8967
8968 if (currspeed > speed_min(mddev)) {
8969 if (currspeed > speed_max(mddev)) {
8970 msleep(500);
8971 goto repeat;
8972 }
8973 if (!is_mddev_idle(mddev, 0)) {
8974
8975
8976
8977
8978 wait_event(mddev->recovery_wait,
8979 !atomic_read(&mddev->recovery_active));
8980 }
8981 }
8982 }
8983 pr_info("md: %s: %s %s.\n",mdname(mddev), desc,
8984 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
8985 ? "interrupted" : "done");
8986
8987
8988
8989 blk_finish_plug(&plug);
8990 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
8991
8992 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
8993 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
8994 mddev->curr_resync > 3) {
8995 mddev->curr_resync_completed = mddev->curr_resync;
8996 sysfs_notify_dirent_safe(mddev->sysfs_completed);
8997 }
8998 mddev->pers->sync_request(mddev, max_sectors, &skipped);
8999
9000 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
9001 mddev->curr_resync > 3) {
9002 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
9003 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9004 if (mddev->curr_resync >= mddev->recovery_cp) {
9005 pr_debug("md: checkpointing %s of %s.\n",
9006 desc, mdname(mddev));
9007 if (test_bit(MD_RECOVERY_ERROR,
9008 &mddev->recovery))
9009 mddev->recovery_cp =
9010 mddev->curr_resync_completed;
9011 else
9012 mddev->recovery_cp =
9013 mddev->curr_resync;
9014 }
9015 } else
9016 mddev->recovery_cp = MaxSector;
9017 } else {
9018 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
9019 mddev->curr_resync = MaxSector;
9020 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9021 test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
9022 rcu_read_lock();
9023 rdev_for_each_rcu(rdev, mddev)
9024 if (rdev->raid_disk >= 0 &&
9025 mddev->delta_disks >= 0 &&
9026 !test_bit(Journal, &rdev->flags) &&
9027 !test_bit(Faulty, &rdev->flags) &&
9028 !test_bit(In_sync, &rdev->flags) &&
9029 rdev->recovery_offset < mddev->curr_resync)
9030 rdev->recovery_offset = mddev->curr_resync;
9031 rcu_read_unlock();
9032 }
9033 }
9034 }
9035 skip:
9036
9037
9038
9039 set_mask_bits(&mddev->sb_flags, 0,
9040 BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
9041
9042 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9043 !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9044 mddev->delta_disks > 0 &&
9045 mddev->pers->finish_reshape &&
9046 mddev->pers->size &&
9047 mddev->queue) {
9048 mddev_lock_nointr(mddev);
9049 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
9050 mddev_unlock(mddev);
9051 if (!mddev_is_clustered(mddev))
9052 set_capacity_and_notify(mddev->gendisk,
9053 mddev->array_sectors);
9054 }
9055
9056 spin_lock(&mddev->lock);
9057 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
9058
9059 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9060 mddev->resync_min = 0;
9061 mddev->resync_max = MaxSector;
9062 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
9063 mddev->resync_min = mddev->curr_resync_completed;
9064 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
9065 mddev->curr_resync = 0;
9066 spin_unlock(&mddev->lock);
9067
9068 wake_up(&resync_wait);
9069 md_wakeup_thread(mddev->thread);
9070 return;
9071}
9072EXPORT_SYMBOL_GPL(md_do_sync);
9073
9074static int remove_and_add_spares(struct mddev *mddev,
9075 struct md_rdev *this)
9076{
9077 struct md_rdev *rdev;
9078 int spares = 0;
9079 int removed = 0;
9080 bool remove_some = false;
9081
9082 if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
9083
9084 return 0;
9085
9086 rdev_for_each(rdev, mddev) {
9087 if ((this == NULL || rdev == this) &&
9088 rdev->raid_disk >= 0 &&
9089 !test_bit(Blocked, &rdev->flags) &&
9090 test_bit(Faulty, &rdev->flags) &&
9091 atomic_read(&rdev->nr_pending)==0) {
9092
9093
9094
9095
9096
9097 remove_some = true;
9098 set_bit(RemoveSynchronized, &rdev->flags);
9099 }
9100 }
9101
9102 if (remove_some)
9103 synchronize_rcu();
9104 rdev_for_each(rdev, mddev) {
9105 if ((this == NULL || rdev == this) &&
9106 rdev->raid_disk >= 0 &&
9107 !test_bit(Blocked, &rdev->flags) &&
9108 ((test_bit(RemoveSynchronized, &rdev->flags) ||
9109 (!test_bit(In_sync, &rdev->flags) &&
9110 !test_bit(Journal, &rdev->flags))) &&
9111 atomic_read(&rdev->nr_pending)==0)) {
9112 if (mddev->pers->hot_remove_disk(
9113 mddev, rdev) == 0) {
9114 sysfs_unlink_rdev(mddev, rdev);
9115 rdev->saved_raid_disk = rdev->raid_disk;
9116 rdev->raid_disk = -1;
9117 removed++;
9118 }
9119 }
9120 if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
9121 clear_bit(RemoveSynchronized, &rdev->flags);
9122 }
9123
9124 if (removed && mddev->kobj.sd)
9125 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9126
9127 if (this && removed)
9128 goto no_add;
9129
9130 rdev_for_each(rdev, mddev) {
9131 if (this && this != rdev)
9132 continue;
9133 if (test_bit(Candidate, &rdev->flags))
9134 continue;
9135 if (rdev->raid_disk >= 0 &&
9136 !test_bit(In_sync, &rdev->flags) &&
9137 !test_bit(Journal, &rdev->flags) &&
9138 !test_bit(Faulty, &rdev->flags))
9139 spares++;
9140 if (rdev->raid_disk >= 0)
9141 continue;
9142 if (test_bit(Faulty, &rdev->flags))
9143 continue;
9144 if (!test_bit(Journal, &rdev->flags)) {
9145 if (mddev->ro &&
9146 ! (rdev->saved_raid_disk >= 0 &&
9147 !test_bit(Bitmap_sync, &rdev->flags)))
9148 continue;
9149
9150 rdev->recovery_offset = 0;
9151 }
9152 if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
9153
9154 sysfs_link_rdev(mddev, rdev);
9155 if (!test_bit(Journal, &rdev->flags))
9156 spares++;
9157 md_new_event(mddev);
9158 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9159 }
9160 }
9161no_add:
9162 if (removed)
9163 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9164 return spares;
9165}
9166
9167static void md_start_sync(struct work_struct *ws)
9168{
9169 struct mddev *mddev = container_of(ws, struct mddev, del_work);
9170
9171 mddev->sync_thread = md_register_thread(md_do_sync,
9172 mddev,
9173 "resync");
9174 if (!mddev->sync_thread) {
9175 pr_warn("%s: could not start resync thread...\n",
9176 mdname(mddev));
9177
9178 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9179 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9180 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9181 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9182 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9183 wake_up(&resync_wait);
9184 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9185 &mddev->recovery))
9186 if (mddev->sysfs_action)
9187 sysfs_notify_dirent_safe(mddev->sysfs_action);
9188 } else
9189 md_wakeup_thread(mddev->sync_thread);
9190 sysfs_notify_dirent_safe(mddev->sysfs_action);
9191 md_new_event(mddev);
9192}
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209
9210
9211
9212
9213
9214
9215
9216void md_check_recovery(struct mddev *mddev)
9217{
9218 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
9219
9220
9221
9222 set_bit(MD_UPDATING_SB, &mddev->flags);
9223 smp_mb__after_atomic();
9224 if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
9225 md_update_sb(mddev, 0);
9226 clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
9227 wake_up(&mddev->sb_wait);
9228 }
9229
9230 if (mddev->suspended)
9231 return;
9232
9233 if (mddev->bitmap)
9234 md_bitmap_daemon_work(mddev);
9235
9236 if (signal_pending(current)) {
9237 if (mddev->pers->sync_request && !mddev->external) {
9238 pr_debug("md: %s in immediate safe mode\n",
9239 mdname(mddev));
9240 mddev->safemode = 2;
9241 }
9242 flush_signals(current);
9243 }
9244
9245 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
9246 return;
9247 if ( ! (
9248 (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
9249 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9250 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
9251 (mddev->external == 0 && mddev->safemode == 1) ||
9252 (mddev->safemode == 2
9253 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
9254 ))
9255 return;
9256
9257 if (mddev_trylock(mddev)) {
9258 int spares = 0;
9259 bool try_set_sync = mddev->safemode != 0;
9260
9261 if (!mddev->external && mddev->safemode == 1)
9262 mddev->safemode = 0;
9263
9264 if (mddev->ro) {
9265 struct md_rdev *rdev;
9266 if (!mddev->external && mddev->in_sync)
9267
9268
9269
9270
9271
9272 rdev_for_each(rdev, mddev)
9273 clear_bit(Blocked, &rdev->flags);
9274
9275
9276
9277
9278
9279
9280
9281 remove_and_add_spares(mddev, NULL);
9282
9283
9284
9285 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
9286 md_reap_sync_thread(mddev);
9287 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9288 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9289 clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
9290 goto unlock;
9291 }
9292
9293 if (mddev_is_clustered(mddev)) {
9294 struct md_rdev *rdev, *tmp;
9295
9296
9297
9298 rdev_for_each_safe(rdev, tmp, mddev) {
9299 if (test_and_clear_bit(ClusterRemove, &rdev->flags) &&
9300 rdev->raid_disk < 0)
9301 md_kick_rdev_from_array(rdev);
9302 }
9303 }
9304
9305 if (try_set_sync && !mddev->external && !mddev->in_sync) {
9306 spin_lock(&mddev->lock);
9307 set_in_sync(mddev);
9308 spin_unlock(&mddev->lock);
9309 }
9310
9311 if (mddev->sb_flags)
9312 md_update_sb(mddev, 0);
9313
9314 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
9315 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
9316
9317 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9318 goto unlock;
9319 }
9320 if (mddev->sync_thread) {
9321 md_reap_sync_thread(mddev);
9322 goto unlock;
9323 }
9324
9325
9326
9327 mddev->curr_resync_completed = 0;
9328 spin_lock(&mddev->lock);
9329 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9330 spin_unlock(&mddev->lock);
9331
9332
9333
9334 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
9335 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9336
9337 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
9338 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
9339 goto not_running;
9340
9341
9342
9343
9344
9345
9346
9347 if (mddev->reshape_position != MaxSector) {
9348 if (mddev->pers->check_reshape == NULL ||
9349 mddev->pers->check_reshape(mddev) != 0)
9350
9351 goto not_running;
9352 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9353 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9354 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
9355 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9356 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9357 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9358 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9359 } else if (mddev->recovery_cp < MaxSector) {
9360 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9361 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
9362 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
9363
9364 goto not_running;
9365
9366 if (mddev->pers->sync_request) {
9367 if (spares) {
9368
9369
9370
9371
9372 md_bitmap_write_all(mddev->bitmap);
9373 }
9374 INIT_WORK(&mddev->del_work, md_start_sync);
9375 queue_work(md_misc_wq, &mddev->del_work);
9376 goto unlock;
9377 }
9378 not_running:
9379 if (!mddev->sync_thread) {
9380 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9381 wake_up(&resync_wait);
9382 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
9383 &mddev->recovery))
9384 if (mddev->sysfs_action)
9385 sysfs_notify_dirent_safe(mddev->sysfs_action);
9386 }
9387 unlock:
9388 wake_up(&mddev->sb_wait);
9389 mddev_unlock(mddev);
9390 }
9391}
9392EXPORT_SYMBOL(md_check_recovery);
9393
9394void md_reap_sync_thread(struct mddev *mddev)
9395{
9396 struct md_rdev *rdev;
9397 sector_t old_dev_sectors = mddev->dev_sectors;
9398 bool is_reshaped = false;
9399
9400
9401 md_unregister_thread(&mddev->sync_thread);
9402 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
9403 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
9404 mddev->degraded != mddev->raid_disks) {
9405
9406
9407 if (mddev->pers->spare_active(mddev)) {
9408 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9409 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
9410 }
9411 }
9412 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
9413 mddev->pers->finish_reshape) {
9414 mddev->pers->finish_reshape(mddev);
9415 if (mddev_is_clustered(mddev))
9416 is_reshaped = true;
9417 }
9418
9419
9420
9421
9422 if (!mddev->degraded)
9423 rdev_for_each(rdev, mddev)
9424 rdev->saved_raid_disk = -1;
9425
9426 md_update_sb(mddev, 1);
9427
9428
9429
9430 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
9431 md_cluster_ops->resync_finish(mddev);
9432 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
9433 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
9434 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
9435 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
9436 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
9437 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
9438
9439
9440
9441
9442
9443 if (mddev_is_clustered(mddev) && is_reshaped
9444 && !test_bit(MD_CLOSING, &mddev->flags))
9445 md_cluster_ops->update_size(mddev, old_dev_sectors);
9446 wake_up(&resync_wait);
9447
9448 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9449 sysfs_notify_dirent_safe(mddev->sysfs_action);
9450 md_new_event(mddev);
9451 if (mddev->event_work.func)
9452 queue_work(md_misc_wq, &mddev->event_work);
9453}
9454EXPORT_SYMBOL(md_reap_sync_thread);
9455
9456void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
9457{
9458 sysfs_notify_dirent_safe(rdev->sysfs_state);
9459 wait_event_timeout(rdev->blocked_wait,
9460 !test_bit(Blocked, &rdev->flags) &&
9461 !test_bit(BlockedBadBlocks, &rdev->flags),
9462 msecs_to_jiffies(5000));
9463 rdev_dec_pending(rdev, mddev);
9464}
9465EXPORT_SYMBOL(md_wait_for_blocked_rdev);
9466
9467void md_finish_reshape(struct mddev *mddev)
9468{
9469
9470 struct md_rdev *rdev;
9471
9472 rdev_for_each(rdev, mddev) {
9473 if (rdev->data_offset > rdev->new_data_offset)
9474 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
9475 else
9476 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
9477 rdev->data_offset = rdev->new_data_offset;
9478 }
9479}
9480EXPORT_SYMBOL(md_finish_reshape);
9481
9482
9483
9484
9485int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9486 int is_new)
9487{
9488 struct mddev *mddev = rdev->mddev;
9489 int rv;
9490 if (is_new)
9491 s += rdev->new_data_offset;
9492 else
9493 s += rdev->data_offset;
9494 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
9495 if (rv == 0) {
9496
9497 if (test_bit(ExternalBbl, &rdev->flags))
9498 sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
9499 sysfs_notify_dirent_safe(rdev->sysfs_state);
9500 set_mask_bits(&mddev->sb_flags, 0,
9501 BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
9502 md_wakeup_thread(rdev->mddev->thread);
9503 return 1;
9504 } else
9505 return 0;
9506}
9507EXPORT_SYMBOL_GPL(rdev_set_badblocks);
9508
9509int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
9510 int is_new)
9511{
9512 int rv;
9513 if (is_new)
9514 s += rdev->new_data_offset;
9515 else
9516 s += rdev->data_offset;
9517 rv = badblocks_clear(&rdev->badblocks, s, sectors);
9518 if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
9519 sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
9520 return rv;
9521}
9522EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
9523
9524static int md_notify_reboot(struct notifier_block *this,
9525 unsigned long code, void *x)
9526{
9527 struct list_head *tmp;
9528 struct mddev *mddev;
9529 int need_delay = 0;
9530
9531 for_each_mddev(mddev, tmp) {
9532 if (mddev_trylock(mddev)) {
9533 if (mddev->pers)
9534 __md_stop_writes(mddev);
9535 if (mddev->persistent)
9536 mddev->safemode = 2;
9537 mddev_unlock(mddev);
9538 }
9539 need_delay = 1;
9540 }
9541
9542
9543
9544
9545
9546
9547 if (need_delay)
9548 mdelay(1000*1);
9549
9550 return NOTIFY_DONE;
9551}
9552
9553static struct notifier_block md_notifier = {
9554 .notifier_call = md_notify_reboot,
9555 .next = NULL,
9556 .priority = INT_MAX,
9557};
9558
9559static void md_geninit(void)
9560{
9561 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
9562
9563 proc_create("mdstat", S_IRUGO, NULL, &mdstat_proc_ops);
9564}
9565
9566static int __init md_init(void)
9567{
9568 int ret = -ENOMEM;
9569
9570 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
9571 if (!md_wq)
9572 goto err_wq;
9573
9574 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
9575 if (!md_misc_wq)
9576 goto err_misc_wq;
9577
9578 md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
9579 if (!md_rdev_misc_wq)
9580 goto err_rdev_misc_wq;
9581
9582 ret = __register_blkdev(MD_MAJOR, "md", md_probe);
9583 if (ret < 0)
9584 goto err_md;
9585
9586 ret = __register_blkdev(0, "mdp", md_probe);
9587 if (ret < 0)
9588 goto err_mdp;
9589 mdp_major = ret;
9590
9591 register_reboot_notifier(&md_notifier);
9592 raid_table_header = register_sysctl_table(raid_root_table);
9593
9594 md_geninit();
9595 return 0;
9596
9597err_mdp:
9598 unregister_blkdev(MD_MAJOR, "md");
9599err_md:
9600 destroy_workqueue(md_rdev_misc_wq);
9601err_rdev_misc_wq:
9602 destroy_workqueue(md_misc_wq);
9603err_misc_wq:
9604 destroy_workqueue(md_wq);
9605err_wq:
9606 return ret;
9607}
9608
9609static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
9610{
9611 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
9612 struct md_rdev *rdev2, *tmp;
9613 int role, ret;
9614 char b[BDEVNAME_SIZE];
9615
9616
9617
9618
9619
9620 if (mddev->dev_sectors != le64_to_cpu(sb->size)) {
9621 ret = mddev->pers->resize(mddev, le64_to_cpu(sb->size));
9622 if (ret)
9623 pr_info("md-cluster: resize failed\n");
9624 else
9625 md_bitmap_update_sb(mddev->bitmap);
9626 }
9627
9628
9629 rdev_for_each_safe(rdev2, tmp, mddev) {
9630 if (test_bit(Faulty, &rdev2->flags))
9631 continue;
9632
9633
9634 role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
9635
9636 if (test_bit(Candidate, &rdev2->flags)) {
9637 if (role == 0xfffe) {
9638 pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
9639 md_kick_rdev_from_array(rdev2);
9640 continue;
9641 }
9642 else
9643 clear_bit(Candidate, &rdev2->flags);
9644 }
9645
9646 if (role != rdev2->raid_disk) {
9647
9648
9649
9650 if (rdev2->raid_disk == -1 && role != 0xffff &&
9651 !(le32_to_cpu(sb->feature_map) &
9652 MD_FEATURE_RESHAPE_ACTIVE)) {
9653 rdev2->saved_raid_disk = role;
9654 ret = remove_and_add_spares(mddev, rdev2);
9655 pr_info("Activated spare: %s\n",
9656 bdevname(rdev2->bdev,b));
9657
9658
9659 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
9660 md_wakeup_thread(mddev->thread);
9661 }
9662
9663
9664
9665
9666
9667 if ((role == 0xfffe) || (role == 0xfffd)) {
9668 md_error(mddev, rdev2);
9669 clear_bit(Blocked, &rdev2->flags);
9670 }
9671 }
9672 }
9673
9674 if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
9675 ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
9676 if (ret)
9677 pr_warn("md: updating array disks failed. %d\n", ret);
9678 }
9679
9680
9681
9682
9683
9684 if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9685 (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9686
9687
9688
9689
9690 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
9691 if (mddev->pers->update_reshape_pos)
9692 mddev->pers->update_reshape_pos(mddev);
9693 if (mddev->pers->start_reshape)
9694 mddev->pers->start_reshape(mddev);
9695 } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
9696 mddev->reshape_position != MaxSector &&
9697 !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
9698
9699 mddev->reshape_position = MaxSector;
9700 if (mddev->pers->update_reshape_pos)
9701 mddev->pers->update_reshape_pos(mddev);
9702 }
9703
9704
9705 mddev->events = le64_to_cpu(sb->events);
9706}
9707
9708static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
9709{
9710 int err;
9711 struct page *swapout = rdev->sb_page;
9712 struct mdp_superblock_1 *sb;
9713
9714
9715
9716
9717 rdev->sb_page = NULL;
9718 err = alloc_disk_sb(rdev);
9719 if (err == 0) {
9720 ClearPageUptodate(rdev->sb_page);
9721 rdev->sb_loaded = 0;
9722 err = super_types[mddev->major_version].
9723 load_super(rdev, NULL, mddev->minor_version);
9724 }
9725 if (err < 0) {
9726 pr_warn("%s: %d Could not reload rdev(%d) err: %d. Restoring old values\n",
9727 __func__, __LINE__, rdev->desc_nr, err);
9728 if (rdev->sb_page)
9729 put_page(rdev->sb_page);
9730 rdev->sb_page = swapout;
9731 rdev->sb_loaded = 1;
9732 return err;
9733 }
9734
9735 sb = page_address(rdev->sb_page);
9736
9737
9738
9739
9740 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET))
9741 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
9742
9743
9744
9745
9746 if (rdev->recovery_offset == MaxSector &&
9747 !test_bit(In_sync, &rdev->flags) &&
9748 mddev->pers->spare_active(mddev))
9749 sysfs_notify_dirent_safe(mddev->sysfs_degraded);
9750
9751 put_page(swapout);
9752 return 0;
9753}
9754
9755void md_reload_sb(struct mddev *mddev, int nr)
9756{
9757 struct md_rdev *rdev;
9758 int err;
9759
9760
9761 rdev_for_each_rcu(rdev, mddev) {
9762 if (rdev->desc_nr == nr)
9763 break;
9764 }
9765
9766 if (!rdev || rdev->desc_nr != nr) {
9767 pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
9768 return;
9769 }
9770
9771 err = read_rdev(mddev, rdev);
9772 if (err < 0)
9773 return;
9774
9775 check_sb_changes(mddev, rdev);
9776
9777
9778 rdev_for_each_rcu(rdev, mddev) {
9779 if (!test_bit(Faulty, &rdev->flags))
9780 read_rdev(mddev, rdev);
9781 }
9782}
9783EXPORT_SYMBOL(md_reload_sb);
9784
9785#ifndef MODULE
9786
9787
9788
9789
9790
9791
9792static DEFINE_MUTEX(detected_devices_mutex);
9793static LIST_HEAD(all_detected_devices);
9794struct detected_devices_node {
9795 struct list_head list;
9796 dev_t dev;
9797};
9798
9799void md_autodetect_dev(dev_t dev)
9800{
9801 struct detected_devices_node *node_detected_dev;
9802
9803 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
9804 if (node_detected_dev) {
9805 node_detected_dev->dev = dev;
9806 mutex_lock(&detected_devices_mutex);
9807 list_add_tail(&node_detected_dev->list, &all_detected_devices);
9808 mutex_unlock(&detected_devices_mutex);
9809 }
9810}
9811
9812void md_autostart_arrays(int part)
9813{
9814 struct md_rdev *rdev;
9815 struct detected_devices_node *node_detected_dev;
9816 dev_t dev;
9817 int i_scanned, i_passed;
9818
9819 i_scanned = 0;
9820 i_passed = 0;
9821
9822 pr_info("md: Autodetecting RAID arrays.\n");
9823
9824 mutex_lock(&detected_devices_mutex);
9825 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
9826 i_scanned++;
9827 node_detected_dev = list_entry(all_detected_devices.next,
9828 struct detected_devices_node, list);
9829 list_del(&node_detected_dev->list);
9830 dev = node_detected_dev->dev;
9831 kfree(node_detected_dev);
9832 mutex_unlock(&detected_devices_mutex);
9833 rdev = md_import_device(dev,0, 90);
9834 mutex_lock(&detected_devices_mutex);
9835 if (IS_ERR(rdev))
9836 continue;
9837
9838 if (test_bit(Faulty, &rdev->flags))
9839 continue;
9840
9841 set_bit(AutoDetected, &rdev->flags);
9842 list_add(&rdev->same_set, &pending_raid_disks);
9843 i_passed++;
9844 }
9845 mutex_unlock(&detected_devices_mutex);
9846
9847 pr_debug("md: Scanned %d and added %d devices.\n", i_scanned, i_passed);
9848
9849 autorun_devices(part);
9850}
9851
9852#endif
9853
9854static __exit void md_exit(void)
9855{
9856 struct mddev *mddev;
9857 struct list_head *tmp;
9858 int delay = 1;
9859
9860 unregister_blkdev(MD_MAJOR,"md");
9861 unregister_blkdev(mdp_major, "mdp");
9862 unregister_reboot_notifier(&md_notifier);
9863 unregister_sysctl_table(raid_table_header);
9864
9865
9866
9867
9868 md_unloading = 1;
9869 while (waitqueue_active(&md_event_waiters)) {
9870
9871 wake_up(&md_event_waiters);
9872 msleep(delay);
9873 delay += delay;
9874 }
9875 remove_proc_entry("mdstat", NULL);
9876
9877 for_each_mddev(mddev, tmp) {
9878 export_array(mddev);
9879 mddev->ctime = 0;
9880 mddev->hold_active = 0;
9881
9882
9883
9884
9885
9886
9887 }
9888 destroy_workqueue(md_rdev_misc_wq);
9889 destroy_workqueue(md_misc_wq);
9890 destroy_workqueue(md_wq);
9891}
9892
9893subsys_initcall(md_init);
9894module_exit(md_exit)
9895
9896static int get_ro(char *buffer, const struct kernel_param *kp)
9897{
9898 return sprintf(buffer, "%d\n", start_readonly);
9899}
9900static int set_ro(const char *val, const struct kernel_param *kp)
9901{
9902 return kstrtouint(val, 10, (unsigned int *)&start_readonly);
9903}
9904
9905module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
9906module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
9907module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
9908module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
9909
9910MODULE_LICENSE("GPL");
9911MODULE_DESCRIPTION("MD RAID framework");
9912MODULE_ALIAS("md");
9913MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
9914