1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75static int remove_and_add_spares(struct mddev *mddev,
76 struct md_rdev *this);
77
78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
79
80
81
82
83
84
85#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
86
87
88
89
90
91
92
93
94
95
96
97
98
99static int sysctl_speed_limit_min = 1000;
100static int sysctl_speed_limit_max = 200000;
101static inline int speed_min(struct mddev *mddev)
102{
103 return mddev->sync_speed_min ?
104 mddev->sync_speed_min : sysctl_speed_limit_min;
105}
106
107static inline int speed_max(struct mddev *mddev)
108{
109 return mddev->sync_speed_max ?
110 mddev->sync_speed_max : sysctl_speed_limit_max;
111}
112
113static struct ctl_table_header *raid_table_header;
114
115static struct ctl_table raid_table[] = {
116 {
117 .procname = "speed_limit_min",
118 .data = &sysctl_speed_limit_min,
119 .maxlen = sizeof(int),
120 .mode = S_IRUGO|S_IWUSR,
121 .proc_handler = proc_dointvec,
122 },
123 {
124 .procname = "speed_limit_max",
125 .data = &sysctl_speed_limit_max,
126 .maxlen = sizeof(int),
127 .mode = S_IRUGO|S_IWUSR,
128 .proc_handler = proc_dointvec,
129 },
130 { }
131};
132
133static struct ctl_table raid_dir_table[] = {
134 {
135 .procname = "raid",
136 .maxlen = 0,
137 .mode = S_IRUGO|S_IXUGO,
138 .child = raid_table,
139 },
140 { }
141};
142
143static struct ctl_table raid_root_table[] = {
144 {
145 .procname = "dev",
146 .maxlen = 0,
147 .mode = 0555,
148 .child = raid_dir_table,
149 },
150 { }
151};
152
153static const struct block_device_operations md_fops;
154
155static int start_readonly;
156
157
158
159
160
161struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
162 struct mddev *mddev)
163{
164 struct bio *b;
165
166 if (!mddev || !mddev->bio_set)
167 return bio_alloc(gfp_mask, nr_iovecs);
168
169 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
170 if (!b)
171 return NULL;
172 return b;
173}
174EXPORT_SYMBOL_GPL(bio_alloc_mddev);
175
176struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
177 struct mddev *mddev)
178{
179 if (!mddev || !mddev->bio_set)
180 return bio_clone(bio, gfp_mask);
181
182 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
183}
184EXPORT_SYMBOL_GPL(bio_clone_mddev);
185
186
187
188
189
190
191
192
193
194
195
196static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
197static atomic_t md_event_count;
198void md_new_event(struct mddev *mddev)
199{
200 atomic_inc(&md_event_count);
201 wake_up(&md_event_waiters);
202}
203EXPORT_SYMBOL_GPL(md_new_event);
204
205
206
207
208static void md_new_event_inintr(struct mddev *mddev)
209{
210 atomic_inc(&md_event_count);
211 wake_up(&md_event_waiters);
212}
213
214
215
216
217
218static LIST_HEAD(all_mddevs);
219static DEFINE_SPINLOCK(all_mddevs_lock);
220
221
222
223
224
225
226
227
228
229#define for_each_mddev(_mddev,_tmp) \
230 \
231 for (({ spin_lock(&all_mddevs_lock); \
232 _tmp = all_mddevs.next; \
233 _mddev = NULL;}); \
234 ({ if (_tmp != &all_mddevs) \
235 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
236 spin_unlock(&all_mddevs_lock); \
237 if (_mddev) mddev_put(_mddev); \
238 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
239 _tmp != &all_mddevs;}); \
240 ({ spin_lock(&all_mddevs_lock); \
241 _tmp = _tmp->next;}) \
242 )
243
244
245
246
247
248
249
250
251
252static void md_make_request(struct request_queue *q, struct bio *bio)
253{
254 const int rw = bio_data_dir(bio);
255 struct mddev *mddev = q->queuedata;
256 int cpu;
257 unsigned int sectors;
258
259 if (mddev == NULL || mddev->pers == NULL
260 || !mddev->ready) {
261 bio_io_error(bio);
262 return;
263 }
264 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
265 bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
266 return;
267 }
268 smp_rmb();
269 rcu_read_lock();
270 if (mddev->suspended) {
271 DEFINE_WAIT(__wait);
272 for (;;) {
273 prepare_to_wait(&mddev->sb_wait, &__wait,
274 TASK_UNINTERRUPTIBLE);
275 if (!mddev->suspended)
276 break;
277 rcu_read_unlock();
278 schedule();
279 rcu_read_lock();
280 }
281 finish_wait(&mddev->sb_wait, &__wait);
282 }
283 atomic_inc(&mddev->active_io);
284 rcu_read_unlock();
285
286
287
288
289
290 sectors = bio_sectors(bio);
291 mddev->pers->make_request(mddev, bio);
292
293 cpu = part_stat_lock();
294 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
295 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
296 part_stat_unlock();
297
298 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
299 wake_up(&mddev->sb_wait);
300}
301
302
303
304
305
306
307
308void mddev_suspend(struct mddev *mddev)
309{
310 BUG_ON(mddev->suspended);
311 mddev->suspended = 1;
312 synchronize_rcu();
313 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
314 mddev->pers->quiesce(mddev, 1);
315
316 del_timer_sync(&mddev->safemode_timer);
317}
318EXPORT_SYMBOL_GPL(mddev_suspend);
319
320void mddev_resume(struct mddev *mddev)
321{
322 mddev->suspended = 0;
323 wake_up(&mddev->sb_wait);
324 mddev->pers->quiesce(mddev, 0);
325
326 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
327 md_wakeup_thread(mddev->thread);
328 md_wakeup_thread(mddev->sync_thread);
329}
330EXPORT_SYMBOL_GPL(mddev_resume);
331
332int mddev_congested(struct mddev *mddev, int bits)
333{
334 return mddev->suspended;
335}
336EXPORT_SYMBOL(mddev_congested);
337
338
339
340
341
342static void md_end_flush(struct bio *bio, int err)
343{
344 struct md_rdev *rdev = bio->bi_private;
345 struct mddev *mddev = rdev->mddev;
346
347 rdev_dec_pending(rdev, mddev);
348
349 if (atomic_dec_and_test(&mddev->flush_pending)) {
350
351 queue_work(md_wq, &mddev->flush_work);
352 }
353 bio_put(bio);
354}
355
356static void md_submit_flush_data(struct work_struct *ws);
357
358static void submit_flushes(struct work_struct *ws)
359{
360 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
361 struct md_rdev *rdev;
362
363 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
364 atomic_set(&mddev->flush_pending, 1);
365 rcu_read_lock();
366 rdev_for_each_rcu(rdev, mddev)
367 if (rdev->raid_disk >= 0 &&
368 !test_bit(Faulty, &rdev->flags)) {
369
370
371
372
373 struct bio *bi;
374 atomic_inc(&rdev->nr_pending);
375 atomic_inc(&rdev->nr_pending);
376 rcu_read_unlock();
377 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
378 bi->bi_end_io = md_end_flush;
379 bi->bi_private = rdev;
380 bi->bi_bdev = rdev->bdev;
381 atomic_inc(&mddev->flush_pending);
382 submit_bio(WRITE_FLUSH, bi);
383 rcu_read_lock();
384 rdev_dec_pending(rdev, mddev);
385 }
386 rcu_read_unlock();
387 if (atomic_dec_and_test(&mddev->flush_pending))
388 queue_work(md_wq, &mddev->flush_work);
389}
390
391static void md_submit_flush_data(struct work_struct *ws)
392{
393 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
394 struct bio *bio = mddev->flush_bio;
395
396 if (bio->bi_iter.bi_size == 0)
397
398 bio_endio(bio, 0);
399 else {
400 bio->bi_rw &= ~REQ_FLUSH;
401 mddev->pers->make_request(mddev, bio);
402 }
403
404 mddev->flush_bio = NULL;
405 wake_up(&mddev->sb_wait);
406}
407
408void md_flush_request(struct mddev *mddev, struct bio *bio)
409{
410 spin_lock_irq(&mddev->write_lock);
411 wait_event_lock_irq(mddev->sb_wait,
412 !mddev->flush_bio,
413 mddev->write_lock);
414 mddev->flush_bio = bio;
415 spin_unlock_irq(&mddev->write_lock);
416
417 INIT_WORK(&mddev->flush_work, submit_flushes);
418 queue_work(md_wq, &mddev->flush_work);
419}
420EXPORT_SYMBOL(md_flush_request);
421
422void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
423{
424 struct mddev *mddev = cb->data;
425 md_wakeup_thread(mddev->thread);
426 kfree(cb);
427}
428EXPORT_SYMBOL(md_unplug);
429
430static inline struct mddev *mddev_get(struct mddev *mddev)
431{
432 atomic_inc(&mddev->active);
433 return mddev;
434}
435
436static void mddev_delayed_delete(struct work_struct *ws);
437
438static void mddev_put(struct mddev *mddev)
439{
440 struct bio_set *bs = NULL;
441
442 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
443 return;
444 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
445 mddev->ctime == 0 && !mddev->hold_active) {
446
447
448 list_del_init(&mddev->all_mddevs);
449 bs = mddev->bio_set;
450 mddev->bio_set = NULL;
451 if (mddev->gendisk) {
452
453
454
455
456
457 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
458 queue_work(md_misc_wq, &mddev->del_work);
459 } else
460 kfree(mddev);
461 }
462 spin_unlock(&all_mddevs_lock);
463 if (bs)
464 bioset_free(bs);
465}
466
467void mddev_init(struct mddev *mddev)
468{
469 mutex_init(&mddev->open_mutex);
470 mutex_init(&mddev->reconfig_mutex);
471 mutex_init(&mddev->bitmap_info.mutex);
472 INIT_LIST_HEAD(&mddev->disks);
473 INIT_LIST_HEAD(&mddev->all_mddevs);
474 init_timer(&mddev->safemode_timer);
475 atomic_set(&mddev->active, 1);
476 atomic_set(&mddev->openers, 0);
477 atomic_set(&mddev->active_io, 0);
478 spin_lock_init(&mddev->write_lock);
479 atomic_set(&mddev->flush_pending, 0);
480 init_waitqueue_head(&mddev->sb_wait);
481 init_waitqueue_head(&mddev->recovery_wait);
482 mddev->reshape_position = MaxSector;
483 mddev->reshape_backwards = 0;
484 mddev->last_sync_action = "none";
485 mddev->resync_min = 0;
486 mddev->resync_max = MaxSector;
487 mddev->level = LEVEL_NONE;
488}
489EXPORT_SYMBOL_GPL(mddev_init);
490
491static struct mddev * mddev_find(dev_t unit)
492{
493 struct mddev *mddev, *new = NULL;
494
495 if (unit && MAJOR(unit) != MD_MAJOR)
496 unit &= ~((1<<MdpMinorShift)-1);
497
498 retry:
499 spin_lock(&all_mddevs_lock);
500
501 if (unit) {
502 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
503 if (mddev->unit == unit) {
504 mddev_get(mddev);
505 spin_unlock(&all_mddevs_lock);
506 kfree(new);
507 return mddev;
508 }
509
510 if (new) {
511 list_add(&new->all_mddevs, &all_mddevs);
512 spin_unlock(&all_mddevs_lock);
513 new->hold_active = UNTIL_IOCTL;
514 return new;
515 }
516 } else if (new) {
517
518 static int next_minor = 512;
519 int start = next_minor;
520 int is_free = 0;
521 int dev = 0;
522 while (!is_free) {
523 dev = MKDEV(MD_MAJOR, next_minor);
524 next_minor++;
525 if (next_minor > MINORMASK)
526 next_minor = 0;
527 if (next_minor == start) {
528
529 spin_unlock(&all_mddevs_lock);
530 kfree(new);
531 return NULL;
532 }
533
534 is_free = 1;
535 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
536 if (mddev->unit == dev) {
537 is_free = 0;
538 break;
539 }
540 }
541 new->unit = dev;
542 new->md_minor = MINOR(dev);
543 new->hold_active = UNTIL_STOP;
544 list_add(&new->all_mddevs, &all_mddevs);
545 spin_unlock(&all_mddevs_lock);
546 return new;
547 }
548 spin_unlock(&all_mddevs_lock);
549
550 new = kzalloc(sizeof(*new), GFP_KERNEL);
551 if (!new)
552 return NULL;
553
554 new->unit = unit;
555 if (MAJOR(unit) == MD_MAJOR)
556 new->md_minor = MINOR(unit);
557 else
558 new->md_minor = MINOR(unit) >> MdpMinorShift;
559
560 mddev_init(new);
561
562 goto retry;
563}
564
565static inline int __must_check mddev_lock(struct mddev * mddev)
566{
567 return mutex_lock_interruptible(&mddev->reconfig_mutex);
568}
569
570
571
572
573static inline void mddev_lock_nointr(struct mddev * mddev)
574{
575 mutex_lock(&mddev->reconfig_mutex);
576}
577
578static inline int mddev_is_locked(struct mddev *mddev)
579{
580 return mutex_is_locked(&mddev->reconfig_mutex);
581}
582
583static inline int mddev_trylock(struct mddev * mddev)
584{
585 return mutex_trylock(&mddev->reconfig_mutex);
586}
587
588static struct attribute_group md_redundancy_group;
589
590static void mddev_unlock(struct mddev * mddev)
591{
592 if (mddev->to_remove) {
593
594
595
596
597
598
599
600
601
602
603
604
605 struct attribute_group *to_remove = mddev->to_remove;
606 mddev->to_remove = NULL;
607 mddev->sysfs_active = 1;
608 mutex_unlock(&mddev->reconfig_mutex);
609
610 if (mddev->kobj.sd) {
611 if (to_remove != &md_redundancy_group)
612 sysfs_remove_group(&mddev->kobj, to_remove);
613 if (mddev->pers == NULL ||
614 mddev->pers->sync_request == NULL) {
615 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
616 if (mddev->sysfs_action)
617 sysfs_put(mddev->sysfs_action);
618 mddev->sysfs_action = NULL;
619 }
620 }
621 mddev->sysfs_active = 0;
622 } else
623 mutex_unlock(&mddev->reconfig_mutex);
624
625
626
627
628 spin_lock(&pers_lock);
629 md_wakeup_thread(mddev->thread);
630 spin_unlock(&pers_lock);
631}
632
633static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
634{
635 struct md_rdev *rdev;
636
637 rdev_for_each(rdev, mddev)
638 if (rdev->desc_nr == nr)
639 return rdev;
640
641 return NULL;
642}
643
644static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
645{
646 struct md_rdev *rdev;
647
648 rdev_for_each_rcu(rdev, mddev)
649 if (rdev->desc_nr == nr)
650 return rdev;
651
652 return NULL;
653}
654
655static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
656{
657 struct md_rdev *rdev;
658
659 rdev_for_each(rdev, mddev)
660 if (rdev->bdev->bd_dev == dev)
661 return rdev;
662
663 return NULL;
664}
665
666static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
667{
668 struct md_rdev *rdev;
669
670 rdev_for_each_rcu(rdev, mddev)
671 if (rdev->bdev->bd_dev == dev)
672 return rdev;
673
674 return NULL;
675}
676
677static struct md_personality *find_pers(int level, char *clevel)
678{
679 struct md_personality *pers;
680 list_for_each_entry(pers, &pers_list, list) {
681 if (level != LEVEL_NONE && pers->level == level)
682 return pers;
683 if (strcmp(pers->name, clevel)==0)
684 return pers;
685 }
686 return NULL;
687}
688
689
690static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
691{
692 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
693 return MD_NEW_SIZE_SECTORS(num_sectors);
694}
695
696static int alloc_disk_sb(struct md_rdev * rdev)
697{
698 if (rdev->sb_page)
699 MD_BUG();
700
701 rdev->sb_page = alloc_page(GFP_KERNEL);
702 if (!rdev->sb_page) {
703 printk(KERN_ALERT "md: out of memory.\n");
704 return -ENOMEM;
705 }
706
707 return 0;
708}
709
710void md_rdev_clear(struct md_rdev *rdev)
711{
712 if (rdev->sb_page) {
713 put_page(rdev->sb_page);
714 rdev->sb_loaded = 0;
715 rdev->sb_page = NULL;
716 rdev->sb_start = 0;
717 rdev->sectors = 0;
718 }
719 if (rdev->bb_page) {
720 put_page(rdev->bb_page);
721 rdev->bb_page = NULL;
722 }
723 kfree(rdev->badblocks.page);
724 rdev->badblocks.page = NULL;
725}
726EXPORT_SYMBOL_GPL(md_rdev_clear);
727
728static void super_written(struct bio *bio, int error)
729{
730 struct md_rdev *rdev = bio->bi_private;
731 struct mddev *mddev = rdev->mddev;
732
733 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
734 printk("md: super_written gets error=%d, uptodate=%d\n",
735 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
736 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
737 md_error(mddev, rdev);
738 }
739
740 if (atomic_dec_and_test(&mddev->pending_writes))
741 wake_up(&mddev->sb_wait);
742 bio_put(bio);
743}
744
745void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
746 sector_t sector, int size, struct page *page)
747{
748
749
750
751
752
753
754 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
755
756 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
757 bio->bi_iter.bi_sector = sector;
758 bio_add_page(bio, page, size, 0);
759 bio->bi_private = rdev;
760 bio->bi_end_io = super_written;
761
762 atomic_inc(&mddev->pending_writes);
763 submit_bio(WRITE_FLUSH_FUA, bio);
764}
765
766void md_super_wait(struct mddev *mddev)
767{
768
769 DEFINE_WAIT(wq);
770 for(;;) {
771 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
772 if (atomic_read(&mddev->pending_writes)==0)
773 break;
774 schedule();
775 }
776 finish_wait(&mddev->sb_wait, &wq);
777}
778
779int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
780 struct page *page, int rw, bool metadata_op)
781{
782 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
783 int ret;
784
785 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
786 rdev->meta_bdev : rdev->bdev;
787 if (metadata_op)
788 bio->bi_iter.bi_sector = sector + rdev->sb_start;
789 else if (rdev->mddev->reshape_position != MaxSector &&
790 (rdev->mddev->reshape_backwards ==
791 (sector >= rdev->mddev->reshape_position)))
792 bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
793 else
794 bio->bi_iter.bi_sector = sector + rdev->data_offset;
795 bio_add_page(bio, page, size, 0);
796 submit_bio_wait(rw, bio);
797
798 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
799 bio_put(bio);
800 return ret;
801}
802EXPORT_SYMBOL_GPL(sync_page_io);
803
804static int read_disk_sb(struct md_rdev * rdev, int size)
805{
806 char b[BDEVNAME_SIZE];
807 if (!rdev->sb_page) {
808 MD_BUG();
809 return -EINVAL;
810 }
811 if (rdev->sb_loaded)
812 return 0;
813
814
815 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
816 goto fail;
817 rdev->sb_loaded = 1;
818 return 0;
819
820fail:
821 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
822 bdevname(rdev->bdev,b));
823 return -EINVAL;
824}
825
826static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
827{
828 return sb1->set_uuid0 == sb2->set_uuid0 &&
829 sb1->set_uuid1 == sb2->set_uuid1 &&
830 sb1->set_uuid2 == sb2->set_uuid2 &&
831 sb1->set_uuid3 == sb2->set_uuid3;
832}
833
834static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
835{
836 int ret;
837 mdp_super_t *tmp1, *tmp2;
838
839 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
840 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
841
842 if (!tmp1 || !tmp2) {
843 ret = 0;
844 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
845 goto abort;
846 }
847
848 *tmp1 = *sb1;
849 *tmp2 = *sb2;
850
851
852
853
854 tmp1->nr_disks = 0;
855 tmp2->nr_disks = 0;
856
857 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
858abort:
859 kfree(tmp1);
860 kfree(tmp2);
861 return ret;
862}
863
864
865static u32 md_csum_fold(u32 csum)
866{
867 csum = (csum & 0xffff) + (csum >> 16);
868 return (csum & 0xffff) + (csum >> 16);
869}
870
871static unsigned int calc_sb_csum(mdp_super_t * sb)
872{
873 u64 newcsum = 0;
874 u32 *sb32 = (u32*)sb;
875 int i;
876 unsigned int disk_csum, csum;
877
878 disk_csum = sb->sb_csum;
879 sb->sb_csum = 0;
880
881 for (i = 0; i < MD_SB_BYTES/4 ; i++)
882 newcsum += sb32[i];
883 csum = (newcsum & 0xffffffff) + (newcsum>>32);
884
885
886#ifdef CONFIG_ALPHA
887
888
889
890
891
892
893
894
895 sb->sb_csum = md_csum_fold(disk_csum);
896#else
897 sb->sb_csum = disk_csum;
898#endif
899 return csum;
900}
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933struct super_type {
934 char *name;
935 struct module *owner;
936 int (*load_super)(struct md_rdev *rdev,
937 struct md_rdev *refdev,
938 int minor_version);
939 int (*validate_super)(struct mddev *mddev,
940 struct md_rdev *rdev);
941 void (*sync_super)(struct mddev *mddev,
942 struct md_rdev *rdev);
943 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
944 sector_t num_sectors);
945 int (*allow_new_offset)(struct md_rdev *rdev,
946 unsigned long long new_offset);
947};
948
949
950
951
952
953
954
955
956
957int md_check_no_bitmap(struct mddev *mddev)
958{
959 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
960 return 0;
961 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
962 mdname(mddev), mddev->pers->name);
963 return 1;
964}
965EXPORT_SYMBOL(md_check_no_bitmap);
966
967
968
969
970static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
971{
972 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
973 mdp_super_t *sb;
974 int ret;
975
976
977
978
979
980
981
982 rdev->sb_start = calc_dev_sboffset(rdev);
983
984 ret = read_disk_sb(rdev, MD_SB_BYTES);
985 if (ret) return ret;
986
987 ret = -EINVAL;
988
989 bdevname(rdev->bdev, b);
990 sb = page_address(rdev->sb_page);
991
992 if (sb->md_magic != MD_SB_MAGIC) {
993 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
994 b);
995 goto abort;
996 }
997
998 if (sb->major_version != 0 ||
999 sb->minor_version < 90 ||
1000 sb->minor_version > 91) {
1001 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1002 sb->major_version, sb->minor_version,
1003 b);
1004 goto abort;
1005 }
1006
1007 if (sb->raid_disks <= 0)
1008 goto abort;
1009
1010 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1011 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1012 b);
1013 goto abort;
1014 }
1015
1016 rdev->preferred_minor = sb->md_minor;
1017 rdev->data_offset = 0;
1018 rdev->new_data_offset = 0;
1019 rdev->sb_size = MD_SB_BYTES;
1020 rdev->badblocks.shift = -1;
1021
1022 if (sb->level == LEVEL_MULTIPATH)
1023 rdev->desc_nr = -1;
1024 else
1025 rdev->desc_nr = sb->this_disk.number;
1026
1027 if (!refdev) {
1028 ret = 1;
1029 } else {
1030 __u64 ev1, ev2;
1031 mdp_super_t *refsb = page_address(refdev->sb_page);
1032 if (!uuid_equal(refsb, sb)) {
1033 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1034 b, bdevname(refdev->bdev,b2));
1035 goto abort;
1036 }
1037 if (!sb_equal(refsb, sb)) {
1038 printk(KERN_WARNING "md: %s has same UUID"
1039 " but different superblock to %s\n",
1040 b, bdevname(refdev->bdev, b2));
1041 goto abort;
1042 }
1043 ev1 = md_event(sb);
1044 ev2 = md_event(refsb);
1045 if (ev1 > ev2)
1046 ret = 1;
1047 else
1048 ret = 0;
1049 }
1050 rdev->sectors = rdev->sb_start;
1051
1052
1053
1054
1055 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1056 rdev->sectors = (2ULL << 32) - 2;
1057
1058 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1059
1060 ret = -EINVAL;
1061
1062 abort:
1063 return ret;
1064}
1065
1066
1067
1068
1069static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1070{
1071 mdp_disk_t *desc;
1072 mdp_super_t *sb = page_address(rdev->sb_page);
1073 __u64 ev1 = md_event(sb);
1074
1075 rdev->raid_disk = -1;
1076 clear_bit(Faulty, &rdev->flags);
1077 clear_bit(In_sync, &rdev->flags);
1078 clear_bit(Bitmap_sync, &rdev->flags);
1079 clear_bit(WriteMostly, &rdev->flags);
1080
1081 if (mddev->raid_disks == 0) {
1082 mddev->major_version = 0;
1083 mddev->minor_version = sb->minor_version;
1084 mddev->patch_version = sb->patch_version;
1085 mddev->external = 0;
1086 mddev->chunk_sectors = sb->chunk_size >> 9;
1087 mddev->ctime = sb->ctime;
1088 mddev->utime = sb->utime;
1089 mddev->level = sb->level;
1090 mddev->clevel[0] = 0;
1091 mddev->layout = sb->layout;
1092 mddev->raid_disks = sb->raid_disks;
1093 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1094 mddev->events = ev1;
1095 mddev->bitmap_info.offset = 0;
1096 mddev->bitmap_info.space = 0;
1097
1098 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1099 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1100 mddev->reshape_backwards = 0;
1101
1102 if (mddev->minor_version >= 91) {
1103 mddev->reshape_position = sb->reshape_position;
1104 mddev->delta_disks = sb->delta_disks;
1105 mddev->new_level = sb->new_level;
1106 mddev->new_layout = sb->new_layout;
1107 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1108 if (mddev->delta_disks < 0)
1109 mddev->reshape_backwards = 1;
1110 } else {
1111 mddev->reshape_position = MaxSector;
1112 mddev->delta_disks = 0;
1113 mddev->new_level = mddev->level;
1114 mddev->new_layout = mddev->layout;
1115 mddev->new_chunk_sectors = mddev->chunk_sectors;
1116 }
1117
1118 if (sb->state & (1<<MD_SB_CLEAN))
1119 mddev->recovery_cp = MaxSector;
1120 else {
1121 if (sb->events_hi == sb->cp_events_hi &&
1122 sb->events_lo == sb->cp_events_lo) {
1123 mddev->recovery_cp = sb->recovery_cp;
1124 } else
1125 mddev->recovery_cp = 0;
1126 }
1127
1128 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1129 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1130 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1131 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1132
1133 mddev->max_disks = MD_SB_DISKS;
1134
1135 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1136 mddev->bitmap_info.file == NULL) {
1137 mddev->bitmap_info.offset =
1138 mddev->bitmap_info.default_offset;
1139 mddev->bitmap_info.space =
1140 mddev->bitmap_info.default_space;
1141 }
1142
1143 } else if (mddev->pers == NULL) {
1144
1145
1146 ++ev1;
1147 if (sb->disks[rdev->desc_nr].state & (
1148 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1149 if (ev1 < mddev->events)
1150 return -EINVAL;
1151 } else if (mddev->bitmap) {
1152
1153
1154
1155 if (ev1 < mddev->bitmap->events_cleared)
1156 return 0;
1157 if (ev1 < mddev->events)
1158 set_bit(Bitmap_sync, &rdev->flags);
1159 } else {
1160 if (ev1 < mddev->events)
1161
1162 return 0;
1163 }
1164
1165 if (mddev->level != LEVEL_MULTIPATH) {
1166 desc = sb->disks + rdev->desc_nr;
1167
1168 if (desc->state & (1<<MD_DISK_FAULTY))
1169 set_bit(Faulty, &rdev->flags);
1170 else if (desc->state & (1<<MD_DISK_SYNC)
1171) {
1172 set_bit(In_sync, &rdev->flags);
1173 rdev->raid_disk = desc->raid_disk;
1174 rdev->saved_raid_disk = desc->raid_disk;
1175 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1176
1177
1178
1179 if (mddev->minor_version >= 91) {
1180 rdev->recovery_offset = 0;
1181 rdev->raid_disk = desc->raid_disk;
1182 }
1183 }
1184 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1185 set_bit(WriteMostly, &rdev->flags);
1186 } else
1187 set_bit(In_sync, &rdev->flags);
1188 return 0;
1189}
1190
1191
1192
1193
1194static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1195{
1196 mdp_super_t *sb;
1197 struct md_rdev *rdev2;
1198 int next_spare = mddev->raid_disks;
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211 int i;
1212 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1213
1214 rdev->sb_size = MD_SB_BYTES;
1215
1216 sb = page_address(rdev->sb_page);
1217
1218 memset(sb, 0, sizeof(*sb));
1219
1220 sb->md_magic = MD_SB_MAGIC;
1221 sb->major_version = mddev->major_version;
1222 sb->patch_version = mddev->patch_version;
1223 sb->gvalid_words = 0;
1224 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1225 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1226 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1227 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1228
1229 sb->ctime = mddev->ctime;
1230 sb->level = mddev->level;
1231 sb->size = mddev->dev_sectors / 2;
1232 sb->raid_disks = mddev->raid_disks;
1233 sb->md_minor = mddev->md_minor;
1234 sb->not_persistent = 0;
1235 sb->utime = mddev->utime;
1236 sb->state = 0;
1237 sb->events_hi = (mddev->events>>32);
1238 sb->events_lo = (u32)mddev->events;
1239
1240 if (mddev->reshape_position == MaxSector)
1241 sb->minor_version = 90;
1242 else {
1243 sb->minor_version = 91;
1244 sb->reshape_position = mddev->reshape_position;
1245 sb->new_level = mddev->new_level;
1246 sb->delta_disks = mddev->delta_disks;
1247 sb->new_layout = mddev->new_layout;
1248 sb->new_chunk = mddev->new_chunk_sectors << 9;
1249 }
1250 mddev->minor_version = sb->minor_version;
1251 if (mddev->in_sync)
1252 {
1253 sb->recovery_cp = mddev->recovery_cp;
1254 sb->cp_events_hi = (mddev->events>>32);
1255 sb->cp_events_lo = (u32)mddev->events;
1256 if (mddev->recovery_cp == MaxSector)
1257 sb->state = (1<< MD_SB_CLEAN);
1258 } else
1259 sb->recovery_cp = 0;
1260
1261 sb->layout = mddev->layout;
1262 sb->chunk_size = mddev->chunk_sectors << 9;
1263
1264 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1265 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1266
1267 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1268 rdev_for_each(rdev2, mddev) {
1269 mdp_disk_t *d;
1270 int desc_nr;
1271 int is_active = test_bit(In_sync, &rdev2->flags);
1272
1273 if (rdev2->raid_disk >= 0 &&
1274 sb->minor_version >= 91)
1275
1276
1277
1278
1279 is_active = 1;
1280 if (rdev2->raid_disk < 0 ||
1281 test_bit(Faulty, &rdev2->flags))
1282 is_active = 0;
1283 if (is_active)
1284 desc_nr = rdev2->raid_disk;
1285 else
1286 desc_nr = next_spare++;
1287 rdev2->desc_nr = desc_nr;
1288 d = &sb->disks[rdev2->desc_nr];
1289 nr_disks++;
1290 d->number = rdev2->desc_nr;
1291 d->major = MAJOR(rdev2->bdev->bd_dev);
1292 d->minor = MINOR(rdev2->bdev->bd_dev);
1293 if (is_active)
1294 d->raid_disk = rdev2->raid_disk;
1295 else
1296 d->raid_disk = rdev2->desc_nr;
1297 if (test_bit(Faulty, &rdev2->flags))
1298 d->state = (1<<MD_DISK_FAULTY);
1299 else if (is_active) {
1300 d->state = (1<<MD_DISK_ACTIVE);
1301 if (test_bit(In_sync, &rdev2->flags))
1302 d->state |= (1<<MD_DISK_SYNC);
1303 active++;
1304 working++;
1305 } else {
1306 d->state = 0;
1307 spare++;
1308 working++;
1309 }
1310 if (test_bit(WriteMostly, &rdev2->flags))
1311 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1312 }
1313
1314 for (i=0 ; i < mddev->raid_disks ; i++) {
1315 mdp_disk_t *d = &sb->disks[i];
1316 if (d->state == 0 && d->number == 0) {
1317 d->number = i;
1318 d->raid_disk = i;
1319 d->state = (1<<MD_DISK_REMOVED);
1320 d->state |= (1<<MD_DISK_FAULTY);
1321 failed++;
1322 }
1323 }
1324 sb->nr_disks = nr_disks;
1325 sb->active_disks = active;
1326 sb->working_disks = working;
1327 sb->failed_disks = failed;
1328 sb->spare_disks = spare;
1329
1330 sb->this_disk = sb->disks[rdev->desc_nr];
1331 sb->sb_csum = calc_sb_csum(sb);
1332}
1333
1334
1335
1336
1337static unsigned long long
1338super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1339{
1340 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1341 return 0;
1342 if (rdev->mddev->bitmap_info.offset)
1343 return 0;
1344 rdev->sb_start = calc_dev_sboffset(rdev);
1345 if (!num_sectors || num_sectors > rdev->sb_start)
1346 num_sectors = rdev->sb_start;
1347
1348
1349
1350 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1351 num_sectors = (2ULL << 32) - 2;
1352 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1353 rdev->sb_page);
1354 md_super_wait(rdev->mddev);
1355 return num_sectors;
1356}
1357
1358static int
1359super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1360{
1361
1362 return new_offset == 0;
1363}
1364
1365
1366
1367
1368
1369static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1370{
1371 __le32 disk_csum;
1372 u32 csum;
1373 unsigned long long newcsum;
1374 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1375 __le32 *isuper = (__le32*)sb;
1376
1377 disk_csum = sb->sb_csum;
1378 sb->sb_csum = 0;
1379 newcsum = 0;
1380 for (; size >= 4; size -= 4)
1381 newcsum += le32_to_cpu(*isuper++);
1382
1383 if (size == 2)
1384 newcsum += le16_to_cpu(*(__le16*) isuper);
1385
1386 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1387 sb->sb_csum = disk_csum;
1388 return cpu_to_le32(csum);
1389}
1390
1391static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1392 int acknowledged);
1393static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1394{
1395 struct mdp_superblock_1 *sb;
1396 int ret;
1397 sector_t sb_start;
1398 sector_t sectors;
1399 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1400 int bmask;
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410 switch(minor_version) {
1411 case 0:
1412 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1413 sb_start -= 8*2;
1414 sb_start &= ~(sector_t)(4*2-1);
1415 break;
1416 case 1:
1417 sb_start = 0;
1418 break;
1419 case 2:
1420 sb_start = 8;
1421 break;
1422 default:
1423 return -EINVAL;
1424 }
1425 rdev->sb_start = sb_start;
1426
1427
1428
1429
1430 ret = read_disk_sb(rdev, 4096);
1431 if (ret) return ret;
1432
1433
1434 sb = page_address(rdev->sb_page);
1435
1436 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1437 sb->major_version != cpu_to_le32(1) ||
1438 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1439 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1440 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1441 return -EINVAL;
1442
1443 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1444 printk("md: invalid superblock checksum on %s\n",
1445 bdevname(rdev->bdev,b));
1446 return -EINVAL;
1447 }
1448 if (le64_to_cpu(sb->data_size) < 10) {
1449 printk("md: data_size too small on %s\n",
1450 bdevname(rdev->bdev,b));
1451 return -EINVAL;
1452 }
1453 if (sb->pad0 ||
1454 sb->pad3[0] ||
1455 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1456
1457 return -EINVAL;
1458
1459 rdev->preferred_minor = 0xffff;
1460 rdev->data_offset = le64_to_cpu(sb->data_offset);
1461 rdev->new_data_offset = rdev->data_offset;
1462 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1463 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1464 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1465 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1466
1467 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1468 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1469 if (rdev->sb_size & bmask)
1470 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1471
1472 if (minor_version
1473 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1474 return -EINVAL;
1475 if (minor_version
1476 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1477 return -EINVAL;
1478
1479 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1480 rdev->desc_nr = -1;
1481 else
1482 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1483
1484 if (!rdev->bb_page) {
1485 rdev->bb_page = alloc_page(GFP_KERNEL);
1486 if (!rdev->bb_page)
1487 return -ENOMEM;
1488 }
1489 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1490 rdev->badblocks.count == 0) {
1491
1492
1493
1494 s32 offset;
1495 sector_t bb_sector;
1496 u64 *bbp;
1497 int i;
1498 int sectors = le16_to_cpu(sb->bblog_size);
1499 if (sectors > (PAGE_SIZE / 512))
1500 return -EINVAL;
1501 offset = le32_to_cpu(sb->bblog_offset);
1502 if (offset == 0)
1503 return -EINVAL;
1504 bb_sector = (long long)offset;
1505 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1506 rdev->bb_page, READ, true))
1507 return -EIO;
1508 bbp = (u64 *)page_address(rdev->bb_page);
1509 rdev->badblocks.shift = sb->bblog_shift;
1510 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1511 u64 bb = le64_to_cpu(*bbp);
1512 int count = bb & (0x3ff);
1513 u64 sector = bb >> 10;
1514 sector <<= sb->bblog_shift;
1515 count <<= sb->bblog_shift;
1516 if (bb + 1 == 0)
1517 break;
1518 if (md_set_badblocks(&rdev->badblocks,
1519 sector, count, 1) == 0)
1520 return -EINVAL;
1521 }
1522 } else if (sb->bblog_offset != 0)
1523 rdev->badblocks.shift = 0;
1524
1525 if (!refdev) {
1526 ret = 1;
1527 } else {
1528 __u64 ev1, ev2;
1529 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1530
1531 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1532 sb->level != refsb->level ||
1533 sb->layout != refsb->layout ||
1534 sb->chunksize != refsb->chunksize) {
1535 printk(KERN_WARNING "md: %s has strangely different"
1536 " superblock to %s\n",
1537 bdevname(rdev->bdev,b),
1538 bdevname(refdev->bdev,b2));
1539 return -EINVAL;
1540 }
1541 ev1 = le64_to_cpu(sb->events);
1542 ev2 = le64_to_cpu(refsb->events);
1543
1544 if (ev1 > ev2)
1545 ret = 1;
1546 else
1547 ret = 0;
1548 }
1549 if (minor_version) {
1550 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1551 sectors -= rdev->data_offset;
1552 } else
1553 sectors = rdev->sb_start;
1554 if (sectors < le64_to_cpu(sb->data_size))
1555 return -EINVAL;
1556 rdev->sectors = le64_to_cpu(sb->data_size);
1557 return ret;
1558}
1559
1560static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1561{
1562 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1563 __u64 ev1 = le64_to_cpu(sb->events);
1564
1565 rdev->raid_disk = -1;
1566 clear_bit(Faulty, &rdev->flags);
1567 clear_bit(In_sync, &rdev->flags);
1568 clear_bit(Bitmap_sync, &rdev->flags);
1569 clear_bit(WriteMostly, &rdev->flags);
1570
1571 if (mddev->raid_disks == 0) {
1572 mddev->major_version = 1;
1573 mddev->patch_version = 0;
1574 mddev->external = 0;
1575 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1576 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1577 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1578 mddev->level = le32_to_cpu(sb->level);
1579 mddev->clevel[0] = 0;
1580 mddev->layout = le32_to_cpu(sb->layout);
1581 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1582 mddev->dev_sectors = le64_to_cpu(sb->size);
1583 mddev->events = ev1;
1584 mddev->bitmap_info.offset = 0;
1585 mddev->bitmap_info.space = 0;
1586
1587
1588
1589 mddev->bitmap_info.default_offset = 1024 >> 9;
1590 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1591 mddev->reshape_backwards = 0;
1592
1593 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1594 memcpy(mddev->uuid, sb->set_uuid, 16);
1595
1596 mddev->max_disks = (4096-256)/2;
1597
1598 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1599 mddev->bitmap_info.file == NULL) {
1600 mddev->bitmap_info.offset =
1601 (__s32)le32_to_cpu(sb->bitmap_offset);
1602
1603
1604
1605
1606
1607 if (mddev->minor_version > 0)
1608 mddev->bitmap_info.space = 0;
1609 else if (mddev->bitmap_info.offset > 0)
1610 mddev->bitmap_info.space =
1611 8 - mddev->bitmap_info.offset;
1612 else
1613 mddev->bitmap_info.space =
1614 -mddev->bitmap_info.offset;
1615 }
1616
1617 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1618 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1619 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1620 mddev->new_level = le32_to_cpu(sb->new_level);
1621 mddev->new_layout = le32_to_cpu(sb->new_layout);
1622 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1623 if (mddev->delta_disks < 0 ||
1624 (mddev->delta_disks == 0 &&
1625 (le32_to_cpu(sb->feature_map)
1626 & MD_FEATURE_RESHAPE_BACKWARDS)))
1627 mddev->reshape_backwards = 1;
1628 } else {
1629 mddev->reshape_position = MaxSector;
1630 mddev->delta_disks = 0;
1631 mddev->new_level = mddev->level;
1632 mddev->new_layout = mddev->layout;
1633 mddev->new_chunk_sectors = mddev->chunk_sectors;
1634 }
1635
1636 } else if (mddev->pers == NULL) {
1637
1638
1639 ++ev1;
1640 if (rdev->desc_nr >= 0 &&
1641 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1642 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1643 if (ev1 < mddev->events)
1644 return -EINVAL;
1645 } else if (mddev->bitmap) {
1646
1647
1648
1649 if (ev1 < mddev->bitmap->events_cleared)
1650 return 0;
1651 if (ev1 < mddev->events)
1652 set_bit(Bitmap_sync, &rdev->flags);
1653 } else {
1654 if (ev1 < mddev->events)
1655
1656 return 0;
1657 }
1658 if (mddev->level != LEVEL_MULTIPATH) {
1659 int role;
1660 if (rdev->desc_nr < 0 ||
1661 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1662 role = 0xffff;
1663 rdev->desc_nr = -1;
1664 } else
1665 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1666 switch(role) {
1667 case 0xffff:
1668 break;
1669 case 0xfffe:
1670 set_bit(Faulty, &rdev->flags);
1671 break;
1672 default:
1673 rdev->saved_raid_disk = role;
1674 if ((le32_to_cpu(sb->feature_map) &
1675 MD_FEATURE_RECOVERY_OFFSET)) {
1676 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1677 if (!(le32_to_cpu(sb->feature_map) &
1678 MD_FEATURE_RECOVERY_BITMAP))
1679 rdev->saved_raid_disk = -1;
1680 } else
1681 set_bit(In_sync, &rdev->flags);
1682 rdev->raid_disk = role;
1683 break;
1684 }
1685 if (sb->devflags & WriteMostly1)
1686 set_bit(WriteMostly, &rdev->flags);
1687 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1688 set_bit(Replacement, &rdev->flags);
1689 } else
1690 set_bit(In_sync, &rdev->flags);
1691
1692 return 0;
1693}
1694
1695static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1696{
1697 struct mdp_superblock_1 *sb;
1698 struct md_rdev *rdev2;
1699 int max_dev, i;
1700
1701
1702 sb = page_address(rdev->sb_page);
1703
1704 sb->feature_map = 0;
1705 sb->pad0 = 0;
1706 sb->recovery_offset = cpu_to_le64(0);
1707 memset(sb->pad3, 0, sizeof(sb->pad3));
1708
1709 sb->utime = cpu_to_le64((__u64)mddev->utime);
1710 sb->events = cpu_to_le64(mddev->events);
1711 if (mddev->in_sync)
1712 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1713 else
1714 sb->resync_offset = cpu_to_le64(0);
1715
1716 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1717
1718 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1719 sb->size = cpu_to_le64(mddev->dev_sectors);
1720 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1721 sb->level = cpu_to_le32(mddev->level);
1722 sb->layout = cpu_to_le32(mddev->layout);
1723
1724 if (test_bit(WriteMostly, &rdev->flags))
1725 sb->devflags |= WriteMostly1;
1726 else
1727 sb->devflags &= ~WriteMostly1;
1728 sb->data_offset = cpu_to_le64(rdev->data_offset);
1729 sb->data_size = cpu_to_le64(rdev->sectors);
1730
1731 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1732 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1733 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1734 }
1735
1736 if (rdev->raid_disk >= 0 &&
1737 !test_bit(In_sync, &rdev->flags)) {
1738 sb->feature_map |=
1739 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1740 sb->recovery_offset =
1741 cpu_to_le64(rdev->recovery_offset);
1742 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1743 sb->feature_map |=
1744 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1745 }
1746 if (test_bit(Replacement, &rdev->flags))
1747 sb->feature_map |=
1748 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1749
1750 if (mddev->reshape_position != MaxSector) {
1751 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1752 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1753 sb->new_layout = cpu_to_le32(mddev->new_layout);
1754 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1755 sb->new_level = cpu_to_le32(mddev->new_level);
1756 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1757 if (mddev->delta_disks == 0 &&
1758 mddev->reshape_backwards)
1759 sb->feature_map
1760 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1761 if (rdev->new_data_offset != rdev->data_offset) {
1762 sb->feature_map
1763 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1764 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1765 - rdev->data_offset));
1766 }
1767 }
1768
1769 if (rdev->badblocks.count == 0)
1770 ;
1771 else if (sb->bblog_offset == 0)
1772
1773 md_error(mddev, rdev);
1774 else {
1775 struct badblocks *bb = &rdev->badblocks;
1776 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1777 u64 *p = bb->page;
1778 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1779 if (bb->changed) {
1780 unsigned seq;
1781
1782retry:
1783 seq = read_seqbegin(&bb->lock);
1784
1785 memset(bbp, 0xff, PAGE_SIZE);
1786
1787 for (i = 0 ; i < bb->count ; i++) {
1788 u64 internal_bb = p[i];
1789 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1790 | BB_LEN(internal_bb));
1791 bbp[i] = cpu_to_le64(store_bb);
1792 }
1793 bb->changed = 0;
1794 if (read_seqretry(&bb->lock, seq))
1795 goto retry;
1796
1797 bb->sector = (rdev->sb_start +
1798 (int)le32_to_cpu(sb->bblog_offset));
1799 bb->size = le16_to_cpu(sb->bblog_size);
1800 }
1801 }
1802
1803 max_dev = 0;
1804 rdev_for_each(rdev2, mddev)
1805 if (rdev2->desc_nr+1 > max_dev)
1806 max_dev = rdev2->desc_nr+1;
1807
1808 if (max_dev > le32_to_cpu(sb->max_dev)) {
1809 int bmask;
1810 sb->max_dev = cpu_to_le32(max_dev);
1811 rdev->sb_size = max_dev * 2 + 256;
1812 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1813 if (rdev->sb_size & bmask)
1814 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1815 } else
1816 max_dev = le32_to_cpu(sb->max_dev);
1817
1818 for (i=0; i<max_dev;i++)
1819 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1820
1821 rdev_for_each(rdev2, mddev) {
1822 i = rdev2->desc_nr;
1823 if (test_bit(Faulty, &rdev2->flags))
1824 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1825 else if (test_bit(In_sync, &rdev2->flags))
1826 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1827 else if (rdev2->raid_disk >= 0)
1828 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1829 else
1830 sb->dev_roles[i] = cpu_to_le16(0xffff);
1831 }
1832
1833 sb->sb_csum = calc_sb_1_csum(sb);
1834}
1835
1836static unsigned long long
1837super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1838{
1839 struct mdp_superblock_1 *sb;
1840 sector_t max_sectors;
1841 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1842 return 0;
1843 if (rdev->data_offset != rdev->new_data_offset)
1844 return 0;
1845 if (rdev->sb_start < rdev->data_offset) {
1846
1847 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1848 max_sectors -= rdev->data_offset;
1849 if (!num_sectors || num_sectors > max_sectors)
1850 num_sectors = max_sectors;
1851 } else if (rdev->mddev->bitmap_info.offset) {
1852
1853 return 0;
1854 } else {
1855
1856 sector_t sb_start;
1857 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1858 sb_start &= ~(sector_t)(4*2 - 1);
1859 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1860 if (!num_sectors || num_sectors > max_sectors)
1861 num_sectors = max_sectors;
1862 rdev->sb_start = sb_start;
1863 }
1864 sb = page_address(rdev->sb_page);
1865 sb->data_size = cpu_to_le64(num_sectors);
1866 sb->super_offset = rdev->sb_start;
1867 sb->sb_csum = calc_sb_1_csum(sb);
1868 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1869 rdev->sb_page);
1870 md_super_wait(rdev->mddev);
1871 return num_sectors;
1872
1873}
1874
1875static int
1876super_1_allow_new_offset(struct md_rdev *rdev,
1877 unsigned long long new_offset)
1878{
1879
1880 struct bitmap *bitmap;
1881 if (new_offset >= rdev->data_offset)
1882 return 1;
1883
1884
1885
1886 if (rdev->mddev->minor_version == 0)
1887 return 1;
1888
1889
1890
1891
1892
1893
1894
1895 if (rdev->sb_start + (32+4)*2 > new_offset)
1896 return 0;
1897 bitmap = rdev->mddev->bitmap;
1898 if (bitmap && !rdev->mddev->bitmap_info.file &&
1899 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1900 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1901 return 0;
1902 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1903 return 0;
1904
1905 return 1;
1906}
1907
1908static struct super_type super_types[] = {
1909 [0] = {
1910 .name = "0.90.0",
1911 .owner = THIS_MODULE,
1912 .load_super = super_90_load,
1913 .validate_super = super_90_validate,
1914 .sync_super = super_90_sync,
1915 .rdev_size_change = super_90_rdev_size_change,
1916 .allow_new_offset = super_90_allow_new_offset,
1917 },
1918 [1] = {
1919 .name = "md-1",
1920 .owner = THIS_MODULE,
1921 .load_super = super_1_load,
1922 .validate_super = super_1_validate,
1923 .sync_super = super_1_sync,
1924 .rdev_size_change = super_1_rdev_size_change,
1925 .allow_new_offset = super_1_allow_new_offset,
1926 },
1927};
1928
1929static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1930{
1931 if (mddev->sync_super) {
1932 mddev->sync_super(mddev, rdev);
1933 return;
1934 }
1935
1936 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1937
1938 super_types[mddev->major_version].sync_super(mddev, rdev);
1939}
1940
1941static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1942{
1943 struct md_rdev *rdev, *rdev2;
1944
1945 rcu_read_lock();
1946 rdev_for_each_rcu(rdev, mddev1)
1947 rdev_for_each_rcu(rdev2, mddev2)
1948 if (rdev->bdev->bd_contains ==
1949 rdev2->bdev->bd_contains) {
1950 rcu_read_unlock();
1951 return 1;
1952 }
1953 rcu_read_unlock();
1954 return 0;
1955}
1956
1957static LIST_HEAD(pending_raid_disks);
1958
1959
1960
1961
1962
1963
1964
1965
1966int md_integrity_register(struct mddev *mddev)
1967{
1968 struct md_rdev *rdev, *reference = NULL;
1969
1970 if (list_empty(&mddev->disks))
1971 return 0;
1972 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
1973 return 0;
1974 rdev_for_each(rdev, mddev) {
1975
1976 if (test_bit(Faulty, &rdev->flags))
1977 continue;
1978 if (rdev->raid_disk < 0)
1979 continue;
1980 if (!reference) {
1981
1982 reference = rdev;
1983 continue;
1984 }
1985
1986 if (blk_integrity_compare(reference->bdev->bd_disk,
1987 rdev->bdev->bd_disk) < 0)
1988 return -EINVAL;
1989 }
1990 if (!reference || !bdev_get_integrity(reference->bdev))
1991 return 0;
1992
1993
1994
1995
1996 if (blk_integrity_register(mddev->gendisk,
1997 bdev_get_integrity(reference->bdev)) != 0) {
1998 printk(KERN_ERR "md: failed to register integrity for %s\n",
1999 mdname(mddev));
2000 return -EINVAL;
2001 }
2002 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2003 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2004 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2005 mdname(mddev));
2006 return -EINVAL;
2007 }
2008 return 0;
2009}
2010EXPORT_SYMBOL(md_integrity_register);
2011
2012
2013void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2014{
2015 struct blk_integrity *bi_rdev;
2016 struct blk_integrity *bi_mddev;
2017
2018 if (!mddev->gendisk)
2019 return;
2020
2021 bi_rdev = bdev_get_integrity(rdev->bdev);
2022 bi_mddev = blk_get_integrity(mddev->gendisk);
2023
2024 if (!bi_mddev)
2025 return;
2026 if (rdev->raid_disk < 0)
2027 return;
2028 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2029 rdev->bdev->bd_disk) >= 0)
2030 return;
2031 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2032 blk_integrity_unregister(mddev->gendisk);
2033}
2034EXPORT_SYMBOL(md_integrity_add_rdev);
2035
2036static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2037{
2038 char b[BDEVNAME_SIZE];
2039 struct kobject *ko;
2040 char *s;
2041 int err;
2042
2043 if (rdev->mddev) {
2044 MD_BUG();
2045 return -EINVAL;
2046 }
2047
2048
2049 if (find_rdev(mddev, rdev->bdev->bd_dev))
2050 return -EEXIST;
2051
2052
2053 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2054 rdev->sectors < mddev->dev_sectors)) {
2055 if (mddev->pers) {
2056
2057
2058
2059
2060 if (mddev->level > 0)
2061 return -ENOSPC;
2062 } else
2063 mddev->dev_sectors = rdev->sectors;
2064 }
2065
2066
2067
2068
2069
2070 if (rdev->desc_nr < 0) {
2071 int choice = 0;
2072 if (mddev->pers) choice = mddev->raid_disks;
2073 while (find_rdev_nr(mddev, choice))
2074 choice++;
2075 rdev->desc_nr = choice;
2076 } else {
2077 if (find_rdev_nr(mddev, rdev->desc_nr))
2078 return -EBUSY;
2079 }
2080 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2081 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2082 mdname(mddev), mddev->max_disks);
2083 return -EBUSY;
2084 }
2085 bdevname(rdev->bdev,b);
2086 while ( (s=strchr(b, '/')) != NULL)
2087 *s = '!';
2088
2089 rdev->mddev = mddev;
2090 printk(KERN_INFO "md: bind<%s>\n", b);
2091
2092 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2093 goto fail;
2094
2095 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2096 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2097 ;
2098 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2099
2100 list_add_rcu(&rdev->same_set, &mddev->disks);
2101 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2102
2103
2104 mddev->recovery_disabled++;
2105
2106 return 0;
2107
2108 fail:
2109 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2110 b, mdname(mddev));
2111 return err;
2112}
2113
2114static void md_delayed_delete(struct work_struct *ws)
2115{
2116 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2117 kobject_del(&rdev->kobj);
2118 kobject_put(&rdev->kobj);
2119}
2120
2121static void unbind_rdev_from_array(struct md_rdev * rdev)
2122{
2123 char b[BDEVNAME_SIZE];
2124 if (!rdev->mddev) {
2125 MD_BUG();
2126 return;
2127 }
2128 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2129 list_del_rcu(&rdev->same_set);
2130 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2131 rdev->mddev = NULL;
2132 sysfs_remove_link(&rdev->kobj, "block");
2133 sysfs_put(rdev->sysfs_state);
2134 rdev->sysfs_state = NULL;
2135 rdev->badblocks.count = 0;
2136
2137
2138
2139
2140 synchronize_rcu();
2141 INIT_WORK(&rdev->del_work, md_delayed_delete);
2142 kobject_get(&rdev->kobj);
2143 queue_work(md_misc_wq, &rdev->del_work);
2144}
2145
2146
2147
2148
2149
2150
2151static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2152{
2153 int err = 0;
2154 struct block_device *bdev;
2155 char b[BDEVNAME_SIZE];
2156
2157 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2158 shared ? (struct md_rdev *)lock_rdev : rdev);
2159 if (IS_ERR(bdev)) {
2160 printk(KERN_ERR "md: could not open %s.\n",
2161 __bdevname(dev, b));
2162 return PTR_ERR(bdev);
2163 }
2164 rdev->bdev = bdev;
2165 return err;
2166}
2167
2168static void unlock_rdev(struct md_rdev *rdev)
2169{
2170 struct block_device *bdev = rdev->bdev;
2171 rdev->bdev = NULL;
2172 if (!bdev)
2173 MD_BUG();
2174 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2175}
2176
2177void md_autodetect_dev(dev_t dev);
2178
2179static void export_rdev(struct md_rdev * rdev)
2180{
2181 char b[BDEVNAME_SIZE];
2182 printk(KERN_INFO "md: export_rdev(%s)\n",
2183 bdevname(rdev->bdev,b));
2184 if (rdev->mddev)
2185 MD_BUG();
2186 md_rdev_clear(rdev);
2187#ifndef MODULE
2188 if (test_bit(AutoDetected, &rdev->flags))
2189 md_autodetect_dev(rdev->bdev->bd_dev);
2190#endif
2191 unlock_rdev(rdev);
2192 kobject_put(&rdev->kobj);
2193}
2194
2195static void kick_rdev_from_array(struct md_rdev * rdev)
2196{
2197 unbind_rdev_from_array(rdev);
2198 export_rdev(rdev);
2199}
2200
2201static void export_array(struct mddev *mddev)
2202{
2203 struct md_rdev *rdev, *tmp;
2204
2205 rdev_for_each_safe(rdev, tmp, mddev) {
2206 if (!rdev->mddev) {
2207 MD_BUG();
2208 continue;
2209 }
2210 kick_rdev_from_array(rdev);
2211 }
2212 if (!list_empty(&mddev->disks))
2213 MD_BUG();
2214 mddev->raid_disks = 0;
2215 mddev->major_version = 0;
2216}
2217
2218static void print_desc(mdp_disk_t *desc)
2219{
2220 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2221 desc->major,desc->minor,desc->raid_disk,desc->state);
2222}
2223
2224static void print_sb_90(mdp_super_t *sb)
2225{
2226 int i;
2227
2228 printk(KERN_INFO
2229 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2230 sb->major_version, sb->minor_version, sb->patch_version,
2231 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2232 sb->ctime);
2233 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2234 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2235 sb->md_minor, sb->layout, sb->chunk_size);
2236 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2237 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2238 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2239 sb->failed_disks, sb->spare_disks,
2240 sb->sb_csum, (unsigned long)sb->events_lo);
2241
2242 printk(KERN_INFO);
2243 for (i = 0; i < MD_SB_DISKS; i++) {
2244 mdp_disk_t *desc;
2245
2246 desc = sb->disks + i;
2247 if (desc->number || desc->major || desc->minor ||
2248 desc->raid_disk || (desc->state && (desc->state != 4))) {
2249 printk(" D %2d: ", i);
2250 print_desc(desc);
2251 }
2252 }
2253 printk(KERN_INFO "md: THIS: ");
2254 print_desc(&sb->this_disk);
2255}
2256
2257static void print_sb_1(struct mdp_superblock_1 *sb)
2258{
2259 __u8 *uuid;
2260
2261 uuid = sb->set_uuid;
2262 printk(KERN_INFO
2263 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2264 "md: Name: \"%s\" CT:%llu\n",
2265 le32_to_cpu(sb->major_version),
2266 le32_to_cpu(sb->feature_map),
2267 uuid,
2268 sb->set_name,
2269 (unsigned long long)le64_to_cpu(sb->ctime)
2270 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2271
2272 uuid = sb->device_uuid;
2273 printk(KERN_INFO
2274 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2275 " RO:%llu\n"
2276 "md: Dev:%08x UUID: %pU\n"
2277 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2278 "md: (MaxDev:%u) \n",
2279 le32_to_cpu(sb->level),
2280 (unsigned long long)le64_to_cpu(sb->size),
2281 le32_to_cpu(sb->raid_disks),
2282 le32_to_cpu(sb->layout),
2283 le32_to_cpu(sb->chunksize),
2284 (unsigned long long)le64_to_cpu(sb->data_offset),
2285 (unsigned long long)le64_to_cpu(sb->data_size),
2286 (unsigned long long)le64_to_cpu(sb->super_offset),
2287 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2288 le32_to_cpu(sb->dev_number),
2289 uuid,
2290 sb->devflags,
2291 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2292 (unsigned long long)le64_to_cpu(sb->events),
2293 (unsigned long long)le64_to_cpu(sb->resync_offset),
2294 le32_to_cpu(sb->sb_csum),
2295 le32_to_cpu(sb->max_dev)
2296 );
2297}
2298
2299static void print_rdev(struct md_rdev *rdev, int major_version)
2300{
2301 char b[BDEVNAME_SIZE];
2302 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2303 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2304 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2305 rdev->desc_nr);
2306 if (rdev->sb_loaded) {
2307 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2308 switch (major_version) {
2309 case 0:
2310 print_sb_90(page_address(rdev->sb_page));
2311 break;
2312 case 1:
2313 print_sb_1(page_address(rdev->sb_page));
2314 break;
2315 }
2316 } else
2317 printk(KERN_INFO "md: no rdev superblock!\n");
2318}
2319
2320static void md_print_devices(void)
2321{
2322 struct list_head *tmp;
2323 struct md_rdev *rdev;
2324 struct mddev *mddev;
2325 char b[BDEVNAME_SIZE];
2326
2327 printk("\n");
2328 printk("md: **********************************\n");
2329 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2330 printk("md: **********************************\n");
2331 for_each_mddev(mddev, tmp) {
2332
2333 if (mddev->bitmap)
2334 bitmap_print_sb(mddev->bitmap);
2335 else
2336 printk("%s: ", mdname(mddev));
2337 rdev_for_each(rdev, mddev)
2338 printk("<%s>", bdevname(rdev->bdev,b));
2339 printk("\n");
2340
2341 rdev_for_each(rdev, mddev)
2342 print_rdev(rdev, mddev->major_version);
2343 }
2344 printk("md: **********************************\n");
2345 printk("\n");
2346}
2347
2348
2349static void sync_sbs(struct mddev * mddev, int nospares)
2350{
2351
2352
2353
2354
2355
2356
2357 struct md_rdev *rdev;
2358 rdev_for_each(rdev, mddev) {
2359 if (rdev->sb_events == mddev->events ||
2360 (nospares &&
2361 rdev->raid_disk < 0 &&
2362 rdev->sb_events+1 == mddev->events)) {
2363
2364 rdev->sb_loaded = 2;
2365 } else {
2366 sync_super(mddev, rdev);
2367 rdev->sb_loaded = 1;
2368 }
2369 }
2370}
2371
2372static void md_update_sb(struct mddev * mddev, int force_change)
2373{
2374 struct md_rdev *rdev;
2375 int sync_req;
2376 int nospares = 0;
2377 int any_badblocks_changed = 0;
2378
2379 if (mddev->ro) {
2380 if (force_change)
2381 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2382 return;
2383 }
2384repeat:
2385
2386 rdev_for_each(rdev, mddev) {
2387 if (rdev->raid_disk >= 0 &&
2388 mddev->delta_disks >= 0 &&
2389 !test_bit(In_sync, &rdev->flags) &&
2390 mddev->curr_resync_completed > rdev->recovery_offset)
2391 rdev->recovery_offset = mddev->curr_resync_completed;
2392
2393 }
2394 if (!mddev->persistent) {
2395 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2396 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2397 if (!mddev->external) {
2398 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2399 rdev_for_each(rdev, mddev) {
2400 if (rdev->badblocks.changed) {
2401 rdev->badblocks.changed = 0;
2402 md_ack_all_badblocks(&rdev->badblocks);
2403 md_error(mddev, rdev);
2404 }
2405 clear_bit(Blocked, &rdev->flags);
2406 clear_bit(BlockedBadBlocks, &rdev->flags);
2407 wake_up(&rdev->blocked_wait);
2408 }
2409 }
2410 wake_up(&mddev->sb_wait);
2411 return;
2412 }
2413
2414 spin_lock_irq(&mddev->write_lock);
2415
2416 mddev->utime = get_seconds();
2417
2418 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2419 force_change = 1;
2420 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2421
2422
2423
2424
2425 nospares = 1;
2426 if (force_change)
2427 nospares = 0;
2428 if (mddev->degraded)
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438 nospares = 0;
2439
2440 sync_req = mddev->in_sync;
2441
2442
2443
2444 if (nospares
2445 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2446 && mddev->can_decrease_events
2447 && mddev->events != 1) {
2448 mddev->events--;
2449 mddev->can_decrease_events = 0;
2450 } else {
2451
2452 mddev->events ++;
2453 mddev->can_decrease_events = nospares;
2454 }
2455
2456 if (!mddev->events) {
2457
2458
2459
2460
2461
2462 MD_BUG();
2463 mddev->events --;
2464 }
2465
2466 rdev_for_each(rdev, mddev) {
2467 if (rdev->badblocks.changed)
2468 any_badblocks_changed++;
2469 if (test_bit(Faulty, &rdev->flags))
2470 set_bit(FaultRecorded, &rdev->flags);
2471 }
2472
2473 sync_sbs(mddev, nospares);
2474 spin_unlock_irq(&mddev->write_lock);
2475
2476 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2477 mdname(mddev), mddev->in_sync);
2478
2479 bitmap_update_sb(mddev->bitmap);
2480 rdev_for_each(rdev, mddev) {
2481 char b[BDEVNAME_SIZE];
2482
2483 if (rdev->sb_loaded != 1)
2484 continue;
2485
2486 if (!test_bit(Faulty, &rdev->flags)) {
2487 md_super_write(mddev,rdev,
2488 rdev->sb_start, rdev->sb_size,
2489 rdev->sb_page);
2490 pr_debug("md: (write) %s's sb offset: %llu\n",
2491 bdevname(rdev->bdev, b),
2492 (unsigned long long)rdev->sb_start);
2493 rdev->sb_events = mddev->events;
2494 if (rdev->badblocks.size) {
2495 md_super_write(mddev, rdev,
2496 rdev->badblocks.sector,
2497 rdev->badblocks.size << 9,
2498 rdev->bb_page);
2499 rdev->badblocks.size = 0;
2500 }
2501
2502 } else
2503 pr_debug("md: %s (skipping faulty)\n",
2504 bdevname(rdev->bdev, b));
2505
2506 if (mddev->level == LEVEL_MULTIPATH)
2507
2508 break;
2509 }
2510 md_super_wait(mddev);
2511
2512
2513 spin_lock_irq(&mddev->write_lock);
2514 if (mddev->in_sync != sync_req ||
2515 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2516
2517 spin_unlock_irq(&mddev->write_lock);
2518 goto repeat;
2519 }
2520 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2521 spin_unlock_irq(&mddev->write_lock);
2522 wake_up(&mddev->sb_wait);
2523 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2524 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2525
2526 rdev_for_each(rdev, mddev) {
2527 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2528 clear_bit(Blocked, &rdev->flags);
2529
2530 if (any_badblocks_changed)
2531 md_ack_all_badblocks(&rdev->badblocks);
2532 clear_bit(BlockedBadBlocks, &rdev->flags);
2533 wake_up(&rdev->blocked_wait);
2534 }
2535}
2536
2537
2538
2539
2540static int cmd_match(const char *cmd, const char *str)
2541{
2542
2543
2544
2545
2546 while (*cmd && *str && *cmd == *str) {
2547 cmd++;
2548 str++;
2549 }
2550 if (*cmd == '\n')
2551 cmd++;
2552 if (*str || *cmd)
2553 return 0;
2554 return 1;
2555}
2556
2557struct rdev_sysfs_entry {
2558 struct attribute attr;
2559 ssize_t (*show)(struct md_rdev *, char *);
2560 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2561};
2562
2563static ssize_t
2564state_show(struct md_rdev *rdev, char *page)
2565{
2566 char *sep = "";
2567 size_t len = 0;
2568
2569 if (test_bit(Faulty, &rdev->flags) ||
2570 rdev->badblocks.unacked_exist) {
2571 len+= sprintf(page+len, "%sfaulty",sep);
2572 sep = ",";
2573 }
2574 if (test_bit(In_sync, &rdev->flags)) {
2575 len += sprintf(page+len, "%sin_sync",sep);
2576 sep = ",";
2577 }
2578 if (test_bit(WriteMostly, &rdev->flags)) {
2579 len += sprintf(page+len, "%swrite_mostly",sep);
2580 sep = ",";
2581 }
2582 if (test_bit(Blocked, &rdev->flags) ||
2583 (rdev->badblocks.unacked_exist
2584 && !test_bit(Faulty, &rdev->flags))) {
2585 len += sprintf(page+len, "%sblocked", sep);
2586 sep = ",";
2587 }
2588 if (!test_bit(Faulty, &rdev->flags) &&
2589 !test_bit(In_sync, &rdev->flags)) {
2590 len += sprintf(page+len, "%sspare", sep);
2591 sep = ",";
2592 }
2593 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2594 len += sprintf(page+len, "%swrite_error", sep);
2595 sep = ",";
2596 }
2597 if (test_bit(WantReplacement, &rdev->flags)) {
2598 len += sprintf(page+len, "%swant_replacement", sep);
2599 sep = ",";
2600 }
2601 if (test_bit(Replacement, &rdev->flags)) {
2602 len += sprintf(page+len, "%sreplacement", sep);
2603 sep = ",";
2604 }
2605
2606 return len+sprintf(page+len, "\n");
2607}
2608
2609static ssize_t
2610state_store(struct md_rdev *rdev, const char *buf, size_t len)
2611{
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625 int err = -EINVAL;
2626 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2627 md_error(rdev->mddev, rdev);
2628 if (test_bit(Faulty, &rdev->flags))
2629 err = 0;
2630 else
2631 err = -EBUSY;
2632 } else if (cmd_match(buf, "remove")) {
2633 if (rdev->raid_disk >= 0)
2634 err = -EBUSY;
2635 else {
2636 struct mddev *mddev = rdev->mddev;
2637 kick_rdev_from_array(rdev);
2638 if (mddev->pers)
2639 md_update_sb(mddev, 1);
2640 md_new_event(mddev);
2641 err = 0;
2642 }
2643 } else if (cmd_match(buf, "writemostly")) {
2644 set_bit(WriteMostly, &rdev->flags);
2645 err = 0;
2646 } else if (cmd_match(buf, "-writemostly")) {
2647 clear_bit(WriteMostly, &rdev->flags);
2648 err = 0;
2649 } else if (cmd_match(buf, "blocked")) {
2650 set_bit(Blocked, &rdev->flags);
2651 err = 0;
2652 } else if (cmd_match(buf, "-blocked")) {
2653 if (!test_bit(Faulty, &rdev->flags) &&
2654 rdev->badblocks.unacked_exist) {
2655
2656
2657
2658 md_error(rdev->mddev, rdev);
2659 }
2660 clear_bit(Blocked, &rdev->flags);
2661 clear_bit(BlockedBadBlocks, &rdev->flags);
2662 wake_up(&rdev->blocked_wait);
2663 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2664 md_wakeup_thread(rdev->mddev->thread);
2665
2666 err = 0;
2667 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2668 set_bit(In_sync, &rdev->flags);
2669 err = 0;
2670 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) {
2671 clear_bit(In_sync, &rdev->flags);
2672 rdev->saved_raid_disk = rdev->raid_disk;
2673 rdev->raid_disk = -1;
2674 err = 0;
2675 } else if (cmd_match(buf, "write_error")) {
2676 set_bit(WriteErrorSeen, &rdev->flags);
2677 err = 0;
2678 } else if (cmd_match(buf, "-write_error")) {
2679 clear_bit(WriteErrorSeen, &rdev->flags);
2680 err = 0;
2681 } else if (cmd_match(buf, "want_replacement")) {
2682
2683
2684
2685
2686 if (rdev->raid_disk >= 0 &&
2687 !test_bit(Replacement, &rdev->flags))
2688 set_bit(WantReplacement, &rdev->flags);
2689 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2690 md_wakeup_thread(rdev->mddev->thread);
2691 err = 0;
2692 } else if (cmd_match(buf, "-want_replacement")) {
2693
2694
2695
2696 err = 0;
2697 clear_bit(WantReplacement, &rdev->flags);
2698 } else if (cmd_match(buf, "replacement")) {
2699
2700
2701
2702
2703 if (rdev->mddev->pers)
2704 err = -EBUSY;
2705 else {
2706 set_bit(Replacement, &rdev->flags);
2707 err = 0;
2708 }
2709 } else if (cmd_match(buf, "-replacement")) {
2710
2711 if (rdev->mddev->pers)
2712 err = -EBUSY;
2713 else {
2714 clear_bit(Replacement, &rdev->flags);
2715 err = 0;
2716 }
2717 }
2718 if (!err)
2719 sysfs_notify_dirent_safe(rdev->sysfs_state);
2720 return err ? err : len;
2721}
2722static struct rdev_sysfs_entry rdev_state =
2723__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2724
2725static ssize_t
2726errors_show(struct md_rdev *rdev, char *page)
2727{
2728 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2729}
2730
2731static ssize_t
2732errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2733{
2734 char *e;
2735 unsigned long n = simple_strtoul(buf, &e, 10);
2736 if (*buf && (*e == 0 || *e == '\n')) {
2737 atomic_set(&rdev->corrected_errors, n);
2738 return len;
2739 }
2740 return -EINVAL;
2741}
2742static struct rdev_sysfs_entry rdev_errors =
2743__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2744
2745static ssize_t
2746slot_show(struct md_rdev *rdev, char *page)
2747{
2748 if (rdev->raid_disk < 0)
2749 return sprintf(page, "none\n");
2750 else
2751 return sprintf(page, "%d\n", rdev->raid_disk);
2752}
2753
2754static ssize_t
2755slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2756{
2757 char *e;
2758 int err;
2759 int slot = simple_strtoul(buf, &e, 10);
2760 if (strncmp(buf, "none", 4)==0)
2761 slot = -1;
2762 else if (e==buf || (*e && *e!= '\n'))
2763 return -EINVAL;
2764 if (rdev->mddev->pers && slot == -1) {
2765
2766
2767
2768
2769
2770
2771
2772 if (rdev->raid_disk == -1)
2773 return -EEXIST;
2774
2775 if (rdev->mddev->pers->hot_remove_disk == NULL)
2776 return -EINVAL;
2777 clear_bit(Blocked, &rdev->flags);
2778 remove_and_add_spares(rdev->mddev, rdev);
2779 if (rdev->raid_disk >= 0)
2780 return -EBUSY;
2781 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2782 md_wakeup_thread(rdev->mddev->thread);
2783 } else if (rdev->mddev->pers) {
2784
2785
2786
2787
2788 if (rdev->raid_disk != -1)
2789 return -EBUSY;
2790
2791 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2792 return -EBUSY;
2793
2794 if (rdev->mddev->pers->hot_add_disk == NULL)
2795 return -EINVAL;
2796
2797 if (slot >= rdev->mddev->raid_disks &&
2798 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2799 return -ENOSPC;
2800
2801 rdev->raid_disk = slot;
2802 if (test_bit(In_sync, &rdev->flags))
2803 rdev->saved_raid_disk = slot;
2804 else
2805 rdev->saved_raid_disk = -1;
2806 clear_bit(In_sync, &rdev->flags);
2807 clear_bit(Bitmap_sync, &rdev->flags);
2808 err = rdev->mddev->pers->
2809 hot_add_disk(rdev->mddev, rdev);
2810 if (err) {
2811 rdev->raid_disk = -1;
2812 return err;
2813 } else
2814 sysfs_notify_dirent_safe(rdev->sysfs_state);
2815 if (sysfs_link_rdev(rdev->mddev, rdev))
2816 ;
2817
2818 } else {
2819 if (slot >= rdev->mddev->raid_disks &&
2820 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2821 return -ENOSPC;
2822 rdev->raid_disk = slot;
2823
2824 clear_bit(Faulty, &rdev->flags);
2825 clear_bit(WriteMostly, &rdev->flags);
2826 set_bit(In_sync, &rdev->flags);
2827 sysfs_notify_dirent_safe(rdev->sysfs_state);
2828 }
2829 return len;
2830}
2831
2832
2833static struct rdev_sysfs_entry rdev_slot =
2834__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2835
2836static ssize_t
2837offset_show(struct md_rdev *rdev, char *page)
2838{
2839 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2840}
2841
2842static ssize_t
2843offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2844{
2845 unsigned long long offset;
2846 if (kstrtoull(buf, 10, &offset) < 0)
2847 return -EINVAL;
2848 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2849 return -EBUSY;
2850 if (rdev->sectors && rdev->mddev->external)
2851
2852
2853 return -EBUSY;
2854 rdev->data_offset = offset;
2855 rdev->new_data_offset = offset;
2856 return len;
2857}
2858
2859static struct rdev_sysfs_entry rdev_offset =
2860__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2861
2862static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2863{
2864 return sprintf(page, "%llu\n",
2865 (unsigned long long)rdev->new_data_offset);
2866}
2867
2868static ssize_t new_offset_store(struct md_rdev *rdev,
2869 const char *buf, size_t len)
2870{
2871 unsigned long long new_offset;
2872 struct mddev *mddev = rdev->mddev;
2873
2874 if (kstrtoull(buf, 10, &new_offset) < 0)
2875 return -EINVAL;
2876
2877 if (mddev->sync_thread)
2878 return -EBUSY;
2879 if (new_offset == rdev->data_offset)
2880
2881 ;
2882 else if (new_offset > rdev->data_offset) {
2883
2884 if (new_offset - rdev->data_offset
2885 + mddev->dev_sectors > rdev->sectors)
2886 return -E2BIG;
2887 }
2888
2889
2890
2891
2892
2893 if (new_offset < rdev->data_offset &&
2894 mddev->reshape_backwards)
2895 return -EINVAL;
2896
2897
2898
2899
2900 if (new_offset > rdev->data_offset &&
2901 !mddev->reshape_backwards)
2902 return -EINVAL;
2903
2904 if (mddev->pers && mddev->persistent &&
2905 !super_types[mddev->major_version]
2906 .allow_new_offset(rdev, new_offset))
2907 return -E2BIG;
2908 rdev->new_data_offset = new_offset;
2909 if (new_offset > rdev->data_offset)
2910 mddev->reshape_backwards = 1;
2911 else if (new_offset < rdev->data_offset)
2912 mddev->reshape_backwards = 0;
2913
2914 return len;
2915}
2916static struct rdev_sysfs_entry rdev_new_offset =
2917__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2918
2919static ssize_t
2920rdev_size_show(struct md_rdev *rdev, char *page)
2921{
2922 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2923}
2924
2925static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2926{
2927
2928 if (s1+l1 <= s2)
2929 return 0;
2930 if (s2+l2 <= s1)
2931 return 0;
2932 return 1;
2933}
2934
2935static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2936{
2937 unsigned long long blocks;
2938 sector_t new;
2939
2940 if (kstrtoull(buf, 10, &blocks) < 0)
2941 return -EINVAL;
2942
2943 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2944 return -EINVAL;
2945
2946 new = blocks * 2;
2947 if (new != blocks * 2)
2948 return -EINVAL;
2949
2950 *sectors = new;
2951 return 0;
2952}
2953
2954static ssize_t
2955rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2956{
2957 struct mddev *my_mddev = rdev->mddev;
2958 sector_t oldsectors = rdev->sectors;
2959 sector_t sectors;
2960
2961 if (strict_blocks_to_sectors(buf, §ors) < 0)
2962 return -EINVAL;
2963 if (rdev->data_offset != rdev->new_data_offset)
2964 return -EINVAL;
2965 if (my_mddev->pers && rdev->raid_disk >= 0) {
2966 if (my_mddev->persistent) {
2967 sectors = super_types[my_mddev->major_version].
2968 rdev_size_change(rdev, sectors);
2969 if (!sectors)
2970 return -EBUSY;
2971 } else if (!sectors)
2972 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2973 rdev->data_offset;
2974 if (!my_mddev->pers->resize)
2975
2976 return -EINVAL;
2977 }
2978 if (sectors < my_mddev->dev_sectors)
2979 return -EINVAL;
2980
2981 rdev->sectors = sectors;
2982 if (sectors > oldsectors && my_mddev->external) {
2983
2984
2985
2986
2987
2988 struct mddev *mddev;
2989 int overlap = 0;
2990 struct list_head *tmp;
2991
2992 mddev_unlock(my_mddev);
2993 for_each_mddev(mddev, tmp) {
2994 struct md_rdev *rdev2;
2995
2996 mddev_lock_nointr(mddev);
2997 rdev_for_each(rdev2, mddev)
2998 if (rdev->bdev == rdev2->bdev &&
2999 rdev != rdev2 &&
3000 overlaps(rdev->data_offset, rdev->sectors,
3001 rdev2->data_offset,
3002 rdev2->sectors)) {
3003 overlap = 1;
3004 break;
3005 }
3006 mddev_unlock(mddev);
3007 if (overlap) {
3008 mddev_put(mddev);
3009 break;
3010 }
3011 }
3012 mddev_lock_nointr(my_mddev);
3013 if (overlap) {
3014
3015
3016
3017
3018
3019
3020 rdev->sectors = oldsectors;
3021 return -EBUSY;
3022 }
3023 }
3024 return len;
3025}
3026
3027static struct rdev_sysfs_entry rdev_size =
3028__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3029
3030
3031static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3032{
3033 unsigned long long recovery_start = rdev->recovery_offset;
3034
3035 if (test_bit(In_sync, &rdev->flags) ||
3036 recovery_start == MaxSector)
3037 return sprintf(page, "none\n");
3038
3039 return sprintf(page, "%llu\n", recovery_start);
3040}
3041
3042static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3043{
3044 unsigned long long recovery_start;
3045
3046 if (cmd_match(buf, "none"))
3047 recovery_start = MaxSector;
3048 else if (kstrtoull(buf, 10, &recovery_start))
3049 return -EINVAL;
3050
3051 if (rdev->mddev->pers &&
3052 rdev->raid_disk >= 0)
3053 return -EBUSY;
3054
3055 rdev->recovery_offset = recovery_start;
3056 if (recovery_start == MaxSector)
3057 set_bit(In_sync, &rdev->flags);
3058 else
3059 clear_bit(In_sync, &rdev->flags);
3060 return len;
3061}
3062
3063static struct rdev_sysfs_entry rdev_recovery_start =
3064__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3065
3066
3067static ssize_t
3068badblocks_show(struct badblocks *bb, char *page, int unack);
3069static ssize_t
3070badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3071
3072static ssize_t bb_show(struct md_rdev *rdev, char *page)
3073{
3074 return badblocks_show(&rdev->badblocks, page, 0);
3075}
3076static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3077{
3078 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3079
3080 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3081 wake_up(&rdev->blocked_wait);
3082 return rv;
3083}
3084static struct rdev_sysfs_entry rdev_bad_blocks =
3085__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3086
3087
3088static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3089{
3090 return badblocks_show(&rdev->badblocks, page, 1);
3091}
3092static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3093{
3094 return badblocks_store(&rdev->badblocks, page, len, 1);
3095}
3096static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3097__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3098
3099static struct attribute *rdev_default_attrs[] = {
3100 &rdev_state.attr,
3101 &rdev_errors.attr,
3102 &rdev_slot.attr,
3103 &rdev_offset.attr,
3104 &rdev_new_offset.attr,
3105 &rdev_size.attr,
3106 &rdev_recovery_start.attr,
3107 &rdev_bad_blocks.attr,
3108 &rdev_unack_bad_blocks.attr,
3109 NULL,
3110};
3111static ssize_t
3112rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3113{
3114 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3115 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3116 struct mddev *mddev = rdev->mddev;
3117 ssize_t rv;
3118
3119 if (!entry->show)
3120 return -EIO;
3121
3122 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3123 if (!rv) {
3124 if (rdev->mddev == NULL)
3125 rv = -EBUSY;
3126 else
3127 rv = entry->show(rdev, page);
3128 mddev_unlock(mddev);
3129 }
3130 return rv;
3131}
3132
3133static ssize_t
3134rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3135 const char *page, size_t length)
3136{
3137 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3138 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3139 ssize_t rv;
3140 struct mddev *mddev = rdev->mddev;
3141
3142 if (!entry->store)
3143 return -EIO;
3144 if (!capable(CAP_SYS_ADMIN))
3145 return -EACCES;
3146 rv = mddev ? mddev_lock(mddev): -EBUSY;
3147 if (!rv) {
3148 if (rdev->mddev == NULL)
3149 rv = -EBUSY;
3150 else
3151 rv = entry->store(rdev, page, length);
3152 mddev_unlock(mddev);
3153 }
3154 return rv;
3155}
3156
3157static void rdev_free(struct kobject *ko)
3158{
3159 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3160 kfree(rdev);
3161}
3162static const struct sysfs_ops rdev_sysfs_ops = {
3163 .show = rdev_attr_show,
3164 .store = rdev_attr_store,
3165};
3166static struct kobj_type rdev_ktype = {
3167 .release = rdev_free,
3168 .sysfs_ops = &rdev_sysfs_ops,
3169 .default_attrs = rdev_default_attrs,
3170};
3171
3172int md_rdev_init(struct md_rdev *rdev)
3173{
3174 rdev->desc_nr = -1;
3175 rdev->saved_raid_disk = -1;
3176 rdev->raid_disk = -1;
3177 rdev->flags = 0;
3178 rdev->data_offset = 0;
3179 rdev->new_data_offset = 0;
3180 rdev->sb_events = 0;
3181 rdev->last_read_error.tv_sec = 0;
3182 rdev->last_read_error.tv_nsec = 0;
3183 rdev->sb_loaded = 0;
3184 rdev->bb_page = NULL;
3185 atomic_set(&rdev->nr_pending, 0);
3186 atomic_set(&rdev->read_errors, 0);
3187 atomic_set(&rdev->corrected_errors, 0);
3188
3189 INIT_LIST_HEAD(&rdev->same_set);
3190 init_waitqueue_head(&rdev->blocked_wait);
3191
3192
3193
3194
3195
3196 rdev->badblocks.count = 0;
3197 rdev->badblocks.shift = -1;
3198 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3199 seqlock_init(&rdev->badblocks.lock);
3200 if (rdev->badblocks.page == NULL)
3201 return -ENOMEM;
3202
3203 return 0;
3204}
3205EXPORT_SYMBOL_GPL(md_rdev_init);
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3217{
3218 char b[BDEVNAME_SIZE];
3219 int err;
3220 struct md_rdev *rdev;
3221 sector_t size;
3222
3223 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3224 if (!rdev) {
3225 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3226 return ERR_PTR(-ENOMEM);
3227 }
3228
3229 err = md_rdev_init(rdev);
3230 if (err)
3231 goto abort_free;
3232 err = alloc_disk_sb(rdev);
3233 if (err)
3234 goto abort_free;
3235
3236 err = lock_rdev(rdev, newdev, super_format == -2);
3237 if (err)
3238 goto abort_free;
3239
3240 kobject_init(&rdev->kobj, &rdev_ktype);
3241
3242 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3243 if (!size) {
3244 printk(KERN_WARNING
3245 "md: %s has zero or unknown size, marking faulty!\n",
3246 bdevname(rdev->bdev,b));
3247 err = -EINVAL;
3248 goto abort_free;
3249 }
3250
3251 if (super_format >= 0) {
3252 err = super_types[super_format].
3253 load_super(rdev, NULL, super_minor);
3254 if (err == -EINVAL) {
3255 printk(KERN_WARNING
3256 "md: %s does not have a valid v%d.%d "
3257 "superblock, not importing!\n",
3258 bdevname(rdev->bdev,b),
3259 super_format, super_minor);
3260 goto abort_free;
3261 }
3262 if (err < 0) {
3263 printk(KERN_WARNING
3264 "md: could not read %s's sb, not importing!\n",
3265 bdevname(rdev->bdev,b));
3266 goto abort_free;
3267 }
3268 }
3269
3270 return rdev;
3271
3272abort_free:
3273 if (rdev->bdev)
3274 unlock_rdev(rdev);
3275 md_rdev_clear(rdev);
3276 kfree(rdev);
3277 return ERR_PTR(err);
3278}
3279
3280
3281
3282
3283
3284
3285static void analyze_sbs(struct mddev * mddev)
3286{
3287 int i;
3288 struct md_rdev *rdev, *freshest, *tmp;
3289 char b[BDEVNAME_SIZE];
3290
3291 freshest = NULL;
3292 rdev_for_each_safe(rdev, tmp, mddev)
3293 switch (super_types[mddev->major_version].
3294 load_super(rdev, freshest, mddev->minor_version)) {
3295 case 1:
3296 freshest = rdev;
3297 break;
3298 case 0:
3299 break;
3300 default:
3301 printk( KERN_ERR \
3302 "md: fatal superblock inconsistency in %s"
3303 " -- removing from array\n",
3304 bdevname(rdev->bdev,b));
3305 kick_rdev_from_array(rdev);
3306 }
3307
3308
3309 super_types[mddev->major_version].
3310 validate_super(mddev, freshest);
3311
3312 i = 0;
3313 rdev_for_each_safe(rdev, tmp, mddev) {
3314 if (mddev->max_disks &&
3315 (rdev->desc_nr >= mddev->max_disks ||
3316 i > mddev->max_disks)) {
3317 printk(KERN_WARNING
3318 "md: %s: %s: only %d devices permitted\n",
3319 mdname(mddev), bdevname(rdev->bdev, b),
3320 mddev->max_disks);
3321 kick_rdev_from_array(rdev);
3322 continue;
3323 }
3324 if (rdev != freshest)
3325 if (super_types[mddev->major_version].
3326 validate_super(mddev, rdev)) {
3327 printk(KERN_WARNING "md: kicking non-fresh %s"
3328 " from array!\n",
3329 bdevname(rdev->bdev,b));
3330 kick_rdev_from_array(rdev);
3331 continue;
3332 }
3333 if (mddev->level == LEVEL_MULTIPATH) {
3334 rdev->desc_nr = i++;
3335 rdev->raid_disk = rdev->desc_nr;
3336 set_bit(In_sync, &rdev->flags);
3337 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3338 rdev->raid_disk = -1;
3339 clear_bit(In_sync, &rdev->flags);
3340 }
3341 }
3342}
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3355{
3356 unsigned long result = 0;
3357 long decimals = -1;
3358 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3359 if (*cp == '.')
3360 decimals = 0;
3361 else if (decimals < scale) {
3362 unsigned int value;
3363 value = *cp - '0';
3364 result = result * 10 + value;
3365 if (decimals >= 0)
3366 decimals++;
3367 }
3368 cp++;
3369 }
3370 if (*cp == '\n')
3371 cp++;
3372 if (*cp)
3373 return -EINVAL;
3374 if (decimals < 0)
3375 decimals = 0;
3376 while (decimals < scale) {
3377 result *= 10;
3378 decimals ++;
3379 }
3380 *res = result;
3381 return 0;
3382}
3383
3384
3385static void md_safemode_timeout(unsigned long data);
3386
3387static ssize_t
3388safe_delay_show(struct mddev *mddev, char *page)
3389{
3390 int msec = (mddev->safemode_delay*1000)/HZ;
3391 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3392}
3393static ssize_t
3394safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3395{
3396 unsigned long msec;
3397
3398 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3399 return -EINVAL;
3400 if (msec == 0)
3401 mddev->safemode_delay = 0;
3402 else {
3403 unsigned long old_delay = mddev->safemode_delay;
3404 mddev->safemode_delay = (msec*HZ)/1000;
3405 if (mddev->safemode_delay == 0)
3406 mddev->safemode_delay = 1;
3407 if (mddev->safemode_delay < old_delay || old_delay == 0)
3408 md_safemode_timeout((unsigned long)mddev);
3409 }
3410 return len;
3411}
3412static struct md_sysfs_entry md_safe_delay =
3413__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3414
3415static ssize_t
3416level_show(struct mddev *mddev, char *page)
3417{
3418 struct md_personality *p = mddev->pers;
3419 if (p)
3420 return sprintf(page, "%s\n", p->name);
3421 else if (mddev->clevel[0])
3422 return sprintf(page, "%s\n", mddev->clevel);
3423 else if (mddev->level != LEVEL_NONE)
3424 return sprintf(page, "%d\n", mddev->level);
3425 else
3426 return 0;
3427}
3428
3429static ssize_t
3430level_store(struct mddev *mddev, const char *buf, size_t len)
3431{
3432 char clevel[16];
3433 ssize_t rv = len;
3434 struct md_personality *pers;
3435 long level;
3436 void *priv;
3437 struct md_rdev *rdev;
3438
3439 if (mddev->pers == NULL) {
3440 if (len == 0)
3441 return 0;
3442 if (len >= sizeof(mddev->clevel))
3443 return -ENOSPC;
3444 strncpy(mddev->clevel, buf, len);
3445 if (mddev->clevel[len-1] == '\n')
3446 len--;
3447 mddev->clevel[len] = 0;
3448 mddev->level = LEVEL_NONE;
3449 return rv;
3450 }
3451 if (mddev->ro)
3452 return -EROFS;
3453
3454
3455
3456
3457
3458
3459
3460 if (mddev->sync_thread ||
3461 mddev->reshape_position != MaxSector ||
3462 mddev->sysfs_active)
3463 return -EBUSY;
3464
3465 if (!mddev->pers->quiesce) {
3466 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3467 mdname(mddev), mddev->pers->name);
3468 return -EINVAL;
3469 }
3470
3471
3472 if (len == 0 || len >= sizeof(clevel))
3473 return -EINVAL;
3474 strncpy(clevel, buf, len);
3475 if (clevel[len-1] == '\n')
3476 len--;
3477 clevel[len] = 0;
3478 if (kstrtol(clevel, 10, &level))
3479 level = LEVEL_NONE;
3480
3481 if (request_module("md-%s", clevel) != 0)
3482 request_module("md-level-%s", clevel);
3483 spin_lock(&pers_lock);
3484 pers = find_pers(level, clevel);
3485 if (!pers || !try_module_get(pers->owner)) {
3486 spin_unlock(&pers_lock);
3487 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3488 return -EINVAL;
3489 }
3490 spin_unlock(&pers_lock);
3491
3492 if (pers == mddev->pers) {
3493
3494 module_put(pers->owner);
3495 return rv;
3496 }
3497 if (!pers->takeover) {
3498 module_put(pers->owner);
3499 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3500 mdname(mddev), clevel);
3501 return -EINVAL;
3502 }
3503
3504 rdev_for_each(rdev, mddev)
3505 rdev->new_raid_disk = rdev->raid_disk;
3506
3507
3508
3509
3510 priv = pers->takeover(mddev);
3511 if (IS_ERR(priv)) {
3512 mddev->new_level = mddev->level;
3513 mddev->new_layout = mddev->layout;
3514 mddev->new_chunk_sectors = mddev->chunk_sectors;
3515 mddev->raid_disks -= mddev->delta_disks;
3516 mddev->delta_disks = 0;
3517 mddev->reshape_backwards = 0;
3518 module_put(pers->owner);
3519 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3520 mdname(mddev), clevel);
3521 return PTR_ERR(priv);
3522 }
3523
3524
3525 mddev_suspend(mddev);
3526 mddev->pers->stop(mddev);
3527
3528 if (mddev->pers->sync_request == NULL &&
3529 pers->sync_request != NULL) {
3530
3531 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3532 printk(KERN_WARNING
3533 "md: cannot register extra attributes for %s\n",
3534 mdname(mddev));
3535 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3536 }
3537 if (mddev->pers->sync_request != NULL &&
3538 pers->sync_request == NULL) {
3539
3540 if (mddev->to_remove == NULL)
3541 mddev->to_remove = &md_redundancy_group;
3542 }
3543
3544 if (mddev->pers->sync_request == NULL &&
3545 mddev->external) {
3546
3547
3548
3549
3550
3551
3552
3553 mddev->in_sync = 0;
3554 mddev->safemode_delay = 0;
3555 mddev->safemode = 0;
3556 }
3557
3558 rdev_for_each(rdev, mddev) {
3559 if (rdev->raid_disk < 0)
3560 continue;
3561 if (rdev->new_raid_disk >= mddev->raid_disks)
3562 rdev->new_raid_disk = -1;
3563 if (rdev->new_raid_disk == rdev->raid_disk)
3564 continue;
3565 sysfs_unlink_rdev(mddev, rdev);
3566 }
3567 rdev_for_each(rdev, mddev) {
3568 if (rdev->raid_disk < 0)
3569 continue;
3570 if (rdev->new_raid_disk == rdev->raid_disk)
3571 continue;
3572 rdev->raid_disk = rdev->new_raid_disk;
3573 if (rdev->raid_disk < 0)
3574 clear_bit(In_sync, &rdev->flags);
3575 else {
3576 if (sysfs_link_rdev(mddev, rdev))
3577 printk(KERN_WARNING "md: cannot register rd%d"
3578 " for %s after level change\n",
3579 rdev->raid_disk, mdname(mddev));
3580 }
3581 }
3582
3583 module_put(mddev->pers->owner);
3584 mddev->pers = pers;
3585 mddev->private = priv;
3586 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3587 mddev->level = mddev->new_level;
3588 mddev->layout = mddev->new_layout;
3589 mddev->chunk_sectors = mddev->new_chunk_sectors;
3590 mddev->delta_disks = 0;
3591 mddev->reshape_backwards = 0;
3592 mddev->degraded = 0;
3593 if (mddev->pers->sync_request == NULL) {
3594
3595
3596
3597 mddev->in_sync = 1;
3598 del_timer_sync(&mddev->safemode_timer);
3599 }
3600 blk_set_stacking_limits(&mddev->queue->limits);
3601 pers->run(mddev);
3602 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3603 mddev_resume(mddev);
3604 if (!mddev->thread)
3605 md_update_sb(mddev, 1);
3606 sysfs_notify(&mddev->kobj, NULL, "level");
3607 md_new_event(mddev);
3608 return rv;
3609}
3610
3611static struct md_sysfs_entry md_level =
3612__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3613
3614
3615static ssize_t
3616layout_show(struct mddev *mddev, char *page)
3617{
3618
3619 if (mddev->reshape_position != MaxSector &&
3620 mddev->layout != mddev->new_layout)
3621 return sprintf(page, "%d (%d)\n",
3622 mddev->new_layout, mddev->layout);
3623 return sprintf(page, "%d\n", mddev->layout);
3624}
3625
3626static ssize_t
3627layout_store(struct mddev *mddev, const char *buf, size_t len)
3628{
3629 char *e;
3630 unsigned long n = simple_strtoul(buf, &e, 10);
3631
3632 if (!*buf || (*e && *e != '\n'))
3633 return -EINVAL;
3634
3635 if (mddev->pers) {
3636 int err;
3637 if (mddev->pers->check_reshape == NULL)
3638 return -EBUSY;
3639 if (mddev->ro)
3640 return -EROFS;
3641 mddev->new_layout = n;
3642 err = mddev->pers->check_reshape(mddev);
3643 if (err) {
3644 mddev->new_layout = mddev->layout;
3645 return err;
3646 }
3647 } else {
3648 mddev->new_layout = n;
3649 if (mddev->reshape_position == MaxSector)
3650 mddev->layout = n;
3651 }
3652 return len;
3653}
3654static struct md_sysfs_entry md_layout =
3655__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3656
3657
3658static ssize_t
3659raid_disks_show(struct mddev *mddev, char *page)
3660{
3661 if (mddev->raid_disks == 0)
3662 return 0;
3663 if (mddev->reshape_position != MaxSector &&
3664 mddev->delta_disks != 0)
3665 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3666 mddev->raid_disks - mddev->delta_disks);
3667 return sprintf(page, "%d\n", mddev->raid_disks);
3668}
3669
3670static int update_raid_disks(struct mddev *mddev, int raid_disks);
3671
3672static ssize_t
3673raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3674{
3675 char *e;
3676 int rv = 0;
3677 unsigned long n = simple_strtoul(buf, &e, 10);
3678
3679 if (!*buf || (*e && *e != '\n'))
3680 return -EINVAL;
3681
3682 if (mddev->pers)
3683 rv = update_raid_disks(mddev, n);
3684 else if (mddev->reshape_position != MaxSector) {
3685 struct md_rdev *rdev;
3686 int olddisks = mddev->raid_disks - mddev->delta_disks;
3687
3688 rdev_for_each(rdev, mddev) {
3689 if (olddisks < n &&
3690 rdev->data_offset < rdev->new_data_offset)
3691 return -EINVAL;
3692 if (olddisks > n &&
3693 rdev->data_offset > rdev->new_data_offset)
3694 return -EINVAL;
3695 }
3696 mddev->delta_disks = n - olddisks;
3697 mddev->raid_disks = n;
3698 mddev->reshape_backwards = (mddev->delta_disks < 0);
3699 } else
3700 mddev->raid_disks = n;
3701 return rv ? rv : len;
3702}
3703static struct md_sysfs_entry md_raid_disks =
3704__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3705
3706static ssize_t
3707chunk_size_show(struct mddev *mddev, char *page)
3708{
3709 if (mddev->reshape_position != MaxSector &&
3710 mddev->chunk_sectors != mddev->new_chunk_sectors)
3711 return sprintf(page, "%d (%d)\n",
3712 mddev->new_chunk_sectors << 9,
3713 mddev->chunk_sectors << 9);
3714 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3715}
3716
3717static ssize_t
3718chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3719{
3720 char *e;
3721 unsigned long n = simple_strtoul(buf, &e, 10);
3722
3723 if (!*buf || (*e && *e != '\n'))
3724 return -EINVAL;
3725
3726 if (mddev->pers) {
3727 int err;
3728 if (mddev->pers->check_reshape == NULL)
3729 return -EBUSY;
3730 if (mddev->ro)
3731 return -EROFS;
3732 mddev->new_chunk_sectors = n >> 9;
3733 err = mddev->pers->check_reshape(mddev);
3734 if (err) {
3735 mddev->new_chunk_sectors = mddev->chunk_sectors;
3736 return err;
3737 }
3738 } else {
3739 mddev->new_chunk_sectors = n >> 9;
3740 if (mddev->reshape_position == MaxSector)
3741 mddev->chunk_sectors = n >> 9;
3742 }
3743 return len;
3744}
3745static struct md_sysfs_entry md_chunk_size =
3746__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3747
3748static ssize_t
3749resync_start_show(struct mddev *mddev, char *page)
3750{
3751 if (mddev->recovery_cp == MaxSector)
3752 return sprintf(page, "none\n");
3753 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3754}
3755
3756static ssize_t
3757resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3758{
3759 char *e;
3760 unsigned long long n = simple_strtoull(buf, &e, 10);
3761
3762 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3763 return -EBUSY;
3764 if (cmd_match(buf, "none"))
3765 n = MaxSector;
3766 else if (!*buf || (*e && *e != '\n'))
3767 return -EINVAL;
3768
3769 mddev->recovery_cp = n;
3770 if (mddev->pers)
3771 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3772 return len;
3773}
3774static struct md_sysfs_entry md_resync_start =
3775__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3814 write_pending, active_idle, bad_word};
3815static char *array_states[] = {
3816 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3817 "write-pending", "active-idle", NULL };
3818
3819static int match_word(const char *word, char **list)
3820{
3821 int n;
3822 for (n=0; list[n]; n++)
3823 if (cmd_match(word, list[n]))
3824 break;
3825 return n;
3826}
3827
3828static ssize_t
3829array_state_show(struct mddev *mddev, char *page)
3830{
3831 enum array_state st = inactive;
3832
3833 if (mddev->pers)
3834 switch(mddev->ro) {
3835 case 1:
3836 st = readonly;
3837 break;
3838 case 2:
3839 st = read_auto;
3840 break;
3841 case 0:
3842 if (mddev->in_sync)
3843 st = clean;
3844 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3845 st = write_pending;
3846 else if (mddev->safemode)
3847 st = active_idle;
3848 else
3849 st = active;
3850 }
3851 else {
3852 if (list_empty(&mddev->disks) &&
3853 mddev->raid_disks == 0 &&
3854 mddev->dev_sectors == 0)
3855 st = clear;
3856 else
3857 st = inactive;
3858 }
3859 return sprintf(page, "%s\n", array_states[st]);
3860}
3861
3862static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3863static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3864static int do_md_run(struct mddev * mddev);
3865static int restart_array(struct mddev *mddev);
3866
3867static ssize_t
3868array_state_store(struct mddev *mddev, const char *buf, size_t len)
3869{
3870 int err = -EINVAL;
3871 enum array_state st = match_word(buf, array_states);
3872 switch(st) {
3873 case bad_word:
3874 break;
3875 case clear:
3876
3877 err = do_md_stop(mddev, 0, NULL);
3878 break;
3879 case inactive:
3880
3881 if (mddev->pers)
3882 err = do_md_stop(mddev, 2, NULL);
3883 else
3884 err = 0;
3885 break;
3886 case suspended:
3887 break;
3888 case readonly:
3889 if (mddev->pers)
3890 err = md_set_readonly(mddev, NULL);
3891 else {
3892 mddev->ro = 1;
3893 set_disk_ro(mddev->gendisk, 1);
3894 err = do_md_run(mddev);
3895 }
3896 break;
3897 case read_auto:
3898 if (mddev->pers) {
3899 if (mddev->ro == 0)
3900 err = md_set_readonly(mddev, NULL);
3901 else if (mddev->ro == 1)
3902 err = restart_array(mddev);
3903 if (err == 0) {
3904 mddev->ro = 2;
3905 set_disk_ro(mddev->gendisk, 0);
3906 }
3907 } else {
3908 mddev->ro = 2;
3909 err = do_md_run(mddev);
3910 }
3911 break;
3912 case clean:
3913 if (mddev->pers) {
3914 restart_array(mddev);
3915 spin_lock_irq(&mddev->write_lock);
3916 if (atomic_read(&mddev->writes_pending) == 0) {
3917 if (mddev->in_sync == 0) {
3918 mddev->in_sync = 1;
3919 if (mddev->safemode == 1)
3920 mddev->safemode = 0;
3921 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3922 }
3923 err = 0;
3924 } else
3925 err = -EBUSY;
3926 spin_unlock_irq(&mddev->write_lock);
3927 } else
3928 err = -EINVAL;
3929 break;
3930 case active:
3931 if (mddev->pers) {
3932 restart_array(mddev);
3933 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3934 wake_up(&mddev->sb_wait);
3935 err = 0;
3936 } else {
3937 mddev->ro = 0;
3938 set_disk_ro(mddev->gendisk, 0);
3939 err = do_md_run(mddev);
3940 }
3941 break;
3942 case write_pending:
3943 case active_idle:
3944
3945 break;
3946 }
3947 if (err)
3948 return err;
3949 else {
3950 if (mddev->hold_active == UNTIL_IOCTL)
3951 mddev->hold_active = 0;
3952 sysfs_notify_dirent_safe(mddev->sysfs_state);
3953 return len;
3954 }
3955}
3956static struct md_sysfs_entry md_array_state =
3957__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3958
3959static ssize_t
3960max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3961 return sprintf(page, "%d\n",
3962 atomic_read(&mddev->max_corr_read_errors));
3963}
3964
3965static ssize_t
3966max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3967{
3968 char *e;
3969 unsigned long n = simple_strtoul(buf, &e, 10);
3970
3971 if (*buf && (*e == 0 || *e == '\n')) {
3972 atomic_set(&mddev->max_corr_read_errors, n);
3973 return len;
3974 }
3975 return -EINVAL;
3976}
3977
3978static struct md_sysfs_entry max_corr_read_errors =
3979__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3980 max_corrected_read_errors_store);
3981
3982static ssize_t
3983null_show(struct mddev *mddev, char *page)
3984{
3985 return -EINVAL;
3986}
3987
3988static ssize_t
3989new_dev_store(struct mddev *mddev, const char *buf, size_t len)
3990{
3991
3992
3993
3994
3995
3996
3997
3998 char *e;
3999 int major = simple_strtoul(buf, &e, 10);
4000 int minor;
4001 dev_t dev;
4002 struct md_rdev *rdev;
4003 int err;
4004
4005 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4006 return -EINVAL;
4007 minor = simple_strtoul(e+1, &e, 10);
4008 if (*e && *e != '\n')
4009 return -EINVAL;
4010 dev = MKDEV(major, minor);
4011 if (major != MAJOR(dev) ||
4012 minor != MINOR(dev))
4013 return -EOVERFLOW;
4014
4015
4016 if (mddev->persistent) {
4017 rdev = md_import_device(dev, mddev->major_version,
4018 mddev->minor_version);
4019 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4020 struct md_rdev *rdev0
4021 = list_entry(mddev->disks.next,
4022 struct md_rdev, same_set);
4023 err = super_types[mddev->major_version]
4024 .load_super(rdev, rdev0, mddev->minor_version);
4025 if (err < 0)
4026 goto out;
4027 }
4028 } else if (mddev->external)
4029 rdev = md_import_device(dev, -2, -1);
4030 else
4031 rdev = md_import_device(dev, -1, -1);
4032
4033 if (IS_ERR(rdev))
4034 return PTR_ERR(rdev);
4035 err = bind_rdev_to_array(rdev, mddev);
4036 out:
4037 if (err)
4038 export_rdev(rdev);
4039 return err ? err : len;
4040}
4041
4042static struct md_sysfs_entry md_new_device =
4043__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4044
4045static ssize_t
4046bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4047{
4048 char *end;
4049 unsigned long chunk, end_chunk;
4050
4051 if (!mddev->bitmap)
4052 goto out;
4053
4054 while (*buf) {
4055 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4056 if (buf == end) break;
4057 if (*end == '-') {
4058 buf = end + 1;
4059 end_chunk = simple_strtoul(buf, &end, 0);
4060 if (buf == end) break;
4061 }
4062 if (*end && !isspace(*end)) break;
4063 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4064 buf = skip_spaces(end);
4065 }
4066 bitmap_unplug(mddev->bitmap);
4067out:
4068 return len;
4069}
4070
4071static struct md_sysfs_entry md_bitmap =
4072__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4073
4074static ssize_t
4075size_show(struct mddev *mddev, char *page)
4076{
4077 return sprintf(page, "%llu\n",
4078 (unsigned long long)mddev->dev_sectors / 2);
4079}
4080
4081static int update_size(struct mddev *mddev, sector_t num_sectors);
4082
4083static ssize_t
4084size_store(struct mddev *mddev, const char *buf, size_t len)
4085{
4086
4087
4088
4089
4090 sector_t sectors;
4091 int err = strict_blocks_to_sectors(buf, §ors);
4092
4093 if (err < 0)
4094 return err;
4095 if (mddev->pers) {
4096 err = update_size(mddev, sectors);
4097 md_update_sb(mddev, 1);
4098 } else {
4099 if (mddev->dev_sectors == 0 ||
4100 mddev->dev_sectors > sectors)
4101 mddev->dev_sectors = sectors;
4102 else
4103 err = -ENOSPC;
4104 }
4105 return err ? err : len;
4106}
4107
4108static struct md_sysfs_entry md_size =
4109__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4110
4111
4112
4113
4114
4115
4116
4117
4118static ssize_t
4119metadata_show(struct mddev *mddev, char *page)
4120{
4121 if (mddev->persistent)
4122 return sprintf(page, "%d.%d\n",
4123 mddev->major_version, mddev->minor_version);
4124 else if (mddev->external)
4125 return sprintf(page, "external:%s\n", mddev->metadata_type);
4126 else
4127 return sprintf(page, "none\n");
4128}
4129
4130static ssize_t
4131metadata_store(struct mddev *mddev, const char *buf, size_t len)
4132{
4133 int major, minor;
4134 char *e;
4135
4136
4137
4138
4139 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4140 ;
4141 else if (!list_empty(&mddev->disks))
4142 return -EBUSY;
4143
4144 if (cmd_match(buf, "none")) {
4145 mddev->persistent = 0;
4146 mddev->external = 0;
4147 mddev->major_version = 0;
4148 mddev->minor_version = 90;
4149 return len;
4150 }
4151 if (strncmp(buf, "external:", 9) == 0) {
4152 size_t namelen = len-9;
4153 if (namelen >= sizeof(mddev->metadata_type))
4154 namelen = sizeof(mddev->metadata_type)-1;
4155 strncpy(mddev->metadata_type, buf+9, namelen);
4156 mddev->metadata_type[namelen] = 0;
4157 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4158 mddev->metadata_type[--namelen] = 0;
4159 mddev->persistent = 0;
4160 mddev->external = 1;
4161 mddev->major_version = 0;
4162 mddev->minor_version = 90;
4163 return len;
4164 }
4165 major = simple_strtoul(buf, &e, 10);
4166 if (e==buf || *e != '.')
4167 return -EINVAL;
4168 buf = e+1;
4169 minor = simple_strtoul(buf, &e, 10);
4170 if (e==buf || (*e && *e != '\n') )
4171 return -EINVAL;
4172 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4173 return -ENOENT;
4174 mddev->major_version = major;
4175 mddev->minor_version = minor;
4176 mddev->persistent = 1;
4177 mddev->external = 0;
4178 return len;
4179}
4180
4181static struct md_sysfs_entry md_metadata =
4182__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4183
4184static ssize_t
4185action_show(struct mddev *mddev, char *page)
4186{
4187 char *type = "idle";
4188 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4189 type = "frozen";
4190 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4191 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4192 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4193 type = "reshape";
4194 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4195 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4196 type = "resync";
4197 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4198 type = "check";
4199 else
4200 type = "repair";
4201 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4202 type = "recover";
4203 }
4204 return sprintf(page, "%s\n", type);
4205}
4206
4207static ssize_t
4208action_store(struct mddev *mddev, const char *page, size_t len)
4209{
4210 if (!mddev->pers || !mddev->pers->sync_request)
4211 return -EINVAL;
4212
4213 if (cmd_match(page, "frozen"))
4214 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4215 else
4216 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4217
4218 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4219 if (mddev->sync_thread) {
4220 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4221 md_reap_sync_thread(mddev);
4222 }
4223 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4224 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4225 return -EBUSY;
4226 else if (cmd_match(page, "resync"))
4227 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4228 else if (cmd_match(page, "recover")) {
4229 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4230 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4231 } else if (cmd_match(page, "reshape")) {
4232 int err;
4233 if (mddev->pers->start_reshape == NULL)
4234 return -EINVAL;
4235 err = mddev->pers->start_reshape(mddev);
4236 if (err)
4237 return err;
4238 sysfs_notify(&mddev->kobj, NULL, "degraded");
4239 } else {
4240 if (cmd_match(page, "check"))
4241 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4242 else if (!cmd_match(page, "repair"))
4243 return -EINVAL;
4244 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4245 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4246 }
4247 if (mddev->ro == 2) {
4248
4249
4250
4251 mddev->ro = 0;
4252 md_wakeup_thread(mddev->sync_thread);
4253 }
4254 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4255 md_wakeup_thread(mddev->thread);
4256 sysfs_notify_dirent_safe(mddev->sysfs_action);
4257 return len;
4258}
4259
4260static struct md_sysfs_entry md_scan_mode =
4261__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4262
4263static ssize_t
4264last_sync_action_show(struct mddev *mddev, char *page)
4265{
4266 return sprintf(page, "%s\n", mddev->last_sync_action);
4267}
4268
4269static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4270
4271static ssize_t
4272mismatch_cnt_show(struct mddev *mddev, char *page)
4273{
4274 return sprintf(page, "%llu\n",
4275 (unsigned long long)
4276 atomic64_read(&mddev->resync_mismatches));
4277}
4278
4279static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4280
4281static ssize_t
4282sync_min_show(struct mddev *mddev, char *page)
4283{
4284 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4285 mddev->sync_speed_min ? "local": "system");
4286}
4287
4288static ssize_t
4289sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4290{
4291 int min;
4292 char *e;
4293 if (strncmp(buf, "system", 6)==0) {
4294 mddev->sync_speed_min = 0;
4295 return len;
4296 }
4297 min = simple_strtoul(buf, &e, 10);
4298 if (buf == e || (*e && *e != '\n') || min <= 0)
4299 return -EINVAL;
4300 mddev->sync_speed_min = min;
4301 return len;
4302}
4303
4304static struct md_sysfs_entry md_sync_min =
4305__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4306
4307static ssize_t
4308sync_max_show(struct mddev *mddev, char *page)
4309{
4310 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4311 mddev->sync_speed_max ? "local": "system");
4312}
4313
4314static ssize_t
4315sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4316{
4317 int max;
4318 char *e;
4319 if (strncmp(buf, "system", 6)==0) {
4320 mddev->sync_speed_max = 0;
4321 return len;
4322 }
4323 max = simple_strtoul(buf, &e, 10);
4324 if (buf == e || (*e && *e != '\n') || max <= 0)
4325 return -EINVAL;
4326 mddev->sync_speed_max = max;
4327 return len;
4328}
4329
4330static struct md_sysfs_entry md_sync_max =
4331__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4332
4333static ssize_t
4334degraded_show(struct mddev *mddev, char *page)
4335{
4336 return sprintf(page, "%d\n", mddev->degraded);
4337}
4338static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4339
4340static ssize_t
4341sync_force_parallel_show(struct mddev *mddev, char *page)
4342{
4343 return sprintf(page, "%d\n", mddev->parallel_resync);
4344}
4345
4346static ssize_t
4347sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4348{
4349 long n;
4350
4351 if (kstrtol(buf, 10, &n))
4352 return -EINVAL;
4353
4354 if (n != 0 && n != 1)
4355 return -EINVAL;
4356
4357 mddev->parallel_resync = n;
4358
4359 if (mddev->sync_thread)
4360 wake_up(&resync_wait);
4361
4362 return len;
4363}
4364
4365
4366static struct md_sysfs_entry md_sync_force_parallel =
4367__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4368 sync_force_parallel_show, sync_force_parallel_store);
4369
4370static ssize_t
4371sync_speed_show(struct mddev *mddev, char *page)
4372{
4373 unsigned long resync, dt, db;
4374 if (mddev->curr_resync == 0)
4375 return sprintf(page, "none\n");
4376 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4377 dt = (jiffies - mddev->resync_mark) / HZ;
4378 if (!dt) dt++;
4379 db = resync - mddev->resync_mark_cnt;
4380 return sprintf(page, "%lu\n", db/dt/2);
4381}
4382
4383static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4384
4385static ssize_t
4386sync_completed_show(struct mddev *mddev, char *page)
4387{
4388 unsigned long long max_sectors, resync;
4389
4390 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4391 return sprintf(page, "none\n");
4392
4393 if (mddev->curr_resync == 1 ||
4394 mddev->curr_resync == 2)
4395 return sprintf(page, "delayed\n");
4396
4397 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4398 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4399 max_sectors = mddev->resync_max_sectors;
4400 else
4401 max_sectors = mddev->dev_sectors;
4402
4403 resync = mddev->curr_resync_completed;
4404 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4405}
4406
4407static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4408
4409static ssize_t
4410min_sync_show(struct mddev *mddev, char *page)
4411{
4412 return sprintf(page, "%llu\n",
4413 (unsigned long long)mddev->resync_min);
4414}
4415static ssize_t
4416min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4417{
4418 unsigned long long min;
4419 if (kstrtoull(buf, 10, &min))
4420 return -EINVAL;
4421 if (min > mddev->resync_max)
4422 return -EINVAL;
4423 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4424 return -EBUSY;
4425
4426
4427 if (mddev->chunk_sectors) {
4428 sector_t temp = min;
4429 if (sector_div(temp, mddev->chunk_sectors))
4430 return -EINVAL;
4431 }
4432 mddev->resync_min = min;
4433
4434 return len;
4435}
4436
4437static struct md_sysfs_entry md_min_sync =
4438__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4439
4440static ssize_t
4441max_sync_show(struct mddev *mddev, char *page)
4442{
4443 if (mddev->resync_max == MaxSector)
4444 return sprintf(page, "max\n");
4445 else
4446 return sprintf(page, "%llu\n",
4447 (unsigned long long)mddev->resync_max);
4448}
4449static ssize_t
4450max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4451{
4452 if (strncmp(buf, "max", 3) == 0)
4453 mddev->resync_max = MaxSector;
4454 else {
4455 unsigned long long max;
4456 if (kstrtoull(buf, 10, &max))
4457 return -EINVAL;
4458 if (max < mddev->resync_min)
4459 return -EINVAL;
4460 if (max < mddev->resync_max &&
4461 mddev->ro == 0 &&
4462 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4463 return -EBUSY;
4464
4465
4466 if (mddev->chunk_sectors) {
4467 sector_t temp = max;
4468 if (sector_div(temp, mddev->chunk_sectors))
4469 return -EINVAL;
4470 }
4471 mddev->resync_max = max;
4472 }
4473 wake_up(&mddev->recovery_wait);
4474 return len;
4475}
4476
4477static struct md_sysfs_entry md_max_sync =
4478__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4479
4480static ssize_t
4481suspend_lo_show(struct mddev *mddev, char *page)
4482{
4483 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4484}
4485
4486static ssize_t
4487suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4488{
4489 char *e;
4490 unsigned long long new = simple_strtoull(buf, &e, 10);
4491 unsigned long long old = mddev->suspend_lo;
4492
4493 if (mddev->pers == NULL ||
4494 mddev->pers->quiesce == NULL)
4495 return -EINVAL;
4496 if (buf == e || (*e && *e != '\n'))
4497 return -EINVAL;
4498
4499 mddev->suspend_lo = new;
4500 if (new >= old)
4501
4502 mddev->pers->quiesce(mddev, 2);
4503 else {
4504
4505 mddev->pers->quiesce(mddev, 1);
4506 mddev->pers->quiesce(mddev, 0);
4507 }
4508 return len;
4509}
4510static struct md_sysfs_entry md_suspend_lo =
4511__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4512
4513
4514static ssize_t
4515suspend_hi_show(struct mddev *mddev, char *page)
4516{
4517 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4518}
4519
4520static ssize_t
4521suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4522{
4523 char *e;
4524 unsigned long long new = simple_strtoull(buf, &e, 10);
4525 unsigned long long old = mddev->suspend_hi;
4526
4527 if (mddev->pers == NULL ||
4528 mddev->pers->quiesce == NULL)
4529 return -EINVAL;
4530 if (buf == e || (*e && *e != '\n'))
4531 return -EINVAL;
4532
4533 mddev->suspend_hi = new;
4534 if (new <= old)
4535
4536 mddev->pers->quiesce(mddev, 2);
4537 else {
4538
4539 mddev->pers->quiesce(mddev, 1);
4540 mddev->pers->quiesce(mddev, 0);
4541 }
4542 return len;
4543}
4544static struct md_sysfs_entry md_suspend_hi =
4545__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4546
4547static ssize_t
4548reshape_position_show(struct mddev *mddev, char *page)
4549{
4550 if (mddev->reshape_position != MaxSector)
4551 return sprintf(page, "%llu\n",
4552 (unsigned long long)mddev->reshape_position);
4553 strcpy(page, "none\n");
4554 return 5;
4555}
4556
4557static ssize_t
4558reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4559{
4560 struct md_rdev *rdev;
4561 char *e;
4562 unsigned long long new = simple_strtoull(buf, &e, 10);
4563 if (mddev->pers)
4564 return -EBUSY;
4565 if (buf == e || (*e && *e != '\n'))
4566 return -EINVAL;
4567 mddev->reshape_position = new;
4568 mddev->delta_disks = 0;
4569 mddev->reshape_backwards = 0;
4570 mddev->new_level = mddev->level;
4571 mddev->new_layout = mddev->layout;
4572 mddev->new_chunk_sectors = mddev->chunk_sectors;
4573 rdev_for_each(rdev, mddev)
4574 rdev->new_data_offset = rdev->data_offset;
4575 return len;
4576}
4577
4578static struct md_sysfs_entry md_reshape_position =
4579__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4580 reshape_position_store);
4581
4582static ssize_t
4583reshape_direction_show(struct mddev *mddev, char *page)
4584{
4585 return sprintf(page, "%s\n",
4586 mddev->reshape_backwards ? "backwards" : "forwards");
4587}
4588
4589static ssize_t
4590reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4591{
4592 int backwards = 0;
4593 if (cmd_match(buf, "forwards"))
4594 backwards = 0;
4595 else if (cmd_match(buf, "backwards"))
4596 backwards = 1;
4597 else
4598 return -EINVAL;
4599 if (mddev->reshape_backwards == backwards)
4600 return len;
4601
4602
4603 if (mddev->delta_disks)
4604 return -EBUSY;
4605
4606 if (mddev->persistent &&
4607 mddev->major_version == 0)
4608 return -EINVAL;
4609
4610 mddev->reshape_backwards = backwards;
4611 return len;
4612}
4613
4614static struct md_sysfs_entry md_reshape_direction =
4615__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4616 reshape_direction_store);
4617
4618static ssize_t
4619array_size_show(struct mddev *mddev, char *page)
4620{
4621 if (mddev->external_size)
4622 return sprintf(page, "%llu\n",
4623 (unsigned long long)mddev->array_sectors/2);
4624 else
4625 return sprintf(page, "default\n");
4626}
4627
4628static ssize_t
4629array_size_store(struct mddev *mddev, const char *buf, size_t len)
4630{
4631 sector_t sectors;
4632
4633 if (strncmp(buf, "default", 7) == 0) {
4634 if (mddev->pers)
4635 sectors = mddev->pers->size(mddev, 0, 0);
4636 else
4637 sectors = mddev->array_sectors;
4638
4639 mddev->external_size = 0;
4640 } else {
4641 if (strict_blocks_to_sectors(buf, §ors) < 0)
4642 return -EINVAL;
4643 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4644 return -E2BIG;
4645
4646 mddev->external_size = 1;
4647 }
4648
4649 mddev->array_sectors = sectors;
4650 if (mddev->pers) {
4651 set_capacity(mddev->gendisk, mddev->array_sectors);
4652 revalidate_disk(mddev->gendisk);
4653 }
4654 return len;
4655}
4656
4657static struct md_sysfs_entry md_array_size =
4658__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4659 array_size_store);
4660
4661static struct attribute *md_default_attrs[] = {
4662 &md_level.attr,
4663 &md_layout.attr,
4664 &md_raid_disks.attr,
4665 &md_chunk_size.attr,
4666 &md_size.attr,
4667 &md_resync_start.attr,
4668 &md_metadata.attr,
4669 &md_new_device.attr,
4670 &md_safe_delay.attr,
4671 &md_array_state.attr,
4672 &md_reshape_position.attr,
4673 &md_reshape_direction.attr,
4674 &md_array_size.attr,
4675 &max_corr_read_errors.attr,
4676 NULL,
4677};
4678
4679static struct attribute *md_redundancy_attrs[] = {
4680 &md_scan_mode.attr,
4681 &md_last_scan_mode.attr,
4682 &md_mismatches.attr,
4683 &md_sync_min.attr,
4684 &md_sync_max.attr,
4685 &md_sync_speed.attr,
4686 &md_sync_force_parallel.attr,
4687 &md_sync_completed.attr,
4688 &md_min_sync.attr,
4689 &md_max_sync.attr,
4690 &md_suspend_lo.attr,
4691 &md_suspend_hi.attr,
4692 &md_bitmap.attr,
4693 &md_degraded.attr,
4694 NULL,
4695};
4696static struct attribute_group md_redundancy_group = {
4697 .name = NULL,
4698 .attrs = md_redundancy_attrs,
4699};
4700
4701
4702static ssize_t
4703md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4704{
4705 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4706 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4707 ssize_t rv;
4708
4709 if (!entry->show)
4710 return -EIO;
4711 spin_lock(&all_mddevs_lock);
4712 if (list_empty(&mddev->all_mddevs)) {
4713 spin_unlock(&all_mddevs_lock);
4714 return -EBUSY;
4715 }
4716 mddev_get(mddev);
4717 spin_unlock(&all_mddevs_lock);
4718
4719 rv = mddev_lock(mddev);
4720 if (!rv) {
4721 rv = entry->show(mddev, page);
4722 mddev_unlock(mddev);
4723 }
4724 mddev_put(mddev);
4725 return rv;
4726}
4727
4728static ssize_t
4729md_attr_store(struct kobject *kobj, struct attribute *attr,
4730 const char *page, size_t length)
4731{
4732 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4733 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4734 ssize_t rv;
4735
4736 if (!entry->store)
4737 return -EIO;
4738 if (!capable(CAP_SYS_ADMIN))
4739 return -EACCES;
4740 spin_lock(&all_mddevs_lock);
4741 if (list_empty(&mddev->all_mddevs)) {
4742 spin_unlock(&all_mddevs_lock);
4743 return -EBUSY;
4744 }
4745 mddev_get(mddev);
4746 spin_unlock(&all_mddevs_lock);
4747 if (entry->store == new_dev_store)
4748 flush_workqueue(md_misc_wq);
4749 rv = mddev_lock(mddev);
4750 if (!rv) {
4751 rv = entry->store(mddev, page, length);
4752 mddev_unlock(mddev);
4753 }
4754 mddev_put(mddev);
4755 return rv;
4756}
4757
4758static void md_free(struct kobject *ko)
4759{
4760 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4761
4762 if (mddev->sysfs_state)
4763 sysfs_put(mddev->sysfs_state);
4764
4765 if (mddev->gendisk) {
4766 del_gendisk(mddev->gendisk);
4767 put_disk(mddev->gendisk);
4768 }
4769 if (mddev->queue)
4770 blk_cleanup_queue(mddev->queue);
4771
4772 kfree(mddev);
4773}
4774
4775static const struct sysfs_ops md_sysfs_ops = {
4776 .show = md_attr_show,
4777 .store = md_attr_store,
4778};
4779static struct kobj_type md_ktype = {
4780 .release = md_free,
4781 .sysfs_ops = &md_sysfs_ops,
4782 .default_attrs = md_default_attrs,
4783};
4784
4785int mdp_major = 0;
4786
4787static void mddev_delayed_delete(struct work_struct *ws)
4788{
4789 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4790
4791 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4792 kobject_del(&mddev->kobj);
4793 kobject_put(&mddev->kobj);
4794}
4795
4796static int md_alloc(dev_t dev, char *name)
4797{
4798 static DEFINE_MUTEX(disks_mutex);
4799 struct mddev *mddev = mddev_find(dev);
4800 struct gendisk *disk;
4801 int partitioned;
4802 int shift;
4803 int unit;
4804 int error;
4805
4806 if (!mddev)
4807 return -ENODEV;
4808
4809 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4810 shift = partitioned ? MdpMinorShift : 0;
4811 unit = MINOR(mddev->unit) >> shift;
4812
4813
4814
4815
4816 flush_workqueue(md_misc_wq);
4817
4818 mutex_lock(&disks_mutex);
4819 error = -EEXIST;
4820 if (mddev->gendisk)
4821 goto abort;
4822
4823 if (name) {
4824
4825
4826 struct mddev *mddev2;
4827 spin_lock(&all_mddevs_lock);
4828
4829 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4830 if (mddev2->gendisk &&
4831 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4832 spin_unlock(&all_mddevs_lock);
4833 goto abort;
4834 }
4835 spin_unlock(&all_mddevs_lock);
4836 }
4837
4838 error = -ENOMEM;
4839 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4840 if (!mddev->queue)
4841 goto abort;
4842 mddev->queue->queuedata = mddev;
4843
4844 blk_queue_make_request(mddev->queue, md_make_request);
4845 blk_set_stacking_limits(&mddev->queue->limits);
4846
4847 disk = alloc_disk(1 << shift);
4848 if (!disk) {
4849 blk_cleanup_queue(mddev->queue);
4850 mddev->queue = NULL;
4851 goto abort;
4852 }
4853 disk->major = MAJOR(mddev->unit);
4854 disk->first_minor = unit << shift;
4855 if (name)
4856 strcpy(disk->disk_name, name);
4857 else if (partitioned)
4858 sprintf(disk->disk_name, "md_d%d", unit);
4859 else
4860 sprintf(disk->disk_name, "md%d", unit);
4861 disk->fops = &md_fops;
4862 disk->private_data = mddev;
4863 disk->queue = mddev->queue;
4864 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4865
4866
4867
4868
4869 disk->flags |= GENHD_FL_EXT_DEVT;
4870 mddev->gendisk = disk;
4871
4872
4873
4874 mutex_lock(&mddev->open_mutex);
4875 add_disk(disk);
4876
4877 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4878 &disk_to_dev(disk)->kobj, "%s", "md");
4879 if (error) {
4880
4881
4882
4883 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4884 disk->disk_name);
4885 error = 0;
4886 }
4887 if (mddev->kobj.sd &&
4888 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4889 printk(KERN_DEBUG "pointless warning\n");
4890 mutex_unlock(&mddev->open_mutex);
4891 abort:
4892 mutex_unlock(&disks_mutex);
4893 if (!error && mddev->kobj.sd) {
4894 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4895 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4896 }
4897 mddev_put(mddev);
4898 return error;
4899}
4900
4901static struct kobject *md_probe(dev_t dev, int *part, void *data)
4902{
4903 md_alloc(dev, NULL);
4904 return NULL;
4905}
4906
4907static int add_named_array(const char *val, struct kernel_param *kp)
4908{
4909
4910
4911
4912
4913 int len = strlen(val);
4914 char buf[DISK_NAME_LEN];
4915
4916 while (len && val[len-1] == '\n')
4917 len--;
4918 if (len >= DISK_NAME_LEN)
4919 return -E2BIG;
4920 strlcpy(buf, val, len+1);
4921 if (strncmp(buf, "md_", 3) != 0)
4922 return -EINVAL;
4923 return md_alloc(0, buf);
4924}
4925
4926static void md_safemode_timeout(unsigned long data)
4927{
4928 struct mddev *mddev = (struct mddev *) data;
4929
4930 if (!atomic_read(&mddev->writes_pending)) {
4931 mddev->safemode = 1;
4932 if (mddev->external)
4933 sysfs_notify_dirent_safe(mddev->sysfs_state);
4934 }
4935 md_wakeup_thread(mddev->thread);
4936}
4937
4938static int start_dirty_degraded;
4939
4940int md_run(struct mddev *mddev)
4941{
4942 int err;
4943 struct md_rdev *rdev;
4944 struct md_personality *pers;
4945
4946 if (list_empty(&mddev->disks))
4947
4948 return -EINVAL;
4949
4950 if (mddev->pers)
4951 return -EBUSY;
4952
4953 if (mddev->sysfs_active)
4954 return -EBUSY;
4955
4956
4957
4958
4959 if (!mddev->raid_disks) {
4960 if (!mddev->persistent)
4961 return -EINVAL;
4962 analyze_sbs(mddev);
4963 }
4964
4965 if (mddev->level != LEVEL_NONE)
4966 request_module("md-level-%d", mddev->level);
4967 else if (mddev->clevel[0])
4968 request_module("md-%s", mddev->clevel);
4969
4970
4971
4972
4973
4974
4975 rdev_for_each(rdev, mddev) {
4976 if (test_bit(Faulty, &rdev->flags))
4977 continue;
4978 sync_blockdev(rdev->bdev);
4979 invalidate_bdev(rdev->bdev);
4980
4981
4982
4983
4984
4985 if (rdev->meta_bdev) {
4986 ;
4987 } else if (rdev->data_offset < rdev->sb_start) {
4988 if (mddev->dev_sectors &&
4989 rdev->data_offset + mddev->dev_sectors
4990 > rdev->sb_start) {
4991 printk("md: %s: data overlaps metadata\n",
4992 mdname(mddev));
4993 return -EINVAL;
4994 }
4995 } else {
4996 if (rdev->sb_start + rdev->sb_size/512
4997 > rdev->data_offset) {
4998 printk("md: %s: metadata overlaps data\n",
4999 mdname(mddev));
5000 return -EINVAL;
5001 }
5002 }
5003 sysfs_notify_dirent_safe(rdev->sysfs_state);
5004 }
5005
5006 if (mddev->bio_set == NULL)
5007 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5008
5009 spin_lock(&pers_lock);
5010 pers = find_pers(mddev->level, mddev->clevel);
5011 if (!pers || !try_module_get(pers->owner)) {
5012 spin_unlock(&pers_lock);
5013 if (mddev->level != LEVEL_NONE)
5014 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5015 mddev->level);
5016 else
5017 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5018 mddev->clevel);
5019 return -EINVAL;
5020 }
5021 mddev->pers = pers;
5022 spin_unlock(&pers_lock);
5023 if (mddev->level != pers->level) {
5024 mddev->level = pers->level;
5025 mddev->new_level = pers->level;
5026 }
5027 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5028
5029 if (mddev->reshape_position != MaxSector &&
5030 pers->start_reshape == NULL) {
5031
5032 mddev->pers = NULL;
5033 module_put(pers->owner);
5034 return -EINVAL;
5035 }
5036
5037 if (pers->sync_request) {
5038
5039
5040
5041 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5042 struct md_rdev *rdev2;
5043 int warned = 0;
5044
5045 rdev_for_each(rdev, mddev)
5046 rdev_for_each(rdev2, mddev) {
5047 if (rdev < rdev2 &&
5048 rdev->bdev->bd_contains ==
5049 rdev2->bdev->bd_contains) {
5050 printk(KERN_WARNING
5051 "%s: WARNING: %s appears to be"
5052 " on the same physical disk as"
5053 " %s.\n",
5054 mdname(mddev),
5055 bdevname(rdev->bdev,b),
5056 bdevname(rdev2->bdev,b2));
5057 warned = 1;
5058 }
5059 }
5060
5061 if (warned)
5062 printk(KERN_WARNING
5063 "True protection against single-disk"
5064 " failure might be compromised.\n");
5065 }
5066
5067 mddev->recovery = 0;
5068
5069 mddev->resync_max_sectors = mddev->dev_sectors;
5070
5071 mddev->ok_start_degraded = start_dirty_degraded;
5072
5073 if (start_readonly && mddev->ro == 0)
5074 mddev->ro = 2;
5075
5076 err = mddev->pers->run(mddev);
5077 if (err)
5078 printk(KERN_ERR "md: pers->run() failed ...\n");
5079 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5080 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5081 " but 'external_size' not in effect?\n", __func__);
5082 printk(KERN_ERR
5083 "md: invalid array_size %llu > default size %llu\n",
5084 (unsigned long long)mddev->array_sectors / 2,
5085 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5086 err = -EINVAL;
5087 mddev->pers->stop(mddev);
5088 }
5089 if (err == 0 && mddev->pers->sync_request &&
5090 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5091 err = bitmap_create(mddev);
5092 if (err) {
5093 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5094 mdname(mddev), err);
5095 mddev->pers->stop(mddev);
5096 }
5097 }
5098 if (err) {
5099 module_put(mddev->pers->owner);
5100 mddev->pers = NULL;
5101 bitmap_destroy(mddev);
5102 return err;
5103 }
5104 if (mddev->pers->sync_request) {
5105 if (mddev->kobj.sd &&
5106 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5107 printk(KERN_WARNING
5108 "md: cannot register extra attributes for %s\n",
5109 mdname(mddev));
5110 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5111 } else if (mddev->ro == 2)
5112 mddev->ro = 0;
5113
5114 atomic_set(&mddev->writes_pending,0);
5115 atomic_set(&mddev->max_corr_read_errors,
5116 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5117 mddev->safemode = 0;
5118 mddev->safemode_timer.function = md_safemode_timeout;
5119 mddev->safemode_timer.data = (unsigned long) mddev;
5120 mddev->safemode_delay = (200 * HZ)/1000 +1;
5121 mddev->in_sync = 1;
5122 smp_wmb();
5123 mddev->ready = 1;
5124 rdev_for_each(rdev, mddev)
5125 if (rdev->raid_disk >= 0)
5126 if (sysfs_link_rdev(mddev, rdev))
5127 ;
5128
5129 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5130
5131 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5132 md_update_sb(mddev, 0);
5133
5134 md_new_event(mddev);
5135 sysfs_notify_dirent_safe(mddev->sysfs_state);
5136 sysfs_notify_dirent_safe(mddev->sysfs_action);
5137 sysfs_notify(&mddev->kobj, NULL, "degraded");
5138 return 0;
5139}
5140EXPORT_SYMBOL_GPL(md_run);
5141
5142static int do_md_run(struct mddev *mddev)
5143{
5144 int err;
5145
5146 err = md_run(mddev);
5147 if (err)
5148 goto out;
5149 err = bitmap_load(mddev);
5150 if (err) {
5151 bitmap_destroy(mddev);
5152 goto out;
5153 }
5154
5155 md_wakeup_thread(mddev->thread);
5156 md_wakeup_thread(mddev->sync_thread);
5157
5158 set_capacity(mddev->gendisk, mddev->array_sectors);
5159 revalidate_disk(mddev->gendisk);
5160 mddev->changed = 1;
5161 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5162out:
5163 return err;
5164}
5165
5166static int restart_array(struct mddev *mddev)
5167{
5168 struct gendisk *disk = mddev->gendisk;
5169
5170
5171 if (list_empty(&mddev->disks))
5172 return -ENXIO;
5173 if (!mddev->pers)
5174 return -EINVAL;
5175 if (!mddev->ro)
5176 return -EBUSY;
5177 mddev->safemode = 0;
5178 mddev->ro = 0;
5179 set_disk_ro(disk, 0);
5180 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5181 mdname(mddev));
5182
5183 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5184 md_wakeup_thread(mddev->thread);
5185 md_wakeup_thread(mddev->sync_thread);
5186 sysfs_notify_dirent_safe(mddev->sysfs_state);
5187 return 0;
5188}
5189
5190static void md_clean(struct mddev *mddev)
5191{
5192 mddev->array_sectors = 0;
5193 mddev->external_size = 0;
5194 mddev->dev_sectors = 0;
5195 mddev->raid_disks = 0;
5196 mddev->recovery_cp = 0;
5197 mddev->resync_min = 0;
5198 mddev->resync_max = MaxSector;
5199 mddev->reshape_position = MaxSector;
5200 mddev->external = 0;
5201 mddev->persistent = 0;
5202 mddev->level = LEVEL_NONE;
5203 mddev->clevel[0] = 0;
5204 mddev->flags = 0;
5205 mddev->ro = 0;
5206 mddev->metadata_type[0] = 0;
5207 mddev->chunk_sectors = 0;
5208 mddev->ctime = mddev->utime = 0;
5209 mddev->layout = 0;
5210 mddev->max_disks = 0;
5211 mddev->events = 0;
5212 mddev->can_decrease_events = 0;
5213 mddev->delta_disks = 0;
5214 mddev->reshape_backwards = 0;
5215 mddev->new_level = LEVEL_NONE;
5216 mddev->new_layout = 0;
5217 mddev->new_chunk_sectors = 0;
5218 mddev->curr_resync = 0;
5219 atomic64_set(&mddev->resync_mismatches, 0);
5220 mddev->suspend_lo = mddev->suspend_hi = 0;
5221 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5222 mddev->recovery = 0;
5223 mddev->in_sync = 0;
5224 mddev->changed = 0;
5225 mddev->degraded = 0;
5226 mddev->safemode = 0;
5227 mddev->merge_check_needed = 0;
5228 mddev->bitmap_info.offset = 0;
5229 mddev->bitmap_info.default_offset = 0;
5230 mddev->bitmap_info.default_space = 0;
5231 mddev->bitmap_info.chunksize = 0;
5232 mddev->bitmap_info.daemon_sleep = 0;
5233 mddev->bitmap_info.max_write_behind = 0;
5234}
5235
5236static void __md_stop_writes(struct mddev *mddev)
5237{
5238 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5239 if (mddev->sync_thread) {
5240 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5241 md_reap_sync_thread(mddev);
5242 }
5243
5244 del_timer_sync(&mddev->safemode_timer);
5245
5246 bitmap_flush(mddev);
5247 md_super_wait(mddev);
5248
5249 if (mddev->ro == 0 &&
5250 (!mddev->in_sync || (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5251
5252 mddev->in_sync = 1;
5253 md_update_sb(mddev, 1);
5254 }
5255}
5256
5257void md_stop_writes(struct mddev *mddev)
5258{
5259 mddev_lock_nointr(mddev);
5260 __md_stop_writes(mddev);
5261 mddev_unlock(mddev);
5262}
5263EXPORT_SYMBOL_GPL(md_stop_writes);
5264
5265static void __md_stop(struct mddev *mddev)
5266{
5267 mddev->ready = 0;
5268 mddev->pers->stop(mddev);
5269 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5270 mddev->to_remove = &md_redundancy_group;
5271 module_put(mddev->pers->owner);
5272 mddev->pers = NULL;
5273 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5274}
5275
5276void md_stop(struct mddev *mddev)
5277{
5278
5279
5280
5281 __md_stop(mddev);
5282 bitmap_destroy(mddev);
5283 if (mddev->bio_set)
5284 bioset_free(mddev->bio_set);
5285}
5286
5287EXPORT_SYMBOL_GPL(md_stop);
5288
5289static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5290{
5291 int err = 0;
5292 int did_freeze = 0;
5293
5294 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5295 did_freeze = 1;
5296 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5297 md_wakeup_thread(mddev->thread);
5298 }
5299 if (mddev->sync_thread) {
5300 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5301
5302
5303 wake_up_process(mddev->sync_thread->tsk);
5304 }
5305 mddev_unlock(mddev);
5306 wait_event(resync_wait, mddev->sync_thread == NULL);
5307 mddev_lock_nointr(mddev);
5308
5309 mutex_lock(&mddev->open_mutex);
5310 if (atomic_read(&mddev->openers) > !!bdev ||
5311 mddev->sync_thread ||
5312 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5313 printk("md: %s still in use.\n",mdname(mddev));
5314 if (did_freeze) {
5315 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5316 md_wakeup_thread(mddev->thread);
5317 }
5318 err = -EBUSY;
5319 goto out;
5320 }
5321 if (mddev->pers) {
5322 __md_stop_writes(mddev);
5323
5324 err = -ENXIO;
5325 if (mddev->ro==1)
5326 goto out;
5327 mddev->ro = 1;
5328 set_disk_ro(mddev->gendisk, 1);
5329 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5330 sysfs_notify_dirent_safe(mddev->sysfs_state);
5331 err = 0;
5332 }
5333out:
5334 mutex_unlock(&mddev->open_mutex);
5335 return err;
5336}
5337
5338
5339
5340
5341
5342static int do_md_stop(struct mddev * mddev, int mode,
5343 struct block_device *bdev)
5344{
5345 struct gendisk *disk = mddev->gendisk;
5346 struct md_rdev *rdev;
5347 int did_freeze = 0;
5348
5349 if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
5350 did_freeze = 1;
5351 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5352 md_wakeup_thread(mddev->thread);
5353 }
5354 if (mddev->sync_thread) {
5355 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5356
5357
5358 wake_up_process(mddev->sync_thread->tsk);
5359 }
5360 mddev_unlock(mddev);
5361 wait_event(resync_wait, mddev->sync_thread == NULL);
5362 mddev_lock_nointr(mddev);
5363
5364 mutex_lock(&mddev->open_mutex);
5365 if (atomic_read(&mddev->openers) > !!bdev ||
5366 mddev->sysfs_active ||
5367 mddev->sync_thread ||
5368 (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
5369 printk("md: %s still in use.\n",mdname(mddev));
5370 mutex_unlock(&mddev->open_mutex);
5371 if (did_freeze) {
5372 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5373 md_wakeup_thread(mddev->thread);
5374 }
5375 return -EBUSY;
5376 }
5377 if (mddev->pers) {
5378 if (mddev->ro)
5379 set_disk_ro(disk, 0);
5380
5381 __md_stop_writes(mddev);
5382 __md_stop(mddev);
5383 mddev->queue->merge_bvec_fn = NULL;
5384 mddev->queue->backing_dev_info.congested_fn = NULL;
5385
5386
5387 sysfs_notify_dirent_safe(mddev->sysfs_state);
5388
5389 rdev_for_each(rdev, mddev)
5390 if (rdev->raid_disk >= 0)
5391 sysfs_unlink_rdev(mddev, rdev);
5392
5393 set_capacity(disk, 0);
5394 mutex_unlock(&mddev->open_mutex);
5395 mddev->changed = 1;
5396 revalidate_disk(disk);
5397
5398 if (mddev->ro)
5399 mddev->ro = 0;
5400 } else
5401 mutex_unlock(&mddev->open_mutex);
5402
5403
5404
5405 if (mode == 0) {
5406 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5407
5408 bitmap_destroy(mddev);
5409 if (mddev->bitmap_info.file) {
5410 fput(mddev->bitmap_info.file);
5411 mddev->bitmap_info.file = NULL;
5412 }
5413 mddev->bitmap_info.offset = 0;
5414
5415 export_array(mddev);
5416
5417 md_clean(mddev);
5418 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5419 if (mddev->hold_active == UNTIL_STOP)
5420 mddev->hold_active = 0;
5421 }
5422 blk_integrity_unregister(disk);
5423 md_new_event(mddev);
5424 sysfs_notify_dirent_safe(mddev->sysfs_state);
5425 return 0;
5426}
5427
5428#ifndef MODULE
5429static void autorun_array(struct mddev *mddev)
5430{
5431 struct md_rdev *rdev;
5432 int err;
5433
5434 if (list_empty(&mddev->disks))
5435 return;
5436
5437 printk(KERN_INFO "md: running: ");
5438
5439 rdev_for_each(rdev, mddev) {
5440 char b[BDEVNAME_SIZE];
5441 printk("<%s>", bdevname(rdev->bdev,b));
5442 }
5443 printk("\n");
5444
5445 err = do_md_run(mddev);
5446 if (err) {
5447 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5448 do_md_stop(mddev, 0, NULL);
5449 }
5450}
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464static void autorun_devices(int part)
5465{
5466 struct md_rdev *rdev0, *rdev, *tmp;
5467 struct mddev *mddev;
5468 char b[BDEVNAME_SIZE];
5469
5470 printk(KERN_INFO "md: autorun ...\n");
5471 while (!list_empty(&pending_raid_disks)) {
5472 int unit;
5473 dev_t dev;
5474 LIST_HEAD(candidates);
5475 rdev0 = list_entry(pending_raid_disks.next,
5476 struct md_rdev, same_set);
5477
5478 printk(KERN_INFO "md: considering %s ...\n",
5479 bdevname(rdev0->bdev,b));
5480 INIT_LIST_HEAD(&candidates);
5481 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5482 if (super_90_load(rdev, rdev0, 0) >= 0) {
5483 printk(KERN_INFO "md: adding %s ...\n",
5484 bdevname(rdev->bdev,b));
5485 list_move(&rdev->same_set, &candidates);
5486 }
5487
5488
5489
5490
5491
5492 if (part) {
5493 dev = MKDEV(mdp_major,
5494 rdev0->preferred_minor << MdpMinorShift);
5495 unit = MINOR(dev) >> MdpMinorShift;
5496 } else {
5497 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5498 unit = MINOR(dev);
5499 }
5500 if (rdev0->preferred_minor != unit) {
5501 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5502 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5503 break;
5504 }
5505
5506 md_probe(dev, NULL, NULL);
5507 mddev = mddev_find(dev);
5508 if (!mddev || !mddev->gendisk) {
5509 if (mddev)
5510 mddev_put(mddev);
5511 printk(KERN_ERR
5512 "md: cannot allocate memory for md drive.\n");
5513 break;
5514 }
5515 if (mddev_lock(mddev))
5516 printk(KERN_WARNING "md: %s locked, cannot run\n",
5517 mdname(mddev));
5518 else if (mddev->raid_disks || mddev->major_version
5519 || !list_empty(&mddev->disks)) {
5520 printk(KERN_WARNING
5521 "md: %s already running, cannot run %s\n",
5522 mdname(mddev), bdevname(rdev0->bdev,b));
5523 mddev_unlock(mddev);
5524 } else {
5525 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5526 mddev->persistent = 1;
5527 rdev_for_each_list(rdev, tmp, &candidates) {
5528 list_del_init(&rdev->same_set);
5529 if (bind_rdev_to_array(rdev, mddev))
5530 export_rdev(rdev);
5531 }
5532 autorun_array(mddev);
5533 mddev_unlock(mddev);
5534 }
5535
5536
5537
5538 rdev_for_each_list(rdev, tmp, &candidates) {
5539 list_del_init(&rdev->same_set);
5540 export_rdev(rdev);
5541 }
5542 mddev_put(mddev);
5543 }
5544 printk(KERN_INFO "md: ... autorun DONE.\n");
5545}
5546#endif
5547
5548static int get_version(void __user * arg)
5549{
5550 mdu_version_t ver;
5551
5552 ver.major = MD_MAJOR_VERSION;
5553 ver.minor = MD_MINOR_VERSION;
5554 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5555
5556 if (copy_to_user(arg, &ver, sizeof(ver)))
5557 return -EFAULT;
5558
5559 return 0;
5560}
5561
5562static int get_array_info(struct mddev * mddev, void __user * arg)
5563{
5564 mdu_array_info_t info;
5565 int nr,working,insync,failed,spare;
5566 struct md_rdev *rdev;
5567
5568 nr = working = insync = failed = spare = 0;
5569 rcu_read_lock();
5570 rdev_for_each_rcu(rdev, mddev) {
5571 nr++;
5572 if (test_bit(Faulty, &rdev->flags))
5573 failed++;
5574 else {
5575 working++;
5576 if (test_bit(In_sync, &rdev->flags))
5577 insync++;
5578 else
5579 spare++;
5580 }
5581 }
5582 rcu_read_unlock();
5583
5584 info.major_version = mddev->major_version;
5585 info.minor_version = mddev->minor_version;
5586 info.patch_version = MD_PATCHLEVEL_VERSION;
5587 info.ctime = mddev->ctime;
5588 info.level = mddev->level;
5589 info.size = mddev->dev_sectors / 2;
5590 if (info.size != mddev->dev_sectors / 2)
5591 info.size = -1;
5592 info.nr_disks = nr;
5593 info.raid_disks = mddev->raid_disks;
5594 info.md_minor = mddev->md_minor;
5595 info.not_persistent= !mddev->persistent;
5596
5597 info.utime = mddev->utime;
5598 info.state = 0;
5599 if (mddev->in_sync)
5600 info.state = (1<<MD_SB_CLEAN);
5601 if (mddev->bitmap && mddev->bitmap_info.offset)
5602 info.state |= (1<<MD_SB_BITMAP_PRESENT);
5603 info.active_disks = insync;
5604 info.working_disks = working;
5605 info.failed_disks = failed;
5606 info.spare_disks = spare;
5607
5608 info.layout = mddev->layout;
5609 info.chunk_size = mddev->chunk_sectors << 9;
5610
5611 if (copy_to_user(arg, &info, sizeof(info)))
5612 return -EFAULT;
5613
5614 return 0;
5615}
5616
5617static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5618{
5619 mdu_bitmap_file_t *file = NULL;
5620 char *ptr, *buf = NULL;
5621 int err = -ENOMEM;
5622
5623 file = kmalloc(sizeof(*file), GFP_NOIO);
5624
5625 if (!file)
5626 goto out;
5627
5628
5629 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5630 file->pathname[0] = '\0';
5631 goto copy_out;
5632 }
5633
5634 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5635 if (!buf)
5636 goto out;
5637
5638 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5639 buf, sizeof(file->pathname));
5640 if (IS_ERR(ptr))
5641 goto out;
5642
5643 strcpy(file->pathname, ptr);
5644
5645copy_out:
5646 err = 0;
5647 if (copy_to_user(arg, file, sizeof(*file)))
5648 err = -EFAULT;
5649out:
5650 kfree(buf);
5651 kfree(file);
5652 return err;
5653}
5654
5655static int get_disk_info(struct mddev * mddev, void __user * arg)
5656{
5657 mdu_disk_info_t info;
5658 struct md_rdev *rdev;
5659
5660 if (copy_from_user(&info, arg, sizeof(info)))
5661 return -EFAULT;
5662
5663 rcu_read_lock();
5664 rdev = find_rdev_nr_rcu(mddev, info.number);
5665 if (rdev) {
5666 info.major = MAJOR(rdev->bdev->bd_dev);
5667 info.minor = MINOR(rdev->bdev->bd_dev);
5668 info.raid_disk = rdev->raid_disk;
5669 info.state = 0;
5670 if (test_bit(Faulty, &rdev->flags))
5671 info.state |= (1<<MD_DISK_FAULTY);
5672 else if (test_bit(In_sync, &rdev->flags)) {
5673 info.state |= (1<<MD_DISK_ACTIVE);
5674 info.state |= (1<<MD_DISK_SYNC);
5675 }
5676 if (test_bit(WriteMostly, &rdev->flags))
5677 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5678 } else {
5679 info.major = info.minor = 0;
5680 info.raid_disk = -1;
5681 info.state = (1<<MD_DISK_REMOVED);
5682 }
5683 rcu_read_unlock();
5684
5685 if (copy_to_user(arg, &info, sizeof(info)))
5686 return -EFAULT;
5687
5688 return 0;
5689}
5690
5691static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5692{
5693 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5694 struct md_rdev *rdev;
5695 dev_t dev = MKDEV(info->major,info->minor);
5696
5697 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5698 return -EOVERFLOW;
5699
5700 if (!mddev->raid_disks) {
5701 int err;
5702
5703 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5704 if (IS_ERR(rdev)) {
5705 printk(KERN_WARNING
5706 "md: md_import_device returned %ld\n",
5707 PTR_ERR(rdev));
5708 return PTR_ERR(rdev);
5709 }
5710 if (!list_empty(&mddev->disks)) {
5711 struct md_rdev *rdev0
5712 = list_entry(mddev->disks.next,
5713 struct md_rdev, same_set);
5714 err = super_types[mddev->major_version]
5715 .load_super(rdev, rdev0, mddev->minor_version);
5716 if (err < 0) {
5717 printk(KERN_WARNING
5718 "md: %s has different UUID to %s\n",
5719 bdevname(rdev->bdev,b),
5720 bdevname(rdev0->bdev,b2));
5721 export_rdev(rdev);
5722 return -EINVAL;
5723 }
5724 }
5725 err = bind_rdev_to_array(rdev, mddev);
5726 if (err)
5727 export_rdev(rdev);
5728 return err;
5729 }
5730
5731
5732
5733
5734
5735
5736 if (mddev->pers) {
5737 int err;
5738 if (!mddev->pers->hot_add_disk) {
5739 printk(KERN_WARNING
5740 "%s: personality does not support diskops!\n",
5741 mdname(mddev));
5742 return -EINVAL;
5743 }
5744 if (mddev->persistent)
5745 rdev = md_import_device(dev, mddev->major_version,
5746 mddev->minor_version);
5747 else
5748 rdev = md_import_device(dev, -1, -1);
5749 if (IS_ERR(rdev)) {
5750 printk(KERN_WARNING
5751 "md: md_import_device returned %ld\n",
5752 PTR_ERR(rdev));
5753 return PTR_ERR(rdev);
5754 }
5755
5756 if (!mddev->persistent) {
5757 if (info->state & (1<<MD_DISK_SYNC) &&
5758 info->raid_disk < mddev->raid_disks) {
5759 rdev->raid_disk = info->raid_disk;
5760 set_bit(In_sync, &rdev->flags);
5761 clear_bit(Bitmap_sync, &rdev->flags);
5762 } else
5763 rdev->raid_disk = -1;
5764 rdev->saved_raid_disk = rdev->raid_disk;
5765 } else
5766 super_types[mddev->major_version].
5767 validate_super(mddev, rdev);
5768 if ((info->state & (1<<MD_DISK_SYNC)) &&
5769 rdev->raid_disk != info->raid_disk) {
5770
5771
5772
5773 export_rdev(rdev);
5774 return -EINVAL;
5775 }
5776
5777 clear_bit(In_sync, &rdev->flags);
5778 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5779 set_bit(WriteMostly, &rdev->flags);
5780 else
5781 clear_bit(WriteMostly, &rdev->flags);
5782
5783 rdev->raid_disk = -1;
5784 err = bind_rdev_to_array(rdev, mddev);
5785 if (!err && !mddev->pers->hot_remove_disk) {
5786
5787
5788
5789
5790 super_types[mddev->major_version].
5791 validate_super(mddev, rdev);
5792 err = mddev->pers->hot_add_disk(mddev, rdev);
5793 if (err)
5794 unbind_rdev_from_array(rdev);
5795 }
5796 if (err)
5797 export_rdev(rdev);
5798 else
5799 sysfs_notify_dirent_safe(rdev->sysfs_state);
5800
5801 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5802 if (mddev->degraded)
5803 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5804 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5805 if (!err)
5806 md_new_event(mddev);
5807 md_wakeup_thread(mddev->thread);
5808 return err;
5809 }
5810
5811
5812
5813
5814 if (mddev->major_version != 0) {
5815 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5816 mdname(mddev));
5817 return -EINVAL;
5818 }
5819
5820 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5821 int err;
5822 rdev = md_import_device(dev, -1, 0);
5823 if (IS_ERR(rdev)) {
5824 printk(KERN_WARNING
5825 "md: error, md_import_device() returned %ld\n",
5826 PTR_ERR(rdev));
5827 return PTR_ERR(rdev);
5828 }
5829 rdev->desc_nr = info->number;
5830 if (info->raid_disk < mddev->raid_disks)
5831 rdev->raid_disk = info->raid_disk;
5832 else
5833 rdev->raid_disk = -1;
5834
5835 if (rdev->raid_disk < mddev->raid_disks)
5836 if (info->state & (1<<MD_DISK_SYNC))
5837 set_bit(In_sync, &rdev->flags);
5838
5839 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5840 set_bit(WriteMostly, &rdev->flags);
5841
5842 if (!mddev->persistent) {
5843 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5844 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5845 } else
5846 rdev->sb_start = calc_dev_sboffset(rdev);
5847 rdev->sectors = rdev->sb_start;
5848
5849 err = bind_rdev_to_array(rdev, mddev);
5850 if (err) {
5851 export_rdev(rdev);
5852 return err;
5853 }
5854 }
5855
5856 return 0;
5857}
5858
5859static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5860{
5861 char b[BDEVNAME_SIZE];
5862 struct md_rdev *rdev;
5863
5864 rdev = find_rdev(mddev, dev);
5865 if (!rdev)
5866 return -ENXIO;
5867
5868 clear_bit(Blocked, &rdev->flags);
5869 remove_and_add_spares(mddev, rdev);
5870
5871 if (rdev->raid_disk >= 0)
5872 goto busy;
5873
5874 kick_rdev_from_array(rdev);
5875 md_update_sb(mddev, 1);
5876 md_new_event(mddev);
5877
5878 return 0;
5879busy:
5880 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5881 bdevname(rdev->bdev,b), mdname(mddev));
5882 return -EBUSY;
5883}
5884
5885static int hot_add_disk(struct mddev * mddev, dev_t dev)
5886{
5887 char b[BDEVNAME_SIZE];
5888 int err;
5889 struct md_rdev *rdev;
5890
5891 if (!mddev->pers)
5892 return -ENODEV;
5893
5894 if (mddev->major_version != 0) {
5895 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5896 " version-0 superblocks.\n",
5897 mdname(mddev));
5898 return -EINVAL;
5899 }
5900 if (!mddev->pers->hot_add_disk) {
5901 printk(KERN_WARNING
5902 "%s: personality does not support diskops!\n",
5903 mdname(mddev));
5904 return -EINVAL;
5905 }
5906
5907 rdev = md_import_device(dev, -1, 0);
5908 if (IS_ERR(rdev)) {
5909 printk(KERN_WARNING
5910 "md: error, md_import_device() returned %ld\n",
5911 PTR_ERR(rdev));
5912 return -EINVAL;
5913 }
5914
5915 if (mddev->persistent)
5916 rdev->sb_start = calc_dev_sboffset(rdev);
5917 else
5918 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5919
5920 rdev->sectors = rdev->sb_start;
5921
5922 if (test_bit(Faulty, &rdev->flags)) {
5923 printk(KERN_WARNING
5924 "md: can not hot-add faulty %s disk to %s!\n",
5925 bdevname(rdev->bdev,b), mdname(mddev));
5926 err = -EINVAL;
5927 goto abort_export;
5928 }
5929 clear_bit(In_sync, &rdev->flags);
5930 rdev->desc_nr = -1;
5931 rdev->saved_raid_disk = -1;
5932 err = bind_rdev_to_array(rdev, mddev);
5933 if (err)
5934 goto abort_export;
5935
5936
5937
5938
5939
5940
5941 rdev->raid_disk = -1;
5942
5943 md_update_sb(mddev, 1);
5944
5945
5946
5947
5948
5949 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5950 md_wakeup_thread(mddev->thread);
5951 md_new_event(mddev);
5952 return 0;
5953
5954abort_export:
5955 export_rdev(rdev);
5956 return err;
5957}
5958
5959static int set_bitmap_file(struct mddev *mddev, int fd)
5960{
5961 int err = 0;
5962
5963 if (mddev->pers) {
5964 if (!mddev->pers->quiesce)
5965 return -EBUSY;
5966 if (mddev->recovery || mddev->sync_thread)
5967 return -EBUSY;
5968
5969 }
5970
5971
5972 if (fd >= 0) {
5973 struct inode *inode;
5974 if (mddev->bitmap)
5975 return -EEXIST;
5976 mddev->bitmap_info.file = fget(fd);
5977
5978 if (mddev->bitmap_info.file == NULL) {
5979 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5980 mdname(mddev));
5981 return -EBADF;
5982 }
5983
5984 inode = mddev->bitmap_info.file->f_mapping->host;
5985 if (!S_ISREG(inode->i_mode)) {
5986 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
5987 mdname(mddev));
5988 err = -EBADF;
5989 } else if (!(mddev->bitmap_info.file->f_mode & FMODE_WRITE)) {
5990 printk(KERN_ERR "%s: error: bitmap file must open for write\n",
5991 mdname(mddev));
5992 err = -EBADF;
5993 } else if (atomic_read(&inode->i_writecount) != 1) {
5994 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5995 mdname(mddev));
5996 err = -EBUSY;
5997 }
5998 if (err) {
5999 fput(mddev->bitmap_info.file);
6000 mddev->bitmap_info.file = NULL;
6001 return err;
6002 }
6003 mddev->bitmap_info.offset = 0;
6004 } else if (mddev->bitmap == NULL)
6005 return -ENOENT;
6006 err = 0;
6007 if (mddev->pers) {
6008 mddev->pers->quiesce(mddev, 1);
6009 if (fd >= 0) {
6010 err = bitmap_create(mddev);
6011 if (!err)
6012 err = bitmap_load(mddev);
6013 }
6014 if (fd < 0 || err) {
6015 bitmap_destroy(mddev);
6016 fd = -1;
6017 }
6018 mddev->pers->quiesce(mddev, 0);
6019 }
6020 if (fd < 0) {
6021 if (mddev->bitmap_info.file)
6022 fput(mddev->bitmap_info.file);
6023 mddev->bitmap_info.file = NULL;
6024 }
6025
6026 return err;
6027}
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6043{
6044
6045 if (info->raid_disks == 0) {
6046
6047 if (info->major_version < 0 ||
6048 info->major_version >= ARRAY_SIZE(super_types) ||
6049 super_types[info->major_version].name == NULL) {
6050
6051 printk(KERN_INFO
6052 "md: superblock version %d not known\n",
6053 info->major_version);
6054 return -EINVAL;
6055 }
6056 mddev->major_version = info->major_version;
6057 mddev->minor_version = info->minor_version;
6058 mddev->patch_version = info->patch_version;
6059 mddev->persistent = !info->not_persistent;
6060
6061
6062
6063 mddev->ctime = get_seconds();
6064 return 0;
6065 }
6066 mddev->major_version = MD_MAJOR_VERSION;
6067 mddev->minor_version = MD_MINOR_VERSION;
6068 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6069 mddev->ctime = get_seconds();
6070
6071 mddev->level = info->level;
6072 mddev->clevel[0] = 0;
6073 mddev->dev_sectors = 2 * (sector_t)info->size;
6074 mddev->raid_disks = info->raid_disks;
6075
6076
6077
6078 if (info->state & (1<<MD_SB_CLEAN))
6079 mddev->recovery_cp = MaxSector;
6080 else
6081 mddev->recovery_cp = 0;
6082 mddev->persistent = ! info->not_persistent;
6083 mddev->external = 0;
6084
6085 mddev->layout = info->layout;
6086 mddev->chunk_sectors = info->chunk_size >> 9;
6087
6088 mddev->max_disks = MD_SB_DISKS;
6089
6090 if (mddev->persistent)
6091 mddev->flags = 0;
6092 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6093
6094 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6095 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6096 mddev->bitmap_info.offset = 0;
6097
6098 mddev->reshape_position = MaxSector;
6099
6100
6101
6102
6103 get_random_bytes(mddev->uuid, 16);
6104
6105 mddev->new_level = mddev->level;
6106 mddev->new_chunk_sectors = mddev->chunk_sectors;
6107 mddev->new_layout = mddev->layout;
6108 mddev->delta_disks = 0;
6109 mddev->reshape_backwards = 0;
6110
6111 return 0;
6112}
6113
6114void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6115{
6116 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6117
6118 if (mddev->external_size)
6119 return;
6120
6121 mddev->array_sectors = array_sectors;
6122}
6123EXPORT_SYMBOL(md_set_array_sectors);
6124
6125static int update_size(struct mddev *mddev, sector_t num_sectors)
6126{
6127 struct md_rdev *rdev;
6128 int rv;
6129 int fit = (num_sectors == 0);
6130
6131 if (mddev->pers->resize == NULL)
6132 return -EINVAL;
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142 if (mddev->sync_thread)
6143 return -EBUSY;
6144 if (mddev->ro)
6145 return -EROFS;
6146
6147 rdev_for_each(rdev, mddev) {
6148 sector_t avail = rdev->sectors;
6149
6150 if (fit && (num_sectors == 0 || num_sectors > avail))
6151 num_sectors = avail;
6152 if (avail < num_sectors)
6153 return -ENOSPC;
6154 }
6155 rv = mddev->pers->resize(mddev, num_sectors);
6156 if (!rv)
6157 revalidate_disk(mddev->gendisk);
6158 return rv;
6159}
6160
6161static int update_raid_disks(struct mddev *mddev, int raid_disks)
6162{
6163 int rv;
6164 struct md_rdev *rdev;
6165
6166 if (mddev->pers->check_reshape == NULL)
6167 return -EINVAL;
6168 if (mddev->ro)
6169 return -EROFS;
6170 if (raid_disks <= 0 ||
6171 (mddev->max_disks && raid_disks >= mddev->max_disks))
6172 return -EINVAL;
6173 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6174 return -EBUSY;
6175
6176 rdev_for_each(rdev, mddev) {
6177 if (mddev->raid_disks < raid_disks &&
6178 rdev->data_offset < rdev->new_data_offset)
6179 return -EINVAL;
6180 if (mddev->raid_disks > raid_disks &&
6181 rdev->data_offset > rdev->new_data_offset)
6182 return -EINVAL;
6183 }
6184
6185 mddev->delta_disks = raid_disks - mddev->raid_disks;
6186 if (mddev->delta_disks < 0)
6187 mddev->reshape_backwards = 1;
6188 else if (mddev->delta_disks > 0)
6189 mddev->reshape_backwards = 0;
6190
6191 rv = mddev->pers->check_reshape(mddev);
6192 if (rv < 0) {
6193 mddev->delta_disks = 0;
6194 mddev->reshape_backwards = 0;
6195 }
6196 return rv;
6197}
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6209{
6210 int rv = 0;
6211 int cnt = 0;
6212 int state = 0;
6213
6214
6215 if (mddev->bitmap && mddev->bitmap_info.offset)
6216 state |= (1 << MD_SB_BITMAP_PRESENT);
6217
6218 if (mddev->major_version != info->major_version ||
6219 mddev->minor_version != info->minor_version ||
6220
6221 mddev->ctime != info->ctime ||
6222 mddev->level != info->level ||
6223
6224 !mddev->persistent != info->not_persistent||
6225 mddev->chunk_sectors != info->chunk_size >> 9 ||
6226
6227 ((state^info->state) & 0xfffffe00)
6228 )
6229 return -EINVAL;
6230
6231 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6232 cnt++;
6233 if (mddev->raid_disks != info->raid_disks)
6234 cnt++;
6235 if (mddev->layout != info->layout)
6236 cnt++;
6237 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6238 cnt++;
6239 if (cnt == 0)
6240 return 0;
6241 if (cnt > 1)
6242 return -EINVAL;
6243
6244 if (mddev->layout != info->layout) {
6245
6246
6247
6248
6249 if (mddev->pers->check_reshape == NULL)
6250 return -EINVAL;
6251 else {
6252 mddev->new_layout = info->layout;
6253 rv = mddev->pers->check_reshape(mddev);
6254 if (rv)
6255 mddev->new_layout = mddev->layout;
6256 return rv;
6257 }
6258 }
6259 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6260 rv = update_size(mddev, (sector_t)info->size * 2);
6261
6262 if (mddev->raid_disks != info->raid_disks)
6263 rv = update_raid_disks(mddev, info->raid_disks);
6264
6265 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6266 if (mddev->pers->quiesce == NULL)
6267 return -EINVAL;
6268 if (mddev->recovery || mddev->sync_thread)
6269 return -EBUSY;
6270 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6271
6272 if (mddev->bitmap)
6273 return -EEXIST;
6274 if (mddev->bitmap_info.default_offset == 0)
6275 return -EINVAL;
6276 mddev->bitmap_info.offset =
6277 mddev->bitmap_info.default_offset;
6278 mddev->bitmap_info.space =
6279 mddev->bitmap_info.default_space;
6280 mddev->pers->quiesce(mddev, 1);
6281 rv = bitmap_create(mddev);
6282 if (!rv)
6283 rv = bitmap_load(mddev);
6284 if (rv)
6285 bitmap_destroy(mddev);
6286 mddev->pers->quiesce(mddev, 0);
6287 } else {
6288
6289 if (!mddev->bitmap)
6290 return -ENOENT;
6291 if (mddev->bitmap->storage.file)
6292 return -EINVAL;
6293 mddev->pers->quiesce(mddev, 1);
6294 bitmap_destroy(mddev);
6295 mddev->pers->quiesce(mddev, 0);
6296 mddev->bitmap_info.offset = 0;
6297 }
6298 }
6299 md_update_sb(mddev, 1);
6300 return rv;
6301}
6302
6303static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6304{
6305 struct md_rdev *rdev;
6306 int err = 0;
6307
6308 if (mddev->pers == NULL)
6309 return -ENODEV;
6310
6311 rcu_read_lock();
6312 rdev = find_rdev_rcu(mddev, dev);
6313 if (!rdev)
6314 err = -ENODEV;
6315 else {
6316 md_error(mddev, rdev);
6317 if (!test_bit(Faulty, &rdev->flags))
6318 err = -EBUSY;
6319 }
6320 rcu_read_unlock();
6321 return err;
6322}
6323
6324
6325
6326
6327
6328
6329
6330static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6331{
6332 struct mddev *mddev = bdev->bd_disk->private_data;
6333
6334 geo->heads = 2;
6335 geo->sectors = 4;
6336 geo->cylinders = mddev->array_sectors / 8;
6337 return 0;
6338}
6339
6340static inline bool md_ioctl_valid(unsigned int cmd)
6341{
6342 switch (cmd) {
6343 case ADD_NEW_DISK:
6344 case BLKROSET:
6345 case GET_ARRAY_INFO:
6346 case GET_BITMAP_FILE:
6347 case GET_DISK_INFO:
6348 case HOT_ADD_DISK:
6349 case HOT_REMOVE_DISK:
6350 case PRINT_RAID_DEBUG:
6351 case RAID_AUTORUN:
6352 case RAID_VERSION:
6353 case RESTART_ARRAY_RW:
6354 case RUN_ARRAY:
6355 case SET_ARRAY_INFO:
6356 case SET_BITMAP_FILE:
6357 case SET_DISK_FAULTY:
6358 case STOP_ARRAY:
6359 case STOP_ARRAY_RO:
6360 return true;
6361 default:
6362 return false;
6363 }
6364}
6365
6366static int md_ioctl(struct block_device *bdev, fmode_t mode,
6367 unsigned int cmd, unsigned long arg)
6368{
6369 int err = 0;
6370 void __user *argp = (void __user *)arg;
6371 struct mddev *mddev = NULL;
6372 int ro;
6373
6374 if (!md_ioctl_valid(cmd))
6375 return -ENOTTY;
6376
6377 switch (cmd) {
6378 case RAID_VERSION:
6379 case GET_ARRAY_INFO:
6380 case GET_DISK_INFO:
6381 break;
6382 default:
6383 if (!capable(CAP_SYS_ADMIN))
6384 return -EACCES;
6385 }
6386
6387
6388
6389
6390
6391 switch (cmd) {
6392 case RAID_VERSION:
6393 err = get_version(argp);
6394 goto done;
6395
6396 case PRINT_RAID_DEBUG:
6397 err = 0;
6398 md_print_devices();
6399 goto done;
6400
6401#ifndef MODULE
6402 case RAID_AUTORUN:
6403 err = 0;
6404 autostart_arrays(arg);
6405 goto done;
6406#endif
6407 default:;
6408 }
6409
6410
6411
6412
6413
6414 mddev = bdev->bd_disk->private_data;
6415
6416 if (!mddev) {
6417 BUG();
6418 goto abort;
6419 }
6420
6421
6422 switch (cmd) {
6423 case GET_ARRAY_INFO:
6424 if (!mddev->raid_disks && !mddev->external)
6425 err = -ENODEV;
6426 else
6427 err = get_array_info(mddev, argp);
6428 goto abort;
6429
6430 case GET_DISK_INFO:
6431 if (!mddev->raid_disks && !mddev->external)
6432 err = -ENODEV;
6433 else
6434 err = get_disk_info(mddev, argp);
6435 goto abort;
6436
6437 case SET_DISK_FAULTY:
6438 err = set_disk_faulty(mddev, new_decode_dev(arg));
6439 goto abort;
6440 }
6441
6442 if (cmd == ADD_NEW_DISK)
6443
6444 flush_workqueue(md_misc_wq);
6445
6446 if (cmd == HOT_REMOVE_DISK)
6447
6448 wait_event_interruptible_timeout(mddev->sb_wait,
6449 !test_bit(MD_RECOVERY_NEEDED,
6450 &mddev->flags),
6451 msecs_to_jiffies(5000));
6452 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6453
6454
6455
6456 mutex_lock(&mddev->open_mutex);
6457 if (atomic_read(&mddev->openers) > 1) {
6458 mutex_unlock(&mddev->open_mutex);
6459 err = -EBUSY;
6460 goto abort;
6461 }
6462 set_bit(MD_STILL_CLOSED, &mddev->flags);
6463 mutex_unlock(&mddev->open_mutex);
6464 sync_blockdev(bdev);
6465 }
6466 err = mddev_lock(mddev);
6467 if (err) {
6468 printk(KERN_INFO
6469 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6470 err, cmd);
6471 goto abort;
6472 }
6473
6474 if (cmd == SET_ARRAY_INFO) {
6475 mdu_array_info_t info;
6476 if (!arg)
6477 memset(&info, 0, sizeof(info));
6478 else if (copy_from_user(&info, argp, sizeof(info))) {
6479 err = -EFAULT;
6480 goto abort_unlock;
6481 }
6482 if (mddev->pers) {
6483 err = update_array_info(mddev, &info);
6484 if (err) {
6485 printk(KERN_WARNING "md: couldn't update"
6486 " array info. %d\n", err);
6487 goto abort_unlock;
6488 }
6489 goto done_unlock;
6490 }
6491 if (!list_empty(&mddev->disks)) {
6492 printk(KERN_WARNING
6493 "md: array %s already has disks!\n",
6494 mdname(mddev));
6495 err = -EBUSY;
6496 goto abort_unlock;
6497 }
6498 if (mddev->raid_disks) {
6499 printk(KERN_WARNING
6500 "md: array %s already initialised!\n",
6501 mdname(mddev));
6502 err = -EBUSY;
6503 goto abort_unlock;
6504 }
6505 err = set_array_info(mddev, &info);
6506 if (err) {
6507 printk(KERN_WARNING "md: couldn't set"
6508 " array info. %d\n", err);
6509 goto abort_unlock;
6510 }
6511 goto done_unlock;
6512 }
6513
6514
6515
6516
6517
6518
6519 if ((!mddev->raid_disks && !mddev->external)
6520 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6521 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6522 && cmd != GET_BITMAP_FILE) {
6523 err = -ENODEV;
6524 goto abort_unlock;
6525 }
6526
6527
6528
6529
6530 switch (cmd) {
6531 case GET_BITMAP_FILE:
6532 err = get_bitmap_file(mddev, argp);
6533 goto done_unlock;
6534
6535 case RESTART_ARRAY_RW:
6536 err = restart_array(mddev);
6537 goto done_unlock;
6538
6539 case STOP_ARRAY:
6540 err = do_md_stop(mddev, 0, bdev);
6541 goto done_unlock;
6542
6543 case STOP_ARRAY_RO:
6544 err = md_set_readonly(mddev, bdev);
6545 goto done_unlock;
6546
6547 case HOT_REMOVE_DISK:
6548 err = hot_remove_disk(mddev, new_decode_dev(arg));
6549 goto done_unlock;
6550
6551 case ADD_NEW_DISK:
6552
6553
6554
6555
6556 if (mddev->pers) {
6557 mdu_disk_info_t info;
6558 if (copy_from_user(&info, argp, sizeof(info)))
6559 err = -EFAULT;
6560 else if (!(info.state & (1<<MD_DISK_SYNC)))
6561
6562 break;
6563 else
6564 err = add_new_disk(mddev, &info);
6565 goto done_unlock;
6566 }
6567 break;
6568
6569 case BLKROSET:
6570 if (get_user(ro, (int __user *)(arg))) {
6571 err = -EFAULT;
6572 goto done_unlock;
6573 }
6574 err = -EINVAL;
6575
6576
6577
6578
6579 if (ro)
6580 goto done_unlock;
6581
6582
6583 if (mddev->ro != 1)
6584 goto done_unlock;
6585
6586
6587
6588
6589 if (mddev->pers) {
6590 err = restart_array(mddev);
6591 if (err == 0) {
6592 mddev->ro = 2;
6593 set_disk_ro(mddev->gendisk, 0);
6594 }
6595 }
6596 goto done_unlock;
6597 }
6598
6599
6600
6601
6602
6603
6604
6605
6606 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6607 if (mddev->ro == 2) {
6608 mddev->ro = 0;
6609 sysfs_notify_dirent_safe(mddev->sysfs_state);
6610 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6611
6612
6613
6614
6615 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6616 mddev_unlock(mddev);
6617 wait_event(mddev->sb_wait,
6618 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6619 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6620 mddev_lock_nointr(mddev);
6621 }
6622 } else {
6623 err = -EROFS;
6624 goto abort_unlock;
6625 }
6626 }
6627
6628 switch (cmd) {
6629 case ADD_NEW_DISK:
6630 {
6631 mdu_disk_info_t info;
6632 if (copy_from_user(&info, argp, sizeof(info)))
6633 err = -EFAULT;
6634 else
6635 err = add_new_disk(mddev, &info);
6636 goto done_unlock;
6637 }
6638
6639 case HOT_ADD_DISK:
6640 err = hot_add_disk(mddev, new_decode_dev(arg));
6641 goto done_unlock;
6642
6643 case RUN_ARRAY:
6644 err = do_md_run(mddev);
6645 goto done_unlock;
6646
6647 case SET_BITMAP_FILE:
6648 err = set_bitmap_file(mddev, (int)arg);
6649 goto done_unlock;
6650
6651 default:
6652 err = -EINVAL;
6653 goto abort_unlock;
6654 }
6655
6656done_unlock:
6657abort_unlock:
6658 if (mddev->hold_active == UNTIL_IOCTL &&
6659 err != -EINVAL)
6660 mddev->hold_active = 0;
6661 mddev_unlock(mddev);
6662
6663 return err;
6664done:
6665 if (err)
6666 MD_BUG();
6667abort:
6668 return err;
6669}
6670#ifdef CONFIG_COMPAT
6671static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6672 unsigned int cmd, unsigned long arg)
6673{
6674 switch (cmd) {
6675 case HOT_REMOVE_DISK:
6676 case HOT_ADD_DISK:
6677 case SET_DISK_FAULTY:
6678 case SET_BITMAP_FILE:
6679
6680 break;
6681 default:
6682 arg = (unsigned long)compat_ptr(arg);
6683 break;
6684 }
6685
6686 return md_ioctl(bdev, mode, cmd, arg);
6687}
6688#endif
6689
6690static int md_open(struct block_device *bdev, fmode_t mode)
6691{
6692
6693
6694
6695
6696 struct mddev *mddev = mddev_find(bdev->bd_dev);
6697 int err;
6698
6699 if (!mddev)
6700 return -ENODEV;
6701
6702 if (mddev->gendisk != bdev->bd_disk) {
6703
6704
6705
6706 mddev_put(mddev);
6707
6708 flush_workqueue(md_misc_wq);
6709
6710 return -ERESTARTSYS;
6711 }
6712 BUG_ON(mddev != bdev->bd_disk->private_data);
6713
6714 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6715 goto out;
6716
6717 err = 0;
6718 atomic_inc(&mddev->openers);
6719 clear_bit(MD_STILL_CLOSED, &mddev->flags);
6720 mutex_unlock(&mddev->open_mutex);
6721
6722 check_disk_change(bdev);
6723 out:
6724 return err;
6725}
6726
6727static void md_release(struct gendisk *disk, fmode_t mode)
6728{
6729 struct mddev *mddev = disk->private_data;
6730
6731 BUG_ON(!mddev);
6732 atomic_dec(&mddev->openers);
6733 mddev_put(mddev);
6734}
6735
6736static int md_media_changed(struct gendisk *disk)
6737{
6738 struct mddev *mddev = disk->private_data;
6739
6740 return mddev->changed;
6741}
6742
6743static int md_revalidate(struct gendisk *disk)
6744{
6745 struct mddev *mddev = disk->private_data;
6746
6747 mddev->changed = 0;
6748 return 0;
6749}
6750static const struct block_device_operations md_fops =
6751{
6752 .owner = THIS_MODULE,
6753 .open = md_open,
6754 .release = md_release,
6755 .ioctl = md_ioctl,
6756#ifdef CONFIG_COMPAT
6757 .compat_ioctl = md_compat_ioctl,
6758#endif
6759 .getgeo = md_getgeo,
6760 .media_changed = md_media_changed,
6761 .revalidate_disk= md_revalidate,
6762};
6763
6764static int md_thread(void * arg)
6765{
6766 struct md_thread *thread = arg;
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780 allow_signal(SIGKILL);
6781 while (!kthread_should_stop()) {
6782
6783
6784
6785
6786
6787
6788 if (signal_pending(current))
6789 flush_signals(current);
6790
6791 wait_event_interruptible_timeout
6792 (thread->wqueue,
6793 test_bit(THREAD_WAKEUP, &thread->flags)
6794 || kthread_should_stop(),
6795 thread->timeout);
6796
6797 clear_bit(THREAD_WAKEUP, &thread->flags);
6798 if (!kthread_should_stop())
6799 thread->run(thread);
6800 }
6801
6802 return 0;
6803}
6804
6805void md_wakeup_thread(struct md_thread *thread)
6806{
6807 if (thread) {
6808 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6809 set_bit(THREAD_WAKEUP, &thread->flags);
6810 wake_up(&thread->wqueue);
6811 }
6812}
6813
6814struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6815 struct mddev *mddev, const char *name)
6816{
6817 struct md_thread *thread;
6818
6819 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6820 if (!thread)
6821 return NULL;
6822
6823 init_waitqueue_head(&thread->wqueue);
6824
6825 thread->run = run;
6826 thread->mddev = mddev;
6827 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6828 thread->tsk = kthread_run(md_thread, thread,
6829 "%s_%s",
6830 mdname(thread->mddev),
6831 name);
6832 if (IS_ERR(thread->tsk)) {
6833 kfree(thread);
6834 return NULL;
6835 }
6836 return thread;
6837}
6838
6839void md_unregister_thread(struct md_thread **threadp)
6840{
6841 struct md_thread *thread = *threadp;
6842 if (!thread)
6843 return;
6844 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6845
6846
6847
6848 spin_lock(&pers_lock);
6849 *threadp = NULL;
6850 spin_unlock(&pers_lock);
6851
6852 kthread_stop(thread->tsk);
6853 kfree(thread);
6854}
6855
6856void md_error(struct mddev *mddev, struct md_rdev *rdev)
6857{
6858 if (!mddev) {
6859 MD_BUG();
6860 return;
6861 }
6862
6863 if (!rdev || test_bit(Faulty, &rdev->flags))
6864 return;
6865
6866 if (!mddev->pers || !mddev->pers->error_handler)
6867 return;
6868 mddev->pers->error_handler(mddev,rdev);
6869 if (mddev->degraded)
6870 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6871 sysfs_notify_dirent_safe(rdev->sysfs_state);
6872 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6873 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6874 md_wakeup_thread(mddev->thread);
6875 if (mddev->event_work.func)
6876 queue_work(md_misc_wq, &mddev->event_work);
6877 md_new_event_inintr(mddev);
6878}
6879
6880
6881
6882static void status_unused(struct seq_file *seq)
6883{
6884 int i = 0;
6885 struct md_rdev *rdev;
6886
6887 seq_printf(seq, "unused devices: ");
6888
6889 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6890 char b[BDEVNAME_SIZE];
6891 i++;
6892 seq_printf(seq, "%s ",
6893 bdevname(rdev->bdev,b));
6894 }
6895 if (!i)
6896 seq_printf(seq, "<none>");
6897
6898 seq_printf(seq, "\n");
6899}
6900
6901
6902static void status_resync(struct seq_file *seq, struct mddev * mddev)
6903{
6904 sector_t max_sectors, resync, res;
6905 unsigned long dt, db;
6906 sector_t rt;
6907 int scale;
6908 unsigned int per_milli;
6909
6910 if (mddev->curr_resync <= 3)
6911 resync = 0;
6912 else
6913 resync = mddev->curr_resync
6914 - atomic_read(&mddev->recovery_active);
6915
6916 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6917 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6918 max_sectors = mddev->resync_max_sectors;
6919 else
6920 max_sectors = mddev->dev_sectors;
6921
6922
6923
6924
6925 if (!max_sectors) {
6926 MD_BUG();
6927 return;
6928 }
6929
6930
6931
6932
6933
6934 scale = 10;
6935 if (sizeof(sector_t) > sizeof(unsigned long)) {
6936 while ( max_sectors/2 > (1ULL<<(scale+32)))
6937 scale++;
6938 }
6939 res = (resync>>scale)*1000;
6940 sector_div(res, (u32)((max_sectors>>scale)+1));
6941
6942 per_milli = res;
6943 {
6944 int i, x = per_milli/50, y = 20-x;
6945 seq_printf(seq, "[");
6946 for (i = 0; i < x; i++)
6947 seq_printf(seq, "=");
6948 seq_printf(seq, ">");
6949 for (i = 0; i < y; i++)
6950 seq_printf(seq, ".");
6951 seq_printf(seq, "] ");
6952 }
6953 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6954 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6955 "reshape" :
6956 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6957 "check" :
6958 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6959 "resync" : "recovery"))),
6960 per_milli/10, per_milli % 10,
6961 (unsigned long long) resync/2,
6962 (unsigned long long) max_sectors/2);
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978 dt = ((jiffies - mddev->resync_mark) / HZ);
6979 if (!dt) dt++;
6980 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6981 - mddev->resync_mark_cnt;
6982
6983 rt = max_sectors - resync;
6984 sector_div(rt, db/32+1);
6985 rt *= dt;
6986 rt >>= 5;
6987
6988 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6989 ((unsigned long)rt % 60)/6);
6990
6991 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6992}
6993
6994static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6995{
6996 struct list_head *tmp;
6997 loff_t l = *pos;
6998 struct mddev *mddev;
6999
7000 if (l >= 0x10000)
7001 return NULL;
7002 if (!l--)
7003
7004 return (void*)1;
7005
7006 spin_lock(&all_mddevs_lock);
7007 list_for_each(tmp,&all_mddevs)
7008 if (!l--) {
7009 mddev = list_entry(tmp, struct mddev, all_mddevs);
7010 mddev_get(mddev);
7011 spin_unlock(&all_mddevs_lock);
7012 return mddev;
7013 }
7014 spin_unlock(&all_mddevs_lock);
7015 if (!l--)
7016 return (void*)2;
7017 return NULL;
7018}
7019
7020static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
7021{
7022 struct list_head *tmp;
7023 struct mddev *next_mddev, *mddev = v;
7024
7025 ++*pos;
7026 if (v == (void*)2)
7027 return NULL;
7028
7029 spin_lock(&all_mddevs_lock);
7030 if (v == (void*)1)
7031 tmp = all_mddevs.next;
7032 else
7033 tmp = mddev->all_mddevs.next;
7034 if (tmp != &all_mddevs)
7035 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7036 else {
7037 next_mddev = (void*)2;
7038 *pos = 0x10000;
7039 }
7040 spin_unlock(&all_mddevs_lock);
7041
7042 if (v != (void*)1)
7043 mddev_put(mddev);
7044 return next_mddev;
7045
7046}
7047
7048static void md_seq_stop(struct seq_file *seq, void *v)
7049{
7050 struct mddev *mddev = v;
7051
7052 if (mddev && v != (void*)1 && v != (void*)2)
7053 mddev_put(mddev);
7054}
7055
7056static int md_seq_show(struct seq_file *seq, void *v)
7057{
7058 struct mddev *mddev = v;
7059 sector_t sectors;
7060 struct md_rdev *rdev;
7061
7062 if (v == (void*)1) {
7063 struct md_personality *pers;
7064 seq_printf(seq, "Personalities : ");
7065 spin_lock(&pers_lock);
7066 list_for_each_entry(pers, &pers_list, list)
7067 seq_printf(seq, "[%s] ", pers->name);
7068
7069 spin_unlock(&pers_lock);
7070 seq_printf(seq, "\n");
7071 seq->poll_event = atomic_read(&md_event_count);
7072 return 0;
7073 }
7074 if (v == (void*)2) {
7075 status_unused(seq);
7076 return 0;
7077 }
7078
7079 if (mddev_lock(mddev) < 0)
7080 return -EINTR;
7081
7082 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7083 seq_printf(seq, "%s : %sactive", mdname(mddev),
7084 mddev->pers ? "" : "in");
7085 if (mddev->pers) {
7086 if (mddev->ro==1)
7087 seq_printf(seq, " (read-only)");
7088 if (mddev->ro==2)
7089 seq_printf(seq, " (auto-read-only)");
7090 seq_printf(seq, " %s", mddev->pers->name);
7091 }
7092
7093 sectors = 0;
7094 rdev_for_each(rdev, mddev) {
7095 char b[BDEVNAME_SIZE];
7096 seq_printf(seq, " %s[%d]",
7097 bdevname(rdev->bdev,b), rdev->desc_nr);
7098 if (test_bit(WriteMostly, &rdev->flags))
7099 seq_printf(seq, "(W)");
7100 if (test_bit(Faulty, &rdev->flags)) {
7101 seq_printf(seq, "(F)");
7102 continue;
7103 }
7104 if (rdev->raid_disk < 0)
7105 seq_printf(seq, "(S)");
7106 if (test_bit(Replacement, &rdev->flags))
7107 seq_printf(seq, "(R)");
7108 sectors += rdev->sectors;
7109 }
7110
7111 if (!list_empty(&mddev->disks)) {
7112 if (mddev->pers)
7113 seq_printf(seq, "\n %llu blocks",
7114 (unsigned long long)
7115 mddev->array_sectors / 2);
7116 else
7117 seq_printf(seq, "\n %llu blocks",
7118 (unsigned long long)sectors / 2);
7119 }
7120 if (mddev->persistent) {
7121 if (mddev->major_version != 0 ||
7122 mddev->minor_version != 90) {
7123 seq_printf(seq," super %d.%d",
7124 mddev->major_version,
7125 mddev->minor_version);
7126 }
7127 } else if (mddev->external)
7128 seq_printf(seq, " super external:%s",
7129 mddev->metadata_type);
7130 else
7131 seq_printf(seq, " super non-persistent");
7132
7133 if (mddev->pers) {
7134 mddev->pers->status(seq, mddev);
7135 seq_printf(seq, "\n ");
7136 if (mddev->pers->sync_request) {
7137 if (mddev->curr_resync > 2) {
7138 status_resync(seq, mddev);
7139 seq_printf(seq, "\n ");
7140 } else if (mddev->curr_resync >= 1)
7141 seq_printf(seq, "\tresync=DELAYED\n ");
7142 else if (mddev->recovery_cp < MaxSector)
7143 seq_printf(seq, "\tresync=PENDING\n ");
7144 }
7145 } else
7146 seq_printf(seq, "\n ");
7147
7148 bitmap_status(seq, mddev->bitmap);
7149
7150 seq_printf(seq, "\n");
7151 }
7152 mddev_unlock(mddev);
7153
7154 return 0;
7155}
7156
7157static const struct seq_operations md_seq_ops = {
7158 .start = md_seq_start,
7159 .next = md_seq_next,
7160 .stop = md_seq_stop,
7161 .show = md_seq_show,
7162};
7163
7164static int md_seq_open(struct inode *inode, struct file *file)
7165{
7166 struct seq_file *seq;
7167 int error;
7168
7169 error = seq_open(file, &md_seq_ops);
7170 if (error)
7171 return error;
7172
7173 seq = file->private_data;
7174 seq->poll_event = atomic_read(&md_event_count);
7175 return error;
7176}
7177
7178static int md_unloading;
7179static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7180{
7181 struct seq_file *seq = filp->private_data;
7182 int mask;
7183
7184 if (md_unloading)
7185 return POLLIN|POLLRDNORM|POLLERR|POLLPRI;;
7186 poll_wait(filp, &md_event_waiters, wait);
7187
7188
7189 mask = POLLIN | POLLRDNORM;
7190
7191 if (seq->poll_event != atomic_read(&md_event_count))
7192 mask |= POLLERR | POLLPRI;
7193 return mask;
7194}
7195
7196static const struct file_operations md_seq_fops = {
7197 .owner = THIS_MODULE,
7198 .open = md_seq_open,
7199 .read = seq_read,
7200 .llseek = seq_lseek,
7201 .release = seq_release_private,
7202 .poll = mdstat_poll,
7203};
7204
7205int register_md_personality(struct md_personality *p)
7206{
7207 spin_lock(&pers_lock);
7208 list_add_tail(&p->list, &pers_list);
7209 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7210 spin_unlock(&pers_lock);
7211 return 0;
7212}
7213
7214int unregister_md_personality(struct md_personality *p)
7215{
7216 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7217 spin_lock(&pers_lock);
7218 list_del_init(&p->list);
7219 spin_unlock(&pers_lock);
7220 return 0;
7221}
7222
7223static int is_mddev_idle(struct mddev *mddev, int init)
7224{
7225 struct md_rdev * rdev;
7226 int idle;
7227 int curr_events;
7228
7229 idle = 1;
7230 rcu_read_lock();
7231 rdev_for_each_rcu(rdev, mddev) {
7232 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7233 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7234 (int)part_stat_read(&disk->part0, sectors[1]) -
7235 atomic_read(&disk->sync_io);
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258 if (init || curr_events - rdev->last_events > 64) {
7259 rdev->last_events = curr_events;
7260 idle = 0;
7261 }
7262 }
7263 rcu_read_unlock();
7264 return idle;
7265}
7266
7267void md_done_sync(struct mddev *mddev, int blocks, int ok)
7268{
7269
7270 atomic_sub(blocks, &mddev->recovery_active);
7271 wake_up(&mddev->recovery_wait);
7272 if (!ok) {
7273 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7274 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7275 md_wakeup_thread(mddev->thread);
7276
7277 }
7278}
7279
7280
7281
7282
7283
7284
7285
7286void md_write_start(struct mddev *mddev, struct bio *bi)
7287{
7288 int did_change = 0;
7289 if (bio_data_dir(bi) != WRITE)
7290 return;
7291
7292 BUG_ON(mddev->ro == 1);
7293 if (mddev->ro == 2) {
7294
7295 mddev->ro = 0;
7296 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7297 md_wakeup_thread(mddev->thread);
7298 md_wakeup_thread(mddev->sync_thread);
7299 did_change = 1;
7300 }
7301 atomic_inc(&mddev->writes_pending);
7302 if (mddev->safemode == 1)
7303 mddev->safemode = 0;
7304 if (mddev->in_sync) {
7305 spin_lock_irq(&mddev->write_lock);
7306 if (mddev->in_sync) {
7307 mddev->in_sync = 0;
7308 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7309 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7310 md_wakeup_thread(mddev->thread);
7311 did_change = 1;
7312 }
7313 spin_unlock_irq(&mddev->write_lock);
7314 }
7315 if (did_change)
7316 sysfs_notify_dirent_safe(mddev->sysfs_state);
7317 wait_event(mddev->sb_wait,
7318 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7319}
7320
7321void md_write_end(struct mddev *mddev)
7322{
7323 if (atomic_dec_and_test(&mddev->writes_pending)) {
7324 if (mddev->safemode == 2)
7325 md_wakeup_thread(mddev->thread);
7326 else if (mddev->safemode_delay)
7327 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7328 }
7329}
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340int md_allow_write(struct mddev *mddev)
7341{
7342 if (!mddev->pers)
7343 return 0;
7344 if (mddev->ro)
7345 return 0;
7346 if (!mddev->pers->sync_request)
7347 return 0;
7348
7349 spin_lock_irq(&mddev->write_lock);
7350 if (mddev->in_sync) {
7351 mddev->in_sync = 0;
7352 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7353 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7354 if (mddev->safemode_delay &&
7355 mddev->safemode == 0)
7356 mddev->safemode = 1;
7357 spin_unlock_irq(&mddev->write_lock);
7358 md_update_sb(mddev, 0);
7359 sysfs_notify_dirent_safe(mddev->sysfs_state);
7360 } else
7361 spin_unlock_irq(&mddev->write_lock);
7362
7363 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7364 return -EAGAIN;
7365 else
7366 return 0;
7367}
7368EXPORT_SYMBOL_GPL(md_allow_write);
7369
7370#define SYNC_MARKS 10
7371#define SYNC_MARK_STEP (3*HZ)
7372#define UPDATE_FREQUENCY (5*60*HZ)
7373void md_do_sync(struct md_thread *thread)
7374{
7375 struct mddev *mddev = thread->mddev;
7376 struct mddev *mddev2;
7377 unsigned int currspeed = 0,
7378 window;
7379 sector_t max_sectors,j, io_sectors;
7380 unsigned long mark[SYNC_MARKS];
7381 unsigned long update_time;
7382 sector_t mark_cnt[SYNC_MARKS];
7383 int last_mark,m;
7384 struct list_head *tmp;
7385 sector_t last_check;
7386 int skipped = 0;
7387 struct md_rdev *rdev;
7388 char *desc, *action = NULL;
7389 struct blk_plug plug;
7390
7391
7392 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7393 return;
7394 if (mddev->ro) {
7395 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7396 return;
7397 }
7398
7399 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7400 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7401 desc = "data-check";
7402 action = "check";
7403 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7404 desc = "requested-resync";
7405 action = "repair";
7406 } else
7407 desc = "resync";
7408 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7409 desc = "reshape";
7410 else
7411 desc = "recovery";
7412
7413 mddev->last_sync_action = action ?: desc;
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431 do {
7432 mddev->curr_resync = 2;
7433
7434 try_again:
7435 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7436 goto skip;
7437 for_each_mddev(mddev2, tmp) {
7438 if (mddev2 == mddev)
7439 continue;
7440 if (!mddev->parallel_resync
7441 && mddev2->curr_resync
7442 && match_mddev_units(mddev, mddev2)) {
7443 DEFINE_WAIT(wq);
7444 if (mddev < mddev2 && mddev->curr_resync == 2) {
7445
7446 mddev->curr_resync = 1;
7447 wake_up(&resync_wait);
7448 }
7449 if (mddev > mddev2 && mddev->curr_resync == 1)
7450
7451
7452
7453 continue;
7454
7455
7456
7457
7458 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7459 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7460 mddev2->curr_resync >= mddev->curr_resync) {
7461 printk(KERN_INFO "md: delaying %s of %s"
7462 " until %s has finished (they"
7463 " share one or more physical units)\n",
7464 desc, mdname(mddev), mdname(mddev2));
7465 mddev_put(mddev2);
7466 if (signal_pending(current))
7467 flush_signals(current);
7468 schedule();
7469 finish_wait(&resync_wait, &wq);
7470 goto try_again;
7471 }
7472 finish_wait(&resync_wait, &wq);
7473 }
7474 }
7475 } while (mddev->curr_resync < 2);
7476
7477 j = 0;
7478 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7479
7480
7481
7482 max_sectors = mddev->resync_max_sectors;
7483 atomic64_set(&mddev->resync_mismatches, 0);
7484
7485 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7486 j = mddev->resync_min;
7487 else if (!mddev->bitmap)
7488 j = mddev->recovery_cp;
7489
7490 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7491 max_sectors = mddev->resync_max_sectors;
7492 else {
7493
7494 max_sectors = mddev->dev_sectors;
7495 j = MaxSector;
7496 rcu_read_lock();
7497 rdev_for_each_rcu(rdev, mddev)
7498 if (rdev->raid_disk >= 0 &&
7499 !test_bit(Faulty, &rdev->flags) &&
7500 !test_bit(In_sync, &rdev->flags) &&
7501 rdev->recovery_offset < j)
7502 j = rdev->recovery_offset;
7503 rcu_read_unlock();
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513 if (mddev->bitmap) {
7514 mddev->pers->quiesce(mddev, 1);
7515 mddev->pers->quiesce(mddev, 0);
7516 }
7517 }
7518
7519 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7520 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7521 " %d KB/sec/disk.\n", speed_min(mddev));
7522 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7523 "(but not more than %d KB/sec) for %s.\n",
7524 speed_max(mddev), desc);
7525
7526 is_mddev_idle(mddev, 1);
7527
7528 io_sectors = 0;
7529 for (m = 0; m < SYNC_MARKS; m++) {
7530 mark[m] = jiffies;
7531 mark_cnt[m] = io_sectors;
7532 }
7533 last_mark = 0;
7534 mddev->resync_mark = mark[last_mark];
7535 mddev->resync_mark_cnt = mark_cnt[last_mark];
7536
7537
7538
7539
7540 window = 32*(PAGE_SIZE/512);
7541 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7542 window/2, (unsigned long long)max_sectors/2);
7543
7544 atomic_set(&mddev->recovery_active, 0);
7545 last_check = 0;
7546
7547 if (j>2) {
7548 printk(KERN_INFO
7549 "md: resuming %s of %s from checkpoint.\n",
7550 desc, mdname(mddev));
7551 mddev->curr_resync = j;
7552 } else
7553 mddev->curr_resync = 3;
7554 mddev->curr_resync_completed = j;
7555 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7556 md_new_event(mddev);
7557 update_time = jiffies;
7558
7559 blk_start_plug(&plug);
7560 while (j < max_sectors) {
7561 sector_t sectors;
7562
7563 skipped = 0;
7564
7565 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7566 ((mddev->curr_resync > mddev->curr_resync_completed &&
7567 (mddev->curr_resync - mddev->curr_resync_completed)
7568 > (max_sectors >> 4)) ||
7569 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7570 (j - mddev->curr_resync_completed)*2
7571 >= mddev->resync_max - mddev->curr_resync_completed
7572 )) {
7573
7574 wait_event(mddev->recovery_wait,
7575 atomic_read(&mddev->recovery_active) == 0);
7576 mddev->curr_resync_completed = j;
7577 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7578 j > mddev->recovery_cp)
7579 mddev->recovery_cp = j;
7580 update_time = jiffies;
7581 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7582 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7583 }
7584
7585 while (j >= mddev->resync_max &&
7586 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7587
7588
7589
7590
7591 flush_signals(current);
7592 wait_event_interruptible(mddev->recovery_wait,
7593 mddev->resync_max > j
7594 || test_bit(MD_RECOVERY_INTR,
7595 &mddev->recovery));
7596 }
7597
7598 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7599 break;
7600
7601 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7602 currspeed < speed_min(mddev));
7603 if (sectors == 0) {
7604 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7605 break;
7606 }
7607
7608 if (!skipped) {
7609 io_sectors += sectors;
7610 atomic_add(sectors, &mddev->recovery_active);
7611 }
7612
7613 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7614 break;
7615
7616 j += sectors;
7617 if (j > 2)
7618 mddev->curr_resync = j;
7619 mddev->curr_mark_cnt = io_sectors;
7620 if (last_check == 0)
7621
7622
7623
7624 md_new_event(mddev);
7625
7626 if (last_check + window > io_sectors || j == max_sectors)
7627 continue;
7628
7629 last_check = io_sectors;
7630 repeat:
7631 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7632
7633 int next = (last_mark+1) % SYNC_MARKS;
7634
7635 mddev->resync_mark = mark[next];
7636 mddev->resync_mark_cnt = mark_cnt[next];
7637 mark[next] = jiffies;
7638 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7639 last_mark = next;
7640 }
7641
7642 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7643 break;
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653 cond_resched();
7654
7655 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7656 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7657
7658 if (currspeed > speed_min(mddev)) {
7659 if ((currspeed > speed_max(mddev)) ||
7660 !is_mddev_idle(mddev, 0)) {
7661 msleep(500);
7662 goto repeat;
7663 }
7664 }
7665 }
7666 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
7667 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
7668 ? "interrupted" : "done");
7669
7670
7671
7672 blk_finish_plug(&plug);
7673 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7674
7675
7676 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7677
7678 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7679 mddev->curr_resync > 2) {
7680 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7681 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7682 if (mddev->curr_resync >= mddev->recovery_cp) {
7683 printk(KERN_INFO
7684 "md: checkpointing %s of %s.\n",
7685 desc, mdname(mddev));
7686 if (test_bit(MD_RECOVERY_ERROR,
7687 &mddev->recovery))
7688 mddev->recovery_cp =
7689 mddev->curr_resync_completed;
7690 else
7691 mddev->recovery_cp =
7692 mddev->curr_resync;
7693 }
7694 } else
7695 mddev->recovery_cp = MaxSector;
7696 } else {
7697 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7698 mddev->curr_resync = MaxSector;
7699 rcu_read_lock();
7700 rdev_for_each_rcu(rdev, mddev)
7701 if (rdev->raid_disk >= 0 &&
7702 mddev->delta_disks >= 0 &&
7703 !test_bit(Faulty, &rdev->flags) &&
7704 !test_bit(In_sync, &rdev->flags) &&
7705 rdev->recovery_offset < mddev->curr_resync)
7706 rdev->recovery_offset = mddev->curr_resync;
7707 rcu_read_unlock();
7708 }
7709 }
7710 skip:
7711 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7712
7713 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7714
7715 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7716 mddev->resync_min = 0;
7717 mddev->resync_max = MaxSector;
7718 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7719 mddev->resync_min = mddev->curr_resync_completed;
7720 mddev->curr_resync = 0;
7721 wake_up(&resync_wait);
7722 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7723 md_wakeup_thread(mddev->thread);
7724 return;
7725}
7726EXPORT_SYMBOL_GPL(md_do_sync);
7727
7728static int remove_and_add_spares(struct mddev *mddev,
7729 struct md_rdev *this)
7730{
7731 struct md_rdev *rdev;
7732 int spares = 0;
7733 int removed = 0;
7734
7735 rdev_for_each(rdev, mddev)
7736 if ((this == NULL || rdev == this) &&
7737 rdev->raid_disk >= 0 &&
7738 !test_bit(Blocked, &rdev->flags) &&
7739 (test_bit(Faulty, &rdev->flags) ||
7740 ! test_bit(In_sync, &rdev->flags)) &&
7741 atomic_read(&rdev->nr_pending)==0) {
7742 if (mddev->pers->hot_remove_disk(
7743 mddev, rdev) == 0) {
7744 sysfs_unlink_rdev(mddev, rdev);
7745 rdev->raid_disk = -1;
7746 removed++;
7747 }
7748 }
7749 if (removed && mddev->kobj.sd)
7750 sysfs_notify(&mddev->kobj, NULL, "degraded");
7751
7752 if (this)
7753 goto no_add;
7754
7755 rdev_for_each(rdev, mddev) {
7756 if (rdev->raid_disk >= 0 &&
7757 !test_bit(In_sync, &rdev->flags) &&
7758 !test_bit(Faulty, &rdev->flags))
7759 spares++;
7760 if (rdev->raid_disk >= 0)
7761 continue;
7762 if (test_bit(Faulty, &rdev->flags))
7763 continue;
7764 if (mddev->ro &&
7765 ! (rdev->saved_raid_disk >= 0 &&
7766 !test_bit(Bitmap_sync, &rdev->flags)))
7767 continue;
7768
7769 if (rdev->saved_raid_disk < 0)
7770 rdev->recovery_offset = 0;
7771 if (mddev->pers->
7772 hot_add_disk(mddev, rdev) == 0) {
7773 if (sysfs_link_rdev(mddev, rdev))
7774 ;
7775 spares++;
7776 md_new_event(mddev);
7777 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7778 }
7779 }
7780no_add:
7781 if (removed)
7782 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7783 return spares;
7784}
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808void md_check_recovery(struct mddev *mddev)
7809{
7810 if (mddev->suspended)
7811 return;
7812
7813 if (mddev->bitmap)
7814 bitmap_daemon_work(mddev);
7815
7816 if (signal_pending(current)) {
7817 if (mddev->pers->sync_request && !mddev->external) {
7818 printk(KERN_INFO "md: %s in immediate safe mode\n",
7819 mdname(mddev));
7820 mddev->safemode = 2;
7821 }
7822 flush_signals(current);
7823 }
7824
7825 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7826 return;
7827 if ( ! (
7828 (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
7829 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7830 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7831 (mddev->external == 0 && mddev->safemode == 1) ||
7832 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7833 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7834 ))
7835 return;
7836
7837 if (mddev_trylock(mddev)) {
7838 int spares = 0;
7839
7840 if (mddev->ro) {
7841
7842
7843
7844
7845
7846
7847
7848 remove_and_add_spares(mddev, NULL);
7849
7850
7851
7852 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7853 md_reap_sync_thread(mddev);
7854 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7855 goto unlock;
7856 }
7857
7858 if (!mddev->external) {
7859 int did_change = 0;
7860 spin_lock_irq(&mddev->write_lock);
7861 if (mddev->safemode &&
7862 !atomic_read(&mddev->writes_pending) &&
7863 !mddev->in_sync &&
7864 mddev->recovery_cp == MaxSector) {
7865 mddev->in_sync = 1;
7866 did_change = 1;
7867 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7868 }
7869 if (mddev->safemode == 1)
7870 mddev->safemode = 0;
7871 spin_unlock_irq(&mddev->write_lock);
7872 if (did_change)
7873 sysfs_notify_dirent_safe(mddev->sysfs_state);
7874 }
7875
7876 if (mddev->flags & MD_UPDATE_SB_FLAGS)
7877 md_update_sb(mddev, 0);
7878
7879 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7880 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7881
7882 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7883 goto unlock;
7884 }
7885 if (mddev->sync_thread) {
7886 md_reap_sync_thread(mddev);
7887 goto unlock;
7888 }
7889
7890
7891
7892 mddev->curr_resync_completed = 0;
7893 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7894
7895
7896
7897 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7898 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7899
7900 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7901 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7902 goto unlock;
7903
7904
7905
7906
7907
7908
7909
7910 if (mddev->reshape_position != MaxSector) {
7911 if (mddev->pers->check_reshape == NULL ||
7912 mddev->pers->check_reshape(mddev) != 0)
7913
7914 goto unlock;
7915 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7916 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7917 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
7918 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7919 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7920 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7921 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7922 } else if (mddev->recovery_cp < MaxSector) {
7923 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7924 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7925 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7926
7927 goto unlock;
7928
7929 if (mddev->pers->sync_request) {
7930 if (spares) {
7931
7932
7933
7934
7935 bitmap_write_all(mddev->bitmap);
7936 }
7937 mddev->sync_thread = md_register_thread(md_do_sync,
7938 mddev,
7939 "resync");
7940 if (!mddev->sync_thread) {
7941 printk(KERN_ERR "%s: could not start resync"
7942 " thread...\n",
7943 mdname(mddev));
7944
7945 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7946 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7947 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7948 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7949 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7950 } else
7951 md_wakeup_thread(mddev->sync_thread);
7952 sysfs_notify_dirent_safe(mddev->sysfs_action);
7953 md_new_event(mddev);
7954 }
7955 unlock:
7956 wake_up(&mddev->sb_wait);
7957
7958 if (!mddev->sync_thread) {
7959 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7960 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7961 &mddev->recovery))
7962 if (mddev->sysfs_action)
7963 sysfs_notify_dirent_safe(mddev->sysfs_action);
7964 }
7965 mddev_unlock(mddev);
7966 }
7967}
7968
7969void md_reap_sync_thread(struct mddev *mddev)
7970{
7971 struct md_rdev *rdev;
7972
7973
7974 md_unregister_thread(&mddev->sync_thread);
7975 wake_up(&resync_wait);
7976 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7977 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7978
7979
7980 if (mddev->pers->spare_active(mddev)) {
7981 sysfs_notify(&mddev->kobj, NULL,
7982 "degraded");
7983 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7984 }
7985 }
7986 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7987 mddev->pers->finish_reshape)
7988 mddev->pers->finish_reshape(mddev);
7989
7990
7991
7992
7993 if (!mddev->degraded)
7994 rdev_for_each(rdev, mddev)
7995 rdev->saved_raid_disk = -1;
7996
7997 md_update_sb(mddev, 1);
7998 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7999 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
8000 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
8001 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
8002 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
8003
8004 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
8005 sysfs_notify_dirent_safe(mddev->sysfs_action);
8006 md_new_event(mddev);
8007 if (mddev->event_work.func)
8008 queue_work(md_misc_wq, &mddev->event_work);
8009}
8010
8011void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
8012{
8013 sysfs_notify_dirent_safe(rdev->sysfs_state);
8014 wait_event_timeout(rdev->blocked_wait,
8015 !test_bit(Blocked, &rdev->flags) &&
8016 !test_bit(BlockedBadBlocks, &rdev->flags),
8017 msecs_to_jiffies(5000));
8018 rdev_dec_pending(rdev, mddev);
8019}
8020EXPORT_SYMBOL(md_wait_for_blocked_rdev);
8021
8022void md_finish_reshape(struct mddev *mddev)
8023{
8024
8025 struct md_rdev *rdev;
8026
8027 rdev_for_each(rdev, mddev) {
8028 if (rdev->data_offset > rdev->new_data_offset)
8029 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
8030 else
8031 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
8032 rdev->data_offset = rdev->new_data_offset;
8033 }
8034}
8035EXPORT_SYMBOL(md_finish_reshape);
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
8064 sector_t *first_bad, int *bad_sectors)
8065{
8066 int hi;
8067 int lo;
8068 u64 *p = bb->page;
8069 int rv;
8070 sector_t target = s + sectors;
8071 unsigned seq;
8072
8073 if (bb->shift > 0) {
8074
8075 s >>= bb->shift;
8076 target += (1<<bb->shift) - 1;
8077 target >>= bb->shift;
8078 sectors = target - s;
8079 }
8080
8081
8082retry:
8083 seq = read_seqbegin(&bb->lock);
8084 lo = 0;
8085 rv = 0;
8086 hi = bb->count;
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096 while (hi - lo > 1) {
8097 int mid = (lo + hi) / 2;
8098 sector_t a = BB_OFFSET(p[mid]);
8099 if (a < target)
8100
8101
8102 lo = mid;
8103 else
8104
8105 hi = mid;
8106 }
8107
8108 if (hi > lo) {
8109
8110
8111
8112 while (lo >= 0 &&
8113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8114 if (BB_OFFSET(p[lo]) < target) {
8115
8116
8117
8118 if (rv != -1 && BB_ACK(p[lo]))
8119 rv = 1;
8120 else
8121 rv = -1;
8122 *first_bad = BB_OFFSET(p[lo]);
8123 *bad_sectors = BB_LEN(p[lo]);
8124 }
8125 lo--;
8126 }
8127 }
8128
8129 if (read_seqretry(&bb->lock, seq))
8130 goto retry;
8131
8132 return rv;
8133}
8134EXPORT_SYMBOL_GPL(md_is_badblock);
8135
8136
8137
8138
8139
8140
8141
8142
8143static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8144 int acknowledged)
8145{
8146 u64 *p;
8147 int lo, hi;
8148 int rv = 1;
8149 unsigned long flags;
8150
8151 if (bb->shift < 0)
8152
8153 return 0;
8154
8155 if (bb->shift) {
8156
8157 sector_t next = s + sectors;
8158 s >>= bb->shift;
8159 next += (1<<bb->shift) - 1;
8160 next >>= bb->shift;
8161 sectors = next - s;
8162 }
8163
8164 write_seqlock_irqsave(&bb->lock, flags);
8165
8166 p = bb->page;
8167 lo = 0;
8168 hi = bb->count;
8169
8170 while (hi - lo > 1) {
8171 int mid = (lo + hi) / 2;
8172 sector_t a = BB_OFFSET(p[mid]);
8173 if (a <= s)
8174 lo = mid;
8175 else
8176 hi = mid;
8177 }
8178 if (hi > lo && BB_OFFSET(p[lo]) > s)
8179 hi = lo;
8180
8181 if (hi > lo) {
8182
8183
8184
8185 sector_t a = BB_OFFSET(p[lo]);
8186 sector_t e = a + BB_LEN(p[lo]);
8187 int ack = BB_ACK(p[lo]);
8188 if (e >= s) {
8189
8190 if (s == a && s + sectors >= e)
8191
8192 ack = acknowledged;
8193 else
8194 ack = ack && acknowledged;
8195
8196 if (e < s + sectors)
8197 e = s + sectors;
8198 if (e - a <= BB_MAX_LEN) {
8199 p[lo] = BB_MAKE(a, e-a, ack);
8200 s = e;
8201 } else {
8202
8203
8204
8205 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8206 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8207 s = a + BB_MAX_LEN;
8208 }
8209 sectors = e - s;
8210 }
8211 }
8212 if (sectors && hi < bb->count) {
8213
8214
8215 sector_t a = BB_OFFSET(p[hi]);
8216 sector_t e = a + BB_LEN(p[hi]);
8217 int ack = BB_ACK(p[hi]);
8218 if (a <= s + sectors) {
8219
8220 if (e <= s + sectors) {
8221
8222 e = s + sectors;
8223 ack = acknowledged;
8224 } else
8225 ack = ack && acknowledged;
8226
8227 a = s;
8228 if (e - a <= BB_MAX_LEN) {
8229 p[hi] = BB_MAKE(a, e-a, ack);
8230 s = e;
8231 } else {
8232 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8233 s = a + BB_MAX_LEN;
8234 }
8235 sectors = e - s;
8236 lo = hi;
8237 hi++;
8238 }
8239 }
8240 if (sectors == 0 && hi < bb->count) {
8241
8242
8243 sector_t a = BB_OFFSET(p[hi]);
8244 int lolen = BB_LEN(p[lo]);
8245 int hilen = BB_LEN(p[hi]);
8246 int newlen = lolen + hilen - (s - a);
8247 if (s >= a && newlen < BB_MAX_LEN) {
8248
8249 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8250 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8251 memmove(p + hi, p + hi + 1,
8252 (bb->count - hi - 1) * 8);
8253 bb->count--;
8254 }
8255 }
8256 while (sectors) {
8257
8258
8259 if (bb->count >= MD_MAX_BADBLOCKS) {
8260
8261 rv = 0;
8262 break;
8263 } else {
8264 int this_sectors = sectors;
8265 memmove(p + hi + 1, p + hi,
8266 (bb->count - hi) * 8);
8267 bb->count++;
8268
8269 if (this_sectors > BB_MAX_LEN)
8270 this_sectors = BB_MAX_LEN;
8271 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8272 sectors -= this_sectors;
8273 s += this_sectors;
8274 }
8275 }
8276
8277 bb->changed = 1;
8278 if (!acknowledged)
8279 bb->unacked_exist = 1;
8280 write_sequnlock_irqrestore(&bb->lock, flags);
8281
8282 return rv;
8283}
8284
8285int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8286 int is_new)
8287{
8288 int rv;
8289 if (is_new)
8290 s += rdev->new_data_offset;
8291 else
8292 s += rdev->data_offset;
8293 rv = md_set_badblocks(&rdev->badblocks,
8294 s, sectors, 0);
8295 if (rv) {
8296
8297 sysfs_notify_dirent_safe(rdev->sysfs_state);
8298 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8299 md_wakeup_thread(rdev->mddev->thread);
8300 }
8301 return rv;
8302}
8303EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8304
8305
8306
8307
8308
8309
8310
8311static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8312{
8313 u64 *p;
8314 int lo, hi;
8315 sector_t target = s + sectors;
8316 int rv = 0;
8317
8318 if (bb->shift > 0) {
8319
8320
8321
8322
8323
8324
8325 s += (1<<bb->shift) - 1;
8326 s >>= bb->shift;
8327 target >>= bb->shift;
8328 sectors = target - s;
8329 }
8330
8331 write_seqlock_irq(&bb->lock);
8332
8333 p = bb->page;
8334 lo = 0;
8335 hi = bb->count;
8336
8337 while (hi - lo > 1) {
8338 int mid = (lo + hi) / 2;
8339 sector_t a = BB_OFFSET(p[mid]);
8340 if (a < target)
8341 lo = mid;
8342 else
8343 hi = mid;
8344 }
8345 if (hi > lo) {
8346
8347
8348
8349
8350 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8351
8352 int ack = BB_ACK(p[lo]);
8353 sector_t a = BB_OFFSET(p[lo]);
8354 sector_t end = a + BB_LEN(p[lo]);
8355
8356 if (a < s) {
8357
8358 if (bb->count >= MD_MAX_BADBLOCKS) {
8359 rv = -ENOSPC;
8360 goto out;
8361 }
8362 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8363 bb->count++;
8364 p[lo] = BB_MAKE(a, s-a, ack);
8365 lo++;
8366 }
8367 p[lo] = BB_MAKE(target, end - target, ack);
8368
8369 hi = lo;
8370 lo--;
8371 }
8372 while (lo >= 0 &&
8373 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8374
8375 if (BB_OFFSET(p[lo]) < s) {
8376
8377 int ack = BB_ACK(p[lo]);
8378 sector_t start = BB_OFFSET(p[lo]);
8379 p[lo] = BB_MAKE(start, s - start, ack);
8380
8381 break;
8382 }
8383 lo--;
8384 }
8385
8386
8387
8388 if (hi - lo > 1) {
8389 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8390 bb->count -= (hi - lo - 1);
8391 }
8392 }
8393
8394 bb->changed = 1;
8395out:
8396 write_sequnlock_irq(&bb->lock);
8397 return rv;
8398}
8399
8400int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8401 int is_new)
8402{
8403 if (is_new)
8404 s += rdev->new_data_offset;
8405 else
8406 s += rdev->data_offset;
8407 return md_clear_badblocks(&rdev->badblocks,
8408 s, sectors);
8409}
8410EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8411
8412
8413
8414
8415
8416
8417void md_ack_all_badblocks(struct badblocks *bb)
8418{
8419 if (bb->page == NULL || bb->changed)
8420
8421 return;
8422 write_seqlock_irq(&bb->lock);
8423
8424 if (bb->changed == 0 && bb->unacked_exist) {
8425 u64 *p = bb->page;
8426 int i;
8427 for (i = 0; i < bb->count ; i++) {
8428 if (!BB_ACK(p[i])) {
8429 sector_t start = BB_OFFSET(p[i]);
8430 int len = BB_LEN(p[i]);
8431 p[i] = BB_MAKE(start, len, 1);
8432 }
8433 }
8434 bb->unacked_exist = 0;
8435 }
8436 write_sequnlock_irq(&bb->lock);
8437}
8438EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452static ssize_t
8453badblocks_show(struct badblocks *bb, char *page, int unack)
8454{
8455 size_t len;
8456 int i;
8457 u64 *p = bb->page;
8458 unsigned seq;
8459
8460 if (bb->shift < 0)
8461 return 0;
8462
8463retry:
8464 seq = read_seqbegin(&bb->lock);
8465
8466 len = 0;
8467 i = 0;
8468
8469 while (len < PAGE_SIZE && i < bb->count) {
8470 sector_t s = BB_OFFSET(p[i]);
8471 unsigned int length = BB_LEN(p[i]);
8472 int ack = BB_ACK(p[i]);
8473 i++;
8474
8475 if (unack && ack)
8476 continue;
8477
8478 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8479 (unsigned long long)s << bb->shift,
8480 length << bb->shift);
8481 }
8482 if (unack && len == 0)
8483 bb->unacked_exist = 0;
8484
8485 if (read_seqretry(&bb->lock, seq))
8486 goto retry;
8487
8488 return len;
8489}
8490
8491#define DO_DEBUG 1
8492
8493static ssize_t
8494badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8495{
8496 unsigned long long sector;
8497 int length;
8498 char newline;
8499#ifdef DO_DEBUG
8500
8501
8502
8503 int clear = 0;
8504 if (page[0] == '-') {
8505 clear = 1;
8506 page++;
8507 }
8508#endif
8509
8510 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8511 case 3:
8512 if (newline != '\n')
8513 return -EINVAL;
8514 case 2:
8515 if (length <= 0)
8516 return -EINVAL;
8517 break;
8518 default:
8519 return -EINVAL;
8520 }
8521
8522#ifdef DO_DEBUG
8523 if (clear) {
8524 md_clear_badblocks(bb, sector, length);
8525 return len;
8526 }
8527#endif
8528 if (md_set_badblocks(bb, sector, length, !unack))
8529 return len;
8530 else
8531 return -ENOSPC;
8532}
8533
8534static int md_notify_reboot(struct notifier_block *this,
8535 unsigned long code, void *x)
8536{
8537 struct list_head *tmp;
8538 struct mddev *mddev;
8539 int need_delay = 0;
8540
8541 for_each_mddev(mddev, tmp) {
8542 if (mddev_trylock(mddev)) {
8543 if (mddev->pers)
8544 __md_stop_writes(mddev);
8545 if (mddev->persistent)
8546 mddev->safemode = 2;
8547 mddev_unlock(mddev);
8548 }
8549 need_delay = 1;
8550 }
8551
8552
8553
8554
8555
8556
8557 if (need_delay)
8558 mdelay(1000*1);
8559
8560 return NOTIFY_DONE;
8561}
8562
8563static struct notifier_block md_notifier = {
8564 .notifier_call = md_notify_reboot,
8565 .next = NULL,
8566 .priority = INT_MAX,
8567};
8568
8569static void md_geninit(void)
8570{
8571 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8572
8573 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8574}
8575
8576static int __init md_init(void)
8577{
8578 int ret = -ENOMEM;
8579
8580 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8581 if (!md_wq)
8582 goto err_wq;
8583
8584 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8585 if (!md_misc_wq)
8586 goto err_misc_wq;
8587
8588 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8589 goto err_md;
8590
8591 if ((ret = register_blkdev(0, "mdp")) < 0)
8592 goto err_mdp;
8593 mdp_major = ret;
8594
8595 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8596 md_probe, NULL, NULL);
8597 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8598 md_probe, NULL, NULL);
8599
8600 register_reboot_notifier(&md_notifier);
8601 raid_table_header = register_sysctl_table(raid_root_table);
8602
8603 md_geninit();
8604 return 0;
8605
8606err_mdp:
8607 unregister_blkdev(MD_MAJOR, "md");
8608err_md:
8609 destroy_workqueue(md_misc_wq);
8610err_misc_wq:
8611 destroy_workqueue(md_wq);
8612err_wq:
8613 return ret;
8614}
8615
8616#ifndef MODULE
8617
8618
8619
8620
8621
8622
8623static LIST_HEAD(all_detected_devices);
8624struct detected_devices_node {
8625 struct list_head list;
8626 dev_t dev;
8627};
8628
8629void md_autodetect_dev(dev_t dev)
8630{
8631 struct detected_devices_node *node_detected_dev;
8632
8633 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8634 if (node_detected_dev) {
8635 node_detected_dev->dev = dev;
8636 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8637 } else {
8638 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8639 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8640 }
8641}
8642
8643
8644static void autostart_arrays(int part)
8645{
8646 struct md_rdev *rdev;
8647 struct detected_devices_node *node_detected_dev;
8648 dev_t dev;
8649 int i_scanned, i_passed;
8650
8651 i_scanned = 0;
8652 i_passed = 0;
8653
8654 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8655
8656 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8657 i_scanned++;
8658 node_detected_dev = list_entry(all_detected_devices.next,
8659 struct detected_devices_node, list);
8660 list_del(&node_detected_dev->list);
8661 dev = node_detected_dev->dev;
8662 kfree(node_detected_dev);
8663 rdev = md_import_device(dev,0, 90);
8664 if (IS_ERR(rdev))
8665 continue;
8666
8667 if (test_bit(Faulty, &rdev->flags)) {
8668 MD_BUG();
8669 continue;
8670 }
8671 set_bit(AutoDetected, &rdev->flags);
8672 list_add(&rdev->same_set, &pending_raid_disks);
8673 i_passed++;
8674 }
8675
8676 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8677 i_scanned, i_passed);
8678
8679 autorun_devices(part);
8680}
8681
8682#endif
8683
8684static __exit void md_exit(void)
8685{
8686 struct mddev *mddev;
8687 struct list_head *tmp;
8688 int delay = 1;
8689
8690 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8691 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8692
8693 unregister_blkdev(MD_MAJOR,"md");
8694 unregister_blkdev(mdp_major, "mdp");
8695 unregister_reboot_notifier(&md_notifier);
8696 unregister_sysctl_table(raid_table_header);
8697
8698
8699
8700
8701 md_unloading = 1;
8702 while (waitqueue_active(&md_event_waiters)) {
8703
8704 wake_up(&md_event_waiters);
8705 msleep(delay);
8706 delay += delay;
8707 }
8708 remove_proc_entry("mdstat", NULL);
8709
8710 for_each_mddev(mddev, tmp) {
8711 export_array(mddev);
8712 mddev->hold_active = 0;
8713 }
8714 destroy_workqueue(md_misc_wq);
8715 destroy_workqueue(md_wq);
8716}
8717
8718subsys_initcall(md_init);
8719module_exit(md_exit)
8720
8721static int get_ro(char *buffer, struct kernel_param *kp)
8722{
8723 return sprintf(buffer, "%d", start_readonly);
8724}
8725static int set_ro(const char *val, struct kernel_param *kp)
8726{
8727 char *e;
8728 int num = simple_strtoul(val, &e, 10);
8729 if (*val && (*e == '\0' || *e == '\n')) {
8730 start_readonly = num;
8731 return 0;
8732 }
8733 return -EINVAL;
8734}
8735
8736module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8737module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8738
8739module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8740
8741EXPORT_SYMBOL(register_md_personality);
8742EXPORT_SYMBOL(unregister_md_personality);
8743EXPORT_SYMBOL(md_error);
8744EXPORT_SYMBOL(md_done_sync);
8745EXPORT_SYMBOL(md_write_start);
8746EXPORT_SYMBOL(md_write_end);
8747EXPORT_SYMBOL(md_register_thread);
8748EXPORT_SYMBOL(md_unregister_thread);
8749EXPORT_SYMBOL(md_wakeup_thread);
8750EXPORT_SYMBOL(md_check_recovery);
8751EXPORT_SYMBOL(md_reap_sync_thread);
8752MODULE_LICENSE("GPL");
8753MODULE_DESCRIPTION("MD RAID framework");
8754MODULE_ALIAS("md");
8755MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8756