1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75static int remove_and_add_spares(struct mddev *mddev,
76 struct md_rdev *this);
77
78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
79
80
81
82
83
84
85#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
86
87
88
89
90
91
92
93
94
95
96
97
98
99static int sysctl_speed_limit_min = 1000;
100static int sysctl_speed_limit_max = 200000;
101static inline int speed_min(struct mddev *mddev)
102{
103 return mddev->sync_speed_min ?
104 mddev->sync_speed_min : sysctl_speed_limit_min;
105}
106
107static inline int speed_max(struct mddev *mddev)
108{
109 return mddev->sync_speed_max ?
110 mddev->sync_speed_max : sysctl_speed_limit_max;
111}
112
113static struct ctl_table_header *raid_table_header;
114
115static ctl_table raid_table[] = {
116 {
117 .procname = "speed_limit_min",
118 .data = &sysctl_speed_limit_min,
119 .maxlen = sizeof(int),
120 .mode = S_IRUGO|S_IWUSR,
121 .proc_handler = proc_dointvec,
122 },
123 {
124 .procname = "speed_limit_max",
125 .data = &sysctl_speed_limit_max,
126 .maxlen = sizeof(int),
127 .mode = S_IRUGO|S_IWUSR,
128 .proc_handler = proc_dointvec,
129 },
130 { }
131};
132
133static ctl_table raid_dir_table[] = {
134 {
135 .procname = "raid",
136 .maxlen = 0,
137 .mode = S_IRUGO|S_IXUGO,
138 .child = raid_table,
139 },
140 { }
141};
142
143static ctl_table raid_root_table[] = {
144 {
145 .procname = "dev",
146 .maxlen = 0,
147 .mode = 0555,
148 .child = raid_dir_table,
149 },
150 { }
151};
152
153static const struct block_device_operations md_fops;
154
155static int start_readonly;
156
157
158
159
160
161struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
162 struct mddev *mddev)
163{
164 struct bio *b;
165
166 if (!mddev || !mddev->bio_set)
167 return bio_alloc(gfp_mask, nr_iovecs);
168
169 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
170 if (!b)
171 return NULL;
172 return b;
173}
174EXPORT_SYMBOL_GPL(bio_alloc_mddev);
175
176struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
177 struct mddev *mddev)
178{
179 if (!mddev || !mddev->bio_set)
180 return bio_clone(bio, gfp_mask);
181
182 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
183}
184EXPORT_SYMBOL_GPL(bio_clone_mddev);
185
186void md_trim_bio(struct bio *bio, int offset, int size)
187{
188
189
190
191
192 int i;
193 struct bio_vec *bvec;
194 int sofar = 0;
195
196 size <<= 9;
197 if (offset == 0 && size == bio->bi_size)
198 return;
199
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
201
202 bio_advance(bio, offset << 9);
203
204 bio->bi_size = size;
205
206
207 if (bio->bi_idx) {
208 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
209 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
210 bio->bi_vcnt -= bio->bi_idx;
211 bio->bi_idx = 0;
212 }
213
214 bio_for_each_segment(bvec, bio, i) {
215 if (sofar + bvec->bv_len > size)
216 bvec->bv_len = size - sofar;
217 if (bvec->bv_len == 0) {
218 bio->bi_vcnt = i;
219 break;
220 }
221 sofar += bvec->bv_len;
222 }
223}
224EXPORT_SYMBOL_GPL(md_trim_bio);
225
226
227
228
229
230
231
232
233
234
235
236static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
237static atomic_t md_event_count;
238void md_new_event(struct mddev *mddev)
239{
240 atomic_inc(&md_event_count);
241 wake_up(&md_event_waiters);
242}
243EXPORT_SYMBOL_GPL(md_new_event);
244
245
246
247
248static void md_new_event_inintr(struct mddev *mddev)
249{
250 atomic_inc(&md_event_count);
251 wake_up(&md_event_waiters);
252}
253
254
255
256
257
258static LIST_HEAD(all_mddevs);
259static DEFINE_SPINLOCK(all_mddevs_lock);
260
261
262
263
264
265
266
267
268
269#define for_each_mddev(_mddev,_tmp) \
270 \
271 for (({ spin_lock(&all_mddevs_lock); \
272 _tmp = all_mddevs.next; \
273 _mddev = NULL;}); \
274 ({ if (_tmp != &all_mddevs) \
275 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
276 spin_unlock(&all_mddevs_lock); \
277 if (_mddev) mddev_put(_mddev); \
278 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
279 _tmp != &all_mddevs;}); \
280 ({ spin_lock(&all_mddevs_lock); \
281 _tmp = _tmp->next;}) \
282 )
283
284
285
286
287
288
289
290
291
292static void md_make_request(struct request_queue *q, struct bio *bio)
293{
294 const int rw = bio_data_dir(bio);
295 struct mddev *mddev = q->queuedata;
296 int cpu;
297 unsigned int sectors;
298
299 if (mddev == NULL || mddev->pers == NULL
300 || !mddev->ready) {
301 bio_io_error(bio);
302 return;
303 }
304 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
305 bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
306 return;
307 }
308 smp_rmb();
309 rcu_read_lock();
310 if (mddev->suspended) {
311 DEFINE_WAIT(__wait);
312 for (;;) {
313 prepare_to_wait(&mddev->sb_wait, &__wait,
314 TASK_UNINTERRUPTIBLE);
315 if (!mddev->suspended)
316 break;
317 rcu_read_unlock();
318 schedule();
319 rcu_read_lock();
320 }
321 finish_wait(&mddev->sb_wait, &__wait);
322 }
323 atomic_inc(&mddev->active_io);
324 rcu_read_unlock();
325
326
327
328
329
330 sectors = bio_sectors(bio);
331 mddev->pers->make_request(mddev, bio);
332
333 cpu = part_stat_lock();
334 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
335 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
336 part_stat_unlock();
337
338 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
339 wake_up(&mddev->sb_wait);
340}
341
342
343
344
345
346
347
348void mddev_suspend(struct mddev *mddev)
349{
350 BUG_ON(mddev->suspended);
351 mddev->suspended = 1;
352 synchronize_rcu();
353 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
354 mddev->pers->quiesce(mddev, 1);
355
356 del_timer_sync(&mddev->safemode_timer);
357}
358EXPORT_SYMBOL_GPL(mddev_suspend);
359
360void mddev_resume(struct mddev *mddev)
361{
362 mddev->suspended = 0;
363 wake_up(&mddev->sb_wait);
364 mddev->pers->quiesce(mddev, 0);
365
366 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
367 md_wakeup_thread(mddev->thread);
368 md_wakeup_thread(mddev->sync_thread);
369}
370EXPORT_SYMBOL_GPL(mddev_resume);
371
372int mddev_congested(struct mddev *mddev, int bits)
373{
374 return mddev->suspended;
375}
376EXPORT_SYMBOL(mddev_congested);
377
378
379
380
381
382static void md_end_flush(struct bio *bio, int err)
383{
384 struct md_rdev *rdev = bio->bi_private;
385 struct mddev *mddev = rdev->mddev;
386
387 rdev_dec_pending(rdev, mddev);
388
389 if (atomic_dec_and_test(&mddev->flush_pending)) {
390
391 queue_work(md_wq, &mddev->flush_work);
392 }
393 bio_put(bio);
394}
395
396static void md_submit_flush_data(struct work_struct *ws);
397
398static void submit_flushes(struct work_struct *ws)
399{
400 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
401 struct md_rdev *rdev;
402
403 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
404 atomic_set(&mddev->flush_pending, 1);
405 rcu_read_lock();
406 rdev_for_each_rcu(rdev, mddev)
407 if (rdev->raid_disk >= 0 &&
408 !test_bit(Faulty, &rdev->flags)) {
409
410
411
412
413 struct bio *bi;
414 atomic_inc(&rdev->nr_pending);
415 atomic_inc(&rdev->nr_pending);
416 rcu_read_unlock();
417 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
418 bi->bi_end_io = md_end_flush;
419 bi->bi_private = rdev;
420 bi->bi_bdev = rdev->bdev;
421 atomic_inc(&mddev->flush_pending);
422 submit_bio(WRITE_FLUSH, bi);
423 rcu_read_lock();
424 rdev_dec_pending(rdev, mddev);
425 }
426 rcu_read_unlock();
427 if (atomic_dec_and_test(&mddev->flush_pending))
428 queue_work(md_wq, &mddev->flush_work);
429}
430
431static void md_submit_flush_data(struct work_struct *ws)
432{
433 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
434 struct bio *bio = mddev->flush_bio;
435
436 if (bio->bi_size == 0)
437
438 bio_endio(bio, 0);
439 else {
440 bio->bi_rw &= ~REQ_FLUSH;
441 mddev->pers->make_request(mddev, bio);
442 }
443
444 mddev->flush_bio = NULL;
445 wake_up(&mddev->sb_wait);
446}
447
448void md_flush_request(struct mddev *mddev, struct bio *bio)
449{
450 spin_lock_irq(&mddev->write_lock);
451 wait_event_lock_irq(mddev->sb_wait,
452 !mddev->flush_bio,
453 mddev->write_lock);
454 mddev->flush_bio = bio;
455 spin_unlock_irq(&mddev->write_lock);
456
457 INIT_WORK(&mddev->flush_work, submit_flushes);
458 queue_work(md_wq, &mddev->flush_work);
459}
460EXPORT_SYMBOL(md_flush_request);
461
462void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
463{
464 struct mddev *mddev = cb->data;
465 md_wakeup_thread(mddev->thread);
466 kfree(cb);
467}
468EXPORT_SYMBOL(md_unplug);
469
470static inline struct mddev *mddev_get(struct mddev *mddev)
471{
472 atomic_inc(&mddev->active);
473 return mddev;
474}
475
476static void mddev_delayed_delete(struct work_struct *ws);
477
478static void mddev_put(struct mddev *mddev)
479{
480 struct bio_set *bs = NULL;
481
482 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
483 return;
484 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
485 mddev->ctime == 0 && !mddev->hold_active) {
486
487
488 list_del_init(&mddev->all_mddevs);
489 bs = mddev->bio_set;
490 mddev->bio_set = NULL;
491 if (mddev->gendisk) {
492
493
494
495
496
497 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
498 queue_work(md_misc_wq, &mddev->del_work);
499 } else
500 kfree(mddev);
501 }
502 spin_unlock(&all_mddevs_lock);
503 if (bs)
504 bioset_free(bs);
505}
506
507void mddev_init(struct mddev *mddev)
508{
509 mutex_init(&mddev->open_mutex);
510 mutex_init(&mddev->reconfig_mutex);
511 mutex_init(&mddev->bitmap_info.mutex);
512 INIT_LIST_HEAD(&mddev->disks);
513 INIT_LIST_HEAD(&mddev->all_mddevs);
514 init_timer(&mddev->safemode_timer);
515 atomic_set(&mddev->active, 1);
516 atomic_set(&mddev->openers, 0);
517 atomic_set(&mddev->active_io, 0);
518 spin_lock_init(&mddev->write_lock);
519 atomic_set(&mddev->flush_pending, 0);
520 init_waitqueue_head(&mddev->sb_wait);
521 init_waitqueue_head(&mddev->recovery_wait);
522 mddev->reshape_position = MaxSector;
523 mddev->reshape_backwards = 0;
524 mddev->resync_min = 0;
525 mddev->resync_max = MaxSector;
526 mddev->level = LEVEL_NONE;
527}
528EXPORT_SYMBOL_GPL(mddev_init);
529
530static struct mddev * mddev_find(dev_t unit)
531{
532 struct mddev *mddev, *new = NULL;
533
534 if (unit && MAJOR(unit) != MD_MAJOR)
535 unit &= ~((1<<MdpMinorShift)-1);
536
537 retry:
538 spin_lock(&all_mddevs_lock);
539
540 if (unit) {
541 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
542 if (mddev->unit == unit) {
543 mddev_get(mddev);
544 spin_unlock(&all_mddevs_lock);
545 kfree(new);
546 return mddev;
547 }
548
549 if (new) {
550 list_add(&new->all_mddevs, &all_mddevs);
551 spin_unlock(&all_mddevs_lock);
552 new->hold_active = UNTIL_IOCTL;
553 return new;
554 }
555 } else if (new) {
556
557 static int next_minor = 512;
558 int start = next_minor;
559 int is_free = 0;
560 int dev = 0;
561 while (!is_free) {
562 dev = MKDEV(MD_MAJOR, next_minor);
563 next_minor++;
564 if (next_minor > MINORMASK)
565 next_minor = 0;
566 if (next_minor == start) {
567
568 spin_unlock(&all_mddevs_lock);
569 kfree(new);
570 return NULL;
571 }
572
573 is_free = 1;
574 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
575 if (mddev->unit == dev) {
576 is_free = 0;
577 break;
578 }
579 }
580 new->unit = dev;
581 new->md_minor = MINOR(dev);
582 new->hold_active = UNTIL_STOP;
583 list_add(&new->all_mddevs, &all_mddevs);
584 spin_unlock(&all_mddevs_lock);
585 return new;
586 }
587 spin_unlock(&all_mddevs_lock);
588
589 new = kzalloc(sizeof(*new), GFP_KERNEL);
590 if (!new)
591 return NULL;
592
593 new->unit = unit;
594 if (MAJOR(unit) == MD_MAJOR)
595 new->md_minor = MINOR(unit);
596 else
597 new->md_minor = MINOR(unit) >> MdpMinorShift;
598
599 mddev_init(new);
600
601 goto retry;
602}
603
604static inline int mddev_lock(struct mddev * mddev)
605{
606 return mutex_lock_interruptible(&mddev->reconfig_mutex);
607}
608
609static inline int mddev_is_locked(struct mddev *mddev)
610{
611 return mutex_is_locked(&mddev->reconfig_mutex);
612}
613
614static inline int mddev_trylock(struct mddev * mddev)
615{
616 return mutex_trylock(&mddev->reconfig_mutex);
617}
618
619static struct attribute_group md_redundancy_group;
620
621static void mddev_unlock(struct mddev * mddev)
622{
623 if (mddev->to_remove) {
624
625
626
627
628
629
630
631
632
633
634
635
636 struct attribute_group *to_remove = mddev->to_remove;
637 mddev->to_remove = NULL;
638 mddev->sysfs_active = 1;
639 mutex_unlock(&mddev->reconfig_mutex);
640
641 if (mddev->kobj.sd) {
642 if (to_remove != &md_redundancy_group)
643 sysfs_remove_group(&mddev->kobj, to_remove);
644 if (mddev->pers == NULL ||
645 mddev->pers->sync_request == NULL) {
646 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
647 if (mddev->sysfs_action)
648 sysfs_put(mddev->sysfs_action);
649 mddev->sysfs_action = NULL;
650 }
651 }
652 mddev->sysfs_active = 0;
653 } else
654 mutex_unlock(&mddev->reconfig_mutex);
655
656
657
658
659 spin_lock(&pers_lock);
660 md_wakeup_thread(mddev->thread);
661 spin_unlock(&pers_lock);
662}
663
664static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
665{
666 struct md_rdev *rdev;
667
668 rdev_for_each(rdev, mddev)
669 if (rdev->desc_nr == nr)
670 return rdev;
671
672 return NULL;
673}
674
675static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
676{
677 struct md_rdev *rdev;
678
679 rdev_for_each_rcu(rdev, mddev)
680 if (rdev->desc_nr == nr)
681 return rdev;
682
683 return NULL;
684}
685
686static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
687{
688 struct md_rdev *rdev;
689
690 rdev_for_each(rdev, mddev)
691 if (rdev->bdev->bd_dev == dev)
692 return rdev;
693
694 return NULL;
695}
696
697static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
698{
699 struct md_rdev *rdev;
700
701 rdev_for_each_rcu(rdev, mddev)
702 if (rdev->bdev->bd_dev == dev)
703 return rdev;
704
705 return NULL;
706}
707
708static struct md_personality *find_pers(int level, char *clevel)
709{
710 struct md_personality *pers;
711 list_for_each_entry(pers, &pers_list, list) {
712 if (level != LEVEL_NONE && pers->level == level)
713 return pers;
714 if (strcmp(pers->name, clevel)==0)
715 return pers;
716 }
717 return NULL;
718}
719
720
721static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
722{
723 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
724 return MD_NEW_SIZE_SECTORS(num_sectors);
725}
726
727static int alloc_disk_sb(struct md_rdev * rdev)
728{
729 if (rdev->sb_page)
730 MD_BUG();
731
732 rdev->sb_page = alloc_page(GFP_KERNEL);
733 if (!rdev->sb_page) {
734 printk(KERN_ALERT "md: out of memory.\n");
735 return -ENOMEM;
736 }
737
738 return 0;
739}
740
741void md_rdev_clear(struct md_rdev *rdev)
742{
743 if (rdev->sb_page) {
744 put_page(rdev->sb_page);
745 rdev->sb_loaded = 0;
746 rdev->sb_page = NULL;
747 rdev->sb_start = 0;
748 rdev->sectors = 0;
749 }
750 if (rdev->bb_page) {
751 put_page(rdev->bb_page);
752 rdev->bb_page = NULL;
753 }
754 kfree(rdev->badblocks.page);
755 rdev->badblocks.page = NULL;
756}
757EXPORT_SYMBOL_GPL(md_rdev_clear);
758
759static void super_written(struct bio *bio, int error)
760{
761 struct md_rdev *rdev = bio->bi_private;
762 struct mddev *mddev = rdev->mddev;
763
764 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
765 printk("md: super_written gets error=%d, uptodate=%d\n",
766 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
767 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
768 md_error(mddev, rdev);
769 }
770
771 if (atomic_dec_and_test(&mddev->pending_writes))
772 wake_up(&mddev->sb_wait);
773 bio_put(bio);
774}
775
776void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
777 sector_t sector, int size, struct page *page)
778{
779
780
781
782
783
784
785 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
786
787 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
788 bio->bi_sector = sector;
789 bio_add_page(bio, page, size, 0);
790 bio->bi_private = rdev;
791 bio->bi_end_io = super_written;
792
793 atomic_inc(&mddev->pending_writes);
794 submit_bio(WRITE_FLUSH_FUA, bio);
795}
796
797void md_super_wait(struct mddev *mddev)
798{
799
800 DEFINE_WAIT(wq);
801 for(;;) {
802 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
803 if (atomic_read(&mddev->pending_writes)==0)
804 break;
805 schedule();
806 }
807 finish_wait(&mddev->sb_wait, &wq);
808}
809
810static void bi_complete(struct bio *bio, int error)
811{
812 complete((struct completion*)bio->bi_private);
813}
814
815int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
816 struct page *page, int rw, bool metadata_op)
817{
818 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
819 struct completion event;
820 int ret;
821
822 rw |= REQ_SYNC;
823
824 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
825 rdev->meta_bdev : rdev->bdev;
826 if (metadata_op)
827 bio->bi_sector = sector + rdev->sb_start;
828 else if (rdev->mddev->reshape_position != MaxSector &&
829 (rdev->mddev->reshape_backwards ==
830 (sector >= rdev->mddev->reshape_position)))
831 bio->bi_sector = sector + rdev->new_data_offset;
832 else
833 bio->bi_sector = sector + rdev->data_offset;
834 bio_add_page(bio, page, size, 0);
835 init_completion(&event);
836 bio->bi_private = &event;
837 bio->bi_end_io = bi_complete;
838 submit_bio(rw, bio);
839 wait_for_completion(&event);
840
841 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
842 bio_put(bio);
843 return ret;
844}
845EXPORT_SYMBOL_GPL(sync_page_io);
846
847static int read_disk_sb(struct md_rdev * rdev, int size)
848{
849 char b[BDEVNAME_SIZE];
850 if (!rdev->sb_page) {
851 MD_BUG();
852 return -EINVAL;
853 }
854 if (rdev->sb_loaded)
855 return 0;
856
857
858 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
859 goto fail;
860 rdev->sb_loaded = 1;
861 return 0;
862
863fail:
864 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
865 bdevname(rdev->bdev,b));
866 return -EINVAL;
867}
868
869static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
870{
871 return sb1->set_uuid0 == sb2->set_uuid0 &&
872 sb1->set_uuid1 == sb2->set_uuid1 &&
873 sb1->set_uuid2 == sb2->set_uuid2 &&
874 sb1->set_uuid3 == sb2->set_uuid3;
875}
876
877static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
878{
879 int ret;
880 mdp_super_t *tmp1, *tmp2;
881
882 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
883 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
884
885 if (!tmp1 || !tmp2) {
886 ret = 0;
887 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
888 goto abort;
889 }
890
891 *tmp1 = *sb1;
892 *tmp2 = *sb2;
893
894
895
896
897 tmp1->nr_disks = 0;
898 tmp2->nr_disks = 0;
899
900 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
901abort:
902 kfree(tmp1);
903 kfree(tmp2);
904 return ret;
905}
906
907
908static u32 md_csum_fold(u32 csum)
909{
910 csum = (csum & 0xffff) + (csum >> 16);
911 return (csum & 0xffff) + (csum >> 16);
912}
913
914static unsigned int calc_sb_csum(mdp_super_t * sb)
915{
916 u64 newcsum = 0;
917 u32 *sb32 = (u32*)sb;
918 int i;
919 unsigned int disk_csum, csum;
920
921 disk_csum = sb->sb_csum;
922 sb->sb_csum = 0;
923
924 for (i = 0; i < MD_SB_BYTES/4 ; i++)
925 newcsum += sb32[i];
926 csum = (newcsum & 0xffffffff) + (newcsum>>32);
927
928
929#ifdef CONFIG_ALPHA
930
931
932
933
934
935
936
937
938 sb->sb_csum = md_csum_fold(disk_csum);
939#else
940 sb->sb_csum = disk_csum;
941#endif
942 return csum;
943}
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976struct super_type {
977 char *name;
978 struct module *owner;
979 int (*load_super)(struct md_rdev *rdev,
980 struct md_rdev *refdev,
981 int minor_version);
982 int (*validate_super)(struct mddev *mddev,
983 struct md_rdev *rdev);
984 void (*sync_super)(struct mddev *mddev,
985 struct md_rdev *rdev);
986 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
987 sector_t num_sectors);
988 int (*allow_new_offset)(struct md_rdev *rdev,
989 unsigned long long new_offset);
990};
991
992
993
994
995
996
997
998
999
1000int md_check_no_bitmap(struct mddev *mddev)
1001{
1002 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1003 return 0;
1004 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1005 mdname(mddev), mddev->pers->name);
1006 return 1;
1007}
1008EXPORT_SYMBOL(md_check_no_bitmap);
1009
1010
1011
1012
1013static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1014{
1015 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1016 mdp_super_t *sb;
1017 int ret;
1018
1019
1020
1021
1022
1023
1024
1025 rdev->sb_start = calc_dev_sboffset(rdev);
1026
1027 ret = read_disk_sb(rdev, MD_SB_BYTES);
1028 if (ret) return ret;
1029
1030 ret = -EINVAL;
1031
1032 bdevname(rdev->bdev, b);
1033 sb = page_address(rdev->sb_page);
1034
1035 if (sb->md_magic != MD_SB_MAGIC) {
1036 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1037 b);
1038 goto abort;
1039 }
1040
1041 if (sb->major_version != 0 ||
1042 sb->minor_version < 90 ||
1043 sb->minor_version > 91) {
1044 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1045 sb->major_version, sb->minor_version,
1046 b);
1047 goto abort;
1048 }
1049
1050 if (sb->raid_disks <= 0)
1051 goto abort;
1052
1053 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1054 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1055 b);
1056 goto abort;
1057 }
1058
1059 rdev->preferred_minor = sb->md_minor;
1060 rdev->data_offset = 0;
1061 rdev->new_data_offset = 0;
1062 rdev->sb_size = MD_SB_BYTES;
1063 rdev->badblocks.shift = -1;
1064
1065 if (sb->level == LEVEL_MULTIPATH)
1066 rdev->desc_nr = -1;
1067 else
1068 rdev->desc_nr = sb->this_disk.number;
1069
1070 if (!refdev) {
1071 ret = 1;
1072 } else {
1073 __u64 ev1, ev2;
1074 mdp_super_t *refsb = page_address(refdev->sb_page);
1075 if (!uuid_equal(refsb, sb)) {
1076 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1077 b, bdevname(refdev->bdev,b2));
1078 goto abort;
1079 }
1080 if (!sb_equal(refsb, sb)) {
1081 printk(KERN_WARNING "md: %s has same UUID"
1082 " but different superblock to %s\n",
1083 b, bdevname(refdev->bdev, b2));
1084 goto abort;
1085 }
1086 ev1 = md_event(sb);
1087 ev2 = md_event(refsb);
1088 if (ev1 > ev2)
1089 ret = 1;
1090 else
1091 ret = 0;
1092 }
1093 rdev->sectors = rdev->sb_start;
1094
1095
1096
1097
1098 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1099 rdev->sectors = (2ULL << 32) - 2;
1100
1101 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1102
1103 ret = -EINVAL;
1104
1105 abort:
1106 return ret;
1107}
1108
1109
1110
1111
1112static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1113{
1114 mdp_disk_t *desc;
1115 mdp_super_t *sb = page_address(rdev->sb_page);
1116 __u64 ev1 = md_event(sb);
1117
1118 rdev->raid_disk = -1;
1119 clear_bit(Faulty, &rdev->flags);
1120 clear_bit(In_sync, &rdev->flags);
1121 clear_bit(WriteMostly, &rdev->flags);
1122
1123 if (mddev->raid_disks == 0) {
1124 mddev->major_version = 0;
1125 mddev->minor_version = sb->minor_version;
1126 mddev->patch_version = sb->patch_version;
1127 mddev->external = 0;
1128 mddev->chunk_sectors = sb->chunk_size >> 9;
1129 mddev->ctime = sb->ctime;
1130 mddev->utime = sb->utime;
1131 mddev->level = sb->level;
1132 mddev->clevel[0] = 0;
1133 mddev->layout = sb->layout;
1134 mddev->raid_disks = sb->raid_disks;
1135 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1136 mddev->events = ev1;
1137 mddev->bitmap_info.offset = 0;
1138 mddev->bitmap_info.space = 0;
1139
1140 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1141 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1142 mddev->reshape_backwards = 0;
1143
1144 if (mddev->minor_version >= 91) {
1145 mddev->reshape_position = sb->reshape_position;
1146 mddev->delta_disks = sb->delta_disks;
1147 mddev->new_level = sb->new_level;
1148 mddev->new_layout = sb->new_layout;
1149 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1150 if (mddev->delta_disks < 0)
1151 mddev->reshape_backwards = 1;
1152 } else {
1153 mddev->reshape_position = MaxSector;
1154 mddev->delta_disks = 0;
1155 mddev->new_level = mddev->level;
1156 mddev->new_layout = mddev->layout;
1157 mddev->new_chunk_sectors = mddev->chunk_sectors;
1158 }
1159
1160 if (sb->state & (1<<MD_SB_CLEAN))
1161 mddev->recovery_cp = MaxSector;
1162 else {
1163 if (sb->events_hi == sb->cp_events_hi &&
1164 sb->events_lo == sb->cp_events_lo) {
1165 mddev->recovery_cp = sb->recovery_cp;
1166 } else
1167 mddev->recovery_cp = 0;
1168 }
1169
1170 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1171 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1172 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1173 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1174
1175 mddev->max_disks = MD_SB_DISKS;
1176
1177 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1178 mddev->bitmap_info.file == NULL) {
1179 mddev->bitmap_info.offset =
1180 mddev->bitmap_info.default_offset;
1181 mddev->bitmap_info.space =
1182 mddev->bitmap_info.space;
1183 }
1184
1185 } else if (mddev->pers == NULL) {
1186
1187
1188 ++ev1;
1189 if (sb->disks[rdev->desc_nr].state & (
1190 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1191 if (ev1 < mddev->events)
1192 return -EINVAL;
1193 } else if (mddev->bitmap) {
1194
1195
1196
1197 if (ev1 < mddev->bitmap->events_cleared)
1198 return 0;
1199 } else {
1200 if (ev1 < mddev->events)
1201
1202 return 0;
1203 }
1204
1205 if (mddev->level != LEVEL_MULTIPATH) {
1206 desc = sb->disks + rdev->desc_nr;
1207
1208 if (desc->state & (1<<MD_DISK_FAULTY))
1209 set_bit(Faulty, &rdev->flags);
1210 else if (desc->state & (1<<MD_DISK_SYNC)
1211) {
1212 set_bit(In_sync, &rdev->flags);
1213 rdev->raid_disk = desc->raid_disk;
1214 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1215
1216
1217
1218 if (mddev->minor_version >= 91) {
1219 rdev->recovery_offset = 0;
1220 rdev->raid_disk = desc->raid_disk;
1221 }
1222 }
1223 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1224 set_bit(WriteMostly, &rdev->flags);
1225 } else
1226 set_bit(In_sync, &rdev->flags);
1227 return 0;
1228}
1229
1230
1231
1232
1233static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1234{
1235 mdp_super_t *sb;
1236 struct md_rdev *rdev2;
1237 int next_spare = mddev->raid_disks;
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250 int i;
1251 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1252
1253 rdev->sb_size = MD_SB_BYTES;
1254
1255 sb = page_address(rdev->sb_page);
1256
1257 memset(sb, 0, sizeof(*sb));
1258
1259 sb->md_magic = MD_SB_MAGIC;
1260 sb->major_version = mddev->major_version;
1261 sb->patch_version = mddev->patch_version;
1262 sb->gvalid_words = 0;
1263 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1264 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1265 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1266 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1267
1268 sb->ctime = mddev->ctime;
1269 sb->level = mddev->level;
1270 sb->size = mddev->dev_sectors / 2;
1271 sb->raid_disks = mddev->raid_disks;
1272 sb->md_minor = mddev->md_minor;
1273 sb->not_persistent = 0;
1274 sb->utime = mddev->utime;
1275 sb->state = 0;
1276 sb->events_hi = (mddev->events>>32);
1277 sb->events_lo = (u32)mddev->events;
1278
1279 if (mddev->reshape_position == MaxSector)
1280 sb->minor_version = 90;
1281 else {
1282 sb->minor_version = 91;
1283 sb->reshape_position = mddev->reshape_position;
1284 sb->new_level = mddev->new_level;
1285 sb->delta_disks = mddev->delta_disks;
1286 sb->new_layout = mddev->new_layout;
1287 sb->new_chunk = mddev->new_chunk_sectors << 9;
1288 }
1289 mddev->minor_version = sb->minor_version;
1290 if (mddev->in_sync)
1291 {
1292 sb->recovery_cp = mddev->recovery_cp;
1293 sb->cp_events_hi = (mddev->events>>32);
1294 sb->cp_events_lo = (u32)mddev->events;
1295 if (mddev->recovery_cp == MaxSector)
1296 sb->state = (1<< MD_SB_CLEAN);
1297 } else
1298 sb->recovery_cp = 0;
1299
1300 sb->layout = mddev->layout;
1301 sb->chunk_size = mddev->chunk_sectors << 9;
1302
1303 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1304 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1305
1306 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1307 rdev_for_each(rdev2, mddev) {
1308 mdp_disk_t *d;
1309 int desc_nr;
1310 int is_active = test_bit(In_sync, &rdev2->flags);
1311
1312 if (rdev2->raid_disk >= 0 &&
1313 sb->minor_version >= 91)
1314
1315
1316
1317
1318 is_active = 1;
1319 if (rdev2->raid_disk < 0 ||
1320 test_bit(Faulty, &rdev2->flags))
1321 is_active = 0;
1322 if (is_active)
1323 desc_nr = rdev2->raid_disk;
1324 else
1325 desc_nr = next_spare++;
1326 rdev2->desc_nr = desc_nr;
1327 d = &sb->disks[rdev2->desc_nr];
1328 nr_disks++;
1329 d->number = rdev2->desc_nr;
1330 d->major = MAJOR(rdev2->bdev->bd_dev);
1331 d->minor = MINOR(rdev2->bdev->bd_dev);
1332 if (is_active)
1333 d->raid_disk = rdev2->raid_disk;
1334 else
1335 d->raid_disk = rdev2->desc_nr;
1336 if (test_bit(Faulty, &rdev2->flags))
1337 d->state = (1<<MD_DISK_FAULTY);
1338 else if (is_active) {
1339 d->state = (1<<MD_DISK_ACTIVE);
1340 if (test_bit(In_sync, &rdev2->flags))
1341 d->state |= (1<<MD_DISK_SYNC);
1342 active++;
1343 working++;
1344 } else {
1345 d->state = 0;
1346 spare++;
1347 working++;
1348 }
1349 if (test_bit(WriteMostly, &rdev2->flags))
1350 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1351 }
1352
1353 for (i=0 ; i < mddev->raid_disks ; i++) {
1354 mdp_disk_t *d = &sb->disks[i];
1355 if (d->state == 0 && d->number == 0) {
1356 d->number = i;
1357 d->raid_disk = i;
1358 d->state = (1<<MD_DISK_REMOVED);
1359 d->state |= (1<<MD_DISK_FAULTY);
1360 failed++;
1361 }
1362 }
1363 sb->nr_disks = nr_disks;
1364 sb->active_disks = active;
1365 sb->working_disks = working;
1366 sb->failed_disks = failed;
1367 sb->spare_disks = spare;
1368
1369 sb->this_disk = sb->disks[rdev->desc_nr];
1370 sb->sb_csum = calc_sb_csum(sb);
1371}
1372
1373
1374
1375
1376static unsigned long long
1377super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1378{
1379 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1380 return 0;
1381 if (rdev->mddev->bitmap_info.offset)
1382 return 0;
1383 rdev->sb_start = calc_dev_sboffset(rdev);
1384 if (!num_sectors || num_sectors > rdev->sb_start)
1385 num_sectors = rdev->sb_start;
1386
1387
1388
1389 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1390 num_sectors = (2ULL << 32) - 2;
1391 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1392 rdev->sb_page);
1393 md_super_wait(rdev->mddev);
1394 return num_sectors;
1395}
1396
1397static int
1398super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1399{
1400
1401 return new_offset == 0;
1402}
1403
1404
1405
1406
1407
1408static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1409{
1410 __le32 disk_csum;
1411 u32 csum;
1412 unsigned long long newcsum;
1413 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1414 __le32 *isuper = (__le32*)sb;
1415
1416 disk_csum = sb->sb_csum;
1417 sb->sb_csum = 0;
1418 newcsum = 0;
1419 for (; size >= 4; size -= 4)
1420 newcsum += le32_to_cpu(*isuper++);
1421
1422 if (size == 2)
1423 newcsum += le16_to_cpu(*(__le16*) isuper);
1424
1425 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1426 sb->sb_csum = disk_csum;
1427 return cpu_to_le32(csum);
1428}
1429
1430static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1431 int acknowledged);
1432static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1433{
1434 struct mdp_superblock_1 *sb;
1435 int ret;
1436 sector_t sb_start;
1437 sector_t sectors;
1438 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1439 int bmask;
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449 switch(minor_version) {
1450 case 0:
1451 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1452 sb_start -= 8*2;
1453 sb_start &= ~(sector_t)(4*2-1);
1454 break;
1455 case 1:
1456 sb_start = 0;
1457 break;
1458 case 2:
1459 sb_start = 8;
1460 break;
1461 default:
1462 return -EINVAL;
1463 }
1464 rdev->sb_start = sb_start;
1465
1466
1467
1468
1469 ret = read_disk_sb(rdev, 4096);
1470 if (ret) return ret;
1471
1472
1473 sb = page_address(rdev->sb_page);
1474
1475 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1476 sb->major_version != cpu_to_le32(1) ||
1477 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1478 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1479 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1480 return -EINVAL;
1481
1482 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1483 printk("md: invalid superblock checksum on %s\n",
1484 bdevname(rdev->bdev,b));
1485 return -EINVAL;
1486 }
1487 if (le64_to_cpu(sb->data_size) < 10) {
1488 printk("md: data_size too small on %s\n",
1489 bdevname(rdev->bdev,b));
1490 return -EINVAL;
1491 }
1492 if (sb->pad0 ||
1493 sb->pad3[0] ||
1494 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1495
1496 return -EINVAL;
1497
1498 rdev->preferred_minor = 0xffff;
1499 rdev->data_offset = le64_to_cpu(sb->data_offset);
1500 rdev->new_data_offset = rdev->data_offset;
1501 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1502 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1503 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1504 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1505
1506 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1507 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1508 if (rdev->sb_size & bmask)
1509 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1510
1511 if (minor_version
1512 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1513 return -EINVAL;
1514 if (minor_version
1515 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1516 return -EINVAL;
1517
1518 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1519 rdev->desc_nr = -1;
1520 else
1521 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1522
1523 if (!rdev->bb_page) {
1524 rdev->bb_page = alloc_page(GFP_KERNEL);
1525 if (!rdev->bb_page)
1526 return -ENOMEM;
1527 }
1528 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1529 rdev->badblocks.count == 0) {
1530
1531
1532
1533 s32 offset;
1534 sector_t bb_sector;
1535 u64 *bbp;
1536 int i;
1537 int sectors = le16_to_cpu(sb->bblog_size);
1538 if (sectors > (PAGE_SIZE / 512))
1539 return -EINVAL;
1540 offset = le32_to_cpu(sb->bblog_offset);
1541 if (offset == 0)
1542 return -EINVAL;
1543 bb_sector = (long long)offset;
1544 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1545 rdev->bb_page, READ, true))
1546 return -EIO;
1547 bbp = (u64 *)page_address(rdev->bb_page);
1548 rdev->badblocks.shift = sb->bblog_shift;
1549 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1550 u64 bb = le64_to_cpu(*bbp);
1551 int count = bb & (0x3ff);
1552 u64 sector = bb >> 10;
1553 sector <<= sb->bblog_shift;
1554 count <<= sb->bblog_shift;
1555 if (bb + 1 == 0)
1556 break;
1557 if (md_set_badblocks(&rdev->badblocks,
1558 sector, count, 1) == 0)
1559 return -EINVAL;
1560 }
1561 } else if (sb->bblog_offset != 0)
1562 rdev->badblocks.shift = 0;
1563
1564 if (!refdev) {
1565 ret = 1;
1566 } else {
1567 __u64 ev1, ev2;
1568 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1569
1570 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1571 sb->level != refsb->level ||
1572 sb->layout != refsb->layout ||
1573 sb->chunksize != refsb->chunksize) {
1574 printk(KERN_WARNING "md: %s has strangely different"
1575 " superblock to %s\n",
1576 bdevname(rdev->bdev,b),
1577 bdevname(refdev->bdev,b2));
1578 return -EINVAL;
1579 }
1580 ev1 = le64_to_cpu(sb->events);
1581 ev2 = le64_to_cpu(refsb->events);
1582
1583 if (ev1 > ev2)
1584 ret = 1;
1585 else
1586 ret = 0;
1587 }
1588 if (minor_version) {
1589 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1590 sectors -= rdev->data_offset;
1591 } else
1592 sectors = rdev->sb_start;
1593 if (sectors < le64_to_cpu(sb->data_size))
1594 return -EINVAL;
1595 rdev->sectors = le64_to_cpu(sb->data_size);
1596 return ret;
1597}
1598
1599static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1600{
1601 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1602 __u64 ev1 = le64_to_cpu(sb->events);
1603
1604 rdev->raid_disk = -1;
1605 clear_bit(Faulty, &rdev->flags);
1606 clear_bit(In_sync, &rdev->flags);
1607 clear_bit(WriteMostly, &rdev->flags);
1608
1609 if (mddev->raid_disks == 0) {
1610 mddev->major_version = 1;
1611 mddev->patch_version = 0;
1612 mddev->external = 0;
1613 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1614 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1615 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1616 mddev->level = le32_to_cpu(sb->level);
1617 mddev->clevel[0] = 0;
1618 mddev->layout = le32_to_cpu(sb->layout);
1619 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1620 mddev->dev_sectors = le64_to_cpu(sb->size);
1621 mddev->events = ev1;
1622 mddev->bitmap_info.offset = 0;
1623 mddev->bitmap_info.space = 0;
1624
1625
1626
1627 mddev->bitmap_info.default_offset = 1024 >> 9;
1628 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1629 mddev->reshape_backwards = 0;
1630
1631 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1632 memcpy(mddev->uuid, sb->set_uuid, 16);
1633
1634 mddev->max_disks = (4096-256)/2;
1635
1636 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1637 mddev->bitmap_info.file == NULL) {
1638 mddev->bitmap_info.offset =
1639 (__s32)le32_to_cpu(sb->bitmap_offset);
1640
1641
1642
1643
1644
1645 if (mddev->minor_version > 0)
1646 mddev->bitmap_info.space = 0;
1647 else if (mddev->bitmap_info.offset > 0)
1648 mddev->bitmap_info.space =
1649 8 - mddev->bitmap_info.offset;
1650 else
1651 mddev->bitmap_info.space =
1652 -mddev->bitmap_info.offset;
1653 }
1654
1655 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1656 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1657 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1658 mddev->new_level = le32_to_cpu(sb->new_level);
1659 mddev->new_layout = le32_to_cpu(sb->new_layout);
1660 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1661 if (mddev->delta_disks < 0 ||
1662 (mddev->delta_disks == 0 &&
1663 (le32_to_cpu(sb->feature_map)
1664 & MD_FEATURE_RESHAPE_BACKWARDS)))
1665 mddev->reshape_backwards = 1;
1666 } else {
1667 mddev->reshape_position = MaxSector;
1668 mddev->delta_disks = 0;
1669 mddev->new_level = mddev->level;
1670 mddev->new_layout = mddev->layout;
1671 mddev->new_chunk_sectors = mddev->chunk_sectors;
1672 }
1673
1674 } else if (mddev->pers == NULL) {
1675
1676
1677 ++ev1;
1678 if (rdev->desc_nr >= 0 &&
1679 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1680 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1681 if (ev1 < mddev->events)
1682 return -EINVAL;
1683 } else if (mddev->bitmap) {
1684
1685
1686
1687 if (ev1 < mddev->bitmap->events_cleared)
1688 return 0;
1689 } else {
1690 if (ev1 < mddev->events)
1691
1692 return 0;
1693 }
1694 if (mddev->level != LEVEL_MULTIPATH) {
1695 int role;
1696 if (rdev->desc_nr < 0 ||
1697 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1698 role = 0xffff;
1699 rdev->desc_nr = -1;
1700 } else
1701 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1702 switch(role) {
1703 case 0xffff:
1704 break;
1705 case 0xfffe:
1706 set_bit(Faulty, &rdev->flags);
1707 break;
1708 default:
1709 if ((le32_to_cpu(sb->feature_map) &
1710 MD_FEATURE_RECOVERY_OFFSET))
1711 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1712 else
1713 set_bit(In_sync, &rdev->flags);
1714 rdev->raid_disk = role;
1715 break;
1716 }
1717 if (sb->devflags & WriteMostly1)
1718 set_bit(WriteMostly, &rdev->flags);
1719 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1720 set_bit(Replacement, &rdev->flags);
1721 } else
1722 set_bit(In_sync, &rdev->flags);
1723
1724 return 0;
1725}
1726
1727static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1728{
1729 struct mdp_superblock_1 *sb;
1730 struct md_rdev *rdev2;
1731 int max_dev, i;
1732
1733
1734 sb = page_address(rdev->sb_page);
1735
1736 sb->feature_map = 0;
1737 sb->pad0 = 0;
1738 sb->recovery_offset = cpu_to_le64(0);
1739 memset(sb->pad3, 0, sizeof(sb->pad3));
1740
1741 sb->utime = cpu_to_le64((__u64)mddev->utime);
1742 sb->events = cpu_to_le64(mddev->events);
1743 if (mddev->in_sync)
1744 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1745 else
1746 sb->resync_offset = cpu_to_le64(0);
1747
1748 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1749
1750 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1751 sb->size = cpu_to_le64(mddev->dev_sectors);
1752 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1753 sb->level = cpu_to_le32(mddev->level);
1754 sb->layout = cpu_to_le32(mddev->layout);
1755
1756 if (test_bit(WriteMostly, &rdev->flags))
1757 sb->devflags |= WriteMostly1;
1758 else
1759 sb->devflags &= ~WriteMostly1;
1760 sb->data_offset = cpu_to_le64(rdev->data_offset);
1761 sb->data_size = cpu_to_le64(rdev->sectors);
1762
1763 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1764 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1765 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1766 }
1767
1768 if (rdev->raid_disk >= 0 &&
1769 !test_bit(In_sync, &rdev->flags)) {
1770 sb->feature_map |=
1771 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1772 sb->recovery_offset =
1773 cpu_to_le64(rdev->recovery_offset);
1774 }
1775 if (test_bit(Replacement, &rdev->flags))
1776 sb->feature_map |=
1777 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1778
1779 if (mddev->reshape_position != MaxSector) {
1780 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1781 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1782 sb->new_layout = cpu_to_le32(mddev->new_layout);
1783 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1784 sb->new_level = cpu_to_le32(mddev->new_level);
1785 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1786 if (mddev->delta_disks == 0 &&
1787 mddev->reshape_backwards)
1788 sb->feature_map
1789 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1790 if (rdev->new_data_offset != rdev->data_offset) {
1791 sb->feature_map
1792 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1793 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1794 - rdev->data_offset));
1795 }
1796 }
1797
1798 if (rdev->badblocks.count == 0)
1799 ;
1800 else if (sb->bblog_offset == 0)
1801
1802 md_error(mddev, rdev);
1803 else {
1804 struct badblocks *bb = &rdev->badblocks;
1805 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1806 u64 *p = bb->page;
1807 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1808 if (bb->changed) {
1809 unsigned seq;
1810
1811retry:
1812 seq = read_seqbegin(&bb->lock);
1813
1814 memset(bbp, 0xff, PAGE_SIZE);
1815
1816 for (i = 0 ; i < bb->count ; i++) {
1817 u64 internal_bb = p[i];
1818 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1819 | BB_LEN(internal_bb));
1820 bbp[i] = cpu_to_le64(store_bb);
1821 }
1822 bb->changed = 0;
1823 if (read_seqretry(&bb->lock, seq))
1824 goto retry;
1825
1826 bb->sector = (rdev->sb_start +
1827 (int)le32_to_cpu(sb->bblog_offset));
1828 bb->size = le16_to_cpu(sb->bblog_size);
1829 }
1830 }
1831
1832 max_dev = 0;
1833 rdev_for_each(rdev2, mddev)
1834 if (rdev2->desc_nr+1 > max_dev)
1835 max_dev = rdev2->desc_nr+1;
1836
1837 if (max_dev > le32_to_cpu(sb->max_dev)) {
1838 int bmask;
1839 sb->max_dev = cpu_to_le32(max_dev);
1840 rdev->sb_size = max_dev * 2 + 256;
1841 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1842 if (rdev->sb_size & bmask)
1843 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1844 } else
1845 max_dev = le32_to_cpu(sb->max_dev);
1846
1847 for (i=0; i<max_dev;i++)
1848 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1849
1850 rdev_for_each(rdev2, mddev) {
1851 i = rdev2->desc_nr;
1852 if (test_bit(Faulty, &rdev2->flags))
1853 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1854 else if (test_bit(In_sync, &rdev2->flags))
1855 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1856 else if (rdev2->raid_disk >= 0)
1857 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1858 else
1859 sb->dev_roles[i] = cpu_to_le16(0xffff);
1860 }
1861
1862 sb->sb_csum = calc_sb_1_csum(sb);
1863}
1864
1865static unsigned long long
1866super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1867{
1868 struct mdp_superblock_1 *sb;
1869 sector_t max_sectors;
1870 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1871 return 0;
1872 if (rdev->data_offset != rdev->new_data_offset)
1873 return 0;
1874 if (rdev->sb_start < rdev->data_offset) {
1875
1876 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1877 max_sectors -= rdev->data_offset;
1878 if (!num_sectors || num_sectors > max_sectors)
1879 num_sectors = max_sectors;
1880 } else if (rdev->mddev->bitmap_info.offset) {
1881
1882 return 0;
1883 } else {
1884
1885 sector_t sb_start;
1886 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1887 sb_start &= ~(sector_t)(4*2 - 1);
1888 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1889 if (!num_sectors || num_sectors > max_sectors)
1890 num_sectors = max_sectors;
1891 rdev->sb_start = sb_start;
1892 }
1893 sb = page_address(rdev->sb_page);
1894 sb->data_size = cpu_to_le64(num_sectors);
1895 sb->super_offset = rdev->sb_start;
1896 sb->sb_csum = calc_sb_1_csum(sb);
1897 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1898 rdev->sb_page);
1899 md_super_wait(rdev->mddev);
1900 return num_sectors;
1901
1902}
1903
1904static int
1905super_1_allow_new_offset(struct md_rdev *rdev,
1906 unsigned long long new_offset)
1907{
1908
1909 struct bitmap *bitmap;
1910 if (new_offset >= rdev->data_offset)
1911 return 1;
1912
1913
1914
1915 if (rdev->mddev->minor_version == 0)
1916 return 1;
1917
1918
1919
1920
1921
1922
1923
1924 if (rdev->sb_start + (32+4)*2 > new_offset)
1925 return 0;
1926 bitmap = rdev->mddev->bitmap;
1927 if (bitmap && !rdev->mddev->bitmap_info.file &&
1928 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1929 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1930 return 0;
1931 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1932 return 0;
1933
1934 return 1;
1935}
1936
1937static struct super_type super_types[] = {
1938 [0] = {
1939 .name = "0.90.0",
1940 .owner = THIS_MODULE,
1941 .load_super = super_90_load,
1942 .validate_super = super_90_validate,
1943 .sync_super = super_90_sync,
1944 .rdev_size_change = super_90_rdev_size_change,
1945 .allow_new_offset = super_90_allow_new_offset,
1946 },
1947 [1] = {
1948 .name = "md-1",
1949 .owner = THIS_MODULE,
1950 .load_super = super_1_load,
1951 .validate_super = super_1_validate,
1952 .sync_super = super_1_sync,
1953 .rdev_size_change = super_1_rdev_size_change,
1954 .allow_new_offset = super_1_allow_new_offset,
1955 },
1956};
1957
1958static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1959{
1960 if (mddev->sync_super) {
1961 mddev->sync_super(mddev, rdev);
1962 return;
1963 }
1964
1965 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1966
1967 super_types[mddev->major_version].sync_super(mddev, rdev);
1968}
1969
1970static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1971{
1972 struct md_rdev *rdev, *rdev2;
1973
1974 rcu_read_lock();
1975 rdev_for_each_rcu(rdev, mddev1)
1976 rdev_for_each_rcu(rdev2, mddev2)
1977 if (rdev->bdev->bd_contains ==
1978 rdev2->bdev->bd_contains) {
1979 rcu_read_unlock();
1980 return 1;
1981 }
1982 rcu_read_unlock();
1983 return 0;
1984}
1985
1986static LIST_HEAD(pending_raid_disks);
1987
1988
1989
1990
1991
1992
1993
1994
1995int md_integrity_register(struct mddev *mddev)
1996{
1997 struct md_rdev *rdev, *reference = NULL;
1998
1999 if (list_empty(&mddev->disks))
2000 return 0;
2001 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2002 return 0;
2003 rdev_for_each(rdev, mddev) {
2004
2005 if (test_bit(Faulty, &rdev->flags))
2006 continue;
2007 if (rdev->raid_disk < 0)
2008 continue;
2009 if (!reference) {
2010
2011 reference = rdev;
2012 continue;
2013 }
2014
2015 if (blk_integrity_compare(reference->bdev->bd_disk,
2016 rdev->bdev->bd_disk) < 0)
2017 return -EINVAL;
2018 }
2019 if (!reference || !bdev_get_integrity(reference->bdev))
2020 return 0;
2021
2022
2023
2024
2025 if (blk_integrity_register(mddev->gendisk,
2026 bdev_get_integrity(reference->bdev)) != 0) {
2027 printk(KERN_ERR "md: failed to register integrity for %s\n",
2028 mdname(mddev));
2029 return -EINVAL;
2030 }
2031 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2032 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2033 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2034 mdname(mddev));
2035 return -EINVAL;
2036 }
2037 return 0;
2038}
2039EXPORT_SYMBOL(md_integrity_register);
2040
2041
2042void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2043{
2044 struct blk_integrity *bi_rdev;
2045 struct blk_integrity *bi_mddev;
2046
2047 if (!mddev->gendisk)
2048 return;
2049
2050 bi_rdev = bdev_get_integrity(rdev->bdev);
2051 bi_mddev = blk_get_integrity(mddev->gendisk);
2052
2053 if (!bi_mddev)
2054 return;
2055 if (rdev->raid_disk < 0)
2056 return;
2057 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2058 rdev->bdev->bd_disk) >= 0)
2059 return;
2060 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2061 blk_integrity_unregister(mddev->gendisk);
2062}
2063EXPORT_SYMBOL(md_integrity_add_rdev);
2064
2065static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2066{
2067 char b[BDEVNAME_SIZE];
2068 struct kobject *ko;
2069 char *s;
2070 int err;
2071
2072 if (rdev->mddev) {
2073 MD_BUG();
2074 return -EINVAL;
2075 }
2076
2077
2078 if (find_rdev(mddev, rdev->bdev->bd_dev))
2079 return -EEXIST;
2080
2081
2082 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2083 rdev->sectors < mddev->dev_sectors)) {
2084 if (mddev->pers) {
2085
2086
2087
2088
2089 if (mddev->level > 0)
2090 return -ENOSPC;
2091 } else
2092 mddev->dev_sectors = rdev->sectors;
2093 }
2094
2095
2096
2097
2098
2099 if (rdev->desc_nr < 0) {
2100 int choice = 0;
2101 if (mddev->pers) choice = mddev->raid_disks;
2102 while (find_rdev_nr(mddev, choice))
2103 choice++;
2104 rdev->desc_nr = choice;
2105 } else {
2106 if (find_rdev_nr(mddev, rdev->desc_nr))
2107 return -EBUSY;
2108 }
2109 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2110 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2111 mdname(mddev), mddev->max_disks);
2112 return -EBUSY;
2113 }
2114 bdevname(rdev->bdev,b);
2115 while ( (s=strchr(b, '/')) != NULL)
2116 *s = '!';
2117
2118 rdev->mddev = mddev;
2119 printk(KERN_INFO "md: bind<%s>\n", b);
2120
2121 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2122 goto fail;
2123
2124 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2125 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2126 ;
2127 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2128
2129 list_add_rcu(&rdev->same_set, &mddev->disks);
2130 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2131
2132
2133 mddev->recovery_disabled++;
2134
2135 return 0;
2136
2137 fail:
2138 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2139 b, mdname(mddev));
2140 return err;
2141}
2142
2143static void md_delayed_delete(struct work_struct *ws)
2144{
2145 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2146 kobject_del(&rdev->kobj);
2147 kobject_put(&rdev->kobj);
2148}
2149
2150static void unbind_rdev_from_array(struct md_rdev * rdev)
2151{
2152 char b[BDEVNAME_SIZE];
2153 if (!rdev->mddev) {
2154 MD_BUG();
2155 return;
2156 }
2157 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2158 list_del_rcu(&rdev->same_set);
2159 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2160 rdev->mddev = NULL;
2161 sysfs_remove_link(&rdev->kobj, "block");
2162 sysfs_put(rdev->sysfs_state);
2163 rdev->sysfs_state = NULL;
2164 rdev->badblocks.count = 0;
2165
2166
2167
2168
2169 synchronize_rcu();
2170 INIT_WORK(&rdev->del_work, md_delayed_delete);
2171 kobject_get(&rdev->kobj);
2172 queue_work(md_misc_wq, &rdev->del_work);
2173}
2174
2175
2176
2177
2178
2179
2180static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2181{
2182 int err = 0;
2183 struct block_device *bdev;
2184 char b[BDEVNAME_SIZE];
2185
2186 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2187 shared ? (struct md_rdev *)lock_rdev : rdev);
2188 if (IS_ERR(bdev)) {
2189 printk(KERN_ERR "md: could not open %s.\n",
2190 __bdevname(dev, b));
2191 return PTR_ERR(bdev);
2192 }
2193 rdev->bdev = bdev;
2194 return err;
2195}
2196
2197static void unlock_rdev(struct md_rdev *rdev)
2198{
2199 struct block_device *bdev = rdev->bdev;
2200 rdev->bdev = NULL;
2201 if (!bdev)
2202 MD_BUG();
2203 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2204}
2205
2206void md_autodetect_dev(dev_t dev);
2207
2208static void export_rdev(struct md_rdev * rdev)
2209{
2210 char b[BDEVNAME_SIZE];
2211 printk(KERN_INFO "md: export_rdev(%s)\n",
2212 bdevname(rdev->bdev,b));
2213 if (rdev->mddev)
2214 MD_BUG();
2215 md_rdev_clear(rdev);
2216#ifndef MODULE
2217 if (test_bit(AutoDetected, &rdev->flags))
2218 md_autodetect_dev(rdev->bdev->bd_dev);
2219#endif
2220 unlock_rdev(rdev);
2221 kobject_put(&rdev->kobj);
2222}
2223
2224static void kick_rdev_from_array(struct md_rdev * rdev)
2225{
2226 unbind_rdev_from_array(rdev);
2227 export_rdev(rdev);
2228}
2229
2230static void export_array(struct mddev *mddev)
2231{
2232 struct md_rdev *rdev, *tmp;
2233
2234 rdev_for_each_safe(rdev, tmp, mddev) {
2235 if (!rdev->mddev) {
2236 MD_BUG();
2237 continue;
2238 }
2239 kick_rdev_from_array(rdev);
2240 }
2241 if (!list_empty(&mddev->disks))
2242 MD_BUG();
2243 mddev->raid_disks = 0;
2244 mddev->major_version = 0;
2245}
2246
2247static void print_desc(mdp_disk_t *desc)
2248{
2249 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2250 desc->major,desc->minor,desc->raid_disk,desc->state);
2251}
2252
2253static void print_sb_90(mdp_super_t *sb)
2254{
2255 int i;
2256
2257 printk(KERN_INFO
2258 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2259 sb->major_version, sb->minor_version, sb->patch_version,
2260 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2261 sb->ctime);
2262 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2263 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2264 sb->md_minor, sb->layout, sb->chunk_size);
2265 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2266 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2267 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2268 sb->failed_disks, sb->spare_disks,
2269 sb->sb_csum, (unsigned long)sb->events_lo);
2270
2271 printk(KERN_INFO);
2272 for (i = 0; i < MD_SB_DISKS; i++) {
2273 mdp_disk_t *desc;
2274
2275 desc = sb->disks + i;
2276 if (desc->number || desc->major || desc->minor ||
2277 desc->raid_disk || (desc->state && (desc->state != 4))) {
2278 printk(" D %2d: ", i);
2279 print_desc(desc);
2280 }
2281 }
2282 printk(KERN_INFO "md: THIS: ");
2283 print_desc(&sb->this_disk);
2284}
2285
2286static void print_sb_1(struct mdp_superblock_1 *sb)
2287{
2288 __u8 *uuid;
2289
2290 uuid = sb->set_uuid;
2291 printk(KERN_INFO
2292 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2293 "md: Name: \"%s\" CT:%llu\n",
2294 le32_to_cpu(sb->major_version),
2295 le32_to_cpu(sb->feature_map),
2296 uuid,
2297 sb->set_name,
2298 (unsigned long long)le64_to_cpu(sb->ctime)
2299 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2300
2301 uuid = sb->device_uuid;
2302 printk(KERN_INFO
2303 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2304 " RO:%llu\n"
2305 "md: Dev:%08x UUID: %pU\n"
2306 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2307 "md: (MaxDev:%u) \n",
2308 le32_to_cpu(sb->level),
2309 (unsigned long long)le64_to_cpu(sb->size),
2310 le32_to_cpu(sb->raid_disks),
2311 le32_to_cpu(sb->layout),
2312 le32_to_cpu(sb->chunksize),
2313 (unsigned long long)le64_to_cpu(sb->data_offset),
2314 (unsigned long long)le64_to_cpu(sb->data_size),
2315 (unsigned long long)le64_to_cpu(sb->super_offset),
2316 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2317 le32_to_cpu(sb->dev_number),
2318 uuid,
2319 sb->devflags,
2320 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2321 (unsigned long long)le64_to_cpu(sb->events),
2322 (unsigned long long)le64_to_cpu(sb->resync_offset),
2323 le32_to_cpu(sb->sb_csum),
2324 le32_to_cpu(sb->max_dev)
2325 );
2326}
2327
2328static void print_rdev(struct md_rdev *rdev, int major_version)
2329{
2330 char b[BDEVNAME_SIZE];
2331 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2332 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2333 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2334 rdev->desc_nr);
2335 if (rdev->sb_loaded) {
2336 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2337 switch (major_version) {
2338 case 0:
2339 print_sb_90(page_address(rdev->sb_page));
2340 break;
2341 case 1:
2342 print_sb_1(page_address(rdev->sb_page));
2343 break;
2344 }
2345 } else
2346 printk(KERN_INFO "md: no rdev superblock!\n");
2347}
2348
2349static void md_print_devices(void)
2350{
2351 struct list_head *tmp;
2352 struct md_rdev *rdev;
2353 struct mddev *mddev;
2354 char b[BDEVNAME_SIZE];
2355
2356 printk("\n");
2357 printk("md: **********************************\n");
2358 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2359 printk("md: **********************************\n");
2360 for_each_mddev(mddev, tmp) {
2361
2362 if (mddev->bitmap)
2363 bitmap_print_sb(mddev->bitmap);
2364 else
2365 printk("%s: ", mdname(mddev));
2366 rdev_for_each(rdev, mddev)
2367 printk("<%s>", bdevname(rdev->bdev,b));
2368 printk("\n");
2369
2370 rdev_for_each(rdev, mddev)
2371 print_rdev(rdev, mddev->major_version);
2372 }
2373 printk("md: **********************************\n");
2374 printk("\n");
2375}
2376
2377
2378static void sync_sbs(struct mddev * mddev, int nospares)
2379{
2380
2381
2382
2383
2384
2385
2386 struct md_rdev *rdev;
2387 rdev_for_each(rdev, mddev) {
2388 if (rdev->sb_events == mddev->events ||
2389 (nospares &&
2390 rdev->raid_disk < 0 &&
2391 rdev->sb_events+1 == mddev->events)) {
2392
2393 rdev->sb_loaded = 2;
2394 } else {
2395 sync_super(mddev, rdev);
2396 rdev->sb_loaded = 1;
2397 }
2398 }
2399}
2400
2401static void md_update_sb(struct mddev * mddev, int force_change)
2402{
2403 struct md_rdev *rdev;
2404 int sync_req;
2405 int nospares = 0;
2406 int any_badblocks_changed = 0;
2407
2408 if (mddev->ro) {
2409 if (force_change)
2410 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2411 return;
2412 }
2413repeat:
2414
2415 rdev_for_each(rdev, mddev) {
2416 if (rdev->raid_disk >= 0 &&
2417 mddev->delta_disks >= 0 &&
2418 !test_bit(In_sync, &rdev->flags) &&
2419 mddev->curr_resync_completed > rdev->recovery_offset)
2420 rdev->recovery_offset = mddev->curr_resync_completed;
2421
2422 }
2423 if (!mddev->persistent) {
2424 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2425 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2426 if (!mddev->external) {
2427 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2428 rdev_for_each(rdev, mddev) {
2429 if (rdev->badblocks.changed) {
2430 rdev->badblocks.changed = 0;
2431 md_ack_all_badblocks(&rdev->badblocks);
2432 md_error(mddev, rdev);
2433 }
2434 clear_bit(Blocked, &rdev->flags);
2435 clear_bit(BlockedBadBlocks, &rdev->flags);
2436 wake_up(&rdev->blocked_wait);
2437 }
2438 }
2439 wake_up(&mddev->sb_wait);
2440 return;
2441 }
2442
2443 spin_lock_irq(&mddev->write_lock);
2444
2445 mddev->utime = get_seconds();
2446
2447 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2448 force_change = 1;
2449 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2450
2451
2452
2453
2454 nospares = 1;
2455 if (force_change)
2456 nospares = 0;
2457 if (mddev->degraded)
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467 nospares = 0;
2468
2469 sync_req = mddev->in_sync;
2470
2471
2472
2473 if (nospares
2474 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2475 && mddev->can_decrease_events
2476 && mddev->events != 1) {
2477 mddev->events--;
2478 mddev->can_decrease_events = 0;
2479 } else {
2480
2481 mddev->events ++;
2482 mddev->can_decrease_events = nospares;
2483 }
2484
2485 if (!mddev->events) {
2486
2487
2488
2489
2490
2491 MD_BUG();
2492 mddev->events --;
2493 }
2494
2495 rdev_for_each(rdev, mddev) {
2496 if (rdev->badblocks.changed)
2497 any_badblocks_changed++;
2498 if (test_bit(Faulty, &rdev->flags))
2499 set_bit(FaultRecorded, &rdev->flags);
2500 }
2501
2502 sync_sbs(mddev, nospares);
2503 spin_unlock_irq(&mddev->write_lock);
2504
2505 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2506 mdname(mddev), mddev->in_sync);
2507
2508 bitmap_update_sb(mddev->bitmap);
2509 rdev_for_each(rdev, mddev) {
2510 char b[BDEVNAME_SIZE];
2511
2512 if (rdev->sb_loaded != 1)
2513 continue;
2514
2515 if (!test_bit(Faulty, &rdev->flags) &&
2516 rdev->saved_raid_disk == -1) {
2517 md_super_write(mddev,rdev,
2518 rdev->sb_start, rdev->sb_size,
2519 rdev->sb_page);
2520 pr_debug("md: (write) %s's sb offset: %llu\n",
2521 bdevname(rdev->bdev, b),
2522 (unsigned long long)rdev->sb_start);
2523 rdev->sb_events = mddev->events;
2524 if (rdev->badblocks.size) {
2525 md_super_write(mddev, rdev,
2526 rdev->badblocks.sector,
2527 rdev->badblocks.size << 9,
2528 rdev->bb_page);
2529 rdev->badblocks.size = 0;
2530 }
2531
2532 } else if (test_bit(Faulty, &rdev->flags))
2533 pr_debug("md: %s (skipping faulty)\n",
2534 bdevname(rdev->bdev, b));
2535 else
2536 pr_debug("(skipping incremental s/r ");
2537
2538 if (mddev->level == LEVEL_MULTIPATH)
2539
2540 break;
2541 }
2542 md_super_wait(mddev);
2543
2544
2545 spin_lock_irq(&mddev->write_lock);
2546 if (mddev->in_sync != sync_req ||
2547 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2548
2549 spin_unlock_irq(&mddev->write_lock);
2550 goto repeat;
2551 }
2552 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2553 spin_unlock_irq(&mddev->write_lock);
2554 wake_up(&mddev->sb_wait);
2555 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2556 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2557
2558 rdev_for_each(rdev, mddev) {
2559 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2560 clear_bit(Blocked, &rdev->flags);
2561
2562 if (any_badblocks_changed)
2563 md_ack_all_badblocks(&rdev->badblocks);
2564 clear_bit(BlockedBadBlocks, &rdev->flags);
2565 wake_up(&rdev->blocked_wait);
2566 }
2567}
2568
2569
2570
2571
2572static int cmd_match(const char *cmd, const char *str)
2573{
2574
2575
2576
2577
2578 while (*cmd && *str && *cmd == *str) {
2579 cmd++;
2580 str++;
2581 }
2582 if (*cmd == '\n')
2583 cmd++;
2584 if (*str || *cmd)
2585 return 0;
2586 return 1;
2587}
2588
2589struct rdev_sysfs_entry {
2590 struct attribute attr;
2591 ssize_t (*show)(struct md_rdev *, char *);
2592 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2593};
2594
2595static ssize_t
2596state_show(struct md_rdev *rdev, char *page)
2597{
2598 char *sep = "";
2599 size_t len = 0;
2600
2601 if (test_bit(Faulty, &rdev->flags) ||
2602 rdev->badblocks.unacked_exist) {
2603 len+= sprintf(page+len, "%sfaulty",sep);
2604 sep = ",";
2605 }
2606 if (test_bit(In_sync, &rdev->flags)) {
2607 len += sprintf(page+len, "%sin_sync",sep);
2608 sep = ",";
2609 }
2610 if (test_bit(WriteMostly, &rdev->flags)) {
2611 len += sprintf(page+len, "%swrite_mostly",sep);
2612 sep = ",";
2613 }
2614 if (test_bit(Blocked, &rdev->flags) ||
2615 (rdev->badblocks.unacked_exist
2616 && !test_bit(Faulty, &rdev->flags))) {
2617 len += sprintf(page+len, "%sblocked", sep);
2618 sep = ",";
2619 }
2620 if (!test_bit(Faulty, &rdev->flags) &&
2621 !test_bit(In_sync, &rdev->flags)) {
2622 len += sprintf(page+len, "%sspare", sep);
2623 sep = ",";
2624 }
2625 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2626 len += sprintf(page+len, "%swrite_error", sep);
2627 sep = ",";
2628 }
2629 if (test_bit(WantReplacement, &rdev->flags)) {
2630 len += sprintf(page+len, "%swant_replacement", sep);
2631 sep = ",";
2632 }
2633 if (test_bit(Replacement, &rdev->flags)) {
2634 len += sprintf(page+len, "%sreplacement", sep);
2635 sep = ",";
2636 }
2637
2638 return len+sprintf(page+len, "\n");
2639}
2640
2641static ssize_t
2642state_store(struct md_rdev *rdev, const char *buf, size_t len)
2643{
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655 int err = -EINVAL;
2656 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2657 md_error(rdev->mddev, rdev);
2658 if (test_bit(Faulty, &rdev->flags))
2659 err = 0;
2660 else
2661 err = -EBUSY;
2662 } else if (cmd_match(buf, "remove")) {
2663 if (rdev->raid_disk >= 0)
2664 err = -EBUSY;
2665 else {
2666 struct mddev *mddev = rdev->mddev;
2667 kick_rdev_from_array(rdev);
2668 if (mddev->pers)
2669 md_update_sb(mddev, 1);
2670 md_new_event(mddev);
2671 err = 0;
2672 }
2673 } else if (cmd_match(buf, "writemostly")) {
2674 set_bit(WriteMostly, &rdev->flags);
2675 err = 0;
2676 } else if (cmd_match(buf, "-writemostly")) {
2677 clear_bit(WriteMostly, &rdev->flags);
2678 err = 0;
2679 } else if (cmd_match(buf, "blocked")) {
2680 set_bit(Blocked, &rdev->flags);
2681 err = 0;
2682 } else if (cmd_match(buf, "-blocked")) {
2683 if (!test_bit(Faulty, &rdev->flags) &&
2684 rdev->badblocks.unacked_exist) {
2685
2686
2687
2688 md_error(rdev->mddev, rdev);
2689 }
2690 clear_bit(Blocked, &rdev->flags);
2691 clear_bit(BlockedBadBlocks, &rdev->flags);
2692 wake_up(&rdev->blocked_wait);
2693 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2694 md_wakeup_thread(rdev->mddev->thread);
2695
2696 err = 0;
2697 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2698 set_bit(In_sync, &rdev->flags);
2699 err = 0;
2700 } else if (cmd_match(buf, "write_error")) {
2701 set_bit(WriteErrorSeen, &rdev->flags);
2702 err = 0;
2703 } else if (cmd_match(buf, "-write_error")) {
2704 clear_bit(WriteErrorSeen, &rdev->flags);
2705 err = 0;
2706 } else if (cmd_match(buf, "want_replacement")) {
2707
2708
2709
2710
2711 if (rdev->raid_disk >= 0 &&
2712 !test_bit(Replacement, &rdev->flags))
2713 set_bit(WantReplacement, &rdev->flags);
2714 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2715 md_wakeup_thread(rdev->mddev->thread);
2716 err = 0;
2717 } else if (cmd_match(buf, "-want_replacement")) {
2718
2719
2720
2721 err = 0;
2722 clear_bit(WantReplacement, &rdev->flags);
2723 } else if (cmd_match(buf, "replacement")) {
2724
2725
2726
2727
2728 if (rdev->mddev->pers)
2729 err = -EBUSY;
2730 else {
2731 set_bit(Replacement, &rdev->flags);
2732 err = 0;
2733 }
2734 } else if (cmd_match(buf, "-replacement")) {
2735
2736 if (rdev->mddev->pers)
2737 err = -EBUSY;
2738 else {
2739 clear_bit(Replacement, &rdev->flags);
2740 err = 0;
2741 }
2742 }
2743 if (!err)
2744 sysfs_notify_dirent_safe(rdev->sysfs_state);
2745 return err ? err : len;
2746}
2747static struct rdev_sysfs_entry rdev_state =
2748__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2749
2750static ssize_t
2751errors_show(struct md_rdev *rdev, char *page)
2752{
2753 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2754}
2755
2756static ssize_t
2757errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2758{
2759 char *e;
2760 unsigned long n = simple_strtoul(buf, &e, 10);
2761 if (*buf && (*e == 0 || *e == '\n')) {
2762 atomic_set(&rdev->corrected_errors, n);
2763 return len;
2764 }
2765 return -EINVAL;
2766}
2767static struct rdev_sysfs_entry rdev_errors =
2768__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2769
2770static ssize_t
2771slot_show(struct md_rdev *rdev, char *page)
2772{
2773 if (rdev->raid_disk < 0)
2774 return sprintf(page, "none\n");
2775 else
2776 return sprintf(page, "%d\n", rdev->raid_disk);
2777}
2778
2779static ssize_t
2780slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2781{
2782 char *e;
2783 int err;
2784 int slot = simple_strtoul(buf, &e, 10);
2785 if (strncmp(buf, "none", 4)==0)
2786 slot = -1;
2787 else if (e==buf || (*e && *e!= '\n'))
2788 return -EINVAL;
2789 if (rdev->mddev->pers && slot == -1) {
2790
2791
2792
2793
2794
2795
2796
2797 if (rdev->raid_disk == -1)
2798 return -EEXIST;
2799
2800 if (rdev->mddev->pers->hot_remove_disk == NULL)
2801 return -EINVAL;
2802 clear_bit(Blocked, &rdev->flags);
2803 remove_and_add_spares(rdev->mddev, rdev);
2804 if (rdev->raid_disk >= 0)
2805 return -EBUSY;
2806 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2807 md_wakeup_thread(rdev->mddev->thread);
2808 } else if (rdev->mddev->pers) {
2809
2810
2811
2812
2813 if (rdev->raid_disk != -1)
2814 return -EBUSY;
2815
2816 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2817 return -EBUSY;
2818
2819 if (rdev->mddev->pers->hot_add_disk == NULL)
2820 return -EINVAL;
2821
2822 if (slot >= rdev->mddev->raid_disks &&
2823 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2824 return -ENOSPC;
2825
2826 rdev->raid_disk = slot;
2827 if (test_bit(In_sync, &rdev->flags))
2828 rdev->saved_raid_disk = slot;
2829 else
2830 rdev->saved_raid_disk = -1;
2831 clear_bit(In_sync, &rdev->flags);
2832 err = rdev->mddev->pers->
2833 hot_add_disk(rdev->mddev, rdev);
2834 if (err) {
2835 rdev->raid_disk = -1;
2836 return err;
2837 } else
2838 sysfs_notify_dirent_safe(rdev->sysfs_state);
2839 if (sysfs_link_rdev(rdev->mddev, rdev))
2840 ;
2841
2842 } else {
2843 if (slot >= rdev->mddev->raid_disks &&
2844 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2845 return -ENOSPC;
2846 rdev->raid_disk = slot;
2847
2848 clear_bit(Faulty, &rdev->flags);
2849 clear_bit(WriteMostly, &rdev->flags);
2850 set_bit(In_sync, &rdev->flags);
2851 sysfs_notify_dirent_safe(rdev->sysfs_state);
2852 }
2853 return len;
2854}
2855
2856
2857static struct rdev_sysfs_entry rdev_slot =
2858__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2859
2860static ssize_t
2861offset_show(struct md_rdev *rdev, char *page)
2862{
2863 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2864}
2865
2866static ssize_t
2867offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2868{
2869 unsigned long long offset;
2870 if (strict_strtoull(buf, 10, &offset) < 0)
2871 return -EINVAL;
2872 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2873 return -EBUSY;
2874 if (rdev->sectors && rdev->mddev->external)
2875
2876
2877 return -EBUSY;
2878 rdev->data_offset = offset;
2879 rdev->new_data_offset = offset;
2880 return len;
2881}
2882
2883static struct rdev_sysfs_entry rdev_offset =
2884__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2885
2886static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2887{
2888 return sprintf(page, "%llu\n",
2889 (unsigned long long)rdev->new_data_offset);
2890}
2891
2892static ssize_t new_offset_store(struct md_rdev *rdev,
2893 const char *buf, size_t len)
2894{
2895 unsigned long long new_offset;
2896 struct mddev *mddev = rdev->mddev;
2897
2898 if (strict_strtoull(buf, 10, &new_offset) < 0)
2899 return -EINVAL;
2900
2901 if (mddev->sync_thread)
2902 return -EBUSY;
2903 if (new_offset == rdev->data_offset)
2904
2905 ;
2906 else if (new_offset > rdev->data_offset) {
2907
2908 if (new_offset - rdev->data_offset
2909 + mddev->dev_sectors > rdev->sectors)
2910 return -E2BIG;
2911 }
2912
2913
2914
2915
2916
2917 if (new_offset < rdev->data_offset &&
2918 mddev->reshape_backwards)
2919 return -EINVAL;
2920
2921
2922
2923
2924 if (new_offset > rdev->data_offset &&
2925 !mddev->reshape_backwards)
2926 return -EINVAL;
2927
2928 if (mddev->pers && mddev->persistent &&
2929 !super_types[mddev->major_version]
2930 .allow_new_offset(rdev, new_offset))
2931 return -E2BIG;
2932 rdev->new_data_offset = new_offset;
2933 if (new_offset > rdev->data_offset)
2934 mddev->reshape_backwards = 1;
2935 else if (new_offset < rdev->data_offset)
2936 mddev->reshape_backwards = 0;
2937
2938 return len;
2939}
2940static struct rdev_sysfs_entry rdev_new_offset =
2941__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2942
2943static ssize_t
2944rdev_size_show(struct md_rdev *rdev, char *page)
2945{
2946 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2947}
2948
2949static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2950{
2951
2952 if (s1+l1 <= s2)
2953 return 0;
2954 if (s2+l2 <= s1)
2955 return 0;
2956 return 1;
2957}
2958
2959static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2960{
2961 unsigned long long blocks;
2962 sector_t new;
2963
2964 if (strict_strtoull(buf, 10, &blocks) < 0)
2965 return -EINVAL;
2966
2967 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2968 return -EINVAL;
2969
2970 new = blocks * 2;
2971 if (new != blocks * 2)
2972 return -EINVAL;
2973
2974 *sectors = new;
2975 return 0;
2976}
2977
2978static ssize_t
2979rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2980{
2981 struct mddev *my_mddev = rdev->mddev;
2982 sector_t oldsectors = rdev->sectors;
2983 sector_t sectors;
2984
2985 if (strict_blocks_to_sectors(buf, §ors) < 0)
2986 return -EINVAL;
2987 if (rdev->data_offset != rdev->new_data_offset)
2988 return -EINVAL;
2989 if (my_mddev->pers && rdev->raid_disk >= 0) {
2990 if (my_mddev->persistent) {
2991 sectors = super_types[my_mddev->major_version].
2992 rdev_size_change(rdev, sectors);
2993 if (!sectors)
2994 return -EBUSY;
2995 } else if (!sectors)
2996 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2997 rdev->data_offset;
2998 if (!my_mddev->pers->resize)
2999
3000 return -EINVAL;
3001 }
3002 if (sectors < my_mddev->dev_sectors)
3003 return -EINVAL;
3004
3005 rdev->sectors = sectors;
3006 if (sectors > oldsectors && my_mddev->external) {
3007
3008
3009
3010
3011
3012 struct mddev *mddev;
3013 int overlap = 0;
3014 struct list_head *tmp;
3015
3016 mddev_unlock(my_mddev);
3017 for_each_mddev(mddev, tmp) {
3018 struct md_rdev *rdev2;
3019
3020 mddev_lock(mddev);
3021 rdev_for_each(rdev2, mddev)
3022 if (rdev->bdev == rdev2->bdev &&
3023 rdev != rdev2 &&
3024 overlaps(rdev->data_offset, rdev->sectors,
3025 rdev2->data_offset,
3026 rdev2->sectors)) {
3027 overlap = 1;
3028 break;
3029 }
3030 mddev_unlock(mddev);
3031 if (overlap) {
3032 mddev_put(mddev);
3033 break;
3034 }
3035 }
3036 mddev_lock(my_mddev);
3037 if (overlap) {
3038
3039
3040
3041
3042
3043
3044 rdev->sectors = oldsectors;
3045 return -EBUSY;
3046 }
3047 }
3048 return len;
3049}
3050
3051static struct rdev_sysfs_entry rdev_size =
3052__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3053
3054
3055static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3056{
3057 unsigned long long recovery_start = rdev->recovery_offset;
3058
3059 if (test_bit(In_sync, &rdev->flags) ||
3060 recovery_start == MaxSector)
3061 return sprintf(page, "none\n");
3062
3063 return sprintf(page, "%llu\n", recovery_start);
3064}
3065
3066static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3067{
3068 unsigned long long recovery_start;
3069
3070 if (cmd_match(buf, "none"))
3071 recovery_start = MaxSector;
3072 else if (strict_strtoull(buf, 10, &recovery_start))
3073 return -EINVAL;
3074
3075 if (rdev->mddev->pers &&
3076 rdev->raid_disk >= 0)
3077 return -EBUSY;
3078
3079 rdev->recovery_offset = recovery_start;
3080 if (recovery_start == MaxSector)
3081 set_bit(In_sync, &rdev->flags);
3082 else
3083 clear_bit(In_sync, &rdev->flags);
3084 return len;
3085}
3086
3087static struct rdev_sysfs_entry rdev_recovery_start =
3088__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3089
3090
3091static ssize_t
3092badblocks_show(struct badblocks *bb, char *page, int unack);
3093static ssize_t
3094badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3095
3096static ssize_t bb_show(struct md_rdev *rdev, char *page)
3097{
3098 return badblocks_show(&rdev->badblocks, page, 0);
3099}
3100static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3101{
3102 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3103
3104 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3105 wake_up(&rdev->blocked_wait);
3106 return rv;
3107}
3108static struct rdev_sysfs_entry rdev_bad_blocks =
3109__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3110
3111
3112static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3113{
3114 return badblocks_show(&rdev->badblocks, page, 1);
3115}
3116static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3117{
3118 return badblocks_store(&rdev->badblocks, page, len, 1);
3119}
3120static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3121__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3122
3123static struct attribute *rdev_default_attrs[] = {
3124 &rdev_state.attr,
3125 &rdev_errors.attr,
3126 &rdev_slot.attr,
3127 &rdev_offset.attr,
3128 &rdev_new_offset.attr,
3129 &rdev_size.attr,
3130 &rdev_recovery_start.attr,
3131 &rdev_bad_blocks.attr,
3132 &rdev_unack_bad_blocks.attr,
3133 NULL,
3134};
3135static ssize_t
3136rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3137{
3138 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3139 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3140 struct mddev *mddev = rdev->mddev;
3141 ssize_t rv;
3142
3143 if (!entry->show)
3144 return -EIO;
3145
3146 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3147 if (!rv) {
3148 if (rdev->mddev == NULL)
3149 rv = -EBUSY;
3150 else
3151 rv = entry->show(rdev, page);
3152 mddev_unlock(mddev);
3153 }
3154 return rv;
3155}
3156
3157static ssize_t
3158rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3159 const char *page, size_t length)
3160{
3161 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3162 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3163 ssize_t rv;
3164 struct mddev *mddev = rdev->mddev;
3165
3166 if (!entry->store)
3167 return -EIO;
3168 if (!capable(CAP_SYS_ADMIN))
3169 return -EACCES;
3170 rv = mddev ? mddev_lock(mddev): -EBUSY;
3171 if (!rv) {
3172 if (rdev->mddev == NULL)
3173 rv = -EBUSY;
3174 else
3175 rv = entry->store(rdev, page, length);
3176 mddev_unlock(mddev);
3177 }
3178 return rv;
3179}
3180
3181static void rdev_free(struct kobject *ko)
3182{
3183 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3184 kfree(rdev);
3185}
3186static const struct sysfs_ops rdev_sysfs_ops = {
3187 .show = rdev_attr_show,
3188 .store = rdev_attr_store,
3189};
3190static struct kobj_type rdev_ktype = {
3191 .release = rdev_free,
3192 .sysfs_ops = &rdev_sysfs_ops,
3193 .default_attrs = rdev_default_attrs,
3194};
3195
3196int md_rdev_init(struct md_rdev *rdev)
3197{
3198 rdev->desc_nr = -1;
3199 rdev->saved_raid_disk = -1;
3200 rdev->raid_disk = -1;
3201 rdev->flags = 0;
3202 rdev->data_offset = 0;
3203 rdev->new_data_offset = 0;
3204 rdev->sb_events = 0;
3205 rdev->last_read_error.tv_sec = 0;
3206 rdev->last_read_error.tv_nsec = 0;
3207 rdev->sb_loaded = 0;
3208 rdev->bb_page = NULL;
3209 atomic_set(&rdev->nr_pending, 0);
3210 atomic_set(&rdev->read_errors, 0);
3211 atomic_set(&rdev->corrected_errors, 0);
3212
3213 INIT_LIST_HEAD(&rdev->same_set);
3214 init_waitqueue_head(&rdev->blocked_wait);
3215
3216
3217
3218
3219
3220 rdev->badblocks.count = 0;
3221 rdev->badblocks.shift = -1;
3222 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3223 seqlock_init(&rdev->badblocks.lock);
3224 if (rdev->badblocks.page == NULL)
3225 return -ENOMEM;
3226
3227 return 0;
3228}
3229EXPORT_SYMBOL_GPL(md_rdev_init);
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3241{
3242 char b[BDEVNAME_SIZE];
3243 int err;
3244 struct md_rdev *rdev;
3245 sector_t size;
3246
3247 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3248 if (!rdev) {
3249 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3250 return ERR_PTR(-ENOMEM);
3251 }
3252
3253 err = md_rdev_init(rdev);
3254 if (err)
3255 goto abort_free;
3256 err = alloc_disk_sb(rdev);
3257 if (err)
3258 goto abort_free;
3259
3260 err = lock_rdev(rdev, newdev, super_format == -2);
3261 if (err)
3262 goto abort_free;
3263
3264 kobject_init(&rdev->kobj, &rdev_ktype);
3265
3266 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3267 if (!size) {
3268 printk(KERN_WARNING
3269 "md: %s has zero or unknown size, marking faulty!\n",
3270 bdevname(rdev->bdev,b));
3271 err = -EINVAL;
3272 goto abort_free;
3273 }
3274
3275 if (super_format >= 0) {
3276 err = super_types[super_format].
3277 load_super(rdev, NULL, super_minor);
3278 if (err == -EINVAL) {
3279 printk(KERN_WARNING
3280 "md: %s does not have a valid v%d.%d "
3281 "superblock, not importing!\n",
3282 bdevname(rdev->bdev,b),
3283 super_format, super_minor);
3284 goto abort_free;
3285 }
3286 if (err < 0) {
3287 printk(KERN_WARNING
3288 "md: could not read %s's sb, not importing!\n",
3289 bdevname(rdev->bdev,b));
3290 goto abort_free;
3291 }
3292 }
3293
3294 return rdev;
3295
3296abort_free:
3297 if (rdev->bdev)
3298 unlock_rdev(rdev);
3299 md_rdev_clear(rdev);
3300 kfree(rdev);
3301 return ERR_PTR(err);
3302}
3303
3304
3305
3306
3307
3308
3309static void analyze_sbs(struct mddev * mddev)
3310{
3311 int i;
3312 struct md_rdev *rdev, *freshest, *tmp;
3313 char b[BDEVNAME_SIZE];
3314
3315 freshest = NULL;
3316 rdev_for_each_safe(rdev, tmp, mddev)
3317 switch (super_types[mddev->major_version].
3318 load_super(rdev, freshest, mddev->minor_version)) {
3319 case 1:
3320 freshest = rdev;
3321 break;
3322 case 0:
3323 break;
3324 default:
3325 printk( KERN_ERR \
3326 "md: fatal superblock inconsistency in %s"
3327 " -- removing from array\n",
3328 bdevname(rdev->bdev,b));
3329 kick_rdev_from_array(rdev);
3330 }
3331
3332
3333 super_types[mddev->major_version].
3334 validate_super(mddev, freshest);
3335
3336 i = 0;
3337 rdev_for_each_safe(rdev, tmp, mddev) {
3338 if (mddev->max_disks &&
3339 (rdev->desc_nr >= mddev->max_disks ||
3340 i > mddev->max_disks)) {
3341 printk(KERN_WARNING
3342 "md: %s: %s: only %d devices permitted\n",
3343 mdname(mddev), bdevname(rdev->bdev, b),
3344 mddev->max_disks);
3345 kick_rdev_from_array(rdev);
3346 continue;
3347 }
3348 if (rdev != freshest)
3349 if (super_types[mddev->major_version].
3350 validate_super(mddev, rdev)) {
3351 printk(KERN_WARNING "md: kicking non-fresh %s"
3352 " from array!\n",
3353 bdevname(rdev->bdev,b));
3354 kick_rdev_from_array(rdev);
3355 continue;
3356 }
3357 if (mddev->level == LEVEL_MULTIPATH) {
3358 rdev->desc_nr = i++;
3359 rdev->raid_disk = rdev->desc_nr;
3360 set_bit(In_sync, &rdev->flags);
3361 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3362 rdev->raid_disk = -1;
3363 clear_bit(In_sync, &rdev->flags);
3364 }
3365 }
3366}
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3379{
3380 unsigned long result = 0;
3381 long decimals = -1;
3382 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3383 if (*cp == '.')
3384 decimals = 0;
3385 else if (decimals < scale) {
3386 unsigned int value;
3387 value = *cp - '0';
3388 result = result * 10 + value;
3389 if (decimals >= 0)
3390 decimals++;
3391 }
3392 cp++;
3393 }
3394 if (*cp == '\n')
3395 cp++;
3396 if (*cp)
3397 return -EINVAL;
3398 if (decimals < 0)
3399 decimals = 0;
3400 while (decimals < scale) {
3401 result *= 10;
3402 decimals ++;
3403 }
3404 *res = result;
3405 return 0;
3406}
3407
3408
3409static void md_safemode_timeout(unsigned long data);
3410
3411static ssize_t
3412safe_delay_show(struct mddev *mddev, char *page)
3413{
3414 int msec = (mddev->safemode_delay*1000)/HZ;
3415 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3416}
3417static ssize_t
3418safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3419{
3420 unsigned long msec;
3421
3422 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3423 return -EINVAL;
3424 if (msec == 0)
3425 mddev->safemode_delay = 0;
3426 else {
3427 unsigned long old_delay = mddev->safemode_delay;
3428 mddev->safemode_delay = (msec*HZ)/1000;
3429 if (mddev->safemode_delay == 0)
3430 mddev->safemode_delay = 1;
3431 if (mddev->safemode_delay < old_delay)
3432 md_safemode_timeout((unsigned long)mddev);
3433 }
3434 return len;
3435}
3436static struct md_sysfs_entry md_safe_delay =
3437__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3438
3439static ssize_t
3440level_show(struct mddev *mddev, char *page)
3441{
3442 struct md_personality *p = mddev->pers;
3443 if (p)
3444 return sprintf(page, "%s\n", p->name);
3445 else if (mddev->clevel[0])
3446 return sprintf(page, "%s\n", mddev->clevel);
3447 else if (mddev->level != LEVEL_NONE)
3448 return sprintf(page, "%d\n", mddev->level);
3449 else
3450 return 0;
3451}
3452
3453static ssize_t
3454level_store(struct mddev *mddev, const char *buf, size_t len)
3455{
3456 char clevel[16];
3457 ssize_t rv = len;
3458 struct md_personality *pers;
3459 long level;
3460 void *priv;
3461 struct md_rdev *rdev;
3462
3463 if (mddev->pers == NULL) {
3464 if (len == 0)
3465 return 0;
3466 if (len >= sizeof(mddev->clevel))
3467 return -ENOSPC;
3468 strncpy(mddev->clevel, buf, len);
3469 if (mddev->clevel[len-1] == '\n')
3470 len--;
3471 mddev->clevel[len] = 0;
3472 mddev->level = LEVEL_NONE;
3473 return rv;
3474 }
3475
3476
3477
3478
3479
3480
3481
3482 if (mddev->sync_thread ||
3483 mddev->reshape_position != MaxSector ||
3484 mddev->sysfs_active)
3485 return -EBUSY;
3486
3487 if (!mddev->pers->quiesce) {
3488 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3489 mdname(mddev), mddev->pers->name);
3490 return -EINVAL;
3491 }
3492
3493
3494 if (len == 0 || len >= sizeof(clevel))
3495 return -EINVAL;
3496 strncpy(clevel, buf, len);
3497 if (clevel[len-1] == '\n')
3498 len--;
3499 clevel[len] = 0;
3500 if (strict_strtol(clevel, 10, &level))
3501 level = LEVEL_NONE;
3502
3503 if (request_module("md-%s", clevel) != 0)
3504 request_module("md-level-%s", clevel);
3505 spin_lock(&pers_lock);
3506 pers = find_pers(level, clevel);
3507 if (!pers || !try_module_get(pers->owner)) {
3508 spin_unlock(&pers_lock);
3509 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3510 return -EINVAL;
3511 }
3512 spin_unlock(&pers_lock);
3513
3514 if (pers == mddev->pers) {
3515
3516 module_put(pers->owner);
3517 return rv;
3518 }
3519 if (!pers->takeover) {
3520 module_put(pers->owner);
3521 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3522 mdname(mddev), clevel);
3523 return -EINVAL;
3524 }
3525
3526 rdev_for_each(rdev, mddev)
3527 rdev->new_raid_disk = rdev->raid_disk;
3528
3529
3530
3531
3532 priv = pers->takeover(mddev);
3533 if (IS_ERR(priv)) {
3534 mddev->new_level = mddev->level;
3535 mddev->new_layout = mddev->layout;
3536 mddev->new_chunk_sectors = mddev->chunk_sectors;
3537 mddev->raid_disks -= mddev->delta_disks;
3538 mddev->delta_disks = 0;
3539 mddev->reshape_backwards = 0;
3540 module_put(pers->owner);
3541 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3542 mdname(mddev), clevel);
3543 return PTR_ERR(priv);
3544 }
3545
3546
3547 mddev_suspend(mddev);
3548 mddev->pers->stop(mddev);
3549
3550 if (mddev->pers->sync_request == NULL &&
3551 pers->sync_request != NULL) {
3552
3553 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3554 printk(KERN_WARNING
3555 "md: cannot register extra attributes for %s\n",
3556 mdname(mddev));
3557 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3558 }
3559 if (mddev->pers->sync_request != NULL &&
3560 pers->sync_request == NULL) {
3561
3562 if (mddev->to_remove == NULL)
3563 mddev->to_remove = &md_redundancy_group;
3564 }
3565
3566 if (mddev->pers->sync_request == NULL &&
3567 mddev->external) {
3568
3569
3570
3571
3572
3573
3574
3575 mddev->in_sync = 0;
3576 mddev->safemode_delay = 0;
3577 mddev->safemode = 0;
3578 }
3579
3580 rdev_for_each(rdev, mddev) {
3581 if (rdev->raid_disk < 0)
3582 continue;
3583 if (rdev->new_raid_disk >= mddev->raid_disks)
3584 rdev->new_raid_disk = -1;
3585 if (rdev->new_raid_disk == rdev->raid_disk)
3586 continue;
3587 sysfs_unlink_rdev(mddev, rdev);
3588 }
3589 rdev_for_each(rdev, mddev) {
3590 if (rdev->raid_disk < 0)
3591 continue;
3592 if (rdev->new_raid_disk == rdev->raid_disk)
3593 continue;
3594 rdev->raid_disk = rdev->new_raid_disk;
3595 if (rdev->raid_disk < 0)
3596 clear_bit(In_sync, &rdev->flags);
3597 else {
3598 if (sysfs_link_rdev(mddev, rdev))
3599 printk(KERN_WARNING "md: cannot register rd%d"
3600 " for %s after level change\n",
3601 rdev->raid_disk, mdname(mddev));
3602 }
3603 }
3604
3605 module_put(mddev->pers->owner);
3606 mddev->pers = pers;
3607 mddev->private = priv;
3608 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3609 mddev->level = mddev->new_level;
3610 mddev->layout = mddev->new_layout;
3611 mddev->chunk_sectors = mddev->new_chunk_sectors;
3612 mddev->delta_disks = 0;
3613 mddev->reshape_backwards = 0;
3614 mddev->degraded = 0;
3615 if (mddev->pers->sync_request == NULL) {
3616
3617
3618
3619 mddev->in_sync = 1;
3620 del_timer_sync(&mddev->safemode_timer);
3621 }
3622 pers->run(mddev);
3623 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3624 mddev_resume(mddev);
3625 sysfs_notify(&mddev->kobj, NULL, "level");
3626 md_new_event(mddev);
3627 return rv;
3628}
3629
3630static struct md_sysfs_entry md_level =
3631__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3632
3633
3634static ssize_t
3635layout_show(struct mddev *mddev, char *page)
3636{
3637
3638 if (mddev->reshape_position != MaxSector &&
3639 mddev->layout != mddev->new_layout)
3640 return sprintf(page, "%d (%d)\n",
3641 mddev->new_layout, mddev->layout);
3642 return sprintf(page, "%d\n", mddev->layout);
3643}
3644
3645static ssize_t
3646layout_store(struct mddev *mddev, const char *buf, size_t len)
3647{
3648 char *e;
3649 unsigned long n = simple_strtoul(buf, &e, 10);
3650
3651 if (!*buf || (*e && *e != '\n'))
3652 return -EINVAL;
3653
3654 if (mddev->pers) {
3655 int err;
3656 if (mddev->pers->check_reshape == NULL)
3657 return -EBUSY;
3658 mddev->new_layout = n;
3659 err = mddev->pers->check_reshape(mddev);
3660 if (err) {
3661 mddev->new_layout = mddev->layout;
3662 return err;
3663 }
3664 } else {
3665 mddev->new_layout = n;
3666 if (mddev->reshape_position == MaxSector)
3667 mddev->layout = n;
3668 }
3669 return len;
3670}
3671static struct md_sysfs_entry md_layout =
3672__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3673
3674
3675static ssize_t
3676raid_disks_show(struct mddev *mddev, char *page)
3677{
3678 if (mddev->raid_disks == 0)
3679 return 0;
3680 if (mddev->reshape_position != MaxSector &&
3681 mddev->delta_disks != 0)
3682 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3683 mddev->raid_disks - mddev->delta_disks);
3684 return sprintf(page, "%d\n", mddev->raid_disks);
3685}
3686
3687static int update_raid_disks(struct mddev *mddev, int raid_disks);
3688
3689static ssize_t
3690raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3691{
3692 char *e;
3693 int rv = 0;
3694 unsigned long n = simple_strtoul(buf, &e, 10);
3695
3696 if (!*buf || (*e && *e != '\n'))
3697 return -EINVAL;
3698
3699 if (mddev->pers)
3700 rv = update_raid_disks(mddev, n);
3701 else if (mddev->reshape_position != MaxSector) {
3702 struct md_rdev *rdev;
3703 int olddisks = mddev->raid_disks - mddev->delta_disks;
3704
3705 rdev_for_each(rdev, mddev) {
3706 if (olddisks < n &&
3707 rdev->data_offset < rdev->new_data_offset)
3708 return -EINVAL;
3709 if (olddisks > n &&
3710 rdev->data_offset > rdev->new_data_offset)
3711 return -EINVAL;
3712 }
3713 mddev->delta_disks = n - olddisks;
3714 mddev->raid_disks = n;
3715 mddev->reshape_backwards = (mddev->delta_disks < 0);
3716 } else
3717 mddev->raid_disks = n;
3718 return rv ? rv : len;
3719}
3720static struct md_sysfs_entry md_raid_disks =
3721__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3722
3723static ssize_t
3724chunk_size_show(struct mddev *mddev, char *page)
3725{
3726 if (mddev->reshape_position != MaxSector &&
3727 mddev->chunk_sectors != mddev->new_chunk_sectors)
3728 return sprintf(page, "%d (%d)\n",
3729 mddev->new_chunk_sectors << 9,
3730 mddev->chunk_sectors << 9);
3731 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3732}
3733
3734static ssize_t
3735chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3736{
3737 char *e;
3738 unsigned long n = simple_strtoul(buf, &e, 10);
3739
3740 if (!*buf || (*e && *e != '\n'))
3741 return -EINVAL;
3742
3743 if (mddev->pers) {
3744 int err;
3745 if (mddev->pers->check_reshape == NULL)
3746 return -EBUSY;
3747 mddev->new_chunk_sectors = n >> 9;
3748 err = mddev->pers->check_reshape(mddev);
3749 if (err) {
3750 mddev->new_chunk_sectors = mddev->chunk_sectors;
3751 return err;
3752 }
3753 } else {
3754 mddev->new_chunk_sectors = n >> 9;
3755 if (mddev->reshape_position == MaxSector)
3756 mddev->chunk_sectors = n >> 9;
3757 }
3758 return len;
3759}
3760static struct md_sysfs_entry md_chunk_size =
3761__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3762
3763static ssize_t
3764resync_start_show(struct mddev *mddev, char *page)
3765{
3766 if (mddev->recovery_cp == MaxSector)
3767 return sprintf(page, "none\n");
3768 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3769}
3770
3771static ssize_t
3772resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3773{
3774 char *e;
3775 unsigned long long n = simple_strtoull(buf, &e, 10);
3776
3777 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3778 return -EBUSY;
3779 if (cmd_match(buf, "none"))
3780 n = MaxSector;
3781 else if (!*buf || (*e && *e != '\n'))
3782 return -EINVAL;
3783
3784 mddev->recovery_cp = n;
3785 if (mddev->pers)
3786 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3787 return len;
3788}
3789static struct md_sysfs_entry md_resync_start =
3790__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3829 write_pending, active_idle, bad_word};
3830static char *array_states[] = {
3831 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3832 "write-pending", "active-idle", NULL };
3833
3834static int match_word(const char *word, char **list)
3835{
3836 int n;
3837 for (n=0; list[n]; n++)
3838 if (cmd_match(word, list[n]))
3839 break;
3840 return n;
3841}
3842
3843static ssize_t
3844array_state_show(struct mddev *mddev, char *page)
3845{
3846 enum array_state st = inactive;
3847
3848 if (mddev->pers)
3849 switch(mddev->ro) {
3850 case 1:
3851 st = readonly;
3852 break;
3853 case 2:
3854 st = read_auto;
3855 break;
3856 case 0:
3857 if (mddev->in_sync)
3858 st = clean;
3859 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3860 st = write_pending;
3861 else if (mddev->safemode)
3862 st = active_idle;
3863 else
3864 st = active;
3865 }
3866 else {
3867 if (list_empty(&mddev->disks) &&
3868 mddev->raid_disks == 0 &&
3869 mddev->dev_sectors == 0)
3870 st = clear;
3871 else
3872 st = inactive;
3873 }
3874 return sprintf(page, "%s\n", array_states[st]);
3875}
3876
3877static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3878static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3879static int do_md_run(struct mddev * mddev);
3880static int restart_array(struct mddev *mddev);
3881
3882static ssize_t
3883array_state_store(struct mddev *mddev, const char *buf, size_t len)
3884{
3885 int err = -EINVAL;
3886 enum array_state st = match_word(buf, array_states);
3887 switch(st) {
3888 case bad_word:
3889 break;
3890 case clear:
3891
3892 err = do_md_stop(mddev, 0, NULL);
3893 break;
3894 case inactive:
3895
3896 if (mddev->pers)
3897 err = do_md_stop(mddev, 2, NULL);
3898 else
3899 err = 0;
3900 break;
3901 case suspended:
3902 break;
3903 case readonly:
3904 if (mddev->pers)
3905 err = md_set_readonly(mddev, NULL);
3906 else {
3907 mddev->ro = 1;
3908 set_disk_ro(mddev->gendisk, 1);
3909 err = do_md_run(mddev);
3910 }
3911 break;
3912 case read_auto:
3913 if (mddev->pers) {
3914 if (mddev->ro == 0)
3915 err = md_set_readonly(mddev, NULL);
3916 else if (mddev->ro == 1)
3917 err = restart_array(mddev);
3918 if (err == 0) {
3919 mddev->ro = 2;
3920 set_disk_ro(mddev->gendisk, 0);
3921 }
3922 } else {
3923 mddev->ro = 2;
3924 err = do_md_run(mddev);
3925 }
3926 break;
3927 case clean:
3928 if (mddev->pers) {
3929 restart_array(mddev);
3930 spin_lock_irq(&mddev->write_lock);
3931 if (atomic_read(&mddev->writes_pending) == 0) {
3932 if (mddev->in_sync == 0) {
3933 mddev->in_sync = 1;
3934 if (mddev->safemode == 1)
3935 mddev->safemode = 0;
3936 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3937 }
3938 err = 0;
3939 } else
3940 err = -EBUSY;
3941 spin_unlock_irq(&mddev->write_lock);
3942 } else
3943 err = -EINVAL;
3944 break;
3945 case active:
3946 if (mddev->pers) {
3947 restart_array(mddev);
3948 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3949 wake_up(&mddev->sb_wait);
3950 err = 0;
3951 } else {
3952 mddev->ro = 0;
3953 set_disk_ro(mddev->gendisk, 0);
3954 err = do_md_run(mddev);
3955 }
3956 break;
3957 case write_pending:
3958 case active_idle:
3959
3960 break;
3961 }
3962 if (err)
3963 return err;
3964 else {
3965 if (mddev->hold_active == UNTIL_IOCTL)
3966 mddev->hold_active = 0;
3967 sysfs_notify_dirent_safe(mddev->sysfs_state);
3968 return len;
3969 }
3970}
3971static struct md_sysfs_entry md_array_state =
3972__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3973
3974static ssize_t
3975max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3976 return sprintf(page, "%d\n",
3977 atomic_read(&mddev->max_corr_read_errors));
3978}
3979
3980static ssize_t
3981max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3982{
3983 char *e;
3984 unsigned long n = simple_strtoul(buf, &e, 10);
3985
3986 if (*buf && (*e == 0 || *e == '\n')) {
3987 atomic_set(&mddev->max_corr_read_errors, n);
3988 return len;
3989 }
3990 return -EINVAL;
3991}
3992
3993static struct md_sysfs_entry max_corr_read_errors =
3994__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3995 max_corrected_read_errors_store);
3996
3997static ssize_t
3998null_show(struct mddev *mddev, char *page)
3999{
4000 return -EINVAL;
4001}
4002
4003static ssize_t
4004new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4005{
4006
4007
4008
4009
4010
4011
4012
4013 char *e;
4014 int major = simple_strtoul(buf, &e, 10);
4015 int minor;
4016 dev_t dev;
4017 struct md_rdev *rdev;
4018 int err;
4019
4020 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4021 return -EINVAL;
4022 minor = simple_strtoul(e+1, &e, 10);
4023 if (*e && *e != '\n')
4024 return -EINVAL;
4025 dev = MKDEV(major, minor);
4026 if (major != MAJOR(dev) ||
4027 minor != MINOR(dev))
4028 return -EOVERFLOW;
4029
4030
4031 if (mddev->persistent) {
4032 rdev = md_import_device(dev, mddev->major_version,
4033 mddev->minor_version);
4034 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4035 struct md_rdev *rdev0
4036 = list_entry(mddev->disks.next,
4037 struct md_rdev, same_set);
4038 err = super_types[mddev->major_version]
4039 .load_super(rdev, rdev0, mddev->minor_version);
4040 if (err < 0)
4041 goto out;
4042 }
4043 } else if (mddev->external)
4044 rdev = md_import_device(dev, -2, -1);
4045 else
4046 rdev = md_import_device(dev, -1, -1);
4047
4048 if (IS_ERR(rdev))
4049 return PTR_ERR(rdev);
4050 err = bind_rdev_to_array(rdev, mddev);
4051 out:
4052 if (err)
4053 export_rdev(rdev);
4054 return err ? err : len;
4055}
4056
4057static struct md_sysfs_entry md_new_device =
4058__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4059
4060static ssize_t
4061bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4062{
4063 char *end;
4064 unsigned long chunk, end_chunk;
4065
4066 if (!mddev->bitmap)
4067 goto out;
4068
4069 while (*buf) {
4070 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4071 if (buf == end) break;
4072 if (*end == '-') {
4073 buf = end + 1;
4074 end_chunk = simple_strtoul(buf, &end, 0);
4075 if (buf == end) break;
4076 }
4077 if (*end && !isspace(*end)) break;
4078 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4079 buf = skip_spaces(end);
4080 }
4081 bitmap_unplug(mddev->bitmap);
4082out:
4083 return len;
4084}
4085
4086static struct md_sysfs_entry md_bitmap =
4087__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4088
4089static ssize_t
4090size_show(struct mddev *mddev, char *page)
4091{
4092 return sprintf(page, "%llu\n",
4093 (unsigned long long)mddev->dev_sectors / 2);
4094}
4095
4096static int update_size(struct mddev *mddev, sector_t num_sectors);
4097
4098static ssize_t
4099size_store(struct mddev *mddev, const char *buf, size_t len)
4100{
4101
4102
4103
4104
4105 sector_t sectors;
4106 int err = strict_blocks_to_sectors(buf, §ors);
4107
4108 if (err < 0)
4109 return err;
4110 if (mddev->pers) {
4111 err = update_size(mddev, sectors);
4112 md_update_sb(mddev, 1);
4113 } else {
4114 if (mddev->dev_sectors == 0 ||
4115 mddev->dev_sectors > sectors)
4116 mddev->dev_sectors = sectors;
4117 else
4118 err = -ENOSPC;
4119 }
4120 return err ? err : len;
4121}
4122
4123static struct md_sysfs_entry md_size =
4124__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4125
4126
4127
4128
4129
4130
4131
4132
4133static ssize_t
4134metadata_show(struct mddev *mddev, char *page)
4135{
4136 if (mddev->persistent)
4137 return sprintf(page, "%d.%d\n",
4138 mddev->major_version, mddev->minor_version);
4139 else if (mddev->external)
4140 return sprintf(page, "external:%s\n", mddev->metadata_type);
4141 else
4142 return sprintf(page, "none\n");
4143}
4144
4145static ssize_t
4146metadata_store(struct mddev *mddev, const char *buf, size_t len)
4147{
4148 int major, minor;
4149 char *e;
4150
4151
4152
4153
4154 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4155 ;
4156 else if (!list_empty(&mddev->disks))
4157 return -EBUSY;
4158
4159 if (cmd_match(buf, "none")) {
4160 mddev->persistent = 0;
4161 mddev->external = 0;
4162 mddev->major_version = 0;
4163 mddev->minor_version = 90;
4164 return len;
4165 }
4166 if (strncmp(buf, "external:", 9) == 0) {
4167 size_t namelen = len-9;
4168 if (namelen >= sizeof(mddev->metadata_type))
4169 namelen = sizeof(mddev->metadata_type)-1;
4170 strncpy(mddev->metadata_type, buf+9, namelen);
4171 mddev->metadata_type[namelen] = 0;
4172 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4173 mddev->metadata_type[--namelen] = 0;
4174 mddev->persistent = 0;
4175 mddev->external = 1;
4176 mddev->major_version = 0;
4177 mddev->minor_version = 90;
4178 return len;
4179 }
4180 major = simple_strtoul(buf, &e, 10);
4181 if (e==buf || *e != '.')
4182 return -EINVAL;
4183 buf = e+1;
4184 minor = simple_strtoul(buf, &e, 10);
4185 if (e==buf || (*e && *e != '\n') )
4186 return -EINVAL;
4187 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4188 return -ENOENT;
4189 mddev->major_version = major;
4190 mddev->minor_version = minor;
4191 mddev->persistent = 1;
4192 mddev->external = 0;
4193 return len;
4194}
4195
4196static struct md_sysfs_entry md_metadata =
4197__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4198
4199static ssize_t
4200action_show(struct mddev *mddev, char *page)
4201{
4202 char *type = "idle";
4203 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4204 type = "frozen";
4205 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4206 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4207 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4208 type = "reshape";
4209 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4210 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4211 type = "resync";
4212 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4213 type = "check";
4214 else
4215 type = "repair";
4216 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4217 type = "recover";
4218 }
4219 return sprintf(page, "%s\n", type);
4220}
4221
4222static ssize_t
4223action_store(struct mddev *mddev, const char *page, size_t len)
4224{
4225 if (!mddev->pers || !mddev->pers->sync_request)
4226 return -EINVAL;
4227
4228 if (cmd_match(page, "frozen"))
4229 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4230 else
4231 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4232
4233 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4234 if (mddev->sync_thread) {
4235 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4236 md_reap_sync_thread(mddev);
4237 }
4238 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4239 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4240 return -EBUSY;
4241 else if (cmd_match(page, "resync"))
4242 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4243 else if (cmd_match(page, "recover")) {
4244 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4245 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4246 } else if (cmd_match(page, "reshape")) {
4247 int err;
4248 if (mddev->pers->start_reshape == NULL)
4249 return -EINVAL;
4250 err = mddev->pers->start_reshape(mddev);
4251 if (err)
4252 return err;
4253 sysfs_notify(&mddev->kobj, NULL, "degraded");
4254 } else {
4255 if (cmd_match(page, "check"))
4256 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4257 else if (!cmd_match(page, "repair"))
4258 return -EINVAL;
4259 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4260 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4261 }
4262 if (mddev->ro == 2) {
4263
4264
4265
4266 mddev->ro = 0;
4267 md_wakeup_thread(mddev->sync_thread);
4268 }
4269 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4270 md_wakeup_thread(mddev->thread);
4271 sysfs_notify_dirent_safe(mddev->sysfs_action);
4272 return len;
4273}
4274
4275static ssize_t
4276mismatch_cnt_show(struct mddev *mddev, char *page)
4277{
4278 return sprintf(page, "%llu\n",
4279 (unsigned long long)
4280 atomic64_read(&mddev->resync_mismatches));
4281}
4282
4283static struct md_sysfs_entry md_scan_mode =
4284__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4285
4286
4287static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4288
4289static ssize_t
4290sync_min_show(struct mddev *mddev, char *page)
4291{
4292 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4293 mddev->sync_speed_min ? "local": "system");
4294}
4295
4296static ssize_t
4297sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4298{
4299 int min;
4300 char *e;
4301 if (strncmp(buf, "system", 6)==0) {
4302 mddev->sync_speed_min = 0;
4303 return len;
4304 }
4305 min = simple_strtoul(buf, &e, 10);
4306 if (buf == e || (*e && *e != '\n') || min <= 0)
4307 return -EINVAL;
4308 mddev->sync_speed_min = min;
4309 return len;
4310}
4311
4312static struct md_sysfs_entry md_sync_min =
4313__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4314
4315static ssize_t
4316sync_max_show(struct mddev *mddev, char *page)
4317{
4318 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4319 mddev->sync_speed_max ? "local": "system");
4320}
4321
4322static ssize_t
4323sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4324{
4325 int max;
4326 char *e;
4327 if (strncmp(buf, "system", 6)==0) {
4328 mddev->sync_speed_max = 0;
4329 return len;
4330 }
4331 max = simple_strtoul(buf, &e, 10);
4332 if (buf == e || (*e && *e != '\n') || max <= 0)
4333 return -EINVAL;
4334 mddev->sync_speed_max = max;
4335 return len;
4336}
4337
4338static struct md_sysfs_entry md_sync_max =
4339__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4340
4341static ssize_t
4342degraded_show(struct mddev *mddev, char *page)
4343{
4344 return sprintf(page, "%d\n", mddev->degraded);
4345}
4346static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4347
4348static ssize_t
4349sync_force_parallel_show(struct mddev *mddev, char *page)
4350{
4351 return sprintf(page, "%d\n", mddev->parallel_resync);
4352}
4353
4354static ssize_t
4355sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4356{
4357 long n;
4358
4359 if (strict_strtol(buf, 10, &n))
4360 return -EINVAL;
4361
4362 if (n != 0 && n != 1)
4363 return -EINVAL;
4364
4365 mddev->parallel_resync = n;
4366
4367 if (mddev->sync_thread)
4368 wake_up(&resync_wait);
4369
4370 return len;
4371}
4372
4373
4374static struct md_sysfs_entry md_sync_force_parallel =
4375__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4376 sync_force_parallel_show, sync_force_parallel_store);
4377
4378static ssize_t
4379sync_speed_show(struct mddev *mddev, char *page)
4380{
4381 unsigned long resync, dt, db;
4382 if (mddev->curr_resync == 0)
4383 return sprintf(page, "none\n");
4384 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4385 dt = (jiffies - mddev->resync_mark) / HZ;
4386 if (!dt) dt++;
4387 db = resync - mddev->resync_mark_cnt;
4388 return sprintf(page, "%lu\n", db/dt/2);
4389}
4390
4391static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4392
4393static ssize_t
4394sync_completed_show(struct mddev *mddev, char *page)
4395{
4396 unsigned long long max_sectors, resync;
4397
4398 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4399 return sprintf(page, "none\n");
4400
4401 if (mddev->curr_resync == 1 ||
4402 mddev->curr_resync == 2)
4403 return sprintf(page, "delayed\n");
4404
4405 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4406 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4407 max_sectors = mddev->resync_max_sectors;
4408 else
4409 max_sectors = mddev->dev_sectors;
4410
4411 resync = mddev->curr_resync_completed;
4412 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4413}
4414
4415static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4416
4417static ssize_t
4418min_sync_show(struct mddev *mddev, char *page)
4419{
4420 return sprintf(page, "%llu\n",
4421 (unsigned long long)mddev->resync_min);
4422}
4423static ssize_t
4424min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4425{
4426 unsigned long long min;
4427 if (strict_strtoull(buf, 10, &min))
4428 return -EINVAL;
4429 if (min > mddev->resync_max)
4430 return -EINVAL;
4431 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4432 return -EBUSY;
4433
4434
4435 if (mddev->chunk_sectors) {
4436 sector_t temp = min;
4437 if (sector_div(temp, mddev->chunk_sectors))
4438 return -EINVAL;
4439 }
4440 mddev->resync_min = min;
4441
4442 return len;
4443}
4444
4445static struct md_sysfs_entry md_min_sync =
4446__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4447
4448static ssize_t
4449max_sync_show(struct mddev *mddev, char *page)
4450{
4451 if (mddev->resync_max == MaxSector)
4452 return sprintf(page, "max\n");
4453 else
4454 return sprintf(page, "%llu\n",
4455 (unsigned long long)mddev->resync_max);
4456}
4457static ssize_t
4458max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4459{
4460 if (strncmp(buf, "max", 3) == 0)
4461 mddev->resync_max = MaxSector;
4462 else {
4463 unsigned long long max;
4464 if (strict_strtoull(buf, 10, &max))
4465 return -EINVAL;
4466 if (max < mddev->resync_min)
4467 return -EINVAL;
4468 if (max < mddev->resync_max &&
4469 mddev->ro == 0 &&
4470 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4471 return -EBUSY;
4472
4473
4474 if (mddev->chunk_sectors) {
4475 sector_t temp = max;
4476 if (sector_div(temp, mddev->chunk_sectors))
4477 return -EINVAL;
4478 }
4479 mddev->resync_max = max;
4480 }
4481 wake_up(&mddev->recovery_wait);
4482 return len;
4483}
4484
4485static struct md_sysfs_entry md_max_sync =
4486__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4487
4488static ssize_t
4489suspend_lo_show(struct mddev *mddev, char *page)
4490{
4491 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4492}
4493
4494static ssize_t
4495suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4496{
4497 char *e;
4498 unsigned long long new = simple_strtoull(buf, &e, 10);
4499 unsigned long long old = mddev->suspend_lo;
4500
4501 if (mddev->pers == NULL ||
4502 mddev->pers->quiesce == NULL)
4503 return -EINVAL;
4504 if (buf == e || (*e && *e != '\n'))
4505 return -EINVAL;
4506
4507 mddev->suspend_lo = new;
4508 if (new >= old)
4509
4510 mddev->pers->quiesce(mddev, 2);
4511 else {
4512
4513 mddev->pers->quiesce(mddev, 1);
4514 mddev->pers->quiesce(mddev, 0);
4515 }
4516 return len;
4517}
4518static struct md_sysfs_entry md_suspend_lo =
4519__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4520
4521
4522static ssize_t
4523suspend_hi_show(struct mddev *mddev, char *page)
4524{
4525 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4526}
4527
4528static ssize_t
4529suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4530{
4531 char *e;
4532 unsigned long long new = simple_strtoull(buf, &e, 10);
4533 unsigned long long old = mddev->suspend_hi;
4534
4535 if (mddev->pers == NULL ||
4536 mddev->pers->quiesce == NULL)
4537 return -EINVAL;
4538 if (buf == e || (*e && *e != '\n'))
4539 return -EINVAL;
4540
4541 mddev->suspend_hi = new;
4542 if (new <= old)
4543
4544 mddev->pers->quiesce(mddev, 2);
4545 else {
4546
4547 mddev->pers->quiesce(mddev, 1);
4548 mddev->pers->quiesce(mddev, 0);
4549 }
4550 return len;
4551}
4552static struct md_sysfs_entry md_suspend_hi =
4553__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4554
4555static ssize_t
4556reshape_position_show(struct mddev *mddev, char *page)
4557{
4558 if (mddev->reshape_position != MaxSector)
4559 return sprintf(page, "%llu\n",
4560 (unsigned long long)mddev->reshape_position);
4561 strcpy(page, "none\n");
4562 return 5;
4563}
4564
4565static ssize_t
4566reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4567{
4568 struct md_rdev *rdev;
4569 char *e;
4570 unsigned long long new = simple_strtoull(buf, &e, 10);
4571 if (mddev->pers)
4572 return -EBUSY;
4573 if (buf == e || (*e && *e != '\n'))
4574 return -EINVAL;
4575 mddev->reshape_position = new;
4576 mddev->delta_disks = 0;
4577 mddev->reshape_backwards = 0;
4578 mddev->new_level = mddev->level;
4579 mddev->new_layout = mddev->layout;
4580 mddev->new_chunk_sectors = mddev->chunk_sectors;
4581 rdev_for_each(rdev, mddev)
4582 rdev->new_data_offset = rdev->data_offset;
4583 return len;
4584}
4585
4586static struct md_sysfs_entry md_reshape_position =
4587__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4588 reshape_position_store);
4589
4590static ssize_t
4591reshape_direction_show(struct mddev *mddev, char *page)
4592{
4593 return sprintf(page, "%s\n",
4594 mddev->reshape_backwards ? "backwards" : "forwards");
4595}
4596
4597static ssize_t
4598reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4599{
4600 int backwards = 0;
4601 if (cmd_match(buf, "forwards"))
4602 backwards = 0;
4603 else if (cmd_match(buf, "backwards"))
4604 backwards = 1;
4605 else
4606 return -EINVAL;
4607 if (mddev->reshape_backwards == backwards)
4608 return len;
4609
4610
4611 if (mddev->delta_disks)
4612 return -EBUSY;
4613
4614 if (mddev->persistent &&
4615 mddev->major_version == 0)
4616 return -EINVAL;
4617
4618 mddev->reshape_backwards = backwards;
4619 return len;
4620}
4621
4622static struct md_sysfs_entry md_reshape_direction =
4623__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4624 reshape_direction_store);
4625
4626static ssize_t
4627array_size_show(struct mddev *mddev, char *page)
4628{
4629 if (mddev->external_size)
4630 return sprintf(page, "%llu\n",
4631 (unsigned long long)mddev->array_sectors/2);
4632 else
4633 return sprintf(page, "default\n");
4634}
4635
4636static ssize_t
4637array_size_store(struct mddev *mddev, const char *buf, size_t len)
4638{
4639 sector_t sectors;
4640
4641 if (strncmp(buf, "default", 7) == 0) {
4642 if (mddev->pers)
4643 sectors = mddev->pers->size(mddev, 0, 0);
4644 else
4645 sectors = mddev->array_sectors;
4646
4647 mddev->external_size = 0;
4648 } else {
4649 if (strict_blocks_to_sectors(buf, §ors) < 0)
4650 return -EINVAL;
4651 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4652 return -E2BIG;
4653
4654 mddev->external_size = 1;
4655 }
4656
4657 mddev->array_sectors = sectors;
4658 if (mddev->pers) {
4659 set_capacity(mddev->gendisk, mddev->array_sectors);
4660 revalidate_disk(mddev->gendisk);
4661 }
4662 return len;
4663}
4664
4665static struct md_sysfs_entry md_array_size =
4666__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4667 array_size_store);
4668
4669static struct attribute *md_default_attrs[] = {
4670 &md_level.attr,
4671 &md_layout.attr,
4672 &md_raid_disks.attr,
4673 &md_chunk_size.attr,
4674 &md_size.attr,
4675 &md_resync_start.attr,
4676 &md_metadata.attr,
4677 &md_new_device.attr,
4678 &md_safe_delay.attr,
4679 &md_array_state.attr,
4680 &md_reshape_position.attr,
4681 &md_reshape_direction.attr,
4682 &md_array_size.attr,
4683 &max_corr_read_errors.attr,
4684 NULL,
4685};
4686
4687static struct attribute *md_redundancy_attrs[] = {
4688 &md_scan_mode.attr,
4689 &md_mismatches.attr,
4690 &md_sync_min.attr,
4691 &md_sync_max.attr,
4692 &md_sync_speed.attr,
4693 &md_sync_force_parallel.attr,
4694 &md_sync_completed.attr,
4695 &md_min_sync.attr,
4696 &md_max_sync.attr,
4697 &md_suspend_lo.attr,
4698 &md_suspend_hi.attr,
4699 &md_bitmap.attr,
4700 &md_degraded.attr,
4701 NULL,
4702};
4703static struct attribute_group md_redundancy_group = {
4704 .name = NULL,
4705 .attrs = md_redundancy_attrs,
4706};
4707
4708
4709static ssize_t
4710md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4711{
4712 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4713 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4714 ssize_t rv;
4715
4716 if (!entry->show)
4717 return -EIO;
4718 spin_lock(&all_mddevs_lock);
4719 if (list_empty(&mddev->all_mddevs)) {
4720 spin_unlock(&all_mddevs_lock);
4721 return -EBUSY;
4722 }
4723 mddev_get(mddev);
4724 spin_unlock(&all_mddevs_lock);
4725
4726 rv = mddev_lock(mddev);
4727 if (!rv) {
4728 rv = entry->show(mddev, page);
4729 mddev_unlock(mddev);
4730 }
4731 mddev_put(mddev);
4732 return rv;
4733}
4734
4735static ssize_t
4736md_attr_store(struct kobject *kobj, struct attribute *attr,
4737 const char *page, size_t length)
4738{
4739 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4740 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4741 ssize_t rv;
4742
4743 if (!entry->store)
4744 return -EIO;
4745 if (!capable(CAP_SYS_ADMIN))
4746 return -EACCES;
4747 spin_lock(&all_mddevs_lock);
4748 if (list_empty(&mddev->all_mddevs)) {
4749 spin_unlock(&all_mddevs_lock);
4750 return -EBUSY;
4751 }
4752 mddev_get(mddev);
4753 spin_unlock(&all_mddevs_lock);
4754 if (entry->store == new_dev_store)
4755 flush_workqueue(md_misc_wq);
4756 rv = mddev_lock(mddev);
4757 if (!rv) {
4758 rv = entry->store(mddev, page, length);
4759 mddev_unlock(mddev);
4760 }
4761 mddev_put(mddev);
4762 return rv;
4763}
4764
4765static void md_free(struct kobject *ko)
4766{
4767 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4768
4769 if (mddev->sysfs_state)
4770 sysfs_put(mddev->sysfs_state);
4771
4772 if (mddev->gendisk) {
4773 del_gendisk(mddev->gendisk);
4774 put_disk(mddev->gendisk);
4775 }
4776 if (mddev->queue)
4777 blk_cleanup_queue(mddev->queue);
4778
4779 kfree(mddev);
4780}
4781
4782static const struct sysfs_ops md_sysfs_ops = {
4783 .show = md_attr_show,
4784 .store = md_attr_store,
4785};
4786static struct kobj_type md_ktype = {
4787 .release = md_free,
4788 .sysfs_ops = &md_sysfs_ops,
4789 .default_attrs = md_default_attrs,
4790};
4791
4792int mdp_major = 0;
4793
4794static void mddev_delayed_delete(struct work_struct *ws)
4795{
4796 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4797
4798 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4799 kobject_del(&mddev->kobj);
4800 kobject_put(&mddev->kobj);
4801}
4802
4803static int md_alloc(dev_t dev, char *name)
4804{
4805 static DEFINE_MUTEX(disks_mutex);
4806 struct mddev *mddev = mddev_find(dev);
4807 struct gendisk *disk;
4808 int partitioned;
4809 int shift;
4810 int unit;
4811 int error;
4812
4813 if (!mddev)
4814 return -ENODEV;
4815
4816 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4817 shift = partitioned ? MdpMinorShift : 0;
4818 unit = MINOR(mddev->unit) >> shift;
4819
4820
4821
4822
4823 flush_workqueue(md_misc_wq);
4824
4825 mutex_lock(&disks_mutex);
4826 error = -EEXIST;
4827 if (mddev->gendisk)
4828 goto abort;
4829
4830 if (name) {
4831
4832
4833 struct mddev *mddev2;
4834 spin_lock(&all_mddevs_lock);
4835
4836 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4837 if (mddev2->gendisk &&
4838 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4839 spin_unlock(&all_mddevs_lock);
4840 goto abort;
4841 }
4842 spin_unlock(&all_mddevs_lock);
4843 }
4844
4845 error = -ENOMEM;
4846 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4847 if (!mddev->queue)
4848 goto abort;
4849 mddev->queue->queuedata = mddev;
4850
4851 blk_queue_make_request(mddev->queue, md_make_request);
4852 blk_set_stacking_limits(&mddev->queue->limits);
4853
4854 disk = alloc_disk(1 << shift);
4855 if (!disk) {
4856 blk_cleanup_queue(mddev->queue);
4857 mddev->queue = NULL;
4858 goto abort;
4859 }
4860 disk->major = MAJOR(mddev->unit);
4861 disk->first_minor = unit << shift;
4862 if (name)
4863 strcpy(disk->disk_name, name);
4864 else if (partitioned)
4865 sprintf(disk->disk_name, "md_d%d", unit);
4866 else
4867 sprintf(disk->disk_name, "md%d", unit);
4868 disk->fops = &md_fops;
4869 disk->private_data = mddev;
4870 disk->queue = mddev->queue;
4871 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4872
4873
4874
4875
4876 disk->flags |= GENHD_FL_EXT_DEVT;
4877 mddev->gendisk = disk;
4878
4879
4880
4881 mutex_lock(&mddev->open_mutex);
4882 add_disk(disk);
4883
4884 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4885 &disk_to_dev(disk)->kobj, "%s", "md");
4886 if (error) {
4887
4888
4889
4890 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4891 disk->disk_name);
4892 error = 0;
4893 }
4894 if (mddev->kobj.sd &&
4895 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4896 printk(KERN_DEBUG "pointless warning\n");
4897 mutex_unlock(&mddev->open_mutex);
4898 abort:
4899 mutex_unlock(&disks_mutex);
4900 if (!error && mddev->kobj.sd) {
4901 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4902 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4903 }
4904 mddev_put(mddev);
4905 return error;
4906}
4907
4908static struct kobject *md_probe(dev_t dev, int *part, void *data)
4909{
4910 md_alloc(dev, NULL);
4911 return NULL;
4912}
4913
4914static int add_named_array(const char *val, struct kernel_param *kp)
4915{
4916
4917
4918
4919
4920 int len = strlen(val);
4921 char buf[DISK_NAME_LEN];
4922
4923 while (len && val[len-1] == '\n')
4924 len--;
4925 if (len >= DISK_NAME_LEN)
4926 return -E2BIG;
4927 strlcpy(buf, val, len+1);
4928 if (strncmp(buf, "md_", 3) != 0)
4929 return -EINVAL;
4930 return md_alloc(0, buf);
4931}
4932
4933static void md_safemode_timeout(unsigned long data)
4934{
4935 struct mddev *mddev = (struct mddev *) data;
4936
4937 if (!atomic_read(&mddev->writes_pending)) {
4938 mddev->safemode = 1;
4939 if (mddev->external)
4940 sysfs_notify_dirent_safe(mddev->sysfs_state);
4941 }
4942 md_wakeup_thread(mddev->thread);
4943}
4944
4945static int start_dirty_degraded;
4946
4947int md_run(struct mddev *mddev)
4948{
4949 int err;
4950 struct md_rdev *rdev;
4951 struct md_personality *pers;
4952
4953 if (list_empty(&mddev->disks))
4954
4955 return -EINVAL;
4956
4957 if (mddev->pers)
4958 return -EBUSY;
4959
4960 if (mddev->sysfs_active)
4961 return -EBUSY;
4962
4963
4964
4965
4966 if (!mddev->raid_disks) {
4967 if (!mddev->persistent)
4968 return -EINVAL;
4969 analyze_sbs(mddev);
4970 }
4971
4972 if (mddev->level != LEVEL_NONE)
4973 request_module("md-level-%d", mddev->level);
4974 else if (mddev->clevel[0])
4975 request_module("md-%s", mddev->clevel);
4976
4977
4978
4979
4980
4981
4982 rdev_for_each(rdev, mddev) {
4983 if (test_bit(Faulty, &rdev->flags))
4984 continue;
4985 sync_blockdev(rdev->bdev);
4986 invalidate_bdev(rdev->bdev);
4987
4988
4989
4990
4991
4992 if (rdev->meta_bdev) {
4993 ;
4994 } else if (rdev->data_offset < rdev->sb_start) {
4995 if (mddev->dev_sectors &&
4996 rdev->data_offset + mddev->dev_sectors
4997 > rdev->sb_start) {
4998 printk("md: %s: data overlaps metadata\n",
4999 mdname(mddev));
5000 return -EINVAL;
5001 }
5002 } else {
5003 if (rdev->sb_start + rdev->sb_size/512
5004 > rdev->data_offset) {
5005 printk("md: %s: metadata overlaps data\n",
5006 mdname(mddev));
5007 return -EINVAL;
5008 }
5009 }
5010 sysfs_notify_dirent_safe(rdev->sysfs_state);
5011 }
5012
5013 if (mddev->bio_set == NULL)
5014 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5015
5016 spin_lock(&pers_lock);
5017 pers = find_pers(mddev->level, mddev->clevel);
5018 if (!pers || !try_module_get(pers->owner)) {
5019 spin_unlock(&pers_lock);
5020 if (mddev->level != LEVEL_NONE)
5021 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5022 mddev->level);
5023 else
5024 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5025 mddev->clevel);
5026 return -EINVAL;
5027 }
5028 mddev->pers = pers;
5029 spin_unlock(&pers_lock);
5030 if (mddev->level != pers->level) {
5031 mddev->level = pers->level;
5032 mddev->new_level = pers->level;
5033 }
5034 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5035
5036 if (mddev->reshape_position != MaxSector &&
5037 pers->start_reshape == NULL) {
5038
5039 mddev->pers = NULL;
5040 module_put(pers->owner);
5041 return -EINVAL;
5042 }
5043
5044 if (pers->sync_request) {
5045
5046
5047
5048 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5049 struct md_rdev *rdev2;
5050 int warned = 0;
5051
5052 rdev_for_each(rdev, mddev)
5053 rdev_for_each(rdev2, mddev) {
5054 if (rdev < rdev2 &&
5055 rdev->bdev->bd_contains ==
5056 rdev2->bdev->bd_contains) {
5057 printk(KERN_WARNING
5058 "%s: WARNING: %s appears to be"
5059 " on the same physical disk as"
5060 " %s.\n",
5061 mdname(mddev),
5062 bdevname(rdev->bdev,b),
5063 bdevname(rdev2->bdev,b2));
5064 warned = 1;
5065 }
5066 }
5067
5068 if (warned)
5069 printk(KERN_WARNING
5070 "True protection against single-disk"
5071 " failure might be compromised.\n");
5072 }
5073
5074 mddev->recovery = 0;
5075
5076 mddev->resync_max_sectors = mddev->dev_sectors;
5077
5078 mddev->ok_start_degraded = start_dirty_degraded;
5079
5080 if (start_readonly && mddev->ro == 0)
5081 mddev->ro = 2;
5082
5083 err = mddev->pers->run(mddev);
5084 if (err)
5085 printk(KERN_ERR "md: pers->run() failed ...\n");
5086 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5087 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5088 " but 'external_size' not in effect?\n", __func__);
5089 printk(KERN_ERR
5090 "md: invalid array_size %llu > default size %llu\n",
5091 (unsigned long long)mddev->array_sectors / 2,
5092 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5093 err = -EINVAL;
5094 mddev->pers->stop(mddev);
5095 }
5096 if (err == 0 && mddev->pers->sync_request &&
5097 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5098 err = bitmap_create(mddev);
5099 if (err) {
5100 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5101 mdname(mddev), err);
5102 mddev->pers->stop(mddev);
5103 }
5104 }
5105 if (err) {
5106 module_put(mddev->pers->owner);
5107 mddev->pers = NULL;
5108 bitmap_destroy(mddev);
5109 return err;
5110 }
5111 if (mddev->pers->sync_request) {
5112 if (mddev->kobj.sd &&
5113 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5114 printk(KERN_WARNING
5115 "md: cannot register extra attributes for %s\n",
5116 mdname(mddev));
5117 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5118 } else if (mddev->ro == 2)
5119 mddev->ro = 0;
5120
5121 atomic_set(&mddev->writes_pending,0);
5122 atomic_set(&mddev->max_corr_read_errors,
5123 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5124 mddev->safemode = 0;
5125 mddev->safemode_timer.function = md_safemode_timeout;
5126 mddev->safemode_timer.data = (unsigned long) mddev;
5127 mddev->safemode_delay = (200 * HZ)/1000 +1;
5128 mddev->in_sync = 1;
5129 smp_wmb();
5130 mddev->ready = 1;
5131 rdev_for_each(rdev, mddev)
5132 if (rdev->raid_disk >= 0)
5133 if (sysfs_link_rdev(mddev, rdev))
5134 ;
5135
5136 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5137
5138 if (mddev->flags)
5139 md_update_sb(mddev, 0);
5140
5141 md_new_event(mddev);
5142 sysfs_notify_dirent_safe(mddev->sysfs_state);
5143 sysfs_notify_dirent_safe(mddev->sysfs_action);
5144 sysfs_notify(&mddev->kobj, NULL, "degraded");
5145 return 0;
5146}
5147EXPORT_SYMBOL_GPL(md_run);
5148
5149static int do_md_run(struct mddev *mddev)
5150{
5151 int err;
5152
5153 err = md_run(mddev);
5154 if (err)
5155 goto out;
5156 err = bitmap_load(mddev);
5157 if (err) {
5158 bitmap_destroy(mddev);
5159 goto out;
5160 }
5161
5162 md_wakeup_thread(mddev->thread);
5163 md_wakeup_thread(mddev->sync_thread);
5164
5165 set_capacity(mddev->gendisk, mddev->array_sectors);
5166 revalidate_disk(mddev->gendisk);
5167 mddev->changed = 1;
5168 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5169out:
5170 return err;
5171}
5172
5173static int restart_array(struct mddev *mddev)
5174{
5175 struct gendisk *disk = mddev->gendisk;
5176
5177
5178 if (list_empty(&mddev->disks))
5179 return -ENXIO;
5180 if (!mddev->pers)
5181 return -EINVAL;
5182 if (!mddev->ro)
5183 return -EBUSY;
5184 mddev->safemode = 0;
5185 mddev->ro = 0;
5186 set_disk_ro(disk, 0);
5187 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5188 mdname(mddev));
5189
5190 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5191 md_wakeup_thread(mddev->thread);
5192 md_wakeup_thread(mddev->sync_thread);
5193 sysfs_notify_dirent_safe(mddev->sysfs_state);
5194 return 0;
5195}
5196
5197
5198
5199static int deny_bitmap_write_access(struct file * file)
5200{
5201 struct inode *inode = file->f_mapping->host;
5202
5203 spin_lock(&inode->i_lock);
5204 if (atomic_read(&inode->i_writecount) > 1) {
5205 spin_unlock(&inode->i_lock);
5206 return -ETXTBSY;
5207 }
5208 atomic_set(&inode->i_writecount, -1);
5209 spin_unlock(&inode->i_lock);
5210
5211 return 0;
5212}
5213
5214void restore_bitmap_write_access(struct file *file)
5215{
5216 struct inode *inode = file->f_mapping->host;
5217
5218 spin_lock(&inode->i_lock);
5219 atomic_set(&inode->i_writecount, 1);
5220 spin_unlock(&inode->i_lock);
5221}
5222
5223static void md_clean(struct mddev *mddev)
5224{
5225 mddev->array_sectors = 0;
5226 mddev->external_size = 0;
5227 mddev->dev_sectors = 0;
5228 mddev->raid_disks = 0;
5229 mddev->recovery_cp = 0;
5230 mddev->resync_min = 0;
5231 mddev->resync_max = MaxSector;
5232 mddev->reshape_position = MaxSector;
5233 mddev->external = 0;
5234 mddev->persistent = 0;
5235 mddev->level = LEVEL_NONE;
5236 mddev->clevel[0] = 0;
5237 mddev->flags = 0;
5238 mddev->ro = 0;
5239 mddev->metadata_type[0] = 0;
5240 mddev->chunk_sectors = 0;
5241 mddev->ctime = mddev->utime = 0;
5242 mddev->layout = 0;
5243 mddev->max_disks = 0;
5244 mddev->events = 0;
5245 mddev->can_decrease_events = 0;
5246 mddev->delta_disks = 0;
5247 mddev->reshape_backwards = 0;
5248 mddev->new_level = LEVEL_NONE;
5249 mddev->new_layout = 0;
5250 mddev->new_chunk_sectors = 0;
5251 mddev->curr_resync = 0;
5252 atomic64_set(&mddev->resync_mismatches, 0);
5253 mddev->suspend_lo = mddev->suspend_hi = 0;
5254 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5255 mddev->recovery = 0;
5256 mddev->in_sync = 0;
5257 mddev->changed = 0;
5258 mddev->degraded = 0;
5259 mddev->safemode = 0;
5260 mddev->merge_check_needed = 0;
5261 mddev->bitmap_info.offset = 0;
5262 mddev->bitmap_info.default_offset = 0;
5263 mddev->bitmap_info.default_space = 0;
5264 mddev->bitmap_info.chunksize = 0;
5265 mddev->bitmap_info.daemon_sleep = 0;
5266 mddev->bitmap_info.max_write_behind = 0;
5267}
5268
5269static void __md_stop_writes(struct mddev *mddev)
5270{
5271 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5272 if (mddev->sync_thread) {
5273 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5274 md_reap_sync_thread(mddev);
5275 }
5276
5277 del_timer_sync(&mddev->safemode_timer);
5278
5279 bitmap_flush(mddev);
5280 md_super_wait(mddev);
5281
5282 if (mddev->ro == 0 &&
5283 (!mddev->in_sync || mddev->flags)) {
5284
5285 mddev->in_sync = 1;
5286 md_update_sb(mddev, 1);
5287 }
5288}
5289
5290void md_stop_writes(struct mddev *mddev)
5291{
5292 mddev_lock(mddev);
5293 __md_stop_writes(mddev);
5294 mddev_unlock(mddev);
5295}
5296EXPORT_SYMBOL_GPL(md_stop_writes);
5297
5298static void __md_stop(struct mddev *mddev)
5299{
5300 mddev->ready = 0;
5301 mddev->pers->stop(mddev);
5302 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5303 mddev->to_remove = &md_redundancy_group;
5304 module_put(mddev->pers->owner);
5305 mddev->pers = NULL;
5306 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5307}
5308
5309void md_stop(struct mddev *mddev)
5310{
5311
5312
5313
5314 __md_stop(mddev);
5315 bitmap_destroy(mddev);
5316 if (mddev->bio_set)
5317 bioset_free(mddev->bio_set);
5318}
5319
5320EXPORT_SYMBOL_GPL(md_stop);
5321
5322static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5323{
5324 int err = 0;
5325 mutex_lock(&mddev->open_mutex);
5326 if (atomic_read(&mddev->openers) > !!bdev) {
5327 printk("md: %s still in use.\n",mdname(mddev));
5328 err = -EBUSY;
5329 goto out;
5330 }
5331 if (bdev)
5332 sync_blockdev(bdev);
5333 if (mddev->pers) {
5334 __md_stop_writes(mddev);
5335
5336 err = -ENXIO;
5337 if (mddev->ro==1)
5338 goto out;
5339 mddev->ro = 1;
5340 set_disk_ro(mddev->gendisk, 1);
5341 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5342 sysfs_notify_dirent_safe(mddev->sysfs_state);
5343 err = 0;
5344 }
5345out:
5346 mutex_unlock(&mddev->open_mutex);
5347 return err;
5348}
5349
5350
5351
5352
5353
5354static int do_md_stop(struct mddev * mddev, int mode,
5355 struct block_device *bdev)
5356{
5357 struct gendisk *disk = mddev->gendisk;
5358 struct md_rdev *rdev;
5359
5360 mutex_lock(&mddev->open_mutex);
5361 if (atomic_read(&mddev->openers) > !!bdev ||
5362 mddev->sysfs_active) {
5363 printk("md: %s still in use.\n",mdname(mddev));
5364 mutex_unlock(&mddev->open_mutex);
5365 return -EBUSY;
5366 }
5367 if (bdev)
5368
5369
5370
5371
5372
5373 sync_blockdev(bdev);
5374
5375 if (mddev->pers) {
5376 if (mddev->ro)
5377 set_disk_ro(disk, 0);
5378
5379 __md_stop_writes(mddev);
5380 __md_stop(mddev);
5381 mddev->queue->merge_bvec_fn = NULL;
5382 mddev->queue->backing_dev_info.congested_fn = NULL;
5383
5384
5385 sysfs_notify_dirent_safe(mddev->sysfs_state);
5386
5387 rdev_for_each(rdev, mddev)
5388 if (rdev->raid_disk >= 0)
5389 sysfs_unlink_rdev(mddev, rdev);
5390
5391 set_capacity(disk, 0);
5392 mutex_unlock(&mddev->open_mutex);
5393 mddev->changed = 1;
5394 revalidate_disk(disk);
5395
5396 if (mddev->ro)
5397 mddev->ro = 0;
5398 } else
5399 mutex_unlock(&mddev->open_mutex);
5400
5401
5402
5403 if (mode == 0) {
5404 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5405
5406 bitmap_destroy(mddev);
5407 if (mddev->bitmap_info.file) {
5408 restore_bitmap_write_access(mddev->bitmap_info.file);
5409 fput(mddev->bitmap_info.file);
5410 mddev->bitmap_info.file = NULL;
5411 }
5412 mddev->bitmap_info.offset = 0;
5413
5414 export_array(mddev);
5415
5416 md_clean(mddev);
5417 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5418 if (mddev->hold_active == UNTIL_STOP)
5419 mddev->hold_active = 0;
5420 }
5421 blk_integrity_unregister(disk);
5422 md_new_event(mddev);
5423 sysfs_notify_dirent_safe(mddev->sysfs_state);
5424 return 0;
5425}
5426
5427#ifndef MODULE
5428static void autorun_array(struct mddev *mddev)
5429{
5430 struct md_rdev *rdev;
5431 int err;
5432
5433 if (list_empty(&mddev->disks))
5434 return;
5435
5436 printk(KERN_INFO "md: running: ");
5437
5438 rdev_for_each(rdev, mddev) {
5439 char b[BDEVNAME_SIZE];
5440 printk("<%s>", bdevname(rdev->bdev,b));
5441 }
5442 printk("\n");
5443
5444 err = do_md_run(mddev);
5445 if (err) {
5446 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5447 do_md_stop(mddev, 0, NULL);
5448 }
5449}
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463static void autorun_devices(int part)
5464{
5465 struct md_rdev *rdev0, *rdev, *tmp;
5466 struct mddev *mddev;
5467 char b[BDEVNAME_SIZE];
5468
5469 printk(KERN_INFO "md: autorun ...\n");
5470 while (!list_empty(&pending_raid_disks)) {
5471 int unit;
5472 dev_t dev;
5473 LIST_HEAD(candidates);
5474 rdev0 = list_entry(pending_raid_disks.next,
5475 struct md_rdev, same_set);
5476
5477 printk(KERN_INFO "md: considering %s ...\n",
5478 bdevname(rdev0->bdev,b));
5479 INIT_LIST_HEAD(&candidates);
5480 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5481 if (super_90_load(rdev, rdev0, 0) >= 0) {
5482 printk(KERN_INFO "md: adding %s ...\n",
5483 bdevname(rdev->bdev,b));
5484 list_move(&rdev->same_set, &candidates);
5485 }
5486
5487
5488
5489
5490
5491 if (part) {
5492 dev = MKDEV(mdp_major,
5493 rdev0->preferred_minor << MdpMinorShift);
5494 unit = MINOR(dev) >> MdpMinorShift;
5495 } else {
5496 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5497 unit = MINOR(dev);
5498 }
5499 if (rdev0->preferred_minor != unit) {
5500 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5501 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5502 break;
5503 }
5504
5505 md_probe(dev, NULL, NULL);
5506 mddev = mddev_find(dev);
5507 if (!mddev || !mddev->gendisk) {
5508 if (mddev)
5509 mddev_put(mddev);
5510 printk(KERN_ERR
5511 "md: cannot allocate memory for md drive.\n");
5512 break;
5513 }
5514 if (mddev_lock(mddev))
5515 printk(KERN_WARNING "md: %s locked, cannot run\n",
5516 mdname(mddev));
5517 else if (mddev->raid_disks || mddev->major_version
5518 || !list_empty(&mddev->disks)) {
5519 printk(KERN_WARNING
5520 "md: %s already running, cannot run %s\n",
5521 mdname(mddev), bdevname(rdev0->bdev,b));
5522 mddev_unlock(mddev);
5523 } else {
5524 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5525 mddev->persistent = 1;
5526 rdev_for_each_list(rdev, tmp, &candidates) {
5527 list_del_init(&rdev->same_set);
5528 if (bind_rdev_to_array(rdev, mddev))
5529 export_rdev(rdev);
5530 }
5531 autorun_array(mddev);
5532 mddev_unlock(mddev);
5533 }
5534
5535
5536
5537 rdev_for_each_list(rdev, tmp, &candidates) {
5538 list_del_init(&rdev->same_set);
5539 export_rdev(rdev);
5540 }
5541 mddev_put(mddev);
5542 }
5543 printk(KERN_INFO "md: ... autorun DONE.\n");
5544}
5545#endif
5546
5547static int get_version(void __user * arg)
5548{
5549 mdu_version_t ver;
5550
5551 ver.major = MD_MAJOR_VERSION;
5552 ver.minor = MD_MINOR_VERSION;
5553 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5554
5555 if (copy_to_user(arg, &ver, sizeof(ver)))
5556 return -EFAULT;
5557
5558 return 0;
5559}
5560
5561static int get_array_info(struct mddev * mddev, void __user * arg)
5562{
5563 mdu_array_info_t info;
5564 int nr,working,insync,failed,spare;
5565 struct md_rdev *rdev;
5566
5567 nr = working = insync = failed = spare = 0;
5568 rcu_read_lock();
5569 rdev_for_each_rcu(rdev, mddev) {
5570 nr++;
5571 if (test_bit(Faulty, &rdev->flags))
5572 failed++;
5573 else {
5574 working++;
5575 if (test_bit(In_sync, &rdev->flags))
5576 insync++;
5577 else
5578 spare++;
5579 }
5580 }
5581 rcu_read_unlock();
5582
5583 info.major_version = mddev->major_version;
5584 info.minor_version = mddev->minor_version;
5585 info.patch_version = MD_PATCHLEVEL_VERSION;
5586 info.ctime = mddev->ctime;
5587 info.level = mddev->level;
5588 info.size = mddev->dev_sectors / 2;
5589 if (info.size != mddev->dev_sectors / 2)
5590 info.size = -1;
5591 info.nr_disks = nr;
5592 info.raid_disks = mddev->raid_disks;
5593 info.md_minor = mddev->md_minor;
5594 info.not_persistent= !mddev->persistent;
5595
5596 info.utime = mddev->utime;
5597 info.state = 0;
5598 if (mddev->in_sync)
5599 info.state = (1<<MD_SB_CLEAN);
5600 if (mddev->bitmap && mddev->bitmap_info.offset)
5601 info.state = (1<<MD_SB_BITMAP_PRESENT);
5602 info.active_disks = insync;
5603 info.working_disks = working;
5604 info.failed_disks = failed;
5605 info.spare_disks = spare;
5606
5607 info.layout = mddev->layout;
5608 info.chunk_size = mddev->chunk_sectors << 9;
5609
5610 if (copy_to_user(arg, &info, sizeof(info)))
5611 return -EFAULT;
5612
5613 return 0;
5614}
5615
5616static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5617{
5618 mdu_bitmap_file_t *file = NULL;
5619 char *ptr, *buf = NULL;
5620 int err = -ENOMEM;
5621
5622 if (md_allow_write(mddev))
5623 file = kmalloc(sizeof(*file), GFP_NOIO);
5624 else
5625 file = kmalloc(sizeof(*file), GFP_KERNEL);
5626
5627 if (!file)
5628 goto out;
5629
5630
5631 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5632 file->pathname[0] = '\0';
5633 goto copy_out;
5634 }
5635
5636 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5637 if (!buf)
5638 goto out;
5639
5640 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5641 buf, sizeof(file->pathname));
5642 if (IS_ERR(ptr))
5643 goto out;
5644
5645 strcpy(file->pathname, ptr);
5646
5647copy_out:
5648 err = 0;
5649 if (copy_to_user(arg, file, sizeof(*file)))
5650 err = -EFAULT;
5651out:
5652 kfree(buf);
5653 kfree(file);
5654 return err;
5655}
5656
5657static int get_disk_info(struct mddev * mddev, void __user * arg)
5658{
5659 mdu_disk_info_t info;
5660 struct md_rdev *rdev;
5661
5662 if (copy_from_user(&info, arg, sizeof(info)))
5663 return -EFAULT;
5664
5665 rcu_read_lock();
5666 rdev = find_rdev_nr_rcu(mddev, info.number);
5667 if (rdev) {
5668 info.major = MAJOR(rdev->bdev->bd_dev);
5669 info.minor = MINOR(rdev->bdev->bd_dev);
5670 info.raid_disk = rdev->raid_disk;
5671 info.state = 0;
5672 if (test_bit(Faulty, &rdev->flags))
5673 info.state |= (1<<MD_DISK_FAULTY);
5674 else if (test_bit(In_sync, &rdev->flags)) {
5675 info.state |= (1<<MD_DISK_ACTIVE);
5676 info.state |= (1<<MD_DISK_SYNC);
5677 }
5678 if (test_bit(WriteMostly, &rdev->flags))
5679 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5680 } else {
5681 info.major = info.minor = 0;
5682 info.raid_disk = -1;
5683 info.state = (1<<MD_DISK_REMOVED);
5684 }
5685 rcu_read_unlock();
5686
5687 if (copy_to_user(arg, &info, sizeof(info)))
5688 return -EFAULT;
5689
5690 return 0;
5691}
5692
5693static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5694{
5695 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5696 struct md_rdev *rdev;
5697 dev_t dev = MKDEV(info->major,info->minor);
5698
5699 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5700 return -EOVERFLOW;
5701
5702 if (!mddev->raid_disks) {
5703 int err;
5704
5705 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5706 if (IS_ERR(rdev)) {
5707 printk(KERN_WARNING
5708 "md: md_import_device returned %ld\n",
5709 PTR_ERR(rdev));
5710 return PTR_ERR(rdev);
5711 }
5712 if (!list_empty(&mddev->disks)) {
5713 struct md_rdev *rdev0
5714 = list_entry(mddev->disks.next,
5715 struct md_rdev, same_set);
5716 err = super_types[mddev->major_version]
5717 .load_super(rdev, rdev0, mddev->minor_version);
5718 if (err < 0) {
5719 printk(KERN_WARNING
5720 "md: %s has different UUID to %s\n",
5721 bdevname(rdev->bdev,b),
5722 bdevname(rdev0->bdev,b2));
5723 export_rdev(rdev);
5724 return -EINVAL;
5725 }
5726 }
5727 err = bind_rdev_to_array(rdev, mddev);
5728 if (err)
5729 export_rdev(rdev);
5730 return err;
5731 }
5732
5733
5734
5735
5736
5737
5738 if (mddev->pers) {
5739 int err;
5740 if (!mddev->pers->hot_add_disk) {
5741 printk(KERN_WARNING
5742 "%s: personality does not support diskops!\n",
5743 mdname(mddev));
5744 return -EINVAL;
5745 }
5746 if (mddev->persistent)
5747 rdev = md_import_device(dev, mddev->major_version,
5748 mddev->minor_version);
5749 else
5750 rdev = md_import_device(dev, -1, -1);
5751 if (IS_ERR(rdev)) {
5752 printk(KERN_WARNING
5753 "md: md_import_device returned %ld\n",
5754 PTR_ERR(rdev));
5755 return PTR_ERR(rdev);
5756 }
5757
5758 if (!mddev->persistent) {
5759 if (info->state & (1<<MD_DISK_SYNC) &&
5760 info->raid_disk < mddev->raid_disks) {
5761 rdev->raid_disk = info->raid_disk;
5762 set_bit(In_sync, &rdev->flags);
5763 } else
5764 rdev->raid_disk = -1;
5765 } else
5766 super_types[mddev->major_version].
5767 validate_super(mddev, rdev);
5768 if ((info->state & (1<<MD_DISK_SYNC)) &&
5769 rdev->raid_disk != info->raid_disk) {
5770
5771
5772
5773 export_rdev(rdev);
5774 return -EINVAL;
5775 }
5776
5777 if (test_bit(In_sync, &rdev->flags))
5778 rdev->saved_raid_disk = rdev->raid_disk;
5779 else
5780 rdev->saved_raid_disk = -1;
5781
5782 clear_bit(In_sync, &rdev->flags);
5783 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5784 set_bit(WriteMostly, &rdev->flags);
5785 else
5786 clear_bit(WriteMostly, &rdev->flags);
5787
5788 rdev->raid_disk = -1;
5789 err = bind_rdev_to_array(rdev, mddev);
5790 if (!err && !mddev->pers->hot_remove_disk) {
5791
5792
5793
5794
5795 super_types[mddev->major_version].
5796 validate_super(mddev, rdev);
5797 err = mddev->pers->hot_add_disk(mddev, rdev);
5798 if (err)
5799 unbind_rdev_from_array(rdev);
5800 }
5801 if (err)
5802 export_rdev(rdev);
5803 else
5804 sysfs_notify_dirent_safe(rdev->sysfs_state);
5805
5806 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5807 if (mddev->degraded)
5808 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5809 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5810 if (!err)
5811 md_new_event(mddev);
5812 md_wakeup_thread(mddev->thread);
5813 return err;
5814 }
5815
5816
5817
5818
5819 if (mddev->major_version != 0) {
5820 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5821 mdname(mddev));
5822 return -EINVAL;
5823 }
5824
5825 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5826 int err;
5827 rdev = md_import_device(dev, -1, 0);
5828 if (IS_ERR(rdev)) {
5829 printk(KERN_WARNING
5830 "md: error, md_import_device() returned %ld\n",
5831 PTR_ERR(rdev));
5832 return PTR_ERR(rdev);
5833 }
5834 rdev->desc_nr = info->number;
5835 if (info->raid_disk < mddev->raid_disks)
5836 rdev->raid_disk = info->raid_disk;
5837 else
5838 rdev->raid_disk = -1;
5839
5840 if (rdev->raid_disk < mddev->raid_disks)
5841 if (info->state & (1<<MD_DISK_SYNC))
5842 set_bit(In_sync, &rdev->flags);
5843
5844 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5845 set_bit(WriteMostly, &rdev->flags);
5846
5847 if (!mddev->persistent) {
5848 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5849 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5850 } else
5851 rdev->sb_start = calc_dev_sboffset(rdev);
5852 rdev->sectors = rdev->sb_start;
5853
5854 err = bind_rdev_to_array(rdev, mddev);
5855 if (err) {
5856 export_rdev(rdev);
5857 return err;
5858 }
5859 }
5860
5861 return 0;
5862}
5863
5864static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5865{
5866 char b[BDEVNAME_SIZE];
5867 struct md_rdev *rdev;
5868
5869 rdev = find_rdev(mddev, dev);
5870 if (!rdev)
5871 return -ENXIO;
5872
5873 clear_bit(Blocked, &rdev->flags);
5874 remove_and_add_spares(mddev, rdev);
5875
5876 if (rdev->raid_disk >= 0)
5877 goto busy;
5878
5879 kick_rdev_from_array(rdev);
5880 md_update_sb(mddev, 1);
5881 md_new_event(mddev);
5882
5883 return 0;
5884busy:
5885 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5886 bdevname(rdev->bdev,b), mdname(mddev));
5887 return -EBUSY;
5888}
5889
5890static int hot_add_disk(struct mddev * mddev, dev_t dev)
5891{
5892 char b[BDEVNAME_SIZE];
5893 int err;
5894 struct md_rdev *rdev;
5895
5896 if (!mddev->pers)
5897 return -ENODEV;
5898
5899 if (mddev->major_version != 0) {
5900 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5901 " version-0 superblocks.\n",
5902 mdname(mddev));
5903 return -EINVAL;
5904 }
5905 if (!mddev->pers->hot_add_disk) {
5906 printk(KERN_WARNING
5907 "%s: personality does not support diskops!\n",
5908 mdname(mddev));
5909 return -EINVAL;
5910 }
5911
5912 rdev = md_import_device(dev, -1, 0);
5913 if (IS_ERR(rdev)) {
5914 printk(KERN_WARNING
5915 "md: error, md_import_device() returned %ld\n",
5916 PTR_ERR(rdev));
5917 return -EINVAL;
5918 }
5919
5920 if (mddev->persistent)
5921 rdev->sb_start = calc_dev_sboffset(rdev);
5922 else
5923 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5924
5925 rdev->sectors = rdev->sb_start;
5926
5927 if (test_bit(Faulty, &rdev->flags)) {
5928 printk(KERN_WARNING
5929 "md: can not hot-add faulty %s disk to %s!\n",
5930 bdevname(rdev->bdev,b), mdname(mddev));
5931 err = -EINVAL;
5932 goto abort_export;
5933 }
5934 clear_bit(In_sync, &rdev->flags);
5935 rdev->desc_nr = -1;
5936 rdev->saved_raid_disk = -1;
5937 err = bind_rdev_to_array(rdev, mddev);
5938 if (err)
5939 goto abort_export;
5940
5941
5942
5943
5944
5945
5946 rdev->raid_disk = -1;
5947
5948 md_update_sb(mddev, 1);
5949
5950
5951
5952
5953
5954 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5955 md_wakeup_thread(mddev->thread);
5956 md_new_event(mddev);
5957 return 0;
5958
5959abort_export:
5960 export_rdev(rdev);
5961 return err;
5962}
5963
5964static int set_bitmap_file(struct mddev *mddev, int fd)
5965{
5966 int err;
5967
5968 if (mddev->pers) {
5969 if (!mddev->pers->quiesce)
5970 return -EBUSY;
5971 if (mddev->recovery || mddev->sync_thread)
5972 return -EBUSY;
5973
5974 }
5975
5976
5977 if (fd >= 0) {
5978 if (mddev->bitmap)
5979 return -EEXIST;
5980 mddev->bitmap_info.file = fget(fd);
5981
5982 if (mddev->bitmap_info.file == NULL) {
5983 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5984 mdname(mddev));
5985 return -EBADF;
5986 }
5987
5988 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5989 if (err) {
5990 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5991 mdname(mddev));
5992 fput(mddev->bitmap_info.file);
5993 mddev->bitmap_info.file = NULL;
5994 return err;
5995 }
5996 mddev->bitmap_info.offset = 0;
5997 } else if (mddev->bitmap == NULL)
5998 return -ENOENT;
5999 err = 0;
6000 if (mddev->pers) {
6001 mddev->pers->quiesce(mddev, 1);
6002 if (fd >= 0) {
6003 err = bitmap_create(mddev);
6004 if (!err)
6005 err = bitmap_load(mddev);
6006 }
6007 if (fd < 0 || err) {
6008 bitmap_destroy(mddev);
6009 fd = -1;
6010 }
6011 mddev->pers->quiesce(mddev, 0);
6012 }
6013 if (fd < 0) {
6014 if (mddev->bitmap_info.file) {
6015 restore_bitmap_write_access(mddev->bitmap_info.file);
6016 fput(mddev->bitmap_info.file);
6017 }
6018 mddev->bitmap_info.file = NULL;
6019 }
6020
6021 return err;
6022}
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6038{
6039
6040 if (info->raid_disks == 0) {
6041
6042 if (info->major_version < 0 ||
6043 info->major_version >= ARRAY_SIZE(super_types) ||
6044 super_types[info->major_version].name == NULL) {
6045
6046 printk(KERN_INFO
6047 "md: superblock version %d not known\n",
6048 info->major_version);
6049 return -EINVAL;
6050 }
6051 mddev->major_version = info->major_version;
6052 mddev->minor_version = info->minor_version;
6053 mddev->patch_version = info->patch_version;
6054 mddev->persistent = !info->not_persistent;
6055
6056
6057
6058 mddev->ctime = get_seconds();
6059 return 0;
6060 }
6061 mddev->major_version = MD_MAJOR_VERSION;
6062 mddev->minor_version = MD_MINOR_VERSION;
6063 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6064 mddev->ctime = get_seconds();
6065
6066 mddev->level = info->level;
6067 mddev->clevel[0] = 0;
6068 mddev->dev_sectors = 2 * (sector_t)info->size;
6069 mddev->raid_disks = info->raid_disks;
6070
6071
6072
6073 if (info->state & (1<<MD_SB_CLEAN))
6074 mddev->recovery_cp = MaxSector;
6075 else
6076 mddev->recovery_cp = 0;
6077 mddev->persistent = ! info->not_persistent;
6078 mddev->external = 0;
6079
6080 mddev->layout = info->layout;
6081 mddev->chunk_sectors = info->chunk_size >> 9;
6082
6083 mddev->max_disks = MD_SB_DISKS;
6084
6085 if (mddev->persistent)
6086 mddev->flags = 0;
6087 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6088
6089 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6090 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6091 mddev->bitmap_info.offset = 0;
6092
6093 mddev->reshape_position = MaxSector;
6094
6095
6096
6097
6098 get_random_bytes(mddev->uuid, 16);
6099
6100 mddev->new_level = mddev->level;
6101 mddev->new_chunk_sectors = mddev->chunk_sectors;
6102 mddev->new_layout = mddev->layout;
6103 mddev->delta_disks = 0;
6104 mddev->reshape_backwards = 0;
6105
6106 return 0;
6107}
6108
6109void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6110{
6111 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6112
6113 if (mddev->external_size)
6114 return;
6115
6116 mddev->array_sectors = array_sectors;
6117}
6118EXPORT_SYMBOL(md_set_array_sectors);
6119
6120static int update_size(struct mddev *mddev, sector_t num_sectors)
6121{
6122 struct md_rdev *rdev;
6123 int rv;
6124 int fit = (num_sectors == 0);
6125
6126 if (mddev->pers->resize == NULL)
6127 return -EINVAL;
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137 if (mddev->sync_thread)
6138 return -EBUSY;
6139
6140 rdev_for_each(rdev, mddev) {
6141 sector_t avail = rdev->sectors;
6142
6143 if (fit && (num_sectors == 0 || num_sectors > avail))
6144 num_sectors = avail;
6145 if (avail < num_sectors)
6146 return -ENOSPC;
6147 }
6148 rv = mddev->pers->resize(mddev, num_sectors);
6149 if (!rv)
6150 revalidate_disk(mddev->gendisk);
6151 return rv;
6152}
6153
6154static int update_raid_disks(struct mddev *mddev, int raid_disks)
6155{
6156 int rv;
6157 struct md_rdev *rdev;
6158
6159 if (mddev->pers->check_reshape == NULL)
6160 return -EINVAL;
6161 if (raid_disks <= 0 ||
6162 (mddev->max_disks && raid_disks >= mddev->max_disks))
6163 return -EINVAL;
6164 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6165 return -EBUSY;
6166
6167 rdev_for_each(rdev, mddev) {
6168 if (mddev->raid_disks < raid_disks &&
6169 rdev->data_offset < rdev->new_data_offset)
6170 return -EINVAL;
6171 if (mddev->raid_disks > raid_disks &&
6172 rdev->data_offset > rdev->new_data_offset)
6173 return -EINVAL;
6174 }
6175
6176 mddev->delta_disks = raid_disks - mddev->raid_disks;
6177 if (mddev->delta_disks < 0)
6178 mddev->reshape_backwards = 1;
6179 else if (mddev->delta_disks > 0)
6180 mddev->reshape_backwards = 0;
6181
6182 rv = mddev->pers->check_reshape(mddev);
6183 if (rv < 0) {
6184 mddev->delta_disks = 0;
6185 mddev->reshape_backwards = 0;
6186 }
6187 return rv;
6188}
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6200{
6201 int rv = 0;
6202 int cnt = 0;
6203 int state = 0;
6204
6205
6206 if (mddev->bitmap && mddev->bitmap_info.offset)
6207 state |= (1 << MD_SB_BITMAP_PRESENT);
6208
6209 if (mddev->major_version != info->major_version ||
6210 mddev->minor_version != info->minor_version ||
6211
6212 mddev->ctime != info->ctime ||
6213 mddev->level != info->level ||
6214
6215 !mddev->persistent != info->not_persistent||
6216 mddev->chunk_sectors != info->chunk_size >> 9 ||
6217
6218 ((state^info->state) & 0xfffffe00)
6219 )
6220 return -EINVAL;
6221
6222 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6223 cnt++;
6224 if (mddev->raid_disks != info->raid_disks)
6225 cnt++;
6226 if (mddev->layout != info->layout)
6227 cnt++;
6228 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6229 cnt++;
6230 if (cnt == 0)
6231 return 0;
6232 if (cnt > 1)
6233 return -EINVAL;
6234
6235 if (mddev->layout != info->layout) {
6236
6237
6238
6239
6240 if (mddev->pers->check_reshape == NULL)
6241 return -EINVAL;
6242 else {
6243 mddev->new_layout = info->layout;
6244 rv = mddev->pers->check_reshape(mddev);
6245 if (rv)
6246 mddev->new_layout = mddev->layout;
6247 return rv;
6248 }
6249 }
6250 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6251 rv = update_size(mddev, (sector_t)info->size * 2);
6252
6253 if (mddev->raid_disks != info->raid_disks)
6254 rv = update_raid_disks(mddev, info->raid_disks);
6255
6256 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6257 if (mddev->pers->quiesce == NULL)
6258 return -EINVAL;
6259 if (mddev->recovery || mddev->sync_thread)
6260 return -EBUSY;
6261 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6262
6263 if (mddev->bitmap)
6264 return -EEXIST;
6265 if (mddev->bitmap_info.default_offset == 0)
6266 return -EINVAL;
6267 mddev->bitmap_info.offset =
6268 mddev->bitmap_info.default_offset;
6269 mddev->bitmap_info.space =
6270 mddev->bitmap_info.default_space;
6271 mddev->pers->quiesce(mddev, 1);
6272 rv = bitmap_create(mddev);
6273 if (!rv)
6274 rv = bitmap_load(mddev);
6275 if (rv)
6276 bitmap_destroy(mddev);
6277 mddev->pers->quiesce(mddev, 0);
6278 } else {
6279
6280 if (!mddev->bitmap)
6281 return -ENOENT;
6282 if (mddev->bitmap->storage.file)
6283 return -EINVAL;
6284 mddev->pers->quiesce(mddev, 1);
6285 bitmap_destroy(mddev);
6286 mddev->pers->quiesce(mddev, 0);
6287 mddev->bitmap_info.offset = 0;
6288 }
6289 }
6290 md_update_sb(mddev, 1);
6291 return rv;
6292}
6293
6294static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6295{
6296 struct md_rdev *rdev;
6297 int err = 0;
6298
6299 if (mddev->pers == NULL)
6300 return -ENODEV;
6301
6302 rcu_read_lock();
6303 rdev = find_rdev_rcu(mddev, dev);
6304 if (!rdev)
6305 err = -ENODEV;
6306 else {
6307 md_error(mddev, rdev);
6308 if (!test_bit(Faulty, &rdev->flags))
6309 err = -EBUSY;
6310 }
6311 rcu_read_unlock();
6312 return err;
6313}
6314
6315
6316
6317
6318
6319
6320
6321static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6322{
6323 struct mddev *mddev = bdev->bd_disk->private_data;
6324
6325 geo->heads = 2;
6326 geo->sectors = 4;
6327 geo->cylinders = mddev->array_sectors / 8;
6328 return 0;
6329}
6330
6331static int md_ioctl(struct block_device *bdev, fmode_t mode,
6332 unsigned int cmd, unsigned long arg)
6333{
6334 int err = 0;
6335 void __user *argp = (void __user *)arg;
6336 struct mddev *mddev = NULL;
6337 int ro;
6338
6339 switch (cmd) {
6340 case RAID_VERSION:
6341 case GET_ARRAY_INFO:
6342 case GET_DISK_INFO:
6343 break;
6344 default:
6345 if (!capable(CAP_SYS_ADMIN))
6346 return -EACCES;
6347 }
6348
6349
6350
6351
6352
6353 switch (cmd) {
6354 case RAID_VERSION:
6355 err = get_version(argp);
6356 goto done;
6357
6358 case PRINT_RAID_DEBUG:
6359 err = 0;
6360 md_print_devices();
6361 goto done;
6362
6363#ifndef MODULE
6364 case RAID_AUTORUN:
6365 err = 0;
6366 autostart_arrays(arg);
6367 goto done;
6368#endif
6369 default:;
6370 }
6371
6372
6373
6374
6375
6376 mddev = bdev->bd_disk->private_data;
6377
6378 if (!mddev) {
6379 BUG();
6380 goto abort;
6381 }
6382
6383
6384 switch (cmd) {
6385 case GET_ARRAY_INFO:
6386 if (!mddev->raid_disks && !mddev->external)
6387 err = -ENODEV;
6388 else
6389 err = get_array_info(mddev, argp);
6390 goto abort;
6391
6392 case GET_DISK_INFO:
6393 if (!mddev->raid_disks && !mddev->external)
6394 err = -ENODEV;
6395 else
6396 err = get_disk_info(mddev, argp);
6397 goto abort;
6398
6399 case SET_DISK_FAULTY:
6400 err = set_disk_faulty(mddev, new_decode_dev(arg));
6401 goto abort;
6402 }
6403
6404 if (cmd == ADD_NEW_DISK)
6405
6406 flush_workqueue(md_misc_wq);
6407
6408 err = mddev_lock(mddev);
6409 if (err) {
6410 printk(KERN_INFO
6411 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6412 err, cmd);
6413 goto abort;
6414 }
6415
6416 if (cmd == SET_ARRAY_INFO) {
6417 mdu_array_info_t info;
6418 if (!arg)
6419 memset(&info, 0, sizeof(info));
6420 else if (copy_from_user(&info, argp, sizeof(info))) {
6421 err = -EFAULT;
6422 goto abort_unlock;
6423 }
6424 if (mddev->pers) {
6425 err = update_array_info(mddev, &info);
6426 if (err) {
6427 printk(KERN_WARNING "md: couldn't update"
6428 " array info. %d\n", err);
6429 goto abort_unlock;
6430 }
6431 goto done_unlock;
6432 }
6433 if (!list_empty(&mddev->disks)) {
6434 printk(KERN_WARNING
6435 "md: array %s already has disks!\n",
6436 mdname(mddev));
6437 err = -EBUSY;
6438 goto abort_unlock;
6439 }
6440 if (mddev->raid_disks) {
6441 printk(KERN_WARNING
6442 "md: array %s already initialised!\n",
6443 mdname(mddev));
6444 err = -EBUSY;
6445 goto abort_unlock;
6446 }
6447 err = set_array_info(mddev, &info);
6448 if (err) {
6449 printk(KERN_WARNING "md: couldn't set"
6450 " array info. %d\n", err);
6451 goto abort_unlock;
6452 }
6453 goto done_unlock;
6454 }
6455
6456
6457
6458
6459
6460
6461 if ((!mddev->raid_disks && !mddev->external)
6462 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6463 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6464 && cmd != GET_BITMAP_FILE) {
6465 err = -ENODEV;
6466 goto abort_unlock;
6467 }
6468
6469
6470
6471
6472 switch (cmd) {
6473 case GET_BITMAP_FILE:
6474 err = get_bitmap_file(mddev, argp);
6475 goto done_unlock;
6476
6477 case RESTART_ARRAY_RW:
6478 err = restart_array(mddev);
6479 goto done_unlock;
6480
6481 case STOP_ARRAY:
6482 err = do_md_stop(mddev, 0, bdev);
6483 goto done_unlock;
6484
6485 case STOP_ARRAY_RO:
6486 err = md_set_readonly(mddev, bdev);
6487 goto done_unlock;
6488
6489 case HOT_REMOVE_DISK:
6490 err = hot_remove_disk(mddev, new_decode_dev(arg));
6491 goto done_unlock;
6492
6493 case ADD_NEW_DISK:
6494
6495
6496
6497
6498 if (mddev->pers) {
6499 mdu_disk_info_t info;
6500 if (copy_from_user(&info, argp, sizeof(info)))
6501 err = -EFAULT;
6502 else if (!(info.state & (1<<MD_DISK_SYNC)))
6503
6504 break;
6505 else
6506 err = add_new_disk(mddev, &info);
6507 goto done_unlock;
6508 }
6509 break;
6510
6511 case BLKROSET:
6512 if (get_user(ro, (int __user *)(arg))) {
6513 err = -EFAULT;
6514 goto done_unlock;
6515 }
6516 err = -EINVAL;
6517
6518
6519
6520
6521 if (ro)
6522 goto done_unlock;
6523
6524
6525 if (mddev->ro != 1)
6526 goto done_unlock;
6527
6528
6529
6530
6531 if (mddev->pers) {
6532 err = restart_array(mddev);
6533 if (err == 0) {
6534 mddev->ro = 2;
6535 set_disk_ro(mddev->gendisk, 0);
6536 }
6537 }
6538 goto done_unlock;
6539 }
6540
6541
6542
6543
6544
6545
6546
6547
6548 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6549 if (mddev->ro == 2) {
6550 mddev->ro = 0;
6551 sysfs_notify_dirent_safe(mddev->sysfs_state);
6552 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6553
6554
6555
6556
6557 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6558 mddev_unlock(mddev);
6559 wait_event(mddev->sb_wait,
6560 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6561 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6562 mddev_lock(mddev);
6563 }
6564 } else {
6565 err = -EROFS;
6566 goto abort_unlock;
6567 }
6568 }
6569
6570 switch (cmd) {
6571 case ADD_NEW_DISK:
6572 {
6573 mdu_disk_info_t info;
6574 if (copy_from_user(&info, argp, sizeof(info)))
6575 err = -EFAULT;
6576 else
6577 err = add_new_disk(mddev, &info);
6578 goto done_unlock;
6579 }
6580
6581 case HOT_ADD_DISK:
6582 err = hot_add_disk(mddev, new_decode_dev(arg));
6583 goto done_unlock;
6584
6585 case RUN_ARRAY:
6586 err = do_md_run(mddev);
6587 goto done_unlock;
6588
6589 case SET_BITMAP_FILE:
6590 err = set_bitmap_file(mddev, (int)arg);
6591 goto done_unlock;
6592
6593 default:
6594 err = -EINVAL;
6595 goto abort_unlock;
6596 }
6597
6598done_unlock:
6599abort_unlock:
6600 if (mddev->hold_active == UNTIL_IOCTL &&
6601 err != -EINVAL)
6602 mddev->hold_active = 0;
6603 mddev_unlock(mddev);
6604
6605 return err;
6606done:
6607 if (err)
6608 MD_BUG();
6609abort:
6610 return err;
6611}
6612#ifdef CONFIG_COMPAT
6613static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6614 unsigned int cmd, unsigned long arg)
6615{
6616 switch (cmd) {
6617 case HOT_REMOVE_DISK:
6618 case HOT_ADD_DISK:
6619 case SET_DISK_FAULTY:
6620 case SET_BITMAP_FILE:
6621
6622 break;
6623 default:
6624 arg = (unsigned long)compat_ptr(arg);
6625 break;
6626 }
6627
6628 return md_ioctl(bdev, mode, cmd, arg);
6629}
6630#endif
6631
6632static int md_open(struct block_device *bdev, fmode_t mode)
6633{
6634
6635
6636
6637
6638 struct mddev *mddev = mddev_find(bdev->bd_dev);
6639 int err;
6640
6641 if (!mddev)
6642 return -ENODEV;
6643
6644 if (mddev->gendisk != bdev->bd_disk) {
6645
6646
6647
6648 mddev_put(mddev);
6649
6650 flush_workqueue(md_misc_wq);
6651
6652 return -ERESTARTSYS;
6653 }
6654 BUG_ON(mddev != bdev->bd_disk->private_data);
6655
6656 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6657 goto out;
6658
6659 err = 0;
6660 atomic_inc(&mddev->openers);
6661 mutex_unlock(&mddev->open_mutex);
6662
6663 check_disk_change(bdev);
6664 out:
6665 return err;
6666}
6667
6668static void md_release(struct gendisk *disk, fmode_t mode)
6669{
6670 struct mddev *mddev = disk->private_data;
6671
6672 BUG_ON(!mddev);
6673 atomic_dec(&mddev->openers);
6674 mddev_put(mddev);
6675}
6676
6677static int md_media_changed(struct gendisk *disk)
6678{
6679 struct mddev *mddev = disk->private_data;
6680
6681 return mddev->changed;
6682}
6683
6684static int md_revalidate(struct gendisk *disk)
6685{
6686 struct mddev *mddev = disk->private_data;
6687
6688 mddev->changed = 0;
6689 return 0;
6690}
6691static const struct block_device_operations md_fops =
6692{
6693 .owner = THIS_MODULE,
6694 .open = md_open,
6695 .release = md_release,
6696 .ioctl = md_ioctl,
6697#ifdef CONFIG_COMPAT
6698 .compat_ioctl = md_compat_ioctl,
6699#endif
6700 .getgeo = md_getgeo,
6701 .media_changed = md_media_changed,
6702 .revalidate_disk= md_revalidate,
6703};
6704
6705static int md_thread(void * arg)
6706{
6707 struct md_thread *thread = arg;
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721 allow_signal(SIGKILL);
6722 while (!kthread_should_stop()) {
6723
6724
6725
6726
6727
6728
6729 if (signal_pending(current))
6730 flush_signals(current);
6731
6732 wait_event_interruptible_timeout
6733 (thread->wqueue,
6734 test_bit(THREAD_WAKEUP, &thread->flags)
6735 || kthread_should_stop(),
6736 thread->timeout);
6737
6738 clear_bit(THREAD_WAKEUP, &thread->flags);
6739 if (!kthread_should_stop())
6740 thread->run(thread);
6741 }
6742
6743 return 0;
6744}
6745
6746void md_wakeup_thread(struct md_thread *thread)
6747{
6748 if (thread) {
6749 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6750 set_bit(THREAD_WAKEUP, &thread->flags);
6751 wake_up(&thread->wqueue);
6752 }
6753}
6754
6755struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6756 struct mddev *mddev, const char *name)
6757{
6758 struct md_thread *thread;
6759
6760 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6761 if (!thread)
6762 return NULL;
6763
6764 init_waitqueue_head(&thread->wqueue);
6765
6766 thread->run = run;
6767 thread->mddev = mddev;
6768 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6769 thread->tsk = kthread_run(md_thread, thread,
6770 "%s_%s",
6771 mdname(thread->mddev),
6772 name);
6773 if (IS_ERR(thread->tsk)) {
6774 kfree(thread);
6775 return NULL;
6776 }
6777 return thread;
6778}
6779
6780void md_unregister_thread(struct md_thread **threadp)
6781{
6782 struct md_thread *thread = *threadp;
6783 if (!thread)
6784 return;
6785 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6786
6787
6788
6789 spin_lock(&pers_lock);
6790 *threadp = NULL;
6791 spin_unlock(&pers_lock);
6792
6793 kthread_stop(thread->tsk);
6794 kfree(thread);
6795}
6796
6797void md_error(struct mddev *mddev, struct md_rdev *rdev)
6798{
6799 if (!mddev) {
6800 MD_BUG();
6801 return;
6802 }
6803
6804 if (!rdev || test_bit(Faulty, &rdev->flags))
6805 return;
6806
6807 if (!mddev->pers || !mddev->pers->error_handler)
6808 return;
6809 mddev->pers->error_handler(mddev,rdev);
6810 if (mddev->degraded)
6811 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6812 sysfs_notify_dirent_safe(rdev->sysfs_state);
6813 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6814 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6815 md_wakeup_thread(mddev->thread);
6816 if (mddev->event_work.func)
6817 queue_work(md_misc_wq, &mddev->event_work);
6818 md_new_event_inintr(mddev);
6819}
6820
6821
6822
6823static void status_unused(struct seq_file *seq)
6824{
6825 int i = 0;
6826 struct md_rdev *rdev;
6827
6828 seq_printf(seq, "unused devices: ");
6829
6830 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6831 char b[BDEVNAME_SIZE];
6832 i++;
6833 seq_printf(seq, "%s ",
6834 bdevname(rdev->bdev,b));
6835 }
6836 if (!i)
6837 seq_printf(seq, "<none>");
6838
6839 seq_printf(seq, "\n");
6840}
6841
6842
6843static void status_resync(struct seq_file *seq, struct mddev * mddev)
6844{
6845 sector_t max_sectors, resync, res;
6846 unsigned long dt, db;
6847 sector_t rt;
6848 int scale;
6849 unsigned int per_milli;
6850
6851 if (mddev->curr_resync <= 3)
6852 resync = 0;
6853 else
6854 resync = mddev->curr_resync
6855 - atomic_read(&mddev->recovery_active);
6856
6857 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6858 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6859 max_sectors = mddev->resync_max_sectors;
6860 else
6861 max_sectors = mddev->dev_sectors;
6862
6863
6864
6865
6866 if (!max_sectors) {
6867 MD_BUG();
6868 return;
6869 }
6870
6871
6872
6873
6874
6875 scale = 10;
6876 if (sizeof(sector_t) > sizeof(unsigned long)) {
6877 while ( max_sectors/2 > (1ULL<<(scale+32)))
6878 scale++;
6879 }
6880 res = (resync>>scale)*1000;
6881 sector_div(res, (u32)((max_sectors>>scale)+1));
6882
6883 per_milli = res;
6884 {
6885 int i, x = per_milli/50, y = 20-x;
6886 seq_printf(seq, "[");
6887 for (i = 0; i < x; i++)
6888 seq_printf(seq, "=");
6889 seq_printf(seq, ">");
6890 for (i = 0; i < y; i++)
6891 seq_printf(seq, ".");
6892 seq_printf(seq, "] ");
6893 }
6894 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6895 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6896 "reshape" :
6897 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6898 "check" :
6899 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6900 "resync" : "recovery"))),
6901 per_milli/10, per_milli % 10,
6902 (unsigned long long) resync/2,
6903 (unsigned long long) max_sectors/2);
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919 dt = ((jiffies - mddev->resync_mark) / HZ);
6920 if (!dt) dt++;
6921 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6922 - mddev->resync_mark_cnt;
6923
6924 rt = max_sectors - resync;
6925 sector_div(rt, db/32+1);
6926 rt *= dt;
6927 rt >>= 5;
6928
6929 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6930 ((unsigned long)rt % 60)/6);
6931
6932 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6933}
6934
6935static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6936{
6937 struct list_head *tmp;
6938 loff_t l = *pos;
6939 struct mddev *mddev;
6940
6941 if (l >= 0x10000)
6942 return NULL;
6943 if (!l--)
6944
6945 return (void*)1;
6946
6947 spin_lock(&all_mddevs_lock);
6948 list_for_each(tmp,&all_mddevs)
6949 if (!l--) {
6950 mddev = list_entry(tmp, struct mddev, all_mddevs);
6951 mddev_get(mddev);
6952 spin_unlock(&all_mddevs_lock);
6953 return mddev;
6954 }
6955 spin_unlock(&all_mddevs_lock);
6956 if (!l--)
6957 return (void*)2;
6958 return NULL;
6959}
6960
6961static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6962{
6963 struct list_head *tmp;
6964 struct mddev *next_mddev, *mddev = v;
6965
6966 ++*pos;
6967 if (v == (void*)2)
6968 return NULL;
6969
6970 spin_lock(&all_mddevs_lock);
6971 if (v == (void*)1)
6972 tmp = all_mddevs.next;
6973 else
6974 tmp = mddev->all_mddevs.next;
6975 if (tmp != &all_mddevs)
6976 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
6977 else {
6978 next_mddev = (void*)2;
6979 *pos = 0x10000;
6980 }
6981 spin_unlock(&all_mddevs_lock);
6982
6983 if (v != (void*)1)
6984 mddev_put(mddev);
6985 return next_mddev;
6986
6987}
6988
6989static void md_seq_stop(struct seq_file *seq, void *v)
6990{
6991 struct mddev *mddev = v;
6992
6993 if (mddev && v != (void*)1 && v != (void*)2)
6994 mddev_put(mddev);
6995}
6996
6997static int md_seq_show(struct seq_file *seq, void *v)
6998{
6999 struct mddev *mddev = v;
7000 sector_t sectors;
7001 struct md_rdev *rdev;
7002
7003 if (v == (void*)1) {
7004 struct md_personality *pers;
7005 seq_printf(seq, "Personalities : ");
7006 spin_lock(&pers_lock);
7007 list_for_each_entry(pers, &pers_list, list)
7008 seq_printf(seq, "[%s] ", pers->name);
7009
7010 spin_unlock(&pers_lock);
7011 seq_printf(seq, "\n");
7012 seq->poll_event = atomic_read(&md_event_count);
7013 return 0;
7014 }
7015 if (v == (void*)2) {
7016 status_unused(seq);
7017 return 0;
7018 }
7019
7020 if (mddev_lock(mddev) < 0)
7021 return -EINTR;
7022
7023 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7024 seq_printf(seq, "%s : %sactive", mdname(mddev),
7025 mddev->pers ? "" : "in");
7026 if (mddev->pers) {
7027 if (mddev->ro==1)
7028 seq_printf(seq, " (read-only)");
7029 if (mddev->ro==2)
7030 seq_printf(seq, " (auto-read-only)");
7031 seq_printf(seq, " %s", mddev->pers->name);
7032 }
7033
7034 sectors = 0;
7035 rdev_for_each(rdev, mddev) {
7036 char b[BDEVNAME_SIZE];
7037 seq_printf(seq, " %s[%d]",
7038 bdevname(rdev->bdev,b), rdev->desc_nr);
7039 if (test_bit(WriteMostly, &rdev->flags))
7040 seq_printf(seq, "(W)");
7041 if (test_bit(Faulty, &rdev->flags)) {
7042 seq_printf(seq, "(F)");
7043 continue;
7044 }
7045 if (rdev->raid_disk < 0)
7046 seq_printf(seq, "(S)");
7047 if (test_bit(Replacement, &rdev->flags))
7048 seq_printf(seq, "(R)");
7049 sectors += rdev->sectors;
7050 }
7051
7052 if (!list_empty(&mddev->disks)) {
7053 if (mddev->pers)
7054 seq_printf(seq, "\n %llu blocks",
7055 (unsigned long long)
7056 mddev->array_sectors / 2);
7057 else
7058 seq_printf(seq, "\n %llu blocks",
7059 (unsigned long long)sectors / 2);
7060 }
7061 if (mddev->persistent) {
7062 if (mddev->major_version != 0 ||
7063 mddev->minor_version != 90) {
7064 seq_printf(seq," super %d.%d",
7065 mddev->major_version,
7066 mddev->minor_version);
7067 }
7068 } else if (mddev->external)
7069 seq_printf(seq, " super external:%s",
7070 mddev->metadata_type);
7071 else
7072 seq_printf(seq, " super non-persistent");
7073
7074 if (mddev->pers) {
7075 mddev->pers->status(seq, mddev);
7076 seq_printf(seq, "\n ");
7077 if (mddev->pers->sync_request) {
7078 if (mddev->curr_resync > 2) {
7079 status_resync(seq, mddev);
7080 seq_printf(seq, "\n ");
7081 } else if (mddev->curr_resync >= 1)
7082 seq_printf(seq, "\tresync=DELAYED\n ");
7083 else if (mddev->recovery_cp < MaxSector)
7084 seq_printf(seq, "\tresync=PENDING\n ");
7085 }
7086 } else
7087 seq_printf(seq, "\n ");
7088
7089 bitmap_status(seq, mddev->bitmap);
7090
7091 seq_printf(seq, "\n");
7092 }
7093 mddev_unlock(mddev);
7094
7095 return 0;
7096}
7097
7098static const struct seq_operations md_seq_ops = {
7099 .start = md_seq_start,
7100 .next = md_seq_next,
7101 .stop = md_seq_stop,
7102 .show = md_seq_show,
7103};
7104
7105static int md_seq_open(struct inode *inode, struct file *file)
7106{
7107 struct seq_file *seq;
7108 int error;
7109
7110 error = seq_open(file, &md_seq_ops);
7111 if (error)
7112 return error;
7113
7114 seq = file->private_data;
7115 seq->poll_event = atomic_read(&md_event_count);
7116 return error;
7117}
7118
7119static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7120{
7121 struct seq_file *seq = filp->private_data;
7122 int mask;
7123
7124 poll_wait(filp, &md_event_waiters, wait);
7125
7126
7127 mask = POLLIN | POLLRDNORM;
7128
7129 if (seq->poll_event != atomic_read(&md_event_count))
7130 mask |= POLLERR | POLLPRI;
7131 return mask;
7132}
7133
7134static const struct file_operations md_seq_fops = {
7135 .owner = THIS_MODULE,
7136 .open = md_seq_open,
7137 .read = seq_read,
7138 .llseek = seq_lseek,
7139 .release = seq_release_private,
7140 .poll = mdstat_poll,
7141};
7142
7143int register_md_personality(struct md_personality *p)
7144{
7145 spin_lock(&pers_lock);
7146 list_add_tail(&p->list, &pers_list);
7147 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7148 spin_unlock(&pers_lock);
7149 return 0;
7150}
7151
7152int unregister_md_personality(struct md_personality *p)
7153{
7154 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7155 spin_lock(&pers_lock);
7156 list_del_init(&p->list);
7157 spin_unlock(&pers_lock);
7158 return 0;
7159}
7160
7161static int is_mddev_idle(struct mddev *mddev, int init)
7162{
7163 struct md_rdev * rdev;
7164 int idle;
7165 int curr_events;
7166
7167 idle = 1;
7168 rcu_read_lock();
7169 rdev_for_each_rcu(rdev, mddev) {
7170 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7171 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7172 (int)part_stat_read(&disk->part0, sectors[1]) -
7173 atomic_read(&disk->sync_io);
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196 if (init || curr_events - rdev->last_events > 64) {
7197 rdev->last_events = curr_events;
7198 idle = 0;
7199 }
7200 }
7201 rcu_read_unlock();
7202 return idle;
7203}
7204
7205void md_done_sync(struct mddev *mddev, int blocks, int ok)
7206{
7207
7208 atomic_sub(blocks, &mddev->recovery_active);
7209 wake_up(&mddev->recovery_wait);
7210 if (!ok) {
7211 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7212 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7213 md_wakeup_thread(mddev->thread);
7214
7215 }
7216}
7217
7218
7219
7220
7221
7222
7223
7224void md_write_start(struct mddev *mddev, struct bio *bi)
7225{
7226 int did_change = 0;
7227 if (bio_data_dir(bi) != WRITE)
7228 return;
7229
7230 BUG_ON(mddev->ro == 1);
7231 if (mddev->ro == 2) {
7232
7233 mddev->ro = 0;
7234 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7235 md_wakeup_thread(mddev->thread);
7236 md_wakeup_thread(mddev->sync_thread);
7237 did_change = 1;
7238 }
7239 atomic_inc(&mddev->writes_pending);
7240 if (mddev->safemode == 1)
7241 mddev->safemode = 0;
7242 if (mddev->in_sync) {
7243 spin_lock_irq(&mddev->write_lock);
7244 if (mddev->in_sync) {
7245 mddev->in_sync = 0;
7246 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7247 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7248 md_wakeup_thread(mddev->thread);
7249 did_change = 1;
7250 }
7251 spin_unlock_irq(&mddev->write_lock);
7252 }
7253 if (did_change)
7254 sysfs_notify_dirent_safe(mddev->sysfs_state);
7255 wait_event(mddev->sb_wait,
7256 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7257}
7258
7259void md_write_end(struct mddev *mddev)
7260{
7261 if (atomic_dec_and_test(&mddev->writes_pending)) {
7262 if (mddev->safemode == 2)
7263 md_wakeup_thread(mddev->thread);
7264 else if (mddev->safemode_delay)
7265 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7266 }
7267}
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278int md_allow_write(struct mddev *mddev)
7279{
7280 if (!mddev->pers)
7281 return 0;
7282 if (mddev->ro)
7283 return 0;
7284 if (!mddev->pers->sync_request)
7285 return 0;
7286
7287 spin_lock_irq(&mddev->write_lock);
7288 if (mddev->in_sync) {
7289 mddev->in_sync = 0;
7290 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7291 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7292 if (mddev->safemode_delay &&
7293 mddev->safemode == 0)
7294 mddev->safemode = 1;
7295 spin_unlock_irq(&mddev->write_lock);
7296 md_update_sb(mddev, 0);
7297 sysfs_notify_dirent_safe(mddev->sysfs_state);
7298 } else
7299 spin_unlock_irq(&mddev->write_lock);
7300
7301 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7302 return -EAGAIN;
7303 else
7304 return 0;
7305}
7306EXPORT_SYMBOL_GPL(md_allow_write);
7307
7308#define SYNC_MARKS 10
7309#define SYNC_MARK_STEP (3*HZ)
7310#define UPDATE_FREQUENCY (5*60*HZ)
7311void md_do_sync(struct md_thread *thread)
7312{
7313 struct mddev *mddev = thread->mddev;
7314 struct mddev *mddev2;
7315 unsigned int currspeed = 0,
7316 window;
7317 sector_t max_sectors,j, io_sectors;
7318 unsigned long mark[SYNC_MARKS];
7319 unsigned long update_time;
7320 sector_t mark_cnt[SYNC_MARKS];
7321 int last_mark,m;
7322 struct list_head *tmp;
7323 sector_t last_check;
7324 int skipped = 0;
7325 struct md_rdev *rdev;
7326 char *desc;
7327 struct blk_plug plug;
7328
7329
7330 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7331 return;
7332 if (mddev->ro)
7333 return;
7334
7335 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7336 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
7337 desc = "data-check";
7338 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7339 desc = "requested-resync";
7340 else
7341 desc = "resync";
7342 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7343 desc = "reshape";
7344 else
7345 desc = "recovery";
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363 do {
7364 mddev->curr_resync = 2;
7365
7366 try_again:
7367 if (kthread_should_stop())
7368 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7369
7370 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7371 goto skip;
7372 for_each_mddev(mddev2, tmp) {
7373 if (mddev2 == mddev)
7374 continue;
7375 if (!mddev->parallel_resync
7376 && mddev2->curr_resync
7377 && match_mddev_units(mddev, mddev2)) {
7378 DEFINE_WAIT(wq);
7379 if (mddev < mddev2 && mddev->curr_resync == 2) {
7380
7381 mddev->curr_resync = 1;
7382 wake_up(&resync_wait);
7383 }
7384 if (mddev > mddev2 && mddev->curr_resync == 1)
7385
7386
7387
7388 continue;
7389
7390
7391
7392
7393 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7394 if (!kthread_should_stop() &&
7395 mddev2->curr_resync >= mddev->curr_resync) {
7396 printk(KERN_INFO "md: delaying %s of %s"
7397 " until %s has finished (they"
7398 " share one or more physical units)\n",
7399 desc, mdname(mddev), mdname(mddev2));
7400 mddev_put(mddev2);
7401 if (signal_pending(current))
7402 flush_signals(current);
7403 schedule();
7404 finish_wait(&resync_wait, &wq);
7405 goto try_again;
7406 }
7407 finish_wait(&resync_wait, &wq);
7408 }
7409 }
7410 } while (mddev->curr_resync < 2);
7411
7412 j = 0;
7413 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7414
7415
7416
7417 max_sectors = mddev->resync_max_sectors;
7418 atomic64_set(&mddev->resync_mismatches, 0);
7419
7420 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7421 j = mddev->resync_min;
7422 else if (!mddev->bitmap)
7423 j = mddev->recovery_cp;
7424
7425 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7426 max_sectors = mddev->resync_max_sectors;
7427 else {
7428
7429 max_sectors = mddev->dev_sectors;
7430 j = MaxSector;
7431 rcu_read_lock();
7432 rdev_for_each_rcu(rdev, mddev)
7433 if (rdev->raid_disk >= 0 &&
7434 !test_bit(Faulty, &rdev->flags) &&
7435 !test_bit(In_sync, &rdev->flags) &&
7436 rdev->recovery_offset < j)
7437 j = rdev->recovery_offset;
7438 rcu_read_unlock();
7439 }
7440
7441 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7442 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7443 " %d KB/sec/disk.\n", speed_min(mddev));
7444 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7445 "(but not more than %d KB/sec) for %s.\n",
7446 speed_max(mddev), desc);
7447
7448 is_mddev_idle(mddev, 1);
7449
7450 io_sectors = 0;
7451 for (m = 0; m < SYNC_MARKS; m++) {
7452 mark[m] = jiffies;
7453 mark_cnt[m] = io_sectors;
7454 }
7455 last_mark = 0;
7456 mddev->resync_mark = mark[last_mark];
7457 mddev->resync_mark_cnt = mark_cnt[last_mark];
7458
7459
7460
7461
7462 window = 32*(PAGE_SIZE/512);
7463 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7464 window/2, (unsigned long long)max_sectors/2);
7465
7466 atomic_set(&mddev->recovery_active, 0);
7467 last_check = 0;
7468
7469 if (j>2) {
7470 printk(KERN_INFO
7471 "md: resuming %s of %s from checkpoint.\n",
7472 desc, mdname(mddev));
7473 mddev->curr_resync = j;
7474 } else
7475 mddev->curr_resync = 3;
7476 mddev->curr_resync_completed = j;
7477 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7478 md_new_event(mddev);
7479 update_time = jiffies;
7480
7481 blk_start_plug(&plug);
7482 while (j < max_sectors) {
7483 sector_t sectors;
7484
7485 skipped = 0;
7486
7487 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7488 ((mddev->curr_resync > mddev->curr_resync_completed &&
7489 (mddev->curr_resync - mddev->curr_resync_completed)
7490 > (max_sectors >> 4)) ||
7491 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7492 (j - mddev->curr_resync_completed)*2
7493 >= mddev->resync_max - mddev->curr_resync_completed
7494 )) {
7495
7496 wait_event(mddev->recovery_wait,
7497 atomic_read(&mddev->recovery_active) == 0);
7498 mddev->curr_resync_completed = j;
7499 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7500 j > mddev->recovery_cp)
7501 mddev->recovery_cp = j;
7502 update_time = jiffies;
7503 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7504 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7505 }
7506
7507 while (j >= mddev->resync_max && !kthread_should_stop()) {
7508
7509
7510
7511
7512 flush_signals(current);
7513 wait_event_interruptible(mddev->recovery_wait,
7514 mddev->resync_max > j
7515 || kthread_should_stop());
7516 }
7517
7518 if (kthread_should_stop())
7519 goto interrupted;
7520
7521 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7522 currspeed < speed_min(mddev));
7523 if (sectors == 0) {
7524 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7525 goto out;
7526 }
7527
7528 if (!skipped) {
7529 io_sectors += sectors;
7530 atomic_add(sectors, &mddev->recovery_active);
7531 }
7532
7533 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7534 break;
7535
7536 j += sectors;
7537 if (j > 2)
7538 mddev->curr_resync = j;
7539 mddev->curr_mark_cnt = io_sectors;
7540 if (last_check == 0)
7541
7542
7543
7544 md_new_event(mddev);
7545
7546 if (last_check + window > io_sectors || j == max_sectors)
7547 continue;
7548
7549 last_check = io_sectors;
7550 repeat:
7551 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7552
7553 int next = (last_mark+1) % SYNC_MARKS;
7554
7555 mddev->resync_mark = mark[next];
7556 mddev->resync_mark_cnt = mark_cnt[next];
7557 mark[next] = jiffies;
7558 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7559 last_mark = next;
7560 }
7561
7562
7563 if (kthread_should_stop())
7564 goto interrupted;
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575 cond_resched();
7576
7577 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7578 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7579
7580 if (currspeed > speed_min(mddev)) {
7581 if ((currspeed > speed_max(mddev)) ||
7582 !is_mddev_idle(mddev, 0)) {
7583 msleep(500);
7584 goto repeat;
7585 }
7586 }
7587 }
7588 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7589
7590
7591
7592 out:
7593 blk_finish_plug(&plug);
7594 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7595
7596
7597 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7598
7599 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7600 mddev->curr_resync > 2) {
7601 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7602 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7603 if (mddev->curr_resync >= mddev->recovery_cp) {
7604 printk(KERN_INFO
7605 "md: checkpointing %s of %s.\n",
7606 desc, mdname(mddev));
7607 if (test_bit(MD_RECOVERY_ERROR,
7608 &mddev->recovery))
7609 mddev->recovery_cp =
7610 mddev->curr_resync_completed;
7611 else
7612 mddev->recovery_cp =
7613 mddev->curr_resync;
7614 }
7615 } else
7616 mddev->recovery_cp = MaxSector;
7617 } else {
7618 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7619 mddev->curr_resync = MaxSector;
7620 rcu_read_lock();
7621 rdev_for_each_rcu(rdev, mddev)
7622 if (rdev->raid_disk >= 0 &&
7623 mddev->delta_disks >= 0 &&
7624 !test_bit(Faulty, &rdev->flags) &&
7625 !test_bit(In_sync, &rdev->flags) &&
7626 rdev->recovery_offset < mddev->curr_resync)
7627 rdev->recovery_offset = mddev->curr_resync;
7628 rcu_read_unlock();
7629 }
7630 }
7631 skip:
7632 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7633
7634 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7635
7636 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7637 mddev->resync_min = 0;
7638 mddev->resync_max = MaxSector;
7639 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7640 mddev->resync_min = mddev->curr_resync_completed;
7641 mddev->curr_resync = 0;
7642 wake_up(&resync_wait);
7643 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7644 md_wakeup_thread(mddev->thread);
7645 return;
7646
7647 interrupted:
7648
7649
7650
7651 printk(KERN_INFO
7652 "md: md_do_sync() got signal ... exiting\n");
7653 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7654 goto out;
7655
7656}
7657EXPORT_SYMBOL_GPL(md_do_sync);
7658
7659static int remove_and_add_spares(struct mddev *mddev,
7660 struct md_rdev *this)
7661{
7662 struct md_rdev *rdev;
7663 int spares = 0;
7664 int removed = 0;
7665
7666 rdev_for_each(rdev, mddev)
7667 if ((this == NULL || rdev == this) &&
7668 rdev->raid_disk >= 0 &&
7669 !test_bit(Blocked, &rdev->flags) &&
7670 (test_bit(Faulty, &rdev->flags) ||
7671 ! test_bit(In_sync, &rdev->flags)) &&
7672 atomic_read(&rdev->nr_pending)==0) {
7673 if (mddev->pers->hot_remove_disk(
7674 mddev, rdev) == 0) {
7675 sysfs_unlink_rdev(mddev, rdev);
7676 rdev->raid_disk = -1;
7677 removed++;
7678 }
7679 }
7680 if (removed && mddev->kobj.sd)
7681 sysfs_notify(&mddev->kobj, NULL, "degraded");
7682
7683 if (this)
7684 goto no_add;
7685
7686 rdev_for_each(rdev, mddev) {
7687 if (rdev->raid_disk >= 0 &&
7688 !test_bit(In_sync, &rdev->flags) &&
7689 !test_bit(Faulty, &rdev->flags))
7690 spares++;
7691 if (rdev->raid_disk >= 0)
7692 continue;
7693 if (test_bit(Faulty, &rdev->flags))
7694 continue;
7695 if (mddev->ro &&
7696 rdev->saved_raid_disk < 0)
7697 continue;
7698
7699 rdev->recovery_offset = 0;
7700 if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
7701 spin_lock_irq(&mddev->write_lock);
7702 if (mddev->in_sync)
7703
7704
7705
7706
7707
7708 rdev->recovery_offset = mddev->recovery_cp;
7709 spin_unlock_irq(&mddev->write_lock);
7710 }
7711 if (mddev->ro && rdev->recovery_offset != MaxSector)
7712
7713 continue;
7714 if (mddev->pers->
7715 hot_add_disk(mddev, rdev) == 0) {
7716 if (sysfs_link_rdev(mddev, rdev))
7717 ;
7718 spares++;
7719 md_new_event(mddev);
7720 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7721 }
7722 }
7723no_add:
7724 if (removed)
7725 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7726 return spares;
7727}
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751void md_check_recovery(struct mddev *mddev)
7752{
7753 if (mddev->suspended)
7754 return;
7755
7756 if (mddev->bitmap)
7757 bitmap_daemon_work(mddev);
7758
7759 if (signal_pending(current)) {
7760 if (mddev->pers->sync_request && !mddev->external) {
7761 printk(KERN_INFO "md: %s in immediate safe mode\n",
7762 mdname(mddev));
7763 mddev->safemode = 2;
7764 }
7765 flush_signals(current);
7766 }
7767
7768 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7769 return;
7770 if ( ! (
7771 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7772 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7773 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7774 (mddev->external == 0 && mddev->safemode == 1) ||
7775 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7776 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7777 ))
7778 return;
7779
7780 if (mddev_trylock(mddev)) {
7781 int spares = 0;
7782
7783 if (mddev->ro) {
7784
7785
7786
7787
7788
7789
7790
7791 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7792 remove_and_add_spares(mddev, NULL);
7793 mddev->pers->spare_active(mddev);
7794 goto unlock;
7795 }
7796
7797 if (!mddev->external) {
7798 int did_change = 0;
7799 spin_lock_irq(&mddev->write_lock);
7800 if (mddev->safemode &&
7801 !atomic_read(&mddev->writes_pending) &&
7802 !mddev->in_sync &&
7803 mddev->recovery_cp == MaxSector) {
7804 mddev->in_sync = 1;
7805 did_change = 1;
7806 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7807 }
7808 if (mddev->safemode == 1)
7809 mddev->safemode = 0;
7810 spin_unlock_irq(&mddev->write_lock);
7811 if (did_change)
7812 sysfs_notify_dirent_safe(mddev->sysfs_state);
7813 }
7814
7815 if (mddev->flags)
7816 md_update_sb(mddev, 0);
7817
7818 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7819 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7820
7821 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7822 goto unlock;
7823 }
7824 if (mddev->sync_thread) {
7825 md_reap_sync_thread(mddev);
7826 goto unlock;
7827 }
7828
7829
7830
7831 mddev->curr_resync_completed = 0;
7832 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7833
7834
7835
7836 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7837 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7838
7839 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7840 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7841 goto unlock;
7842
7843
7844
7845
7846
7847
7848
7849 if (mddev->reshape_position != MaxSector) {
7850 if (mddev->pers->check_reshape == NULL ||
7851 mddev->pers->check_reshape(mddev) != 0)
7852
7853 goto unlock;
7854 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7855 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7856 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
7857 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7858 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7859 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7860 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7861 } else if (mddev->recovery_cp < MaxSector) {
7862 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7863 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7864 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7865
7866 goto unlock;
7867
7868 if (mddev->pers->sync_request) {
7869 if (spares) {
7870
7871
7872
7873
7874 bitmap_write_all(mddev->bitmap);
7875 }
7876 mddev->sync_thread = md_register_thread(md_do_sync,
7877 mddev,
7878 "resync");
7879 if (!mddev->sync_thread) {
7880 printk(KERN_ERR "%s: could not start resync"
7881 " thread...\n",
7882 mdname(mddev));
7883
7884 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7885 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7886 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7887 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7888 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7889 } else
7890 md_wakeup_thread(mddev->sync_thread);
7891 sysfs_notify_dirent_safe(mddev->sysfs_action);
7892 md_new_event(mddev);
7893 }
7894 unlock:
7895 if (!mddev->sync_thread) {
7896 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7897 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7898 &mddev->recovery))
7899 if (mddev->sysfs_action)
7900 sysfs_notify_dirent_safe(mddev->sysfs_action);
7901 }
7902 mddev_unlock(mddev);
7903 }
7904}
7905
7906void md_reap_sync_thread(struct mddev *mddev)
7907{
7908 struct md_rdev *rdev;
7909
7910
7911 md_unregister_thread(&mddev->sync_thread);
7912 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7913 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7914
7915
7916 if (mddev->pers->spare_active(mddev)) {
7917 sysfs_notify(&mddev->kobj, NULL,
7918 "degraded");
7919 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7920 }
7921 }
7922 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7923 mddev->pers->finish_reshape)
7924 mddev->pers->finish_reshape(mddev);
7925
7926
7927
7928
7929
7930
7931
7932 rdev_for_each(rdev, mddev)
7933 if (!mddev->degraded ||
7934 test_bit(In_sync, &rdev->flags))
7935 rdev->saved_raid_disk = -1;
7936
7937 md_update_sb(mddev, 1);
7938 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7939 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7940 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7941 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7942 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7943
7944 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7945 sysfs_notify_dirent_safe(mddev->sysfs_action);
7946 md_new_event(mddev);
7947 if (mddev->event_work.func)
7948 queue_work(md_misc_wq, &mddev->event_work);
7949}
7950
7951void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7952{
7953 sysfs_notify_dirent_safe(rdev->sysfs_state);
7954 wait_event_timeout(rdev->blocked_wait,
7955 !test_bit(Blocked, &rdev->flags) &&
7956 !test_bit(BlockedBadBlocks, &rdev->flags),
7957 msecs_to_jiffies(5000));
7958 rdev_dec_pending(rdev, mddev);
7959}
7960EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7961
7962void md_finish_reshape(struct mddev *mddev)
7963{
7964
7965 struct md_rdev *rdev;
7966
7967 rdev_for_each(rdev, mddev) {
7968 if (rdev->data_offset > rdev->new_data_offset)
7969 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
7970 else
7971 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
7972 rdev->data_offset = rdev->new_data_offset;
7973 }
7974}
7975EXPORT_SYMBOL(md_finish_reshape);
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
8004 sector_t *first_bad, int *bad_sectors)
8005{
8006 int hi;
8007 int lo;
8008 u64 *p = bb->page;
8009 int rv;
8010 sector_t target = s + sectors;
8011 unsigned seq;
8012
8013 if (bb->shift > 0) {
8014
8015 s >>= bb->shift;
8016 target += (1<<bb->shift) - 1;
8017 target >>= bb->shift;
8018 sectors = target - s;
8019 }
8020
8021
8022retry:
8023 seq = read_seqbegin(&bb->lock);
8024 lo = 0;
8025 rv = 0;
8026 hi = bb->count;
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036 while (hi - lo > 1) {
8037 int mid = (lo + hi) / 2;
8038 sector_t a = BB_OFFSET(p[mid]);
8039 if (a < target)
8040
8041
8042 lo = mid;
8043 else
8044
8045 hi = mid;
8046 }
8047
8048 if (hi > lo) {
8049
8050
8051
8052 while (lo >= 0 &&
8053 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8054 if (BB_OFFSET(p[lo]) < target) {
8055
8056
8057
8058 if (rv != -1 && BB_ACK(p[lo]))
8059 rv = 1;
8060 else
8061 rv = -1;
8062 *first_bad = BB_OFFSET(p[lo]);
8063 *bad_sectors = BB_LEN(p[lo]);
8064 }
8065 lo--;
8066 }
8067 }
8068
8069 if (read_seqretry(&bb->lock, seq))
8070 goto retry;
8071
8072 return rv;
8073}
8074EXPORT_SYMBOL_GPL(md_is_badblock);
8075
8076
8077
8078
8079
8080
8081
8082
8083static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8084 int acknowledged)
8085{
8086 u64 *p;
8087 int lo, hi;
8088 int rv = 1;
8089
8090 if (bb->shift < 0)
8091
8092 return 0;
8093
8094 if (bb->shift) {
8095
8096 sector_t next = s + sectors;
8097 s >>= bb->shift;
8098 next += (1<<bb->shift) - 1;
8099 next >>= bb->shift;
8100 sectors = next - s;
8101 }
8102
8103 write_seqlock_irq(&bb->lock);
8104
8105 p = bb->page;
8106 lo = 0;
8107 hi = bb->count;
8108
8109 while (hi - lo > 1) {
8110 int mid = (lo + hi) / 2;
8111 sector_t a = BB_OFFSET(p[mid]);
8112 if (a <= s)
8113 lo = mid;
8114 else
8115 hi = mid;
8116 }
8117 if (hi > lo && BB_OFFSET(p[lo]) > s)
8118 hi = lo;
8119
8120 if (hi > lo) {
8121
8122
8123
8124 sector_t a = BB_OFFSET(p[lo]);
8125 sector_t e = a + BB_LEN(p[lo]);
8126 int ack = BB_ACK(p[lo]);
8127 if (e >= s) {
8128
8129 if (s == a && s + sectors >= e)
8130
8131 ack = acknowledged;
8132 else
8133 ack = ack && acknowledged;
8134
8135 if (e < s + sectors)
8136 e = s + sectors;
8137 if (e - a <= BB_MAX_LEN) {
8138 p[lo] = BB_MAKE(a, e-a, ack);
8139 s = e;
8140 } else {
8141
8142
8143
8144 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8145 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8146 s = a + BB_MAX_LEN;
8147 }
8148 sectors = e - s;
8149 }
8150 }
8151 if (sectors && hi < bb->count) {
8152
8153
8154 sector_t a = BB_OFFSET(p[hi]);
8155 sector_t e = a + BB_LEN(p[hi]);
8156 int ack = BB_ACK(p[hi]);
8157 if (a <= s + sectors) {
8158
8159 if (e <= s + sectors) {
8160
8161 e = s + sectors;
8162 ack = acknowledged;
8163 } else
8164 ack = ack && acknowledged;
8165
8166 a = s;
8167 if (e - a <= BB_MAX_LEN) {
8168 p[hi] = BB_MAKE(a, e-a, ack);
8169 s = e;
8170 } else {
8171 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8172 s = a + BB_MAX_LEN;
8173 }
8174 sectors = e - s;
8175 lo = hi;
8176 hi++;
8177 }
8178 }
8179 if (sectors == 0 && hi < bb->count) {
8180
8181
8182 sector_t a = BB_OFFSET(p[hi]);
8183 int lolen = BB_LEN(p[lo]);
8184 int hilen = BB_LEN(p[hi]);
8185 int newlen = lolen + hilen - (s - a);
8186 if (s >= a && newlen < BB_MAX_LEN) {
8187
8188 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8189 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8190 memmove(p + hi, p + hi + 1,
8191 (bb->count - hi - 1) * 8);
8192 bb->count--;
8193 }
8194 }
8195 while (sectors) {
8196
8197
8198 if (bb->count >= MD_MAX_BADBLOCKS) {
8199
8200 rv = 0;
8201 break;
8202 } else {
8203 int this_sectors = sectors;
8204 memmove(p + hi + 1, p + hi,
8205 (bb->count - hi) * 8);
8206 bb->count++;
8207
8208 if (this_sectors > BB_MAX_LEN)
8209 this_sectors = BB_MAX_LEN;
8210 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8211 sectors -= this_sectors;
8212 s += this_sectors;
8213 }
8214 }
8215
8216 bb->changed = 1;
8217 if (!acknowledged)
8218 bb->unacked_exist = 1;
8219 write_sequnlock_irq(&bb->lock);
8220
8221 return rv;
8222}
8223
8224int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8225 int is_new)
8226{
8227 int rv;
8228 if (is_new)
8229 s += rdev->new_data_offset;
8230 else
8231 s += rdev->data_offset;
8232 rv = md_set_badblocks(&rdev->badblocks,
8233 s, sectors, 0);
8234 if (rv) {
8235
8236 sysfs_notify_dirent_safe(rdev->sysfs_state);
8237 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8238 md_wakeup_thread(rdev->mddev->thread);
8239 }
8240 return rv;
8241}
8242EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8243
8244
8245
8246
8247
8248
8249
8250static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8251{
8252 u64 *p;
8253 int lo, hi;
8254 sector_t target = s + sectors;
8255 int rv = 0;
8256
8257 if (bb->shift > 0) {
8258
8259
8260
8261
8262
8263
8264 s += (1<<bb->shift) - 1;
8265 s >>= bb->shift;
8266 target >>= bb->shift;
8267 sectors = target - s;
8268 }
8269
8270 write_seqlock_irq(&bb->lock);
8271
8272 p = bb->page;
8273 lo = 0;
8274 hi = bb->count;
8275
8276 while (hi - lo > 1) {
8277 int mid = (lo + hi) / 2;
8278 sector_t a = BB_OFFSET(p[mid]);
8279 if (a < target)
8280 lo = mid;
8281 else
8282 hi = mid;
8283 }
8284 if (hi > lo) {
8285
8286
8287
8288
8289 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8290
8291 int ack = BB_ACK(p[lo]);
8292 sector_t a = BB_OFFSET(p[lo]);
8293 sector_t end = a + BB_LEN(p[lo]);
8294
8295 if (a < s) {
8296
8297 if (bb->count >= MD_MAX_BADBLOCKS) {
8298 rv = 0;
8299 goto out;
8300 }
8301 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8302 bb->count++;
8303 p[lo] = BB_MAKE(a, s-a, ack);
8304 lo++;
8305 }
8306 p[lo] = BB_MAKE(target, end - target, ack);
8307
8308 hi = lo;
8309 lo--;
8310 }
8311 while (lo >= 0 &&
8312 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8313
8314 if (BB_OFFSET(p[lo]) < s) {
8315
8316 int ack = BB_ACK(p[lo]);
8317 sector_t start = BB_OFFSET(p[lo]);
8318 p[lo] = BB_MAKE(start, s - start, ack);
8319
8320 break;
8321 }
8322 lo--;
8323 }
8324
8325
8326
8327 if (hi - lo > 1) {
8328 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8329 bb->count -= (hi - lo - 1);
8330 }
8331 }
8332
8333 bb->changed = 1;
8334out:
8335 write_sequnlock_irq(&bb->lock);
8336 return rv;
8337}
8338
8339int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8340 int is_new)
8341{
8342 if (is_new)
8343 s += rdev->new_data_offset;
8344 else
8345 s += rdev->data_offset;
8346 return md_clear_badblocks(&rdev->badblocks,
8347 s, sectors);
8348}
8349EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8350
8351
8352
8353
8354
8355
8356void md_ack_all_badblocks(struct badblocks *bb)
8357{
8358 if (bb->page == NULL || bb->changed)
8359
8360 return;
8361 write_seqlock_irq(&bb->lock);
8362
8363 if (bb->changed == 0 && bb->unacked_exist) {
8364 u64 *p = bb->page;
8365 int i;
8366 for (i = 0; i < bb->count ; i++) {
8367 if (!BB_ACK(p[i])) {
8368 sector_t start = BB_OFFSET(p[i]);
8369 int len = BB_LEN(p[i]);
8370 p[i] = BB_MAKE(start, len, 1);
8371 }
8372 }
8373 bb->unacked_exist = 0;
8374 }
8375 write_sequnlock_irq(&bb->lock);
8376}
8377EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391static ssize_t
8392badblocks_show(struct badblocks *bb, char *page, int unack)
8393{
8394 size_t len;
8395 int i;
8396 u64 *p = bb->page;
8397 unsigned seq;
8398
8399 if (bb->shift < 0)
8400 return 0;
8401
8402retry:
8403 seq = read_seqbegin(&bb->lock);
8404
8405 len = 0;
8406 i = 0;
8407
8408 while (len < PAGE_SIZE && i < bb->count) {
8409 sector_t s = BB_OFFSET(p[i]);
8410 unsigned int length = BB_LEN(p[i]);
8411 int ack = BB_ACK(p[i]);
8412 i++;
8413
8414 if (unack && ack)
8415 continue;
8416
8417 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8418 (unsigned long long)s << bb->shift,
8419 length << bb->shift);
8420 }
8421 if (unack && len == 0)
8422 bb->unacked_exist = 0;
8423
8424 if (read_seqretry(&bb->lock, seq))
8425 goto retry;
8426
8427 return len;
8428}
8429
8430#define DO_DEBUG 1
8431
8432static ssize_t
8433badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8434{
8435 unsigned long long sector;
8436 int length;
8437 char newline;
8438#ifdef DO_DEBUG
8439
8440
8441
8442 int clear = 0;
8443 if (page[0] == '-') {
8444 clear = 1;
8445 page++;
8446 }
8447#endif
8448
8449 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8450 case 3:
8451 if (newline != '\n')
8452 return -EINVAL;
8453 case 2:
8454 if (length <= 0)
8455 return -EINVAL;
8456 break;
8457 default:
8458 return -EINVAL;
8459 }
8460
8461#ifdef DO_DEBUG
8462 if (clear) {
8463 md_clear_badblocks(bb, sector, length);
8464 return len;
8465 }
8466#endif
8467 if (md_set_badblocks(bb, sector, length, !unack))
8468 return len;
8469 else
8470 return -ENOSPC;
8471}
8472
8473static int md_notify_reboot(struct notifier_block *this,
8474 unsigned long code, void *x)
8475{
8476 struct list_head *tmp;
8477 struct mddev *mddev;
8478 int need_delay = 0;
8479
8480 for_each_mddev(mddev, tmp) {
8481 if (mddev_trylock(mddev)) {
8482 if (mddev->pers)
8483 __md_stop_writes(mddev);
8484 mddev->safemode = 2;
8485 mddev_unlock(mddev);
8486 }
8487 need_delay = 1;
8488 }
8489
8490
8491
8492
8493
8494
8495 if (need_delay)
8496 mdelay(1000*1);
8497
8498 return NOTIFY_DONE;
8499}
8500
8501static struct notifier_block md_notifier = {
8502 .notifier_call = md_notify_reboot,
8503 .next = NULL,
8504 .priority = INT_MAX,
8505};
8506
8507static void md_geninit(void)
8508{
8509 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8510
8511 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8512}
8513
8514static int __init md_init(void)
8515{
8516 int ret = -ENOMEM;
8517
8518 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8519 if (!md_wq)
8520 goto err_wq;
8521
8522 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8523 if (!md_misc_wq)
8524 goto err_misc_wq;
8525
8526 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8527 goto err_md;
8528
8529 if ((ret = register_blkdev(0, "mdp")) < 0)
8530 goto err_mdp;
8531 mdp_major = ret;
8532
8533 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8534 md_probe, NULL, NULL);
8535 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8536 md_probe, NULL, NULL);
8537
8538 register_reboot_notifier(&md_notifier);
8539 raid_table_header = register_sysctl_table(raid_root_table);
8540
8541 md_geninit();
8542 return 0;
8543
8544err_mdp:
8545 unregister_blkdev(MD_MAJOR, "md");
8546err_md:
8547 destroy_workqueue(md_misc_wq);
8548err_misc_wq:
8549 destroy_workqueue(md_wq);
8550err_wq:
8551 return ret;
8552}
8553
8554#ifndef MODULE
8555
8556
8557
8558
8559
8560
8561static LIST_HEAD(all_detected_devices);
8562struct detected_devices_node {
8563 struct list_head list;
8564 dev_t dev;
8565};
8566
8567void md_autodetect_dev(dev_t dev)
8568{
8569 struct detected_devices_node *node_detected_dev;
8570
8571 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8572 if (node_detected_dev) {
8573 node_detected_dev->dev = dev;
8574 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8575 } else {
8576 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8577 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8578 }
8579}
8580
8581
8582static void autostart_arrays(int part)
8583{
8584 struct md_rdev *rdev;
8585 struct detected_devices_node *node_detected_dev;
8586 dev_t dev;
8587 int i_scanned, i_passed;
8588
8589 i_scanned = 0;
8590 i_passed = 0;
8591
8592 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8593
8594 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8595 i_scanned++;
8596 node_detected_dev = list_entry(all_detected_devices.next,
8597 struct detected_devices_node, list);
8598 list_del(&node_detected_dev->list);
8599 dev = node_detected_dev->dev;
8600 kfree(node_detected_dev);
8601 rdev = md_import_device(dev,0, 90);
8602 if (IS_ERR(rdev))
8603 continue;
8604
8605 if (test_bit(Faulty, &rdev->flags)) {
8606 MD_BUG();
8607 continue;
8608 }
8609 set_bit(AutoDetected, &rdev->flags);
8610 list_add(&rdev->same_set, &pending_raid_disks);
8611 i_passed++;
8612 }
8613
8614 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8615 i_scanned, i_passed);
8616
8617 autorun_devices(part);
8618}
8619
8620#endif
8621
8622static __exit void md_exit(void)
8623{
8624 struct mddev *mddev;
8625 struct list_head *tmp;
8626
8627 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8628 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8629
8630 unregister_blkdev(MD_MAJOR,"md");
8631 unregister_blkdev(mdp_major, "mdp");
8632 unregister_reboot_notifier(&md_notifier);
8633 unregister_sysctl_table(raid_table_header);
8634 remove_proc_entry("mdstat", NULL);
8635 for_each_mddev(mddev, tmp) {
8636 export_array(mddev);
8637 mddev->hold_active = 0;
8638 }
8639 destroy_workqueue(md_misc_wq);
8640 destroy_workqueue(md_wq);
8641}
8642
8643subsys_initcall(md_init);
8644module_exit(md_exit)
8645
8646static int get_ro(char *buffer, struct kernel_param *kp)
8647{
8648 return sprintf(buffer, "%d", start_readonly);
8649}
8650static int set_ro(const char *val, struct kernel_param *kp)
8651{
8652 char *e;
8653 int num = simple_strtoul(val, &e, 10);
8654 if (*val && (*e == '\0' || *e == '\n')) {
8655 start_readonly = num;
8656 return 0;
8657 }
8658 return -EINVAL;
8659}
8660
8661module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8662module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8663
8664module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8665
8666EXPORT_SYMBOL(register_md_personality);
8667EXPORT_SYMBOL(unregister_md_personality);
8668EXPORT_SYMBOL(md_error);
8669EXPORT_SYMBOL(md_done_sync);
8670EXPORT_SYMBOL(md_write_start);
8671EXPORT_SYMBOL(md_write_end);
8672EXPORT_SYMBOL(md_register_thread);
8673EXPORT_SYMBOL(md_unregister_thread);
8674EXPORT_SYMBOL(md_wakeup_thread);
8675EXPORT_SYMBOL(md_check_recovery);
8676EXPORT_SYMBOL(md_reap_sync_thread);
8677MODULE_LICENSE("GPL");
8678MODULE_DESCRIPTION("MD RAID framework");
8679MODULE_ALIAS("md");
8680MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8681