1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
76
77
78
79
80
81
82#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
83
84
85
86
87
88
89
90
91
92
93
94
95
96static int sysctl_speed_limit_min = 1000;
97static int sysctl_speed_limit_max = 200000;
98static inline int speed_min(struct mddev *mddev)
99{
100 return mddev->sync_speed_min ?
101 mddev->sync_speed_min : sysctl_speed_limit_min;
102}
103
104static inline int speed_max(struct mddev *mddev)
105{
106 return mddev->sync_speed_max ?
107 mddev->sync_speed_max : sysctl_speed_limit_max;
108}
109
110static struct ctl_table_header *raid_table_header;
111
112static ctl_table raid_table[] = {
113 {
114 .procname = "speed_limit_min",
115 .data = &sysctl_speed_limit_min,
116 .maxlen = sizeof(int),
117 .mode = S_IRUGO|S_IWUSR,
118 .proc_handler = proc_dointvec,
119 },
120 {
121 .procname = "speed_limit_max",
122 .data = &sysctl_speed_limit_max,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 { }
128};
129
130static ctl_table raid_dir_table[] = {
131 {
132 .procname = "raid",
133 .maxlen = 0,
134 .mode = S_IRUGO|S_IXUGO,
135 .child = raid_table,
136 },
137 { }
138};
139
140static ctl_table raid_root_table[] = {
141 {
142 .procname = "dev",
143 .maxlen = 0,
144 .mode = 0555,
145 .child = raid_dir_table,
146 },
147 { }
148};
149
150static const struct block_device_operations md_fops;
151
152static int start_readonly;
153
154
155
156
157
158struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
159 struct mddev *mddev)
160{
161 struct bio *b;
162
163 if (!mddev || !mddev->bio_set)
164 return bio_alloc(gfp_mask, nr_iovecs);
165
166 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
167 if (!b)
168 return NULL;
169 return b;
170}
171EXPORT_SYMBOL_GPL(bio_alloc_mddev);
172
173struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
174 struct mddev *mddev)
175{
176 if (!mddev || !mddev->bio_set)
177 return bio_clone(bio, gfp_mask);
178
179 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
180}
181EXPORT_SYMBOL_GPL(bio_clone_mddev);
182
183void md_trim_bio(struct bio *bio, int offset, int size)
184{
185
186
187
188
189 int i;
190 struct bio_vec *bvec;
191 int sofar = 0;
192
193 size <<= 9;
194 if (offset == 0 && size == bio->bi_size)
195 return;
196
197 bio->bi_sector += offset;
198 bio->bi_size = size;
199 offset <<= 9;
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
201
202 while (bio->bi_idx < bio->bi_vcnt &&
203 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
204
205 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
206 bio->bi_idx++;
207 }
208 if (bio->bi_idx < bio->bi_vcnt) {
209 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
210 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
211 }
212
213 if (bio->bi_idx) {
214 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
215 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
216 bio->bi_vcnt -= bio->bi_idx;
217 bio->bi_idx = 0;
218 }
219
220 bio_for_each_segment(bvec, bio, i) {
221 if (sofar + bvec->bv_len > size)
222 bvec->bv_len = size - sofar;
223 if (bvec->bv_len == 0) {
224 bio->bi_vcnt = i;
225 break;
226 }
227 sofar += bvec->bv_len;
228 }
229}
230EXPORT_SYMBOL_GPL(md_trim_bio);
231
232
233
234
235
236
237
238
239
240
241
242static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
243static atomic_t md_event_count;
244void md_new_event(struct mddev *mddev)
245{
246 atomic_inc(&md_event_count);
247 wake_up(&md_event_waiters);
248}
249EXPORT_SYMBOL_GPL(md_new_event);
250
251
252
253
254static void md_new_event_inintr(struct mddev *mddev)
255{
256 atomic_inc(&md_event_count);
257 wake_up(&md_event_waiters);
258}
259
260
261
262
263
264static LIST_HEAD(all_mddevs);
265static DEFINE_SPINLOCK(all_mddevs_lock);
266
267
268
269
270
271
272
273
274
275#define for_each_mddev(_mddev,_tmp) \
276 \
277 for (({ spin_lock(&all_mddevs_lock); \
278 _tmp = all_mddevs.next; \
279 _mddev = NULL;}); \
280 ({ if (_tmp != &all_mddevs) \
281 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
282 spin_unlock(&all_mddevs_lock); \
283 if (_mddev) mddev_put(_mddev); \
284 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
285 _tmp != &all_mddevs;}); \
286 ({ spin_lock(&all_mddevs_lock); \
287 _tmp = _tmp->next;}) \
288 )
289
290
291
292
293
294
295
296
297
298static void md_make_request(struct request_queue *q, struct bio *bio)
299{
300 const int rw = bio_data_dir(bio);
301 struct mddev *mddev = q->queuedata;
302 int cpu;
303 unsigned int sectors;
304
305 if (mddev == NULL || mddev->pers == NULL
306 || !mddev->ready) {
307 bio_io_error(bio);
308 return;
309 }
310 smp_rmb();
311 rcu_read_lock();
312 if (mddev->suspended) {
313 DEFINE_WAIT(__wait);
314 for (;;) {
315 prepare_to_wait(&mddev->sb_wait, &__wait,
316 TASK_UNINTERRUPTIBLE);
317 if (!mddev->suspended)
318 break;
319 rcu_read_unlock();
320 schedule();
321 rcu_read_lock();
322 }
323 finish_wait(&mddev->sb_wait, &__wait);
324 }
325 atomic_inc(&mddev->active_io);
326 rcu_read_unlock();
327
328
329
330
331
332 sectors = bio_sectors(bio);
333 mddev->pers->make_request(mddev, bio);
334
335 cpu = part_stat_lock();
336 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
337 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
338 part_stat_unlock();
339
340 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
341 wake_up(&mddev->sb_wait);
342}
343
344
345
346
347
348
349
350void mddev_suspend(struct mddev *mddev)
351{
352 BUG_ON(mddev->suspended);
353 mddev->suspended = 1;
354 synchronize_rcu();
355 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
356 mddev->pers->quiesce(mddev, 1);
357
358 del_timer_sync(&mddev->safemode_timer);
359}
360EXPORT_SYMBOL_GPL(mddev_suspend);
361
362void mddev_resume(struct mddev *mddev)
363{
364 mddev->suspended = 0;
365 wake_up(&mddev->sb_wait);
366 mddev->pers->quiesce(mddev, 0);
367
368 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
369 md_wakeup_thread(mddev->thread);
370 md_wakeup_thread(mddev->sync_thread);
371}
372EXPORT_SYMBOL_GPL(mddev_resume);
373
374int mddev_congested(struct mddev *mddev, int bits)
375{
376 return mddev->suspended;
377}
378EXPORT_SYMBOL(mddev_congested);
379
380
381
382
383
384static void md_end_flush(struct bio *bio, int err)
385{
386 struct md_rdev *rdev = bio->bi_private;
387 struct mddev *mddev = rdev->mddev;
388
389 rdev_dec_pending(rdev, mddev);
390
391 if (atomic_dec_and_test(&mddev->flush_pending)) {
392
393 queue_work(md_wq, &mddev->flush_work);
394 }
395 bio_put(bio);
396}
397
398static void md_submit_flush_data(struct work_struct *ws);
399
400static void submit_flushes(struct work_struct *ws)
401{
402 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
403 struct md_rdev *rdev;
404
405 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
406 atomic_set(&mddev->flush_pending, 1);
407 rcu_read_lock();
408 rdev_for_each_rcu(rdev, mddev)
409 if (rdev->raid_disk >= 0 &&
410 !test_bit(Faulty, &rdev->flags)) {
411
412
413
414
415 struct bio *bi;
416 atomic_inc(&rdev->nr_pending);
417 atomic_inc(&rdev->nr_pending);
418 rcu_read_unlock();
419 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
420 bi->bi_end_io = md_end_flush;
421 bi->bi_private = rdev;
422 bi->bi_bdev = rdev->bdev;
423 atomic_inc(&mddev->flush_pending);
424 submit_bio(WRITE_FLUSH, bi);
425 rcu_read_lock();
426 rdev_dec_pending(rdev, mddev);
427 }
428 rcu_read_unlock();
429 if (atomic_dec_and_test(&mddev->flush_pending))
430 queue_work(md_wq, &mddev->flush_work);
431}
432
433static void md_submit_flush_data(struct work_struct *ws)
434{
435 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
436 struct bio *bio = mddev->flush_bio;
437
438 if (bio->bi_size == 0)
439
440 bio_endio(bio, 0);
441 else {
442 bio->bi_rw &= ~REQ_FLUSH;
443 mddev->pers->make_request(mddev, bio);
444 }
445
446 mddev->flush_bio = NULL;
447 wake_up(&mddev->sb_wait);
448}
449
450void md_flush_request(struct mddev *mddev, struct bio *bio)
451{
452 spin_lock_irq(&mddev->write_lock);
453 wait_event_lock_irq(mddev->sb_wait,
454 !mddev->flush_bio,
455 mddev->write_lock, );
456 mddev->flush_bio = bio;
457 spin_unlock_irq(&mddev->write_lock);
458
459 INIT_WORK(&mddev->flush_work, submit_flushes);
460 queue_work(md_wq, &mddev->flush_work);
461}
462EXPORT_SYMBOL(md_flush_request);
463
464void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
465{
466 struct mddev *mddev = cb->data;
467 md_wakeup_thread(mddev->thread);
468 kfree(cb);
469}
470EXPORT_SYMBOL(md_unplug);
471
472static inline struct mddev *mddev_get(struct mddev *mddev)
473{
474 atomic_inc(&mddev->active);
475 return mddev;
476}
477
478static void mddev_delayed_delete(struct work_struct *ws);
479
480static void mddev_put(struct mddev *mddev)
481{
482 struct bio_set *bs = NULL;
483
484 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
485 return;
486 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
487 mddev->ctime == 0 && !mddev->hold_active) {
488
489
490 list_del_init(&mddev->all_mddevs);
491 bs = mddev->bio_set;
492 mddev->bio_set = NULL;
493 if (mddev->gendisk) {
494
495
496
497
498
499 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
500 queue_work(md_misc_wq, &mddev->del_work);
501 } else
502 kfree(mddev);
503 }
504 spin_unlock(&all_mddevs_lock);
505 if (bs)
506 bioset_free(bs);
507}
508
509void mddev_init(struct mddev *mddev)
510{
511 mutex_init(&mddev->open_mutex);
512 mutex_init(&mddev->reconfig_mutex);
513 mutex_init(&mddev->bitmap_info.mutex);
514 INIT_LIST_HEAD(&mddev->disks);
515 INIT_LIST_HEAD(&mddev->all_mddevs);
516 init_timer(&mddev->safemode_timer);
517 atomic_set(&mddev->active, 1);
518 atomic_set(&mddev->openers, 0);
519 atomic_set(&mddev->active_io, 0);
520 spin_lock_init(&mddev->write_lock);
521 atomic_set(&mddev->flush_pending, 0);
522 init_waitqueue_head(&mddev->sb_wait);
523 init_waitqueue_head(&mddev->recovery_wait);
524 mddev->reshape_position = MaxSector;
525 mddev->reshape_backwards = 0;
526 mddev->resync_min = 0;
527 mddev->resync_max = MaxSector;
528 mddev->level = LEVEL_NONE;
529}
530EXPORT_SYMBOL_GPL(mddev_init);
531
532static struct mddev * mddev_find(dev_t unit)
533{
534 struct mddev *mddev, *new = NULL;
535
536 if (unit && MAJOR(unit) != MD_MAJOR)
537 unit &= ~((1<<MdpMinorShift)-1);
538
539 retry:
540 spin_lock(&all_mddevs_lock);
541
542 if (unit) {
543 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
544 if (mddev->unit == unit) {
545 mddev_get(mddev);
546 spin_unlock(&all_mddevs_lock);
547 kfree(new);
548 return mddev;
549 }
550
551 if (new) {
552 list_add(&new->all_mddevs, &all_mddevs);
553 spin_unlock(&all_mddevs_lock);
554 new->hold_active = UNTIL_IOCTL;
555 return new;
556 }
557 } else if (new) {
558
559 static int next_minor = 512;
560 int start = next_minor;
561 int is_free = 0;
562 int dev = 0;
563 while (!is_free) {
564 dev = MKDEV(MD_MAJOR, next_minor);
565 next_minor++;
566 if (next_minor > MINORMASK)
567 next_minor = 0;
568 if (next_minor == start) {
569
570 spin_unlock(&all_mddevs_lock);
571 kfree(new);
572 return NULL;
573 }
574
575 is_free = 1;
576 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
577 if (mddev->unit == dev) {
578 is_free = 0;
579 break;
580 }
581 }
582 new->unit = dev;
583 new->md_minor = MINOR(dev);
584 new->hold_active = UNTIL_STOP;
585 list_add(&new->all_mddevs, &all_mddevs);
586 spin_unlock(&all_mddevs_lock);
587 return new;
588 }
589 spin_unlock(&all_mddevs_lock);
590
591 new = kzalloc(sizeof(*new), GFP_KERNEL);
592 if (!new)
593 return NULL;
594
595 new->unit = unit;
596 if (MAJOR(unit) == MD_MAJOR)
597 new->md_minor = MINOR(unit);
598 else
599 new->md_minor = MINOR(unit) >> MdpMinorShift;
600
601 mddev_init(new);
602
603 goto retry;
604}
605
606static inline int mddev_lock(struct mddev * mddev)
607{
608 return mutex_lock_interruptible(&mddev->reconfig_mutex);
609}
610
611static inline int mddev_is_locked(struct mddev *mddev)
612{
613 return mutex_is_locked(&mddev->reconfig_mutex);
614}
615
616static inline int mddev_trylock(struct mddev * mddev)
617{
618 return mutex_trylock(&mddev->reconfig_mutex);
619}
620
621static struct attribute_group md_redundancy_group;
622
623static void mddev_unlock(struct mddev * mddev)
624{
625 if (mddev->to_remove) {
626
627
628
629
630
631
632
633
634
635
636
637
638 struct attribute_group *to_remove = mddev->to_remove;
639 mddev->to_remove = NULL;
640 mddev->sysfs_active = 1;
641 mutex_unlock(&mddev->reconfig_mutex);
642
643 if (mddev->kobj.sd) {
644 if (to_remove != &md_redundancy_group)
645 sysfs_remove_group(&mddev->kobj, to_remove);
646 if (mddev->pers == NULL ||
647 mddev->pers->sync_request == NULL) {
648 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
649 if (mddev->sysfs_action)
650 sysfs_put(mddev->sysfs_action);
651 mddev->sysfs_action = NULL;
652 }
653 }
654 mddev->sysfs_active = 0;
655 } else
656 mutex_unlock(&mddev->reconfig_mutex);
657
658
659
660
661 spin_lock(&pers_lock);
662 md_wakeup_thread(mddev->thread);
663 spin_unlock(&pers_lock);
664}
665
666static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
667{
668 struct md_rdev *rdev;
669
670 rdev_for_each(rdev, mddev)
671 if (rdev->desc_nr == nr)
672 return rdev;
673
674 return NULL;
675}
676
677static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
678{
679 struct md_rdev *rdev;
680
681 rdev_for_each_rcu(rdev, mddev)
682 if (rdev->desc_nr == nr)
683 return rdev;
684
685 return NULL;
686}
687
688static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
689{
690 struct md_rdev *rdev;
691
692 rdev_for_each(rdev, mddev)
693 if (rdev->bdev->bd_dev == dev)
694 return rdev;
695
696 return NULL;
697}
698
699static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
700{
701 struct md_rdev *rdev;
702
703 rdev_for_each_rcu(rdev, mddev)
704 if (rdev->bdev->bd_dev == dev)
705 return rdev;
706
707 return NULL;
708}
709
710static struct md_personality *find_pers(int level, char *clevel)
711{
712 struct md_personality *pers;
713 list_for_each_entry(pers, &pers_list, list) {
714 if (level != LEVEL_NONE && pers->level == level)
715 return pers;
716 if (strcmp(pers->name, clevel)==0)
717 return pers;
718 }
719 return NULL;
720}
721
722
723static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
724{
725 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
726 return MD_NEW_SIZE_SECTORS(num_sectors);
727}
728
729static int alloc_disk_sb(struct md_rdev * rdev)
730{
731 if (rdev->sb_page)
732 MD_BUG();
733
734 rdev->sb_page = alloc_page(GFP_KERNEL);
735 if (!rdev->sb_page) {
736 printk(KERN_ALERT "md: out of memory.\n");
737 return -ENOMEM;
738 }
739
740 return 0;
741}
742
743void md_rdev_clear(struct md_rdev *rdev)
744{
745 if (rdev->sb_page) {
746 put_page(rdev->sb_page);
747 rdev->sb_loaded = 0;
748 rdev->sb_page = NULL;
749 rdev->sb_start = 0;
750 rdev->sectors = 0;
751 }
752 if (rdev->bb_page) {
753 put_page(rdev->bb_page);
754 rdev->bb_page = NULL;
755 }
756 kfree(rdev->badblocks.page);
757 rdev->badblocks.page = NULL;
758}
759EXPORT_SYMBOL_GPL(md_rdev_clear);
760
761static void super_written(struct bio *bio, int error)
762{
763 struct md_rdev *rdev = bio->bi_private;
764 struct mddev *mddev = rdev->mddev;
765
766 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
767 printk("md: super_written gets error=%d, uptodate=%d\n",
768 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
769 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
770 md_error(mddev, rdev);
771 }
772
773 if (atomic_dec_and_test(&mddev->pending_writes))
774 wake_up(&mddev->sb_wait);
775 bio_put(bio);
776}
777
778void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
779 sector_t sector, int size, struct page *page)
780{
781
782
783
784
785
786
787 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
788
789 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
790 bio->bi_sector = sector;
791 bio_add_page(bio, page, size, 0);
792 bio->bi_private = rdev;
793 bio->bi_end_io = super_written;
794
795 atomic_inc(&mddev->pending_writes);
796 submit_bio(WRITE_FLUSH_FUA, bio);
797}
798
799void md_super_wait(struct mddev *mddev)
800{
801
802 DEFINE_WAIT(wq);
803 for(;;) {
804 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
805 if (atomic_read(&mddev->pending_writes)==0)
806 break;
807 schedule();
808 }
809 finish_wait(&mddev->sb_wait, &wq);
810}
811
812static void bi_complete(struct bio *bio, int error)
813{
814 complete((struct completion*)bio->bi_private);
815}
816
817int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
818 struct page *page, int rw, bool metadata_op)
819{
820 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
821 struct completion event;
822 int ret;
823
824 rw |= REQ_SYNC;
825
826 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
827 rdev->meta_bdev : rdev->bdev;
828 if (metadata_op)
829 bio->bi_sector = sector + rdev->sb_start;
830 else if (rdev->mddev->reshape_position != MaxSector &&
831 (rdev->mddev->reshape_backwards ==
832 (sector >= rdev->mddev->reshape_position)))
833 bio->bi_sector = sector + rdev->new_data_offset;
834 else
835 bio->bi_sector = sector + rdev->data_offset;
836 bio_add_page(bio, page, size, 0);
837 init_completion(&event);
838 bio->bi_private = &event;
839 bio->bi_end_io = bi_complete;
840 submit_bio(rw, bio);
841 wait_for_completion(&event);
842
843 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
844 bio_put(bio);
845 return ret;
846}
847EXPORT_SYMBOL_GPL(sync_page_io);
848
849static int read_disk_sb(struct md_rdev * rdev, int size)
850{
851 char b[BDEVNAME_SIZE];
852 if (!rdev->sb_page) {
853 MD_BUG();
854 return -EINVAL;
855 }
856 if (rdev->sb_loaded)
857 return 0;
858
859
860 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
861 goto fail;
862 rdev->sb_loaded = 1;
863 return 0;
864
865fail:
866 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
867 bdevname(rdev->bdev,b));
868 return -EINVAL;
869}
870
871static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
872{
873 return sb1->set_uuid0 == sb2->set_uuid0 &&
874 sb1->set_uuid1 == sb2->set_uuid1 &&
875 sb1->set_uuid2 == sb2->set_uuid2 &&
876 sb1->set_uuid3 == sb2->set_uuid3;
877}
878
879static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
880{
881 int ret;
882 mdp_super_t *tmp1, *tmp2;
883
884 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
885 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
886
887 if (!tmp1 || !tmp2) {
888 ret = 0;
889 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
890 goto abort;
891 }
892
893 *tmp1 = *sb1;
894 *tmp2 = *sb2;
895
896
897
898
899 tmp1->nr_disks = 0;
900 tmp2->nr_disks = 0;
901
902 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
903abort:
904 kfree(tmp1);
905 kfree(tmp2);
906 return ret;
907}
908
909
910static u32 md_csum_fold(u32 csum)
911{
912 csum = (csum & 0xffff) + (csum >> 16);
913 return (csum & 0xffff) + (csum >> 16);
914}
915
916static unsigned int calc_sb_csum(mdp_super_t * sb)
917{
918 u64 newcsum = 0;
919 u32 *sb32 = (u32*)sb;
920 int i;
921 unsigned int disk_csum, csum;
922
923 disk_csum = sb->sb_csum;
924 sb->sb_csum = 0;
925
926 for (i = 0; i < MD_SB_BYTES/4 ; i++)
927 newcsum += sb32[i];
928 csum = (newcsum & 0xffffffff) + (newcsum>>32);
929
930
931#ifdef CONFIG_ALPHA
932
933
934
935
936
937
938
939
940 sb->sb_csum = md_csum_fold(disk_csum);
941#else
942 sb->sb_csum = disk_csum;
943#endif
944 return csum;
945}
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978struct super_type {
979 char *name;
980 struct module *owner;
981 int (*load_super)(struct md_rdev *rdev,
982 struct md_rdev *refdev,
983 int minor_version);
984 int (*validate_super)(struct mddev *mddev,
985 struct md_rdev *rdev);
986 void (*sync_super)(struct mddev *mddev,
987 struct md_rdev *rdev);
988 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
989 sector_t num_sectors);
990 int (*allow_new_offset)(struct md_rdev *rdev,
991 unsigned long long new_offset);
992};
993
994
995
996
997
998
999
1000
1001
1002int md_check_no_bitmap(struct mddev *mddev)
1003{
1004 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1005 return 0;
1006 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1007 mdname(mddev), mddev->pers->name);
1008 return 1;
1009}
1010EXPORT_SYMBOL(md_check_no_bitmap);
1011
1012
1013
1014
1015static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1016{
1017 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1018 mdp_super_t *sb;
1019 int ret;
1020
1021
1022
1023
1024
1025
1026
1027 rdev->sb_start = calc_dev_sboffset(rdev);
1028
1029 ret = read_disk_sb(rdev, MD_SB_BYTES);
1030 if (ret) return ret;
1031
1032 ret = -EINVAL;
1033
1034 bdevname(rdev->bdev, b);
1035 sb = page_address(rdev->sb_page);
1036
1037 if (sb->md_magic != MD_SB_MAGIC) {
1038 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1039 b);
1040 goto abort;
1041 }
1042
1043 if (sb->major_version != 0 ||
1044 sb->minor_version < 90 ||
1045 sb->minor_version > 91) {
1046 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1047 sb->major_version, sb->minor_version,
1048 b);
1049 goto abort;
1050 }
1051
1052 if (sb->raid_disks <= 0)
1053 goto abort;
1054
1055 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1056 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1057 b);
1058 goto abort;
1059 }
1060
1061 rdev->preferred_minor = sb->md_minor;
1062 rdev->data_offset = 0;
1063 rdev->new_data_offset = 0;
1064 rdev->sb_size = MD_SB_BYTES;
1065 rdev->badblocks.shift = -1;
1066
1067 if (sb->level == LEVEL_MULTIPATH)
1068 rdev->desc_nr = -1;
1069 else
1070 rdev->desc_nr = sb->this_disk.number;
1071
1072 if (!refdev) {
1073 ret = 1;
1074 } else {
1075 __u64 ev1, ev2;
1076 mdp_super_t *refsb = page_address(refdev->sb_page);
1077 if (!uuid_equal(refsb, sb)) {
1078 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1079 b, bdevname(refdev->bdev,b2));
1080 goto abort;
1081 }
1082 if (!sb_equal(refsb, sb)) {
1083 printk(KERN_WARNING "md: %s has same UUID"
1084 " but different superblock to %s\n",
1085 b, bdevname(refdev->bdev, b2));
1086 goto abort;
1087 }
1088 ev1 = md_event(sb);
1089 ev2 = md_event(refsb);
1090 if (ev1 > ev2)
1091 ret = 1;
1092 else
1093 ret = 0;
1094 }
1095 rdev->sectors = rdev->sb_start;
1096
1097
1098
1099
1100 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1101 rdev->sectors = (2ULL << 32) - 2;
1102
1103 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1104
1105 ret = -EINVAL;
1106
1107 abort:
1108 return ret;
1109}
1110
1111
1112
1113
1114static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1115{
1116 mdp_disk_t *desc;
1117 mdp_super_t *sb = page_address(rdev->sb_page);
1118 __u64 ev1 = md_event(sb);
1119
1120 rdev->raid_disk = -1;
1121 clear_bit(Faulty, &rdev->flags);
1122 clear_bit(In_sync, &rdev->flags);
1123 clear_bit(WriteMostly, &rdev->flags);
1124
1125 if (mddev->raid_disks == 0) {
1126 mddev->major_version = 0;
1127 mddev->minor_version = sb->minor_version;
1128 mddev->patch_version = sb->patch_version;
1129 mddev->external = 0;
1130 mddev->chunk_sectors = sb->chunk_size >> 9;
1131 mddev->ctime = sb->ctime;
1132 mddev->utime = sb->utime;
1133 mddev->level = sb->level;
1134 mddev->clevel[0] = 0;
1135 mddev->layout = sb->layout;
1136 mddev->raid_disks = sb->raid_disks;
1137 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1138 mddev->events = ev1;
1139 mddev->bitmap_info.offset = 0;
1140 mddev->bitmap_info.space = 0;
1141
1142 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1143 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1144 mddev->reshape_backwards = 0;
1145
1146 if (mddev->minor_version >= 91) {
1147 mddev->reshape_position = sb->reshape_position;
1148 mddev->delta_disks = sb->delta_disks;
1149 mddev->new_level = sb->new_level;
1150 mddev->new_layout = sb->new_layout;
1151 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1152 if (mddev->delta_disks < 0)
1153 mddev->reshape_backwards = 1;
1154 } else {
1155 mddev->reshape_position = MaxSector;
1156 mddev->delta_disks = 0;
1157 mddev->new_level = mddev->level;
1158 mddev->new_layout = mddev->layout;
1159 mddev->new_chunk_sectors = mddev->chunk_sectors;
1160 }
1161
1162 if (sb->state & (1<<MD_SB_CLEAN))
1163 mddev->recovery_cp = MaxSector;
1164 else {
1165 if (sb->events_hi == sb->cp_events_hi &&
1166 sb->events_lo == sb->cp_events_lo) {
1167 mddev->recovery_cp = sb->recovery_cp;
1168 } else
1169 mddev->recovery_cp = 0;
1170 }
1171
1172 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1173 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1174 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1175 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1176
1177 mddev->max_disks = MD_SB_DISKS;
1178
1179 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1180 mddev->bitmap_info.file == NULL) {
1181 mddev->bitmap_info.offset =
1182 mddev->bitmap_info.default_offset;
1183 mddev->bitmap_info.space =
1184 mddev->bitmap_info.space;
1185 }
1186
1187 } else if (mddev->pers == NULL) {
1188
1189
1190 ++ev1;
1191 if (sb->disks[rdev->desc_nr].state & (
1192 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1193 if (ev1 < mddev->events)
1194 return -EINVAL;
1195 } else if (mddev->bitmap) {
1196
1197
1198
1199 if (ev1 < mddev->bitmap->events_cleared)
1200 return 0;
1201 } else {
1202 if (ev1 < mddev->events)
1203
1204 return 0;
1205 }
1206
1207 if (mddev->level != LEVEL_MULTIPATH) {
1208 desc = sb->disks + rdev->desc_nr;
1209
1210 if (desc->state & (1<<MD_DISK_FAULTY))
1211 set_bit(Faulty, &rdev->flags);
1212 else if (desc->state & (1<<MD_DISK_SYNC)
1213) {
1214 set_bit(In_sync, &rdev->flags);
1215 rdev->raid_disk = desc->raid_disk;
1216 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1217
1218
1219
1220 if (mddev->minor_version >= 91) {
1221 rdev->recovery_offset = 0;
1222 rdev->raid_disk = desc->raid_disk;
1223 }
1224 }
1225 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1226 set_bit(WriteMostly, &rdev->flags);
1227 } else
1228 set_bit(In_sync, &rdev->flags);
1229 return 0;
1230}
1231
1232
1233
1234
1235static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1236{
1237 mdp_super_t *sb;
1238 struct md_rdev *rdev2;
1239 int next_spare = mddev->raid_disks;
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252 int i;
1253 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1254
1255 rdev->sb_size = MD_SB_BYTES;
1256
1257 sb = page_address(rdev->sb_page);
1258
1259 memset(sb, 0, sizeof(*sb));
1260
1261 sb->md_magic = MD_SB_MAGIC;
1262 sb->major_version = mddev->major_version;
1263 sb->patch_version = mddev->patch_version;
1264 sb->gvalid_words = 0;
1265 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1266 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1267 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1268 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1269
1270 sb->ctime = mddev->ctime;
1271 sb->level = mddev->level;
1272 sb->size = mddev->dev_sectors / 2;
1273 sb->raid_disks = mddev->raid_disks;
1274 sb->md_minor = mddev->md_minor;
1275 sb->not_persistent = 0;
1276 sb->utime = mddev->utime;
1277 sb->state = 0;
1278 sb->events_hi = (mddev->events>>32);
1279 sb->events_lo = (u32)mddev->events;
1280
1281 if (mddev->reshape_position == MaxSector)
1282 sb->minor_version = 90;
1283 else {
1284 sb->minor_version = 91;
1285 sb->reshape_position = mddev->reshape_position;
1286 sb->new_level = mddev->new_level;
1287 sb->delta_disks = mddev->delta_disks;
1288 sb->new_layout = mddev->new_layout;
1289 sb->new_chunk = mddev->new_chunk_sectors << 9;
1290 }
1291 mddev->minor_version = sb->minor_version;
1292 if (mddev->in_sync)
1293 {
1294 sb->recovery_cp = mddev->recovery_cp;
1295 sb->cp_events_hi = (mddev->events>>32);
1296 sb->cp_events_lo = (u32)mddev->events;
1297 if (mddev->recovery_cp == MaxSector)
1298 sb->state = (1<< MD_SB_CLEAN);
1299 } else
1300 sb->recovery_cp = 0;
1301
1302 sb->layout = mddev->layout;
1303 sb->chunk_size = mddev->chunk_sectors << 9;
1304
1305 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1306 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1307
1308 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1309 rdev_for_each(rdev2, mddev) {
1310 mdp_disk_t *d;
1311 int desc_nr;
1312 int is_active = test_bit(In_sync, &rdev2->flags);
1313
1314 if (rdev2->raid_disk >= 0 &&
1315 sb->minor_version >= 91)
1316
1317
1318
1319
1320 is_active = 1;
1321 if (rdev2->raid_disk < 0 ||
1322 test_bit(Faulty, &rdev2->flags))
1323 is_active = 0;
1324 if (is_active)
1325 desc_nr = rdev2->raid_disk;
1326 else
1327 desc_nr = next_spare++;
1328 rdev2->desc_nr = desc_nr;
1329 d = &sb->disks[rdev2->desc_nr];
1330 nr_disks++;
1331 d->number = rdev2->desc_nr;
1332 d->major = MAJOR(rdev2->bdev->bd_dev);
1333 d->minor = MINOR(rdev2->bdev->bd_dev);
1334 if (is_active)
1335 d->raid_disk = rdev2->raid_disk;
1336 else
1337 d->raid_disk = rdev2->desc_nr;
1338 if (test_bit(Faulty, &rdev2->flags))
1339 d->state = (1<<MD_DISK_FAULTY);
1340 else if (is_active) {
1341 d->state = (1<<MD_DISK_ACTIVE);
1342 if (test_bit(In_sync, &rdev2->flags))
1343 d->state |= (1<<MD_DISK_SYNC);
1344 active++;
1345 working++;
1346 } else {
1347 d->state = 0;
1348 spare++;
1349 working++;
1350 }
1351 if (test_bit(WriteMostly, &rdev2->flags))
1352 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1353 }
1354
1355 for (i=0 ; i < mddev->raid_disks ; i++) {
1356 mdp_disk_t *d = &sb->disks[i];
1357 if (d->state == 0 && d->number == 0) {
1358 d->number = i;
1359 d->raid_disk = i;
1360 d->state = (1<<MD_DISK_REMOVED);
1361 d->state |= (1<<MD_DISK_FAULTY);
1362 failed++;
1363 }
1364 }
1365 sb->nr_disks = nr_disks;
1366 sb->active_disks = active;
1367 sb->working_disks = working;
1368 sb->failed_disks = failed;
1369 sb->spare_disks = spare;
1370
1371 sb->this_disk = sb->disks[rdev->desc_nr];
1372 sb->sb_csum = calc_sb_csum(sb);
1373}
1374
1375
1376
1377
1378static unsigned long long
1379super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1380{
1381 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1382 return 0;
1383 if (rdev->mddev->bitmap_info.offset)
1384 return 0;
1385 rdev->sb_start = calc_dev_sboffset(rdev);
1386 if (!num_sectors || num_sectors > rdev->sb_start)
1387 num_sectors = rdev->sb_start;
1388
1389
1390
1391 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1392 num_sectors = (2ULL << 32) - 2;
1393 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1394 rdev->sb_page);
1395 md_super_wait(rdev->mddev);
1396 return num_sectors;
1397}
1398
1399static int
1400super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1401{
1402
1403 return new_offset == 0;
1404}
1405
1406
1407
1408
1409
1410static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1411{
1412 __le32 disk_csum;
1413 u32 csum;
1414 unsigned long long newcsum;
1415 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1416 __le32 *isuper = (__le32*)sb;
1417 int i;
1418
1419 disk_csum = sb->sb_csum;
1420 sb->sb_csum = 0;
1421 newcsum = 0;
1422 for (i=0; size>=4; size -= 4 )
1423 newcsum += le32_to_cpu(*isuper++);
1424
1425 if (size == 2)
1426 newcsum += le16_to_cpu(*(__le16*) isuper);
1427
1428 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1429 sb->sb_csum = disk_csum;
1430 return cpu_to_le32(csum);
1431}
1432
1433static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1434 int acknowledged);
1435static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1436{
1437 struct mdp_superblock_1 *sb;
1438 int ret;
1439 sector_t sb_start;
1440 sector_t sectors;
1441 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1442 int bmask;
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452 switch(minor_version) {
1453 case 0:
1454 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1455 sb_start -= 8*2;
1456 sb_start &= ~(sector_t)(4*2-1);
1457 break;
1458 case 1:
1459 sb_start = 0;
1460 break;
1461 case 2:
1462 sb_start = 8;
1463 break;
1464 default:
1465 return -EINVAL;
1466 }
1467 rdev->sb_start = sb_start;
1468
1469
1470
1471
1472 ret = read_disk_sb(rdev, 4096);
1473 if (ret) return ret;
1474
1475
1476 sb = page_address(rdev->sb_page);
1477
1478 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1479 sb->major_version != cpu_to_le32(1) ||
1480 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1481 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1482 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1483 return -EINVAL;
1484
1485 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1486 printk("md: invalid superblock checksum on %s\n",
1487 bdevname(rdev->bdev,b));
1488 return -EINVAL;
1489 }
1490 if (le64_to_cpu(sb->data_size) < 10) {
1491 printk("md: data_size too small on %s\n",
1492 bdevname(rdev->bdev,b));
1493 return -EINVAL;
1494 }
1495 if (sb->pad0 ||
1496 sb->pad3[0] ||
1497 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1498
1499 return -EINVAL;
1500
1501 rdev->preferred_minor = 0xffff;
1502 rdev->data_offset = le64_to_cpu(sb->data_offset);
1503 rdev->new_data_offset = rdev->data_offset;
1504 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1505 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1506 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1507 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1508
1509 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1510 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1511 if (rdev->sb_size & bmask)
1512 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1513
1514 if (minor_version
1515 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1516 return -EINVAL;
1517 if (minor_version
1518 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1519 return -EINVAL;
1520
1521 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1522 rdev->desc_nr = -1;
1523 else
1524 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1525
1526 if (!rdev->bb_page) {
1527 rdev->bb_page = alloc_page(GFP_KERNEL);
1528 if (!rdev->bb_page)
1529 return -ENOMEM;
1530 }
1531 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1532 rdev->badblocks.count == 0) {
1533
1534
1535
1536 s32 offset;
1537 sector_t bb_sector;
1538 u64 *bbp;
1539 int i;
1540 int sectors = le16_to_cpu(sb->bblog_size);
1541 if (sectors > (PAGE_SIZE / 512))
1542 return -EINVAL;
1543 offset = le32_to_cpu(sb->bblog_offset);
1544 if (offset == 0)
1545 return -EINVAL;
1546 bb_sector = (long long)offset;
1547 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1548 rdev->bb_page, READ, true))
1549 return -EIO;
1550 bbp = (u64 *)page_address(rdev->bb_page);
1551 rdev->badblocks.shift = sb->bblog_shift;
1552 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1553 u64 bb = le64_to_cpu(*bbp);
1554 int count = bb & (0x3ff);
1555 u64 sector = bb >> 10;
1556 sector <<= sb->bblog_shift;
1557 count <<= sb->bblog_shift;
1558 if (bb + 1 == 0)
1559 break;
1560 if (md_set_badblocks(&rdev->badblocks,
1561 sector, count, 1) == 0)
1562 return -EINVAL;
1563 }
1564 } else if (sb->bblog_offset == 0)
1565 rdev->badblocks.shift = -1;
1566
1567 if (!refdev) {
1568 ret = 1;
1569 } else {
1570 __u64 ev1, ev2;
1571 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1572
1573 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1574 sb->level != refsb->level ||
1575 sb->layout != refsb->layout ||
1576 sb->chunksize != refsb->chunksize) {
1577 printk(KERN_WARNING "md: %s has strangely different"
1578 " superblock to %s\n",
1579 bdevname(rdev->bdev,b),
1580 bdevname(refdev->bdev,b2));
1581 return -EINVAL;
1582 }
1583 ev1 = le64_to_cpu(sb->events);
1584 ev2 = le64_to_cpu(refsb->events);
1585
1586 if (ev1 > ev2)
1587 ret = 1;
1588 else
1589 ret = 0;
1590 }
1591 if (minor_version) {
1592 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1593 sectors -= rdev->data_offset;
1594 } else
1595 sectors = rdev->sb_start;
1596 if (sectors < le64_to_cpu(sb->data_size))
1597 return -EINVAL;
1598 rdev->sectors = le64_to_cpu(sb->data_size);
1599 return ret;
1600}
1601
1602static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1603{
1604 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1605 __u64 ev1 = le64_to_cpu(sb->events);
1606
1607 rdev->raid_disk = -1;
1608 clear_bit(Faulty, &rdev->flags);
1609 clear_bit(In_sync, &rdev->flags);
1610 clear_bit(WriteMostly, &rdev->flags);
1611
1612 if (mddev->raid_disks == 0) {
1613 mddev->major_version = 1;
1614 mddev->patch_version = 0;
1615 mddev->external = 0;
1616 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1617 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1618 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1619 mddev->level = le32_to_cpu(sb->level);
1620 mddev->clevel[0] = 0;
1621 mddev->layout = le32_to_cpu(sb->layout);
1622 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1623 mddev->dev_sectors = le64_to_cpu(sb->size);
1624 mddev->events = ev1;
1625 mddev->bitmap_info.offset = 0;
1626 mddev->bitmap_info.space = 0;
1627
1628
1629
1630 mddev->bitmap_info.default_offset = 1024 >> 9;
1631 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1632 mddev->reshape_backwards = 0;
1633
1634 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1635 memcpy(mddev->uuid, sb->set_uuid, 16);
1636
1637 mddev->max_disks = (4096-256)/2;
1638
1639 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1640 mddev->bitmap_info.file == NULL) {
1641 mddev->bitmap_info.offset =
1642 (__s32)le32_to_cpu(sb->bitmap_offset);
1643
1644
1645
1646
1647
1648 if (mddev->minor_version > 0)
1649 mddev->bitmap_info.space = 0;
1650 else if (mddev->bitmap_info.offset > 0)
1651 mddev->bitmap_info.space =
1652 8 - mddev->bitmap_info.offset;
1653 else
1654 mddev->bitmap_info.space =
1655 -mddev->bitmap_info.offset;
1656 }
1657
1658 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1659 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1660 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1661 mddev->new_level = le32_to_cpu(sb->new_level);
1662 mddev->new_layout = le32_to_cpu(sb->new_layout);
1663 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1664 if (mddev->delta_disks < 0 ||
1665 (mddev->delta_disks == 0 &&
1666 (le32_to_cpu(sb->feature_map)
1667 & MD_FEATURE_RESHAPE_BACKWARDS)))
1668 mddev->reshape_backwards = 1;
1669 } else {
1670 mddev->reshape_position = MaxSector;
1671 mddev->delta_disks = 0;
1672 mddev->new_level = mddev->level;
1673 mddev->new_layout = mddev->layout;
1674 mddev->new_chunk_sectors = mddev->chunk_sectors;
1675 }
1676
1677 } else if (mddev->pers == NULL) {
1678
1679
1680 ++ev1;
1681 if (rdev->desc_nr >= 0 &&
1682 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1683 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1684 if (ev1 < mddev->events)
1685 return -EINVAL;
1686 } else if (mddev->bitmap) {
1687
1688
1689
1690 if (ev1 < mddev->bitmap->events_cleared)
1691 return 0;
1692 } else {
1693 if (ev1 < mddev->events)
1694
1695 return 0;
1696 }
1697 if (mddev->level != LEVEL_MULTIPATH) {
1698 int role;
1699 if (rdev->desc_nr < 0 ||
1700 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1701 role = 0xffff;
1702 rdev->desc_nr = -1;
1703 } else
1704 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1705 switch(role) {
1706 case 0xffff:
1707 break;
1708 case 0xfffe:
1709 set_bit(Faulty, &rdev->flags);
1710 break;
1711 default:
1712 if ((le32_to_cpu(sb->feature_map) &
1713 MD_FEATURE_RECOVERY_OFFSET))
1714 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1715 else
1716 set_bit(In_sync, &rdev->flags);
1717 rdev->raid_disk = role;
1718 break;
1719 }
1720 if (sb->devflags & WriteMostly1)
1721 set_bit(WriteMostly, &rdev->flags);
1722 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1723 set_bit(Replacement, &rdev->flags);
1724 } else
1725 set_bit(In_sync, &rdev->flags);
1726
1727 return 0;
1728}
1729
1730static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1731{
1732 struct mdp_superblock_1 *sb;
1733 struct md_rdev *rdev2;
1734 int max_dev, i;
1735
1736
1737 sb = page_address(rdev->sb_page);
1738
1739 sb->feature_map = 0;
1740 sb->pad0 = 0;
1741 sb->recovery_offset = cpu_to_le64(0);
1742 memset(sb->pad3, 0, sizeof(sb->pad3));
1743
1744 sb->utime = cpu_to_le64((__u64)mddev->utime);
1745 sb->events = cpu_to_le64(mddev->events);
1746 if (mddev->in_sync)
1747 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1748 else
1749 sb->resync_offset = cpu_to_le64(0);
1750
1751 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1752
1753 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1754 sb->size = cpu_to_le64(mddev->dev_sectors);
1755 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1756 sb->level = cpu_to_le32(mddev->level);
1757 sb->layout = cpu_to_le32(mddev->layout);
1758
1759 if (test_bit(WriteMostly, &rdev->flags))
1760 sb->devflags |= WriteMostly1;
1761 else
1762 sb->devflags &= ~WriteMostly1;
1763 sb->data_offset = cpu_to_le64(rdev->data_offset);
1764 sb->data_size = cpu_to_le64(rdev->sectors);
1765
1766 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1767 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1768 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1769 }
1770
1771 if (rdev->raid_disk >= 0 &&
1772 !test_bit(In_sync, &rdev->flags)) {
1773 sb->feature_map |=
1774 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1775 sb->recovery_offset =
1776 cpu_to_le64(rdev->recovery_offset);
1777 }
1778 if (test_bit(Replacement, &rdev->flags))
1779 sb->feature_map |=
1780 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1781
1782 if (mddev->reshape_position != MaxSector) {
1783 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1784 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1785 sb->new_layout = cpu_to_le32(mddev->new_layout);
1786 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1787 sb->new_level = cpu_to_le32(mddev->new_level);
1788 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1789 if (mddev->delta_disks == 0 &&
1790 mddev->reshape_backwards)
1791 sb->feature_map
1792 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1793 if (rdev->new_data_offset != rdev->data_offset) {
1794 sb->feature_map
1795 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1796 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1797 - rdev->data_offset));
1798 }
1799 }
1800
1801 if (rdev->badblocks.count == 0)
1802 ;
1803 else if (sb->bblog_offset == 0)
1804
1805 md_error(mddev, rdev);
1806 else {
1807 struct badblocks *bb = &rdev->badblocks;
1808 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1809 u64 *p = bb->page;
1810 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1811 if (bb->changed) {
1812 unsigned seq;
1813
1814retry:
1815 seq = read_seqbegin(&bb->lock);
1816
1817 memset(bbp, 0xff, PAGE_SIZE);
1818
1819 for (i = 0 ; i < bb->count ; i++) {
1820 u64 internal_bb = p[i];
1821 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1822 | BB_LEN(internal_bb));
1823 bbp[i] = cpu_to_le64(store_bb);
1824 }
1825 bb->changed = 0;
1826 if (read_seqretry(&bb->lock, seq))
1827 goto retry;
1828
1829 bb->sector = (rdev->sb_start +
1830 (int)le32_to_cpu(sb->bblog_offset));
1831 bb->size = le16_to_cpu(sb->bblog_size);
1832 }
1833 }
1834
1835 max_dev = 0;
1836 rdev_for_each(rdev2, mddev)
1837 if (rdev2->desc_nr+1 > max_dev)
1838 max_dev = rdev2->desc_nr+1;
1839
1840 if (max_dev > le32_to_cpu(sb->max_dev)) {
1841 int bmask;
1842 sb->max_dev = cpu_to_le32(max_dev);
1843 rdev->sb_size = max_dev * 2 + 256;
1844 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1845 if (rdev->sb_size & bmask)
1846 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1847 } else
1848 max_dev = le32_to_cpu(sb->max_dev);
1849
1850 for (i=0; i<max_dev;i++)
1851 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1852
1853 rdev_for_each(rdev2, mddev) {
1854 i = rdev2->desc_nr;
1855 if (test_bit(Faulty, &rdev2->flags))
1856 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1857 else if (test_bit(In_sync, &rdev2->flags))
1858 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1859 else if (rdev2->raid_disk >= 0)
1860 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1861 else
1862 sb->dev_roles[i] = cpu_to_le16(0xffff);
1863 }
1864
1865 sb->sb_csum = calc_sb_1_csum(sb);
1866}
1867
1868static unsigned long long
1869super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1870{
1871 struct mdp_superblock_1 *sb;
1872 sector_t max_sectors;
1873 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1874 return 0;
1875 if (rdev->data_offset != rdev->new_data_offset)
1876 return 0;
1877 if (rdev->sb_start < rdev->data_offset) {
1878
1879 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1880 max_sectors -= rdev->data_offset;
1881 if (!num_sectors || num_sectors > max_sectors)
1882 num_sectors = max_sectors;
1883 } else if (rdev->mddev->bitmap_info.offset) {
1884
1885 return 0;
1886 } else {
1887
1888 sector_t sb_start;
1889 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1890 sb_start &= ~(sector_t)(4*2 - 1);
1891 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1892 if (!num_sectors || num_sectors > max_sectors)
1893 num_sectors = max_sectors;
1894 rdev->sb_start = sb_start;
1895 }
1896 sb = page_address(rdev->sb_page);
1897 sb->data_size = cpu_to_le64(num_sectors);
1898 sb->super_offset = rdev->sb_start;
1899 sb->sb_csum = calc_sb_1_csum(sb);
1900 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1901 rdev->sb_page);
1902 md_super_wait(rdev->mddev);
1903 return num_sectors;
1904
1905}
1906
1907static int
1908super_1_allow_new_offset(struct md_rdev *rdev,
1909 unsigned long long new_offset)
1910{
1911
1912 struct bitmap *bitmap;
1913 if (new_offset >= rdev->data_offset)
1914 return 1;
1915
1916
1917
1918 if (rdev->mddev->minor_version == 0)
1919 return 1;
1920
1921
1922
1923
1924
1925
1926
1927 if (rdev->sb_start + (32+4)*2 > new_offset)
1928 return 0;
1929 bitmap = rdev->mddev->bitmap;
1930 if (bitmap && !rdev->mddev->bitmap_info.file &&
1931 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1932 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1933 return 0;
1934 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1935 return 0;
1936
1937 return 1;
1938}
1939
1940static struct super_type super_types[] = {
1941 [0] = {
1942 .name = "0.90.0",
1943 .owner = THIS_MODULE,
1944 .load_super = super_90_load,
1945 .validate_super = super_90_validate,
1946 .sync_super = super_90_sync,
1947 .rdev_size_change = super_90_rdev_size_change,
1948 .allow_new_offset = super_90_allow_new_offset,
1949 },
1950 [1] = {
1951 .name = "md-1",
1952 .owner = THIS_MODULE,
1953 .load_super = super_1_load,
1954 .validate_super = super_1_validate,
1955 .sync_super = super_1_sync,
1956 .rdev_size_change = super_1_rdev_size_change,
1957 .allow_new_offset = super_1_allow_new_offset,
1958 },
1959};
1960
1961static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1962{
1963 if (mddev->sync_super) {
1964 mddev->sync_super(mddev, rdev);
1965 return;
1966 }
1967
1968 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1969
1970 super_types[mddev->major_version].sync_super(mddev, rdev);
1971}
1972
1973static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1974{
1975 struct md_rdev *rdev, *rdev2;
1976
1977 rcu_read_lock();
1978 rdev_for_each_rcu(rdev, mddev1)
1979 rdev_for_each_rcu(rdev2, mddev2)
1980 if (rdev->bdev->bd_contains ==
1981 rdev2->bdev->bd_contains) {
1982 rcu_read_unlock();
1983 return 1;
1984 }
1985 rcu_read_unlock();
1986 return 0;
1987}
1988
1989static LIST_HEAD(pending_raid_disks);
1990
1991
1992
1993
1994
1995
1996
1997
1998int md_integrity_register(struct mddev *mddev)
1999{
2000 struct md_rdev *rdev, *reference = NULL;
2001
2002 if (list_empty(&mddev->disks))
2003 return 0;
2004 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2005 return 0;
2006 rdev_for_each(rdev, mddev) {
2007
2008 if (test_bit(Faulty, &rdev->flags))
2009 continue;
2010 if (rdev->raid_disk < 0)
2011 continue;
2012 if (!reference) {
2013
2014 reference = rdev;
2015 continue;
2016 }
2017
2018 if (blk_integrity_compare(reference->bdev->bd_disk,
2019 rdev->bdev->bd_disk) < 0)
2020 return -EINVAL;
2021 }
2022 if (!reference || !bdev_get_integrity(reference->bdev))
2023 return 0;
2024
2025
2026
2027
2028 if (blk_integrity_register(mddev->gendisk,
2029 bdev_get_integrity(reference->bdev)) != 0) {
2030 printk(KERN_ERR "md: failed to register integrity for %s\n",
2031 mdname(mddev));
2032 return -EINVAL;
2033 }
2034 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2035 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2036 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2037 mdname(mddev));
2038 return -EINVAL;
2039 }
2040 return 0;
2041}
2042EXPORT_SYMBOL(md_integrity_register);
2043
2044
2045void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2046{
2047 struct blk_integrity *bi_rdev;
2048 struct blk_integrity *bi_mddev;
2049
2050 if (!mddev->gendisk)
2051 return;
2052
2053 bi_rdev = bdev_get_integrity(rdev->bdev);
2054 bi_mddev = blk_get_integrity(mddev->gendisk);
2055
2056 if (!bi_mddev)
2057 return;
2058 if (rdev->raid_disk < 0)
2059 return;
2060 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2061 rdev->bdev->bd_disk) >= 0)
2062 return;
2063 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2064 blk_integrity_unregister(mddev->gendisk);
2065}
2066EXPORT_SYMBOL(md_integrity_add_rdev);
2067
2068static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2069{
2070 char b[BDEVNAME_SIZE];
2071 struct kobject *ko;
2072 char *s;
2073 int err;
2074
2075 if (rdev->mddev) {
2076 MD_BUG();
2077 return -EINVAL;
2078 }
2079
2080
2081 if (find_rdev(mddev, rdev->bdev->bd_dev))
2082 return -EEXIST;
2083
2084
2085 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2086 rdev->sectors < mddev->dev_sectors)) {
2087 if (mddev->pers) {
2088
2089
2090
2091
2092 if (mddev->level > 0)
2093 return -ENOSPC;
2094 } else
2095 mddev->dev_sectors = rdev->sectors;
2096 }
2097
2098
2099
2100
2101
2102 if (rdev->desc_nr < 0) {
2103 int choice = 0;
2104 if (mddev->pers) choice = mddev->raid_disks;
2105 while (find_rdev_nr(mddev, choice))
2106 choice++;
2107 rdev->desc_nr = choice;
2108 } else {
2109 if (find_rdev_nr(mddev, rdev->desc_nr))
2110 return -EBUSY;
2111 }
2112 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2113 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2114 mdname(mddev), mddev->max_disks);
2115 return -EBUSY;
2116 }
2117 bdevname(rdev->bdev,b);
2118 while ( (s=strchr(b, '/')) != NULL)
2119 *s = '!';
2120
2121 rdev->mddev = mddev;
2122 printk(KERN_INFO "md: bind<%s>\n", b);
2123
2124 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2125 goto fail;
2126
2127 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2128 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2129 ;
2130 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2131
2132 list_add_rcu(&rdev->same_set, &mddev->disks);
2133 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2134
2135
2136 mddev->recovery_disabled++;
2137
2138 return 0;
2139
2140 fail:
2141 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2142 b, mdname(mddev));
2143 return err;
2144}
2145
2146static void md_delayed_delete(struct work_struct *ws)
2147{
2148 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2149 kobject_del(&rdev->kobj);
2150 kobject_put(&rdev->kobj);
2151}
2152
2153static void unbind_rdev_from_array(struct md_rdev * rdev)
2154{
2155 char b[BDEVNAME_SIZE];
2156 if (!rdev->mddev) {
2157 MD_BUG();
2158 return;
2159 }
2160 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2161 list_del_rcu(&rdev->same_set);
2162 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2163 rdev->mddev = NULL;
2164 sysfs_remove_link(&rdev->kobj, "block");
2165 sysfs_put(rdev->sysfs_state);
2166 rdev->sysfs_state = NULL;
2167 rdev->badblocks.count = 0;
2168
2169
2170
2171
2172 synchronize_rcu();
2173 INIT_WORK(&rdev->del_work, md_delayed_delete);
2174 kobject_get(&rdev->kobj);
2175 queue_work(md_misc_wq, &rdev->del_work);
2176}
2177
2178
2179
2180
2181
2182
2183static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2184{
2185 int err = 0;
2186 struct block_device *bdev;
2187 char b[BDEVNAME_SIZE];
2188
2189 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2190 shared ? (struct md_rdev *)lock_rdev : rdev);
2191 if (IS_ERR(bdev)) {
2192 printk(KERN_ERR "md: could not open %s.\n",
2193 __bdevname(dev, b));
2194 return PTR_ERR(bdev);
2195 }
2196 rdev->bdev = bdev;
2197 return err;
2198}
2199
2200static void unlock_rdev(struct md_rdev *rdev)
2201{
2202 struct block_device *bdev = rdev->bdev;
2203 rdev->bdev = NULL;
2204 if (!bdev)
2205 MD_BUG();
2206 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2207}
2208
2209void md_autodetect_dev(dev_t dev);
2210
2211static void export_rdev(struct md_rdev * rdev)
2212{
2213 char b[BDEVNAME_SIZE];
2214 printk(KERN_INFO "md: export_rdev(%s)\n",
2215 bdevname(rdev->bdev,b));
2216 if (rdev->mddev)
2217 MD_BUG();
2218 md_rdev_clear(rdev);
2219#ifndef MODULE
2220 if (test_bit(AutoDetected, &rdev->flags))
2221 md_autodetect_dev(rdev->bdev->bd_dev);
2222#endif
2223 unlock_rdev(rdev);
2224 kobject_put(&rdev->kobj);
2225}
2226
2227static void kick_rdev_from_array(struct md_rdev * rdev)
2228{
2229 unbind_rdev_from_array(rdev);
2230 export_rdev(rdev);
2231}
2232
2233static void export_array(struct mddev *mddev)
2234{
2235 struct md_rdev *rdev, *tmp;
2236
2237 rdev_for_each_safe(rdev, tmp, mddev) {
2238 if (!rdev->mddev) {
2239 MD_BUG();
2240 continue;
2241 }
2242 kick_rdev_from_array(rdev);
2243 }
2244 if (!list_empty(&mddev->disks))
2245 MD_BUG();
2246 mddev->raid_disks = 0;
2247 mddev->major_version = 0;
2248}
2249
2250static void print_desc(mdp_disk_t *desc)
2251{
2252 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2253 desc->major,desc->minor,desc->raid_disk,desc->state);
2254}
2255
2256static void print_sb_90(mdp_super_t *sb)
2257{
2258 int i;
2259
2260 printk(KERN_INFO
2261 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2262 sb->major_version, sb->minor_version, sb->patch_version,
2263 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2264 sb->ctime);
2265 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2266 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2267 sb->md_minor, sb->layout, sb->chunk_size);
2268 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2269 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2270 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2271 sb->failed_disks, sb->spare_disks,
2272 sb->sb_csum, (unsigned long)sb->events_lo);
2273
2274 printk(KERN_INFO);
2275 for (i = 0; i < MD_SB_DISKS; i++) {
2276 mdp_disk_t *desc;
2277
2278 desc = sb->disks + i;
2279 if (desc->number || desc->major || desc->minor ||
2280 desc->raid_disk || (desc->state && (desc->state != 4))) {
2281 printk(" D %2d: ", i);
2282 print_desc(desc);
2283 }
2284 }
2285 printk(KERN_INFO "md: THIS: ");
2286 print_desc(&sb->this_disk);
2287}
2288
2289static void print_sb_1(struct mdp_superblock_1 *sb)
2290{
2291 __u8 *uuid;
2292
2293 uuid = sb->set_uuid;
2294 printk(KERN_INFO
2295 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2296 "md: Name: \"%s\" CT:%llu\n",
2297 le32_to_cpu(sb->major_version),
2298 le32_to_cpu(sb->feature_map),
2299 uuid,
2300 sb->set_name,
2301 (unsigned long long)le64_to_cpu(sb->ctime)
2302 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2303
2304 uuid = sb->device_uuid;
2305 printk(KERN_INFO
2306 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2307 " RO:%llu\n"
2308 "md: Dev:%08x UUID: %pU\n"
2309 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2310 "md: (MaxDev:%u) \n",
2311 le32_to_cpu(sb->level),
2312 (unsigned long long)le64_to_cpu(sb->size),
2313 le32_to_cpu(sb->raid_disks),
2314 le32_to_cpu(sb->layout),
2315 le32_to_cpu(sb->chunksize),
2316 (unsigned long long)le64_to_cpu(sb->data_offset),
2317 (unsigned long long)le64_to_cpu(sb->data_size),
2318 (unsigned long long)le64_to_cpu(sb->super_offset),
2319 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2320 le32_to_cpu(sb->dev_number),
2321 uuid,
2322 sb->devflags,
2323 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2324 (unsigned long long)le64_to_cpu(sb->events),
2325 (unsigned long long)le64_to_cpu(sb->resync_offset),
2326 le32_to_cpu(sb->sb_csum),
2327 le32_to_cpu(sb->max_dev)
2328 );
2329}
2330
2331static void print_rdev(struct md_rdev *rdev, int major_version)
2332{
2333 char b[BDEVNAME_SIZE];
2334 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2335 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2336 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2337 rdev->desc_nr);
2338 if (rdev->sb_loaded) {
2339 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2340 switch (major_version) {
2341 case 0:
2342 print_sb_90(page_address(rdev->sb_page));
2343 break;
2344 case 1:
2345 print_sb_1(page_address(rdev->sb_page));
2346 break;
2347 }
2348 } else
2349 printk(KERN_INFO "md: no rdev superblock!\n");
2350}
2351
2352static void md_print_devices(void)
2353{
2354 struct list_head *tmp;
2355 struct md_rdev *rdev;
2356 struct mddev *mddev;
2357 char b[BDEVNAME_SIZE];
2358
2359 printk("\n");
2360 printk("md: **********************************\n");
2361 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2362 printk("md: **********************************\n");
2363 for_each_mddev(mddev, tmp) {
2364
2365 if (mddev->bitmap)
2366 bitmap_print_sb(mddev->bitmap);
2367 else
2368 printk("%s: ", mdname(mddev));
2369 rdev_for_each(rdev, mddev)
2370 printk("<%s>", bdevname(rdev->bdev,b));
2371 printk("\n");
2372
2373 rdev_for_each(rdev, mddev)
2374 print_rdev(rdev, mddev->major_version);
2375 }
2376 printk("md: **********************************\n");
2377 printk("\n");
2378}
2379
2380
2381static void sync_sbs(struct mddev * mddev, int nospares)
2382{
2383
2384
2385
2386
2387
2388
2389 struct md_rdev *rdev;
2390 rdev_for_each(rdev, mddev) {
2391 if (rdev->sb_events == mddev->events ||
2392 (nospares &&
2393 rdev->raid_disk < 0 &&
2394 rdev->sb_events+1 == mddev->events)) {
2395
2396 rdev->sb_loaded = 2;
2397 } else {
2398 sync_super(mddev, rdev);
2399 rdev->sb_loaded = 1;
2400 }
2401 }
2402}
2403
2404static void md_update_sb(struct mddev * mddev, int force_change)
2405{
2406 struct md_rdev *rdev;
2407 int sync_req;
2408 int nospares = 0;
2409 int any_badblocks_changed = 0;
2410
2411repeat:
2412
2413 rdev_for_each(rdev, mddev) {
2414 if (rdev->raid_disk >= 0 &&
2415 mddev->delta_disks >= 0 &&
2416 !test_bit(In_sync, &rdev->flags) &&
2417 mddev->curr_resync_completed > rdev->recovery_offset)
2418 rdev->recovery_offset = mddev->curr_resync_completed;
2419
2420 }
2421 if (!mddev->persistent) {
2422 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2423 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2424 if (!mddev->external) {
2425 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2426 rdev_for_each(rdev, mddev) {
2427 if (rdev->badblocks.changed) {
2428 rdev->badblocks.changed = 0;
2429 md_ack_all_badblocks(&rdev->badblocks);
2430 md_error(mddev, rdev);
2431 }
2432 clear_bit(Blocked, &rdev->flags);
2433 clear_bit(BlockedBadBlocks, &rdev->flags);
2434 wake_up(&rdev->blocked_wait);
2435 }
2436 }
2437 wake_up(&mddev->sb_wait);
2438 return;
2439 }
2440
2441 spin_lock_irq(&mddev->write_lock);
2442
2443 mddev->utime = get_seconds();
2444
2445 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2446 force_change = 1;
2447 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2448
2449
2450
2451
2452 nospares = 1;
2453 if (force_change)
2454 nospares = 0;
2455 if (mddev->degraded)
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465 nospares = 0;
2466
2467 sync_req = mddev->in_sync;
2468
2469
2470
2471 if (nospares
2472 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2473 && mddev->can_decrease_events
2474 && mddev->events != 1) {
2475 mddev->events--;
2476 mddev->can_decrease_events = 0;
2477 } else {
2478
2479 mddev->events ++;
2480 mddev->can_decrease_events = nospares;
2481 }
2482
2483 if (!mddev->events) {
2484
2485
2486
2487
2488
2489 MD_BUG();
2490 mddev->events --;
2491 }
2492
2493 rdev_for_each(rdev, mddev) {
2494 if (rdev->badblocks.changed)
2495 any_badblocks_changed++;
2496 if (test_bit(Faulty, &rdev->flags))
2497 set_bit(FaultRecorded, &rdev->flags);
2498 }
2499
2500 sync_sbs(mddev, nospares);
2501 spin_unlock_irq(&mddev->write_lock);
2502
2503 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2504 mdname(mddev), mddev->in_sync);
2505
2506 bitmap_update_sb(mddev->bitmap);
2507 rdev_for_each(rdev, mddev) {
2508 char b[BDEVNAME_SIZE];
2509
2510 if (rdev->sb_loaded != 1)
2511 continue;
2512
2513 if (!test_bit(Faulty, &rdev->flags) &&
2514 rdev->saved_raid_disk == -1) {
2515 md_super_write(mddev,rdev,
2516 rdev->sb_start, rdev->sb_size,
2517 rdev->sb_page);
2518 pr_debug("md: (write) %s's sb offset: %llu\n",
2519 bdevname(rdev->bdev, b),
2520 (unsigned long long)rdev->sb_start);
2521 rdev->sb_events = mddev->events;
2522 if (rdev->badblocks.size) {
2523 md_super_write(mddev, rdev,
2524 rdev->badblocks.sector,
2525 rdev->badblocks.size << 9,
2526 rdev->bb_page);
2527 rdev->badblocks.size = 0;
2528 }
2529
2530 } else if (test_bit(Faulty, &rdev->flags))
2531 pr_debug("md: %s (skipping faulty)\n",
2532 bdevname(rdev->bdev, b));
2533 else
2534 pr_debug("(skipping incremental s/r ");
2535
2536 if (mddev->level == LEVEL_MULTIPATH)
2537
2538 break;
2539 }
2540 md_super_wait(mddev);
2541
2542
2543 spin_lock_irq(&mddev->write_lock);
2544 if (mddev->in_sync != sync_req ||
2545 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2546
2547 spin_unlock_irq(&mddev->write_lock);
2548 goto repeat;
2549 }
2550 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2551 spin_unlock_irq(&mddev->write_lock);
2552 wake_up(&mddev->sb_wait);
2553 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2554 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2555
2556 rdev_for_each(rdev, mddev) {
2557 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2558 clear_bit(Blocked, &rdev->flags);
2559
2560 if (any_badblocks_changed)
2561 md_ack_all_badblocks(&rdev->badblocks);
2562 clear_bit(BlockedBadBlocks, &rdev->flags);
2563 wake_up(&rdev->blocked_wait);
2564 }
2565}
2566
2567
2568
2569
2570static int cmd_match(const char *cmd, const char *str)
2571{
2572
2573
2574
2575
2576 while (*cmd && *str && *cmd == *str) {
2577 cmd++;
2578 str++;
2579 }
2580 if (*cmd == '\n')
2581 cmd++;
2582 if (*str || *cmd)
2583 return 0;
2584 return 1;
2585}
2586
2587struct rdev_sysfs_entry {
2588 struct attribute attr;
2589 ssize_t (*show)(struct md_rdev *, char *);
2590 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2591};
2592
2593static ssize_t
2594state_show(struct md_rdev *rdev, char *page)
2595{
2596 char *sep = "";
2597 size_t len = 0;
2598
2599 if (test_bit(Faulty, &rdev->flags) ||
2600 rdev->badblocks.unacked_exist) {
2601 len+= sprintf(page+len, "%sfaulty",sep);
2602 sep = ",";
2603 }
2604 if (test_bit(In_sync, &rdev->flags)) {
2605 len += sprintf(page+len, "%sin_sync",sep);
2606 sep = ",";
2607 }
2608 if (test_bit(WriteMostly, &rdev->flags)) {
2609 len += sprintf(page+len, "%swrite_mostly",sep);
2610 sep = ",";
2611 }
2612 if (test_bit(Blocked, &rdev->flags) ||
2613 (rdev->badblocks.unacked_exist
2614 && !test_bit(Faulty, &rdev->flags))) {
2615 len += sprintf(page+len, "%sblocked", sep);
2616 sep = ",";
2617 }
2618 if (!test_bit(Faulty, &rdev->flags) &&
2619 !test_bit(In_sync, &rdev->flags)) {
2620 len += sprintf(page+len, "%sspare", sep);
2621 sep = ",";
2622 }
2623 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2624 len += sprintf(page+len, "%swrite_error", sep);
2625 sep = ",";
2626 }
2627 if (test_bit(WantReplacement, &rdev->flags)) {
2628 len += sprintf(page+len, "%swant_replacement", sep);
2629 sep = ",";
2630 }
2631 if (test_bit(Replacement, &rdev->flags)) {
2632 len += sprintf(page+len, "%sreplacement", sep);
2633 sep = ",";
2634 }
2635
2636 return len+sprintf(page+len, "\n");
2637}
2638
2639static ssize_t
2640state_store(struct md_rdev *rdev, const char *buf, size_t len)
2641{
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653 int err = -EINVAL;
2654 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2655 md_error(rdev->mddev, rdev);
2656 if (test_bit(Faulty, &rdev->flags))
2657 err = 0;
2658 else
2659 err = -EBUSY;
2660 } else if (cmd_match(buf, "remove")) {
2661 if (rdev->raid_disk >= 0)
2662 err = -EBUSY;
2663 else {
2664 struct mddev *mddev = rdev->mddev;
2665 kick_rdev_from_array(rdev);
2666 if (mddev->pers)
2667 md_update_sb(mddev, 1);
2668 md_new_event(mddev);
2669 err = 0;
2670 }
2671 } else if (cmd_match(buf, "writemostly")) {
2672 set_bit(WriteMostly, &rdev->flags);
2673 err = 0;
2674 } else if (cmd_match(buf, "-writemostly")) {
2675 clear_bit(WriteMostly, &rdev->flags);
2676 err = 0;
2677 } else if (cmd_match(buf, "blocked")) {
2678 set_bit(Blocked, &rdev->flags);
2679 err = 0;
2680 } else if (cmd_match(buf, "-blocked")) {
2681 if (!test_bit(Faulty, &rdev->flags) &&
2682 rdev->badblocks.unacked_exist) {
2683
2684
2685
2686 md_error(rdev->mddev, rdev);
2687 }
2688 clear_bit(Blocked, &rdev->flags);
2689 clear_bit(BlockedBadBlocks, &rdev->flags);
2690 wake_up(&rdev->blocked_wait);
2691 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2692 md_wakeup_thread(rdev->mddev->thread);
2693
2694 err = 0;
2695 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2696 set_bit(In_sync, &rdev->flags);
2697 err = 0;
2698 } else if (cmd_match(buf, "write_error")) {
2699 set_bit(WriteErrorSeen, &rdev->flags);
2700 err = 0;
2701 } else if (cmd_match(buf, "-write_error")) {
2702 clear_bit(WriteErrorSeen, &rdev->flags);
2703 err = 0;
2704 } else if (cmd_match(buf, "want_replacement")) {
2705
2706
2707
2708
2709 if (rdev->raid_disk >= 0 &&
2710 !test_bit(Replacement, &rdev->flags))
2711 set_bit(WantReplacement, &rdev->flags);
2712 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2713 md_wakeup_thread(rdev->mddev->thread);
2714 err = 0;
2715 } else if (cmd_match(buf, "-want_replacement")) {
2716
2717
2718
2719 err = 0;
2720 clear_bit(WantReplacement, &rdev->flags);
2721 } else if (cmd_match(buf, "replacement")) {
2722
2723
2724
2725
2726 if (rdev->mddev->pers)
2727 err = -EBUSY;
2728 else {
2729 set_bit(Replacement, &rdev->flags);
2730 err = 0;
2731 }
2732 } else if (cmd_match(buf, "-replacement")) {
2733
2734 if (rdev->mddev->pers)
2735 err = -EBUSY;
2736 else {
2737 clear_bit(Replacement, &rdev->flags);
2738 err = 0;
2739 }
2740 }
2741 if (!err)
2742 sysfs_notify_dirent_safe(rdev->sysfs_state);
2743 return err ? err : len;
2744}
2745static struct rdev_sysfs_entry rdev_state =
2746__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2747
2748static ssize_t
2749errors_show(struct md_rdev *rdev, char *page)
2750{
2751 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2752}
2753
2754static ssize_t
2755errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2756{
2757 char *e;
2758 unsigned long n = simple_strtoul(buf, &e, 10);
2759 if (*buf && (*e == 0 || *e == '\n')) {
2760 atomic_set(&rdev->corrected_errors, n);
2761 return len;
2762 }
2763 return -EINVAL;
2764}
2765static struct rdev_sysfs_entry rdev_errors =
2766__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2767
2768static ssize_t
2769slot_show(struct md_rdev *rdev, char *page)
2770{
2771 if (rdev->raid_disk < 0)
2772 return sprintf(page, "none\n");
2773 else
2774 return sprintf(page, "%d\n", rdev->raid_disk);
2775}
2776
2777static ssize_t
2778slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2779{
2780 char *e;
2781 int err;
2782 int slot = simple_strtoul(buf, &e, 10);
2783 if (strncmp(buf, "none", 4)==0)
2784 slot = -1;
2785 else if (e==buf || (*e && *e!= '\n'))
2786 return -EINVAL;
2787 if (rdev->mddev->pers && slot == -1) {
2788
2789
2790
2791
2792
2793
2794
2795 if (rdev->raid_disk == -1)
2796 return -EEXIST;
2797
2798 if (rdev->mddev->pers->hot_remove_disk == NULL)
2799 return -EINVAL;
2800 err = rdev->mddev->pers->
2801 hot_remove_disk(rdev->mddev, rdev);
2802 if (err)
2803 return err;
2804 sysfs_unlink_rdev(rdev->mddev, rdev);
2805 rdev->raid_disk = -1;
2806 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2807 md_wakeup_thread(rdev->mddev->thread);
2808 } else if (rdev->mddev->pers) {
2809
2810
2811
2812
2813 if (rdev->raid_disk != -1)
2814 return -EBUSY;
2815
2816 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2817 return -EBUSY;
2818
2819 if (rdev->mddev->pers->hot_add_disk == NULL)
2820 return -EINVAL;
2821
2822 if (slot >= rdev->mddev->raid_disks &&
2823 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2824 return -ENOSPC;
2825
2826 rdev->raid_disk = slot;
2827 if (test_bit(In_sync, &rdev->flags))
2828 rdev->saved_raid_disk = slot;
2829 else
2830 rdev->saved_raid_disk = -1;
2831 clear_bit(In_sync, &rdev->flags);
2832 err = rdev->mddev->pers->
2833 hot_add_disk(rdev->mddev, rdev);
2834 if (err) {
2835 rdev->raid_disk = -1;
2836 return err;
2837 } else
2838 sysfs_notify_dirent_safe(rdev->sysfs_state);
2839 if (sysfs_link_rdev(rdev->mddev, rdev))
2840 ;
2841
2842 } else {
2843 if (slot >= rdev->mddev->raid_disks &&
2844 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2845 return -ENOSPC;
2846 rdev->raid_disk = slot;
2847
2848 clear_bit(Faulty, &rdev->flags);
2849 clear_bit(WriteMostly, &rdev->flags);
2850 set_bit(In_sync, &rdev->flags);
2851 sysfs_notify_dirent_safe(rdev->sysfs_state);
2852 }
2853 return len;
2854}
2855
2856
2857static struct rdev_sysfs_entry rdev_slot =
2858__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2859
2860static ssize_t
2861offset_show(struct md_rdev *rdev, char *page)
2862{
2863 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2864}
2865
2866static ssize_t
2867offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2868{
2869 unsigned long long offset;
2870 if (strict_strtoull(buf, 10, &offset) < 0)
2871 return -EINVAL;
2872 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2873 return -EBUSY;
2874 if (rdev->sectors && rdev->mddev->external)
2875
2876
2877 return -EBUSY;
2878 rdev->data_offset = offset;
2879 rdev->new_data_offset = offset;
2880 return len;
2881}
2882
2883static struct rdev_sysfs_entry rdev_offset =
2884__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2885
2886static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2887{
2888 return sprintf(page, "%llu\n",
2889 (unsigned long long)rdev->new_data_offset);
2890}
2891
2892static ssize_t new_offset_store(struct md_rdev *rdev,
2893 const char *buf, size_t len)
2894{
2895 unsigned long long new_offset;
2896 struct mddev *mddev = rdev->mddev;
2897
2898 if (strict_strtoull(buf, 10, &new_offset) < 0)
2899 return -EINVAL;
2900
2901 if (mddev->sync_thread)
2902 return -EBUSY;
2903 if (new_offset == rdev->data_offset)
2904
2905 ;
2906 else if (new_offset > rdev->data_offset) {
2907
2908 if (new_offset - rdev->data_offset
2909 + mddev->dev_sectors > rdev->sectors)
2910 return -E2BIG;
2911 }
2912
2913
2914
2915
2916
2917 if (new_offset < rdev->data_offset &&
2918 mddev->reshape_backwards)
2919 return -EINVAL;
2920
2921
2922
2923
2924 if (new_offset > rdev->data_offset &&
2925 !mddev->reshape_backwards)
2926 return -EINVAL;
2927
2928 if (mddev->pers && mddev->persistent &&
2929 !super_types[mddev->major_version]
2930 .allow_new_offset(rdev, new_offset))
2931 return -E2BIG;
2932 rdev->new_data_offset = new_offset;
2933 if (new_offset > rdev->data_offset)
2934 mddev->reshape_backwards = 1;
2935 else if (new_offset < rdev->data_offset)
2936 mddev->reshape_backwards = 0;
2937
2938 return len;
2939}
2940static struct rdev_sysfs_entry rdev_new_offset =
2941__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2942
2943static ssize_t
2944rdev_size_show(struct md_rdev *rdev, char *page)
2945{
2946 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2947}
2948
2949static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2950{
2951
2952 if (s1+l1 <= s2)
2953 return 0;
2954 if (s2+l2 <= s1)
2955 return 0;
2956 return 1;
2957}
2958
2959static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2960{
2961 unsigned long long blocks;
2962 sector_t new;
2963
2964 if (strict_strtoull(buf, 10, &blocks) < 0)
2965 return -EINVAL;
2966
2967 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2968 return -EINVAL;
2969
2970 new = blocks * 2;
2971 if (new != blocks * 2)
2972 return -EINVAL;
2973
2974 *sectors = new;
2975 return 0;
2976}
2977
2978static ssize_t
2979rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2980{
2981 struct mddev *my_mddev = rdev->mddev;
2982 sector_t oldsectors = rdev->sectors;
2983 sector_t sectors;
2984
2985 if (strict_blocks_to_sectors(buf, §ors) < 0)
2986 return -EINVAL;
2987 if (rdev->data_offset != rdev->new_data_offset)
2988 return -EINVAL;
2989 if (my_mddev->pers && rdev->raid_disk >= 0) {
2990 if (my_mddev->persistent) {
2991 sectors = super_types[my_mddev->major_version].
2992 rdev_size_change(rdev, sectors);
2993 if (!sectors)
2994 return -EBUSY;
2995 } else if (!sectors)
2996 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2997 rdev->data_offset;
2998 }
2999 if (sectors < my_mddev->dev_sectors)
3000 return -EINVAL;
3001
3002 rdev->sectors = sectors;
3003 if (sectors > oldsectors && my_mddev->external) {
3004
3005
3006
3007
3008
3009 struct mddev *mddev;
3010 int overlap = 0;
3011 struct list_head *tmp;
3012
3013 mddev_unlock(my_mddev);
3014 for_each_mddev(mddev, tmp) {
3015 struct md_rdev *rdev2;
3016
3017 mddev_lock(mddev);
3018 rdev_for_each(rdev2, mddev)
3019 if (rdev->bdev == rdev2->bdev &&
3020 rdev != rdev2 &&
3021 overlaps(rdev->data_offset, rdev->sectors,
3022 rdev2->data_offset,
3023 rdev2->sectors)) {
3024 overlap = 1;
3025 break;
3026 }
3027 mddev_unlock(mddev);
3028 if (overlap) {
3029 mddev_put(mddev);
3030 break;
3031 }
3032 }
3033 mddev_lock(my_mddev);
3034 if (overlap) {
3035
3036
3037
3038
3039
3040
3041 rdev->sectors = oldsectors;
3042 return -EBUSY;
3043 }
3044 }
3045 return len;
3046}
3047
3048static struct rdev_sysfs_entry rdev_size =
3049__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3050
3051
3052static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3053{
3054 unsigned long long recovery_start = rdev->recovery_offset;
3055
3056 if (test_bit(In_sync, &rdev->flags) ||
3057 recovery_start == MaxSector)
3058 return sprintf(page, "none\n");
3059
3060 return sprintf(page, "%llu\n", recovery_start);
3061}
3062
3063static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3064{
3065 unsigned long long recovery_start;
3066
3067 if (cmd_match(buf, "none"))
3068 recovery_start = MaxSector;
3069 else if (strict_strtoull(buf, 10, &recovery_start))
3070 return -EINVAL;
3071
3072 if (rdev->mddev->pers &&
3073 rdev->raid_disk >= 0)
3074 return -EBUSY;
3075
3076 rdev->recovery_offset = recovery_start;
3077 if (recovery_start == MaxSector)
3078 set_bit(In_sync, &rdev->flags);
3079 else
3080 clear_bit(In_sync, &rdev->flags);
3081 return len;
3082}
3083
3084static struct rdev_sysfs_entry rdev_recovery_start =
3085__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3086
3087
3088static ssize_t
3089badblocks_show(struct badblocks *bb, char *page, int unack);
3090static ssize_t
3091badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3092
3093static ssize_t bb_show(struct md_rdev *rdev, char *page)
3094{
3095 return badblocks_show(&rdev->badblocks, page, 0);
3096}
3097static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3098{
3099 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3100
3101 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3102 wake_up(&rdev->blocked_wait);
3103 return rv;
3104}
3105static struct rdev_sysfs_entry rdev_bad_blocks =
3106__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3107
3108
3109static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3110{
3111 return badblocks_show(&rdev->badblocks, page, 1);
3112}
3113static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3114{
3115 return badblocks_store(&rdev->badblocks, page, len, 1);
3116}
3117static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3118__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3119
3120static struct attribute *rdev_default_attrs[] = {
3121 &rdev_state.attr,
3122 &rdev_errors.attr,
3123 &rdev_slot.attr,
3124 &rdev_offset.attr,
3125 &rdev_new_offset.attr,
3126 &rdev_size.attr,
3127 &rdev_recovery_start.attr,
3128 &rdev_bad_blocks.attr,
3129 &rdev_unack_bad_blocks.attr,
3130 NULL,
3131};
3132static ssize_t
3133rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3134{
3135 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3136 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3137 struct mddev *mddev = rdev->mddev;
3138 ssize_t rv;
3139
3140 if (!entry->show)
3141 return -EIO;
3142
3143 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3144 if (!rv) {
3145 if (rdev->mddev == NULL)
3146 rv = -EBUSY;
3147 else
3148 rv = entry->show(rdev, page);
3149 mddev_unlock(mddev);
3150 }
3151 return rv;
3152}
3153
3154static ssize_t
3155rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3156 const char *page, size_t length)
3157{
3158 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3159 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3160 ssize_t rv;
3161 struct mddev *mddev = rdev->mddev;
3162
3163 if (!entry->store)
3164 return -EIO;
3165 if (!capable(CAP_SYS_ADMIN))
3166 return -EACCES;
3167 rv = mddev ? mddev_lock(mddev): -EBUSY;
3168 if (!rv) {
3169 if (rdev->mddev == NULL)
3170 rv = -EBUSY;
3171 else
3172 rv = entry->store(rdev, page, length);
3173 mddev_unlock(mddev);
3174 }
3175 return rv;
3176}
3177
3178static void rdev_free(struct kobject *ko)
3179{
3180 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3181 kfree(rdev);
3182}
3183static const struct sysfs_ops rdev_sysfs_ops = {
3184 .show = rdev_attr_show,
3185 .store = rdev_attr_store,
3186};
3187static struct kobj_type rdev_ktype = {
3188 .release = rdev_free,
3189 .sysfs_ops = &rdev_sysfs_ops,
3190 .default_attrs = rdev_default_attrs,
3191};
3192
3193int md_rdev_init(struct md_rdev *rdev)
3194{
3195 rdev->desc_nr = -1;
3196 rdev->saved_raid_disk = -1;
3197 rdev->raid_disk = -1;
3198 rdev->flags = 0;
3199 rdev->data_offset = 0;
3200 rdev->new_data_offset = 0;
3201 rdev->sb_events = 0;
3202 rdev->last_read_error.tv_sec = 0;
3203 rdev->last_read_error.tv_nsec = 0;
3204 rdev->sb_loaded = 0;
3205 rdev->bb_page = NULL;
3206 atomic_set(&rdev->nr_pending, 0);
3207 atomic_set(&rdev->read_errors, 0);
3208 atomic_set(&rdev->corrected_errors, 0);
3209
3210 INIT_LIST_HEAD(&rdev->same_set);
3211 init_waitqueue_head(&rdev->blocked_wait);
3212
3213
3214
3215
3216
3217 rdev->badblocks.count = 0;
3218 rdev->badblocks.shift = 0;
3219 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3220 seqlock_init(&rdev->badblocks.lock);
3221 if (rdev->badblocks.page == NULL)
3222 return -ENOMEM;
3223
3224 return 0;
3225}
3226EXPORT_SYMBOL_GPL(md_rdev_init);
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3238{
3239 char b[BDEVNAME_SIZE];
3240 int err;
3241 struct md_rdev *rdev;
3242 sector_t size;
3243
3244 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3245 if (!rdev) {
3246 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3247 return ERR_PTR(-ENOMEM);
3248 }
3249
3250 err = md_rdev_init(rdev);
3251 if (err)
3252 goto abort_free;
3253 err = alloc_disk_sb(rdev);
3254 if (err)
3255 goto abort_free;
3256
3257 err = lock_rdev(rdev, newdev, super_format == -2);
3258 if (err)
3259 goto abort_free;
3260
3261 kobject_init(&rdev->kobj, &rdev_ktype);
3262
3263 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3264 if (!size) {
3265 printk(KERN_WARNING
3266 "md: %s has zero or unknown size, marking faulty!\n",
3267 bdevname(rdev->bdev,b));
3268 err = -EINVAL;
3269 goto abort_free;
3270 }
3271
3272 if (super_format >= 0) {
3273 err = super_types[super_format].
3274 load_super(rdev, NULL, super_minor);
3275 if (err == -EINVAL) {
3276 printk(KERN_WARNING
3277 "md: %s does not have a valid v%d.%d "
3278 "superblock, not importing!\n",
3279 bdevname(rdev->bdev,b),
3280 super_format, super_minor);
3281 goto abort_free;
3282 }
3283 if (err < 0) {
3284 printk(KERN_WARNING
3285 "md: could not read %s's sb, not importing!\n",
3286 bdevname(rdev->bdev,b));
3287 goto abort_free;
3288 }
3289 }
3290 if (super_format == -1)
3291
3292 rdev->badblocks.shift = -1;
3293
3294 return rdev;
3295
3296abort_free:
3297 if (rdev->bdev)
3298 unlock_rdev(rdev);
3299 md_rdev_clear(rdev);
3300 kfree(rdev);
3301 return ERR_PTR(err);
3302}
3303
3304
3305
3306
3307
3308
3309static void analyze_sbs(struct mddev * mddev)
3310{
3311 int i;
3312 struct md_rdev *rdev, *freshest, *tmp;
3313 char b[BDEVNAME_SIZE];
3314
3315 freshest = NULL;
3316 rdev_for_each_safe(rdev, tmp, mddev)
3317 switch (super_types[mddev->major_version].
3318 load_super(rdev, freshest, mddev->minor_version)) {
3319 case 1:
3320 freshest = rdev;
3321 break;
3322 case 0:
3323 break;
3324 default:
3325 printk( KERN_ERR \
3326 "md: fatal superblock inconsistency in %s"
3327 " -- removing from array\n",
3328 bdevname(rdev->bdev,b));
3329 kick_rdev_from_array(rdev);
3330 }
3331
3332
3333 super_types[mddev->major_version].
3334 validate_super(mddev, freshest);
3335
3336 i = 0;
3337 rdev_for_each_safe(rdev, tmp, mddev) {
3338 if (mddev->max_disks &&
3339 (rdev->desc_nr >= mddev->max_disks ||
3340 i > mddev->max_disks)) {
3341 printk(KERN_WARNING
3342 "md: %s: %s: only %d devices permitted\n",
3343 mdname(mddev), bdevname(rdev->bdev, b),
3344 mddev->max_disks);
3345 kick_rdev_from_array(rdev);
3346 continue;
3347 }
3348 if (rdev != freshest)
3349 if (super_types[mddev->major_version].
3350 validate_super(mddev, rdev)) {
3351 printk(KERN_WARNING "md: kicking non-fresh %s"
3352 " from array!\n",
3353 bdevname(rdev->bdev,b));
3354 kick_rdev_from_array(rdev);
3355 continue;
3356 }
3357 if (mddev->level == LEVEL_MULTIPATH) {
3358 rdev->desc_nr = i++;
3359 rdev->raid_disk = rdev->desc_nr;
3360 set_bit(In_sync, &rdev->flags);
3361 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3362 rdev->raid_disk = -1;
3363 clear_bit(In_sync, &rdev->flags);
3364 }
3365 }
3366}
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3379{
3380 unsigned long result = 0;
3381 long decimals = -1;
3382 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3383 if (*cp == '.')
3384 decimals = 0;
3385 else if (decimals < scale) {
3386 unsigned int value;
3387 value = *cp - '0';
3388 result = result * 10 + value;
3389 if (decimals >= 0)
3390 decimals++;
3391 }
3392 cp++;
3393 }
3394 if (*cp == '\n')
3395 cp++;
3396 if (*cp)
3397 return -EINVAL;
3398 if (decimals < 0)
3399 decimals = 0;
3400 while (decimals < scale) {
3401 result *= 10;
3402 decimals ++;
3403 }
3404 *res = result;
3405 return 0;
3406}
3407
3408
3409static void md_safemode_timeout(unsigned long data);
3410
3411static ssize_t
3412safe_delay_show(struct mddev *mddev, char *page)
3413{
3414 int msec = (mddev->safemode_delay*1000)/HZ;
3415 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3416}
3417static ssize_t
3418safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3419{
3420 unsigned long msec;
3421
3422 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3423 return -EINVAL;
3424 if (msec == 0)
3425 mddev->safemode_delay = 0;
3426 else {
3427 unsigned long old_delay = mddev->safemode_delay;
3428 mddev->safemode_delay = (msec*HZ)/1000;
3429 if (mddev->safemode_delay == 0)
3430 mddev->safemode_delay = 1;
3431 if (mddev->safemode_delay < old_delay)
3432 md_safemode_timeout((unsigned long)mddev);
3433 }
3434 return len;
3435}
3436static struct md_sysfs_entry md_safe_delay =
3437__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3438
3439static ssize_t
3440level_show(struct mddev *mddev, char *page)
3441{
3442 struct md_personality *p = mddev->pers;
3443 if (p)
3444 return sprintf(page, "%s\n", p->name);
3445 else if (mddev->clevel[0])
3446 return sprintf(page, "%s\n", mddev->clevel);
3447 else if (mddev->level != LEVEL_NONE)
3448 return sprintf(page, "%d\n", mddev->level);
3449 else
3450 return 0;
3451}
3452
3453static ssize_t
3454level_store(struct mddev *mddev, const char *buf, size_t len)
3455{
3456 char clevel[16];
3457 ssize_t rv = len;
3458 struct md_personality *pers;
3459 long level;
3460 void *priv;
3461 struct md_rdev *rdev;
3462
3463 if (mddev->pers == NULL) {
3464 if (len == 0)
3465 return 0;
3466 if (len >= sizeof(mddev->clevel))
3467 return -ENOSPC;
3468 strncpy(mddev->clevel, buf, len);
3469 if (mddev->clevel[len-1] == '\n')
3470 len--;
3471 mddev->clevel[len] = 0;
3472 mddev->level = LEVEL_NONE;
3473 return rv;
3474 }
3475
3476
3477
3478
3479
3480
3481
3482 if (mddev->sync_thread ||
3483 mddev->reshape_position != MaxSector ||
3484 mddev->sysfs_active)
3485 return -EBUSY;
3486
3487 if (!mddev->pers->quiesce) {
3488 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3489 mdname(mddev), mddev->pers->name);
3490 return -EINVAL;
3491 }
3492
3493
3494 if (len == 0 || len >= sizeof(clevel))
3495 return -EINVAL;
3496 strncpy(clevel, buf, len);
3497 if (clevel[len-1] == '\n')
3498 len--;
3499 clevel[len] = 0;
3500 if (strict_strtol(clevel, 10, &level))
3501 level = LEVEL_NONE;
3502
3503 if (request_module("md-%s", clevel) != 0)
3504 request_module("md-level-%s", clevel);
3505 spin_lock(&pers_lock);
3506 pers = find_pers(level, clevel);
3507 if (!pers || !try_module_get(pers->owner)) {
3508 spin_unlock(&pers_lock);
3509 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3510 return -EINVAL;
3511 }
3512 spin_unlock(&pers_lock);
3513
3514 if (pers == mddev->pers) {
3515
3516 module_put(pers->owner);
3517 return rv;
3518 }
3519 if (!pers->takeover) {
3520 module_put(pers->owner);
3521 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3522 mdname(mddev), clevel);
3523 return -EINVAL;
3524 }
3525
3526 rdev_for_each(rdev, mddev)
3527 rdev->new_raid_disk = rdev->raid_disk;
3528
3529
3530
3531
3532 priv = pers->takeover(mddev);
3533 if (IS_ERR(priv)) {
3534 mddev->new_level = mddev->level;
3535 mddev->new_layout = mddev->layout;
3536 mddev->new_chunk_sectors = mddev->chunk_sectors;
3537 mddev->raid_disks -= mddev->delta_disks;
3538 mddev->delta_disks = 0;
3539 mddev->reshape_backwards = 0;
3540 module_put(pers->owner);
3541 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3542 mdname(mddev), clevel);
3543 return PTR_ERR(priv);
3544 }
3545
3546
3547 mddev_suspend(mddev);
3548 mddev->pers->stop(mddev);
3549
3550 if (mddev->pers->sync_request == NULL &&
3551 pers->sync_request != NULL) {
3552
3553 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3554 printk(KERN_WARNING
3555 "md: cannot register extra attributes for %s\n",
3556 mdname(mddev));
3557 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3558 }
3559 if (mddev->pers->sync_request != NULL &&
3560 pers->sync_request == NULL) {
3561
3562 if (mddev->to_remove == NULL)
3563 mddev->to_remove = &md_redundancy_group;
3564 }
3565
3566 if (mddev->pers->sync_request == NULL &&
3567 mddev->external) {
3568
3569
3570
3571
3572
3573
3574
3575 mddev->in_sync = 0;
3576 mddev->safemode_delay = 0;
3577 mddev->safemode = 0;
3578 }
3579
3580 rdev_for_each(rdev, mddev) {
3581 if (rdev->raid_disk < 0)
3582 continue;
3583 if (rdev->new_raid_disk >= mddev->raid_disks)
3584 rdev->new_raid_disk = -1;
3585 if (rdev->new_raid_disk == rdev->raid_disk)
3586 continue;
3587 sysfs_unlink_rdev(mddev, rdev);
3588 }
3589 rdev_for_each(rdev, mddev) {
3590 if (rdev->raid_disk < 0)
3591 continue;
3592 if (rdev->new_raid_disk == rdev->raid_disk)
3593 continue;
3594 rdev->raid_disk = rdev->new_raid_disk;
3595 if (rdev->raid_disk < 0)
3596 clear_bit(In_sync, &rdev->flags);
3597 else {
3598 if (sysfs_link_rdev(mddev, rdev))
3599 printk(KERN_WARNING "md: cannot register rd%d"
3600 " for %s after level change\n",
3601 rdev->raid_disk, mdname(mddev));
3602 }
3603 }
3604
3605 module_put(mddev->pers->owner);
3606 mddev->pers = pers;
3607 mddev->private = priv;
3608 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3609 mddev->level = mddev->new_level;
3610 mddev->layout = mddev->new_layout;
3611 mddev->chunk_sectors = mddev->new_chunk_sectors;
3612 mddev->delta_disks = 0;
3613 mddev->reshape_backwards = 0;
3614 mddev->degraded = 0;
3615 if (mddev->pers->sync_request == NULL) {
3616
3617
3618
3619 mddev->in_sync = 1;
3620 del_timer_sync(&mddev->safemode_timer);
3621 }
3622 pers->run(mddev);
3623 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3624 mddev_resume(mddev);
3625 sysfs_notify(&mddev->kobj, NULL, "level");
3626 md_new_event(mddev);
3627 return rv;
3628}
3629
3630static struct md_sysfs_entry md_level =
3631__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3632
3633
3634static ssize_t
3635layout_show(struct mddev *mddev, char *page)
3636{
3637
3638 if (mddev->reshape_position != MaxSector &&
3639 mddev->layout != mddev->new_layout)
3640 return sprintf(page, "%d (%d)\n",
3641 mddev->new_layout, mddev->layout);
3642 return sprintf(page, "%d\n", mddev->layout);
3643}
3644
3645static ssize_t
3646layout_store(struct mddev *mddev, const char *buf, size_t len)
3647{
3648 char *e;
3649 unsigned long n = simple_strtoul(buf, &e, 10);
3650
3651 if (!*buf || (*e && *e != '\n'))
3652 return -EINVAL;
3653
3654 if (mddev->pers) {
3655 int err;
3656 if (mddev->pers->check_reshape == NULL)
3657 return -EBUSY;
3658 mddev->new_layout = n;
3659 err = mddev->pers->check_reshape(mddev);
3660 if (err) {
3661 mddev->new_layout = mddev->layout;
3662 return err;
3663 }
3664 } else {
3665 mddev->new_layout = n;
3666 if (mddev->reshape_position == MaxSector)
3667 mddev->layout = n;
3668 }
3669 return len;
3670}
3671static struct md_sysfs_entry md_layout =
3672__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3673
3674
3675static ssize_t
3676raid_disks_show(struct mddev *mddev, char *page)
3677{
3678 if (mddev->raid_disks == 0)
3679 return 0;
3680 if (mddev->reshape_position != MaxSector &&
3681 mddev->delta_disks != 0)
3682 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3683 mddev->raid_disks - mddev->delta_disks);
3684 return sprintf(page, "%d\n", mddev->raid_disks);
3685}
3686
3687static int update_raid_disks(struct mddev *mddev, int raid_disks);
3688
3689static ssize_t
3690raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3691{
3692 char *e;
3693 int rv = 0;
3694 unsigned long n = simple_strtoul(buf, &e, 10);
3695
3696 if (!*buf || (*e && *e != '\n'))
3697 return -EINVAL;
3698
3699 if (mddev->pers)
3700 rv = update_raid_disks(mddev, n);
3701 else if (mddev->reshape_position != MaxSector) {
3702 struct md_rdev *rdev;
3703 int olddisks = mddev->raid_disks - mddev->delta_disks;
3704
3705 rdev_for_each(rdev, mddev) {
3706 if (olddisks < n &&
3707 rdev->data_offset < rdev->new_data_offset)
3708 return -EINVAL;
3709 if (olddisks > n &&
3710 rdev->data_offset > rdev->new_data_offset)
3711 return -EINVAL;
3712 }
3713 mddev->delta_disks = n - olddisks;
3714 mddev->raid_disks = n;
3715 mddev->reshape_backwards = (mddev->delta_disks < 0);
3716 } else
3717 mddev->raid_disks = n;
3718 return rv ? rv : len;
3719}
3720static struct md_sysfs_entry md_raid_disks =
3721__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3722
3723static ssize_t
3724chunk_size_show(struct mddev *mddev, char *page)
3725{
3726 if (mddev->reshape_position != MaxSector &&
3727 mddev->chunk_sectors != mddev->new_chunk_sectors)
3728 return sprintf(page, "%d (%d)\n",
3729 mddev->new_chunk_sectors << 9,
3730 mddev->chunk_sectors << 9);
3731 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3732}
3733
3734static ssize_t
3735chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3736{
3737 char *e;
3738 unsigned long n = simple_strtoul(buf, &e, 10);
3739
3740 if (!*buf || (*e && *e != '\n'))
3741 return -EINVAL;
3742
3743 if (mddev->pers) {
3744 int err;
3745 if (mddev->pers->check_reshape == NULL)
3746 return -EBUSY;
3747 mddev->new_chunk_sectors = n >> 9;
3748 err = mddev->pers->check_reshape(mddev);
3749 if (err) {
3750 mddev->new_chunk_sectors = mddev->chunk_sectors;
3751 return err;
3752 }
3753 } else {
3754 mddev->new_chunk_sectors = n >> 9;
3755 if (mddev->reshape_position == MaxSector)
3756 mddev->chunk_sectors = n >> 9;
3757 }
3758 return len;
3759}
3760static struct md_sysfs_entry md_chunk_size =
3761__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3762
3763static ssize_t
3764resync_start_show(struct mddev *mddev, char *page)
3765{
3766 if (mddev->recovery_cp == MaxSector)
3767 return sprintf(page, "none\n");
3768 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3769}
3770
3771static ssize_t
3772resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3773{
3774 char *e;
3775 unsigned long long n = simple_strtoull(buf, &e, 10);
3776
3777 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3778 return -EBUSY;
3779 if (cmd_match(buf, "none"))
3780 n = MaxSector;
3781 else if (!*buf || (*e && *e != '\n'))
3782 return -EINVAL;
3783
3784 mddev->recovery_cp = n;
3785 if (mddev->pers)
3786 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3787 return len;
3788}
3789static struct md_sysfs_entry md_resync_start =
3790__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3829 write_pending, active_idle, bad_word};
3830static char *array_states[] = {
3831 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3832 "write-pending", "active-idle", NULL };
3833
3834static int match_word(const char *word, char **list)
3835{
3836 int n;
3837 for (n=0; list[n]; n++)
3838 if (cmd_match(word, list[n]))
3839 break;
3840 return n;
3841}
3842
3843static ssize_t
3844array_state_show(struct mddev *mddev, char *page)
3845{
3846 enum array_state st = inactive;
3847
3848 if (mddev->pers)
3849 switch(mddev->ro) {
3850 case 1:
3851 st = readonly;
3852 break;
3853 case 2:
3854 st = read_auto;
3855 break;
3856 case 0:
3857 if (mddev->in_sync)
3858 st = clean;
3859 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3860 st = write_pending;
3861 else if (mddev->safemode)
3862 st = active_idle;
3863 else
3864 st = active;
3865 }
3866 else {
3867 if (list_empty(&mddev->disks) &&
3868 mddev->raid_disks == 0 &&
3869 mddev->dev_sectors == 0)
3870 st = clear;
3871 else
3872 st = inactive;
3873 }
3874 return sprintf(page, "%s\n", array_states[st]);
3875}
3876
3877static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3878static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3879static int do_md_run(struct mddev * mddev);
3880static int restart_array(struct mddev *mddev);
3881
3882static ssize_t
3883array_state_store(struct mddev *mddev, const char *buf, size_t len)
3884{
3885 int err = -EINVAL;
3886 enum array_state st = match_word(buf, array_states);
3887 switch(st) {
3888 case bad_word:
3889 break;
3890 case clear:
3891
3892 err = do_md_stop(mddev, 0, NULL);
3893 break;
3894 case inactive:
3895
3896 if (mddev->pers)
3897 err = do_md_stop(mddev, 2, NULL);
3898 else
3899 err = 0;
3900 break;
3901 case suspended:
3902 break;
3903 case readonly:
3904 if (mddev->pers)
3905 err = md_set_readonly(mddev, NULL);
3906 else {
3907 mddev->ro = 1;
3908 set_disk_ro(mddev->gendisk, 1);
3909 err = do_md_run(mddev);
3910 }
3911 break;
3912 case read_auto:
3913 if (mddev->pers) {
3914 if (mddev->ro == 0)
3915 err = md_set_readonly(mddev, NULL);
3916 else if (mddev->ro == 1)
3917 err = restart_array(mddev);
3918 if (err == 0) {
3919 mddev->ro = 2;
3920 set_disk_ro(mddev->gendisk, 0);
3921 }
3922 } else {
3923 mddev->ro = 2;
3924 err = do_md_run(mddev);
3925 }
3926 break;
3927 case clean:
3928 if (mddev->pers) {
3929 restart_array(mddev);
3930 spin_lock_irq(&mddev->write_lock);
3931 if (atomic_read(&mddev->writes_pending) == 0) {
3932 if (mddev->in_sync == 0) {
3933 mddev->in_sync = 1;
3934 if (mddev->safemode == 1)
3935 mddev->safemode = 0;
3936 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3937 }
3938 err = 0;
3939 } else
3940 err = -EBUSY;
3941 spin_unlock_irq(&mddev->write_lock);
3942 } else
3943 err = -EINVAL;
3944 break;
3945 case active:
3946 if (mddev->pers) {
3947 restart_array(mddev);
3948 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3949 wake_up(&mddev->sb_wait);
3950 err = 0;
3951 } else {
3952 mddev->ro = 0;
3953 set_disk_ro(mddev->gendisk, 0);
3954 err = do_md_run(mddev);
3955 }
3956 break;
3957 case write_pending:
3958 case active_idle:
3959
3960 break;
3961 }
3962 if (err)
3963 return err;
3964 else {
3965 if (mddev->hold_active == UNTIL_IOCTL)
3966 mddev->hold_active = 0;
3967 sysfs_notify_dirent_safe(mddev->sysfs_state);
3968 return len;
3969 }
3970}
3971static struct md_sysfs_entry md_array_state =
3972__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3973
3974static ssize_t
3975max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3976 return sprintf(page, "%d\n",
3977 atomic_read(&mddev->max_corr_read_errors));
3978}
3979
3980static ssize_t
3981max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3982{
3983 char *e;
3984 unsigned long n = simple_strtoul(buf, &e, 10);
3985
3986 if (*buf && (*e == 0 || *e == '\n')) {
3987 atomic_set(&mddev->max_corr_read_errors, n);
3988 return len;
3989 }
3990 return -EINVAL;
3991}
3992
3993static struct md_sysfs_entry max_corr_read_errors =
3994__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3995 max_corrected_read_errors_store);
3996
3997static ssize_t
3998null_show(struct mddev *mddev, char *page)
3999{
4000 return -EINVAL;
4001}
4002
4003static ssize_t
4004new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4005{
4006
4007
4008
4009
4010
4011
4012
4013 char *e;
4014 int major = simple_strtoul(buf, &e, 10);
4015 int minor;
4016 dev_t dev;
4017 struct md_rdev *rdev;
4018 int err;
4019
4020 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4021 return -EINVAL;
4022 minor = simple_strtoul(e+1, &e, 10);
4023 if (*e && *e != '\n')
4024 return -EINVAL;
4025 dev = MKDEV(major, minor);
4026 if (major != MAJOR(dev) ||
4027 minor != MINOR(dev))
4028 return -EOVERFLOW;
4029
4030
4031 if (mddev->persistent) {
4032 rdev = md_import_device(dev, mddev->major_version,
4033 mddev->minor_version);
4034 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4035 struct md_rdev *rdev0
4036 = list_entry(mddev->disks.next,
4037 struct md_rdev, same_set);
4038 err = super_types[mddev->major_version]
4039 .load_super(rdev, rdev0, mddev->minor_version);
4040 if (err < 0)
4041 goto out;
4042 }
4043 } else if (mddev->external)
4044 rdev = md_import_device(dev, -2, -1);
4045 else
4046 rdev = md_import_device(dev, -1, -1);
4047
4048 if (IS_ERR(rdev))
4049 return PTR_ERR(rdev);
4050 err = bind_rdev_to_array(rdev, mddev);
4051 out:
4052 if (err)
4053 export_rdev(rdev);
4054 return err ? err : len;
4055}
4056
4057static struct md_sysfs_entry md_new_device =
4058__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4059
4060static ssize_t
4061bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4062{
4063 char *end;
4064 unsigned long chunk, end_chunk;
4065
4066 if (!mddev->bitmap)
4067 goto out;
4068
4069 while (*buf) {
4070 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4071 if (buf == end) break;
4072 if (*end == '-') {
4073 buf = end + 1;
4074 end_chunk = simple_strtoul(buf, &end, 0);
4075 if (buf == end) break;
4076 }
4077 if (*end && !isspace(*end)) break;
4078 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4079 buf = skip_spaces(end);
4080 }
4081 bitmap_unplug(mddev->bitmap);
4082out:
4083 return len;
4084}
4085
4086static struct md_sysfs_entry md_bitmap =
4087__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4088
4089static ssize_t
4090size_show(struct mddev *mddev, char *page)
4091{
4092 return sprintf(page, "%llu\n",
4093 (unsigned long long)mddev->dev_sectors / 2);
4094}
4095
4096static int update_size(struct mddev *mddev, sector_t num_sectors);
4097
4098static ssize_t
4099size_store(struct mddev *mddev, const char *buf, size_t len)
4100{
4101
4102
4103
4104
4105 sector_t sectors;
4106 int err = strict_blocks_to_sectors(buf, §ors);
4107
4108 if (err < 0)
4109 return err;
4110 if (mddev->pers) {
4111 err = update_size(mddev, sectors);
4112 md_update_sb(mddev, 1);
4113 } else {
4114 if (mddev->dev_sectors == 0 ||
4115 mddev->dev_sectors > sectors)
4116 mddev->dev_sectors = sectors;
4117 else
4118 err = -ENOSPC;
4119 }
4120 return err ? err : len;
4121}
4122
4123static struct md_sysfs_entry md_size =
4124__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4125
4126
4127
4128
4129
4130
4131
4132
4133static ssize_t
4134metadata_show(struct mddev *mddev, char *page)
4135{
4136 if (mddev->persistent)
4137 return sprintf(page, "%d.%d\n",
4138 mddev->major_version, mddev->minor_version);
4139 else if (mddev->external)
4140 return sprintf(page, "external:%s\n", mddev->metadata_type);
4141 else
4142 return sprintf(page, "none\n");
4143}
4144
4145static ssize_t
4146metadata_store(struct mddev *mddev, const char *buf, size_t len)
4147{
4148 int major, minor;
4149 char *e;
4150
4151
4152
4153
4154 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4155 ;
4156 else if (!list_empty(&mddev->disks))
4157 return -EBUSY;
4158
4159 if (cmd_match(buf, "none")) {
4160 mddev->persistent = 0;
4161 mddev->external = 0;
4162 mddev->major_version = 0;
4163 mddev->minor_version = 90;
4164 return len;
4165 }
4166 if (strncmp(buf, "external:", 9) == 0) {
4167 size_t namelen = len-9;
4168 if (namelen >= sizeof(mddev->metadata_type))
4169 namelen = sizeof(mddev->metadata_type)-1;
4170 strncpy(mddev->metadata_type, buf+9, namelen);
4171 mddev->metadata_type[namelen] = 0;
4172 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4173 mddev->metadata_type[--namelen] = 0;
4174 mddev->persistent = 0;
4175 mddev->external = 1;
4176 mddev->major_version = 0;
4177 mddev->minor_version = 90;
4178 return len;
4179 }
4180 major = simple_strtoul(buf, &e, 10);
4181 if (e==buf || *e != '.')
4182 return -EINVAL;
4183 buf = e+1;
4184 minor = simple_strtoul(buf, &e, 10);
4185 if (e==buf || (*e && *e != '\n') )
4186 return -EINVAL;
4187 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4188 return -ENOENT;
4189 mddev->major_version = major;
4190 mddev->minor_version = minor;
4191 mddev->persistent = 1;
4192 mddev->external = 0;
4193 return len;
4194}
4195
4196static struct md_sysfs_entry md_metadata =
4197__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4198
4199static ssize_t
4200action_show(struct mddev *mddev, char *page)
4201{
4202 char *type = "idle";
4203 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4204 type = "frozen";
4205 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4206 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4207 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4208 type = "reshape";
4209 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4210 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4211 type = "resync";
4212 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4213 type = "check";
4214 else
4215 type = "repair";
4216 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4217 type = "recover";
4218 }
4219 return sprintf(page, "%s\n", type);
4220}
4221
4222static void reap_sync_thread(struct mddev *mddev);
4223
4224static ssize_t
4225action_store(struct mddev *mddev, const char *page, size_t len)
4226{
4227 if (!mddev->pers || !mddev->pers->sync_request)
4228 return -EINVAL;
4229
4230 if (cmd_match(page, "frozen"))
4231 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4232 else
4233 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4234
4235 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4236 if (mddev->sync_thread) {
4237 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4238 reap_sync_thread(mddev);
4239 }
4240 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4241 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4242 return -EBUSY;
4243 else if (cmd_match(page, "resync"))
4244 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4245 else if (cmd_match(page, "recover")) {
4246 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4247 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4248 } else if (cmd_match(page, "reshape")) {
4249 int err;
4250 if (mddev->pers->start_reshape == NULL)
4251 return -EINVAL;
4252 err = mddev->pers->start_reshape(mddev);
4253 if (err)
4254 return err;
4255 sysfs_notify(&mddev->kobj, NULL, "degraded");
4256 } else {
4257 if (cmd_match(page, "check"))
4258 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4259 else if (!cmd_match(page, "repair"))
4260 return -EINVAL;
4261 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4262 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4263 }
4264 if (mddev->ro == 2) {
4265
4266
4267
4268 mddev->ro = 0;
4269 md_wakeup_thread(mddev->sync_thread);
4270 }
4271 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4272 md_wakeup_thread(mddev->thread);
4273 sysfs_notify_dirent_safe(mddev->sysfs_action);
4274 return len;
4275}
4276
4277static ssize_t
4278mismatch_cnt_show(struct mddev *mddev, char *page)
4279{
4280 return sprintf(page, "%llu\n",
4281 (unsigned long long)
4282 atomic64_read(&mddev->resync_mismatches));
4283}
4284
4285static struct md_sysfs_entry md_scan_mode =
4286__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4287
4288
4289static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4290
4291static ssize_t
4292sync_min_show(struct mddev *mddev, char *page)
4293{
4294 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4295 mddev->sync_speed_min ? "local": "system");
4296}
4297
4298static ssize_t
4299sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4300{
4301 int min;
4302 char *e;
4303 if (strncmp(buf, "system", 6)==0) {
4304 mddev->sync_speed_min = 0;
4305 return len;
4306 }
4307 min = simple_strtoul(buf, &e, 10);
4308 if (buf == e || (*e && *e != '\n') || min <= 0)
4309 return -EINVAL;
4310 mddev->sync_speed_min = min;
4311 return len;
4312}
4313
4314static struct md_sysfs_entry md_sync_min =
4315__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4316
4317static ssize_t
4318sync_max_show(struct mddev *mddev, char *page)
4319{
4320 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4321 mddev->sync_speed_max ? "local": "system");
4322}
4323
4324static ssize_t
4325sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4326{
4327 int max;
4328 char *e;
4329 if (strncmp(buf, "system", 6)==0) {
4330 mddev->sync_speed_max = 0;
4331 return len;
4332 }
4333 max = simple_strtoul(buf, &e, 10);
4334 if (buf == e || (*e && *e != '\n') || max <= 0)
4335 return -EINVAL;
4336 mddev->sync_speed_max = max;
4337 return len;
4338}
4339
4340static struct md_sysfs_entry md_sync_max =
4341__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4342
4343static ssize_t
4344degraded_show(struct mddev *mddev, char *page)
4345{
4346 return sprintf(page, "%d\n", mddev->degraded);
4347}
4348static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4349
4350static ssize_t
4351sync_force_parallel_show(struct mddev *mddev, char *page)
4352{
4353 return sprintf(page, "%d\n", mddev->parallel_resync);
4354}
4355
4356static ssize_t
4357sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4358{
4359 long n;
4360
4361 if (strict_strtol(buf, 10, &n))
4362 return -EINVAL;
4363
4364 if (n != 0 && n != 1)
4365 return -EINVAL;
4366
4367 mddev->parallel_resync = n;
4368
4369 if (mddev->sync_thread)
4370 wake_up(&resync_wait);
4371
4372 return len;
4373}
4374
4375
4376static struct md_sysfs_entry md_sync_force_parallel =
4377__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4378 sync_force_parallel_show, sync_force_parallel_store);
4379
4380static ssize_t
4381sync_speed_show(struct mddev *mddev, char *page)
4382{
4383 unsigned long resync, dt, db;
4384 if (mddev->curr_resync == 0)
4385 return sprintf(page, "none\n");
4386 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4387 dt = (jiffies - mddev->resync_mark) / HZ;
4388 if (!dt) dt++;
4389 db = resync - mddev->resync_mark_cnt;
4390 return sprintf(page, "%lu\n", db/dt/2);
4391}
4392
4393static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4394
4395static ssize_t
4396sync_completed_show(struct mddev *mddev, char *page)
4397{
4398 unsigned long long max_sectors, resync;
4399
4400 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4401 return sprintf(page, "none\n");
4402
4403 if (mddev->curr_resync == 1 ||
4404 mddev->curr_resync == 2)
4405 return sprintf(page, "delayed\n");
4406
4407 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4408 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4409 max_sectors = mddev->resync_max_sectors;
4410 else
4411 max_sectors = mddev->dev_sectors;
4412
4413 resync = mddev->curr_resync_completed;
4414 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4415}
4416
4417static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4418
4419static ssize_t
4420min_sync_show(struct mddev *mddev, char *page)
4421{
4422 return sprintf(page, "%llu\n",
4423 (unsigned long long)mddev->resync_min);
4424}
4425static ssize_t
4426min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4427{
4428 unsigned long long min;
4429 if (strict_strtoull(buf, 10, &min))
4430 return -EINVAL;
4431 if (min > mddev->resync_max)
4432 return -EINVAL;
4433 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4434 return -EBUSY;
4435
4436
4437 if (mddev->chunk_sectors) {
4438 sector_t temp = min;
4439 if (sector_div(temp, mddev->chunk_sectors))
4440 return -EINVAL;
4441 }
4442 mddev->resync_min = min;
4443
4444 return len;
4445}
4446
4447static struct md_sysfs_entry md_min_sync =
4448__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4449
4450static ssize_t
4451max_sync_show(struct mddev *mddev, char *page)
4452{
4453 if (mddev->resync_max == MaxSector)
4454 return sprintf(page, "max\n");
4455 else
4456 return sprintf(page, "%llu\n",
4457 (unsigned long long)mddev->resync_max);
4458}
4459static ssize_t
4460max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4461{
4462 if (strncmp(buf, "max", 3) == 0)
4463 mddev->resync_max = MaxSector;
4464 else {
4465 unsigned long long max;
4466 if (strict_strtoull(buf, 10, &max))
4467 return -EINVAL;
4468 if (max < mddev->resync_min)
4469 return -EINVAL;
4470 if (max < mddev->resync_max &&
4471 mddev->ro == 0 &&
4472 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4473 return -EBUSY;
4474
4475
4476 if (mddev->chunk_sectors) {
4477 sector_t temp = max;
4478 if (sector_div(temp, mddev->chunk_sectors))
4479 return -EINVAL;
4480 }
4481 mddev->resync_max = max;
4482 }
4483 wake_up(&mddev->recovery_wait);
4484 return len;
4485}
4486
4487static struct md_sysfs_entry md_max_sync =
4488__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4489
4490static ssize_t
4491suspend_lo_show(struct mddev *mddev, char *page)
4492{
4493 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4494}
4495
4496static ssize_t
4497suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4498{
4499 char *e;
4500 unsigned long long new = simple_strtoull(buf, &e, 10);
4501 unsigned long long old = mddev->suspend_lo;
4502
4503 if (mddev->pers == NULL ||
4504 mddev->pers->quiesce == NULL)
4505 return -EINVAL;
4506 if (buf == e || (*e && *e != '\n'))
4507 return -EINVAL;
4508
4509 mddev->suspend_lo = new;
4510 if (new >= old)
4511
4512 mddev->pers->quiesce(mddev, 2);
4513 else {
4514
4515 mddev->pers->quiesce(mddev, 1);
4516 mddev->pers->quiesce(mddev, 0);
4517 }
4518 return len;
4519}
4520static struct md_sysfs_entry md_suspend_lo =
4521__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4522
4523
4524static ssize_t
4525suspend_hi_show(struct mddev *mddev, char *page)
4526{
4527 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4528}
4529
4530static ssize_t
4531suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4532{
4533 char *e;
4534 unsigned long long new = simple_strtoull(buf, &e, 10);
4535 unsigned long long old = mddev->suspend_hi;
4536
4537 if (mddev->pers == NULL ||
4538 mddev->pers->quiesce == NULL)
4539 return -EINVAL;
4540 if (buf == e || (*e && *e != '\n'))
4541 return -EINVAL;
4542
4543 mddev->suspend_hi = new;
4544 if (new <= old)
4545
4546 mddev->pers->quiesce(mddev, 2);
4547 else {
4548
4549 mddev->pers->quiesce(mddev, 1);
4550 mddev->pers->quiesce(mddev, 0);
4551 }
4552 return len;
4553}
4554static struct md_sysfs_entry md_suspend_hi =
4555__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4556
4557static ssize_t
4558reshape_position_show(struct mddev *mddev, char *page)
4559{
4560 if (mddev->reshape_position != MaxSector)
4561 return sprintf(page, "%llu\n",
4562 (unsigned long long)mddev->reshape_position);
4563 strcpy(page, "none\n");
4564 return 5;
4565}
4566
4567static ssize_t
4568reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4569{
4570 struct md_rdev *rdev;
4571 char *e;
4572 unsigned long long new = simple_strtoull(buf, &e, 10);
4573 if (mddev->pers)
4574 return -EBUSY;
4575 if (buf == e || (*e && *e != '\n'))
4576 return -EINVAL;
4577 mddev->reshape_position = new;
4578 mddev->delta_disks = 0;
4579 mddev->reshape_backwards = 0;
4580 mddev->new_level = mddev->level;
4581 mddev->new_layout = mddev->layout;
4582 mddev->new_chunk_sectors = mddev->chunk_sectors;
4583 rdev_for_each(rdev, mddev)
4584 rdev->new_data_offset = rdev->data_offset;
4585 return len;
4586}
4587
4588static struct md_sysfs_entry md_reshape_position =
4589__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4590 reshape_position_store);
4591
4592static ssize_t
4593reshape_direction_show(struct mddev *mddev, char *page)
4594{
4595 return sprintf(page, "%s\n",
4596 mddev->reshape_backwards ? "backwards" : "forwards");
4597}
4598
4599static ssize_t
4600reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4601{
4602 int backwards = 0;
4603 if (cmd_match(buf, "forwards"))
4604 backwards = 0;
4605 else if (cmd_match(buf, "backwards"))
4606 backwards = 1;
4607 else
4608 return -EINVAL;
4609 if (mddev->reshape_backwards == backwards)
4610 return len;
4611
4612
4613 if (mddev->delta_disks)
4614 return -EBUSY;
4615
4616 if (mddev->persistent &&
4617 mddev->major_version == 0)
4618 return -EINVAL;
4619
4620 mddev->reshape_backwards = backwards;
4621 return len;
4622}
4623
4624static struct md_sysfs_entry md_reshape_direction =
4625__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4626 reshape_direction_store);
4627
4628static ssize_t
4629array_size_show(struct mddev *mddev, char *page)
4630{
4631 if (mddev->external_size)
4632 return sprintf(page, "%llu\n",
4633 (unsigned long long)mddev->array_sectors/2);
4634 else
4635 return sprintf(page, "default\n");
4636}
4637
4638static ssize_t
4639array_size_store(struct mddev *mddev, const char *buf, size_t len)
4640{
4641 sector_t sectors;
4642
4643 if (strncmp(buf, "default", 7) == 0) {
4644 if (mddev->pers)
4645 sectors = mddev->pers->size(mddev, 0, 0);
4646 else
4647 sectors = mddev->array_sectors;
4648
4649 mddev->external_size = 0;
4650 } else {
4651 if (strict_blocks_to_sectors(buf, §ors) < 0)
4652 return -EINVAL;
4653 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4654 return -E2BIG;
4655
4656 mddev->external_size = 1;
4657 }
4658
4659 mddev->array_sectors = sectors;
4660 if (mddev->pers) {
4661 set_capacity(mddev->gendisk, mddev->array_sectors);
4662 revalidate_disk(mddev->gendisk);
4663 }
4664 return len;
4665}
4666
4667static struct md_sysfs_entry md_array_size =
4668__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4669 array_size_store);
4670
4671static struct attribute *md_default_attrs[] = {
4672 &md_level.attr,
4673 &md_layout.attr,
4674 &md_raid_disks.attr,
4675 &md_chunk_size.attr,
4676 &md_size.attr,
4677 &md_resync_start.attr,
4678 &md_metadata.attr,
4679 &md_new_device.attr,
4680 &md_safe_delay.attr,
4681 &md_array_state.attr,
4682 &md_reshape_position.attr,
4683 &md_reshape_direction.attr,
4684 &md_array_size.attr,
4685 &max_corr_read_errors.attr,
4686 NULL,
4687};
4688
4689static struct attribute *md_redundancy_attrs[] = {
4690 &md_scan_mode.attr,
4691 &md_mismatches.attr,
4692 &md_sync_min.attr,
4693 &md_sync_max.attr,
4694 &md_sync_speed.attr,
4695 &md_sync_force_parallel.attr,
4696 &md_sync_completed.attr,
4697 &md_min_sync.attr,
4698 &md_max_sync.attr,
4699 &md_suspend_lo.attr,
4700 &md_suspend_hi.attr,
4701 &md_bitmap.attr,
4702 &md_degraded.attr,
4703 NULL,
4704};
4705static struct attribute_group md_redundancy_group = {
4706 .name = NULL,
4707 .attrs = md_redundancy_attrs,
4708};
4709
4710
4711static ssize_t
4712md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4713{
4714 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4715 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4716 ssize_t rv;
4717
4718 if (!entry->show)
4719 return -EIO;
4720 spin_lock(&all_mddevs_lock);
4721 if (list_empty(&mddev->all_mddevs)) {
4722 spin_unlock(&all_mddevs_lock);
4723 return -EBUSY;
4724 }
4725 mddev_get(mddev);
4726 spin_unlock(&all_mddevs_lock);
4727
4728 rv = mddev_lock(mddev);
4729 if (!rv) {
4730 rv = entry->show(mddev, page);
4731 mddev_unlock(mddev);
4732 }
4733 mddev_put(mddev);
4734 return rv;
4735}
4736
4737static ssize_t
4738md_attr_store(struct kobject *kobj, struct attribute *attr,
4739 const char *page, size_t length)
4740{
4741 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4742 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4743 ssize_t rv;
4744
4745 if (!entry->store)
4746 return -EIO;
4747 if (!capable(CAP_SYS_ADMIN))
4748 return -EACCES;
4749 spin_lock(&all_mddevs_lock);
4750 if (list_empty(&mddev->all_mddevs)) {
4751 spin_unlock(&all_mddevs_lock);
4752 return -EBUSY;
4753 }
4754 mddev_get(mddev);
4755 spin_unlock(&all_mddevs_lock);
4756 rv = mddev_lock(mddev);
4757 if (!rv) {
4758 rv = entry->store(mddev, page, length);
4759 mddev_unlock(mddev);
4760 }
4761 mddev_put(mddev);
4762 return rv;
4763}
4764
4765static void md_free(struct kobject *ko)
4766{
4767 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4768
4769 if (mddev->sysfs_state)
4770 sysfs_put(mddev->sysfs_state);
4771
4772 if (mddev->gendisk) {
4773 del_gendisk(mddev->gendisk);
4774 put_disk(mddev->gendisk);
4775 }
4776 if (mddev->queue)
4777 blk_cleanup_queue(mddev->queue);
4778
4779 kfree(mddev);
4780}
4781
4782static const struct sysfs_ops md_sysfs_ops = {
4783 .show = md_attr_show,
4784 .store = md_attr_store,
4785};
4786static struct kobj_type md_ktype = {
4787 .release = md_free,
4788 .sysfs_ops = &md_sysfs_ops,
4789 .default_attrs = md_default_attrs,
4790};
4791
4792int mdp_major = 0;
4793
4794static void mddev_delayed_delete(struct work_struct *ws)
4795{
4796 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4797
4798 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4799 kobject_del(&mddev->kobj);
4800 kobject_put(&mddev->kobj);
4801}
4802
4803static int md_alloc(dev_t dev, char *name)
4804{
4805 static DEFINE_MUTEX(disks_mutex);
4806 struct mddev *mddev = mddev_find(dev);
4807 struct gendisk *disk;
4808 int partitioned;
4809 int shift;
4810 int unit;
4811 int error;
4812
4813 if (!mddev)
4814 return -ENODEV;
4815
4816 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4817 shift = partitioned ? MdpMinorShift : 0;
4818 unit = MINOR(mddev->unit) >> shift;
4819
4820
4821
4822
4823 flush_workqueue(md_misc_wq);
4824
4825 mutex_lock(&disks_mutex);
4826 error = -EEXIST;
4827 if (mddev->gendisk)
4828 goto abort;
4829
4830 if (name) {
4831
4832
4833 struct mddev *mddev2;
4834 spin_lock(&all_mddevs_lock);
4835
4836 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4837 if (mddev2->gendisk &&
4838 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4839 spin_unlock(&all_mddevs_lock);
4840 goto abort;
4841 }
4842 spin_unlock(&all_mddevs_lock);
4843 }
4844
4845 error = -ENOMEM;
4846 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4847 if (!mddev->queue)
4848 goto abort;
4849 mddev->queue->queuedata = mddev;
4850
4851 blk_queue_make_request(mddev->queue, md_make_request);
4852 blk_set_stacking_limits(&mddev->queue->limits);
4853
4854 disk = alloc_disk(1 << shift);
4855 if (!disk) {
4856 blk_cleanup_queue(mddev->queue);
4857 mddev->queue = NULL;
4858 goto abort;
4859 }
4860 disk->major = MAJOR(mddev->unit);
4861 disk->first_minor = unit << shift;
4862 if (name)
4863 strcpy(disk->disk_name, name);
4864 else if (partitioned)
4865 sprintf(disk->disk_name, "md_d%d", unit);
4866 else
4867 sprintf(disk->disk_name, "md%d", unit);
4868 disk->fops = &md_fops;
4869 disk->private_data = mddev;
4870 disk->queue = mddev->queue;
4871 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4872
4873
4874
4875
4876 disk->flags |= GENHD_FL_EXT_DEVT;
4877 mddev->gendisk = disk;
4878
4879
4880
4881 mutex_lock(&mddev->open_mutex);
4882 add_disk(disk);
4883
4884 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4885 &disk_to_dev(disk)->kobj, "%s", "md");
4886 if (error) {
4887
4888
4889
4890 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4891 disk->disk_name);
4892 error = 0;
4893 }
4894 if (mddev->kobj.sd &&
4895 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4896 printk(KERN_DEBUG "pointless warning\n");
4897 mutex_unlock(&mddev->open_mutex);
4898 abort:
4899 mutex_unlock(&disks_mutex);
4900 if (!error && mddev->kobj.sd) {
4901 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4902 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4903 }
4904 mddev_put(mddev);
4905 return error;
4906}
4907
4908static struct kobject *md_probe(dev_t dev, int *part, void *data)
4909{
4910 md_alloc(dev, NULL);
4911 return NULL;
4912}
4913
4914static int add_named_array(const char *val, struct kernel_param *kp)
4915{
4916
4917
4918
4919
4920 int len = strlen(val);
4921 char buf[DISK_NAME_LEN];
4922
4923 while (len && val[len-1] == '\n')
4924 len--;
4925 if (len >= DISK_NAME_LEN)
4926 return -E2BIG;
4927 strlcpy(buf, val, len+1);
4928 if (strncmp(buf, "md_", 3) != 0)
4929 return -EINVAL;
4930 return md_alloc(0, buf);
4931}
4932
4933static void md_safemode_timeout(unsigned long data)
4934{
4935 struct mddev *mddev = (struct mddev *) data;
4936
4937 if (!atomic_read(&mddev->writes_pending)) {
4938 mddev->safemode = 1;
4939 if (mddev->external)
4940 sysfs_notify_dirent_safe(mddev->sysfs_state);
4941 }
4942 md_wakeup_thread(mddev->thread);
4943}
4944
4945static int start_dirty_degraded;
4946
4947int md_run(struct mddev *mddev)
4948{
4949 int err;
4950 struct md_rdev *rdev;
4951 struct md_personality *pers;
4952
4953 if (list_empty(&mddev->disks))
4954
4955 return -EINVAL;
4956
4957 if (mddev->pers)
4958 return -EBUSY;
4959
4960 if (mddev->sysfs_active)
4961 return -EBUSY;
4962
4963
4964
4965
4966 if (!mddev->raid_disks) {
4967 if (!mddev->persistent)
4968 return -EINVAL;
4969 analyze_sbs(mddev);
4970 }
4971
4972 if (mddev->level != LEVEL_NONE)
4973 request_module("md-level-%d", mddev->level);
4974 else if (mddev->clevel[0])
4975 request_module("md-%s", mddev->clevel);
4976
4977
4978
4979
4980
4981
4982 rdev_for_each(rdev, mddev) {
4983 if (test_bit(Faulty, &rdev->flags))
4984 continue;
4985 sync_blockdev(rdev->bdev);
4986 invalidate_bdev(rdev->bdev);
4987
4988
4989
4990
4991
4992 if (rdev->meta_bdev) {
4993 ;
4994 } else if (rdev->data_offset < rdev->sb_start) {
4995 if (mddev->dev_sectors &&
4996 rdev->data_offset + mddev->dev_sectors
4997 > rdev->sb_start) {
4998 printk("md: %s: data overlaps metadata\n",
4999 mdname(mddev));
5000 return -EINVAL;
5001 }
5002 } else {
5003 if (rdev->sb_start + rdev->sb_size/512
5004 > rdev->data_offset) {
5005 printk("md: %s: metadata overlaps data\n",
5006 mdname(mddev));
5007 return -EINVAL;
5008 }
5009 }
5010 sysfs_notify_dirent_safe(rdev->sysfs_state);
5011 }
5012
5013 if (mddev->bio_set == NULL)
5014 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5015
5016 spin_lock(&pers_lock);
5017 pers = find_pers(mddev->level, mddev->clevel);
5018 if (!pers || !try_module_get(pers->owner)) {
5019 spin_unlock(&pers_lock);
5020 if (mddev->level != LEVEL_NONE)
5021 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5022 mddev->level);
5023 else
5024 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5025 mddev->clevel);
5026 return -EINVAL;
5027 }
5028 mddev->pers = pers;
5029 spin_unlock(&pers_lock);
5030 if (mddev->level != pers->level) {
5031 mddev->level = pers->level;
5032 mddev->new_level = pers->level;
5033 }
5034 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5035
5036 if (mddev->reshape_position != MaxSector &&
5037 pers->start_reshape == NULL) {
5038
5039 mddev->pers = NULL;
5040 module_put(pers->owner);
5041 return -EINVAL;
5042 }
5043
5044 if (pers->sync_request) {
5045
5046
5047
5048 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5049 struct md_rdev *rdev2;
5050 int warned = 0;
5051
5052 rdev_for_each(rdev, mddev)
5053 rdev_for_each(rdev2, mddev) {
5054 if (rdev < rdev2 &&
5055 rdev->bdev->bd_contains ==
5056 rdev2->bdev->bd_contains) {
5057 printk(KERN_WARNING
5058 "%s: WARNING: %s appears to be"
5059 " on the same physical disk as"
5060 " %s.\n",
5061 mdname(mddev),
5062 bdevname(rdev->bdev,b),
5063 bdevname(rdev2->bdev,b2));
5064 warned = 1;
5065 }
5066 }
5067
5068 if (warned)
5069 printk(KERN_WARNING
5070 "True protection against single-disk"
5071 " failure might be compromised.\n");
5072 }
5073
5074 mddev->recovery = 0;
5075
5076 mddev->resync_max_sectors = mddev->dev_sectors;
5077
5078 mddev->ok_start_degraded = start_dirty_degraded;
5079
5080 if (start_readonly && mddev->ro == 0)
5081 mddev->ro = 2;
5082
5083 err = mddev->pers->run(mddev);
5084 if (err)
5085 printk(KERN_ERR "md: pers->run() failed ...\n");
5086 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5087 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5088 " but 'external_size' not in effect?\n", __func__);
5089 printk(KERN_ERR
5090 "md: invalid array_size %llu > default size %llu\n",
5091 (unsigned long long)mddev->array_sectors / 2,
5092 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5093 err = -EINVAL;
5094 mddev->pers->stop(mddev);
5095 }
5096 if (err == 0 && mddev->pers->sync_request &&
5097 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5098 err = bitmap_create(mddev);
5099 if (err) {
5100 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5101 mdname(mddev), err);
5102 mddev->pers->stop(mddev);
5103 }
5104 }
5105 if (err) {
5106 module_put(mddev->pers->owner);
5107 mddev->pers = NULL;
5108 bitmap_destroy(mddev);
5109 return err;
5110 }
5111 if (mddev->pers->sync_request) {
5112 if (mddev->kobj.sd &&
5113 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5114 printk(KERN_WARNING
5115 "md: cannot register extra attributes for %s\n",
5116 mdname(mddev));
5117 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5118 } else if (mddev->ro == 2)
5119 mddev->ro = 0;
5120
5121 atomic_set(&mddev->writes_pending,0);
5122 atomic_set(&mddev->max_corr_read_errors,
5123 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5124 mddev->safemode = 0;
5125 mddev->safemode_timer.function = md_safemode_timeout;
5126 mddev->safemode_timer.data = (unsigned long) mddev;
5127 mddev->safemode_delay = (200 * HZ)/1000 +1;
5128 mddev->in_sync = 1;
5129 smp_wmb();
5130 mddev->ready = 1;
5131 rdev_for_each(rdev, mddev)
5132 if (rdev->raid_disk >= 0)
5133 if (sysfs_link_rdev(mddev, rdev))
5134 ;
5135
5136 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5137
5138 if (mddev->flags)
5139 md_update_sb(mddev, 0);
5140
5141 md_new_event(mddev);
5142 sysfs_notify_dirent_safe(mddev->sysfs_state);
5143 sysfs_notify_dirent_safe(mddev->sysfs_action);
5144 sysfs_notify(&mddev->kobj, NULL, "degraded");
5145 return 0;
5146}
5147EXPORT_SYMBOL_GPL(md_run);
5148
5149static int do_md_run(struct mddev *mddev)
5150{
5151 int err;
5152
5153 err = md_run(mddev);
5154 if (err)
5155 goto out;
5156 err = bitmap_load(mddev);
5157 if (err) {
5158 bitmap_destroy(mddev);
5159 goto out;
5160 }
5161
5162 md_wakeup_thread(mddev->thread);
5163 md_wakeup_thread(mddev->sync_thread);
5164
5165 set_capacity(mddev->gendisk, mddev->array_sectors);
5166 revalidate_disk(mddev->gendisk);
5167 mddev->changed = 1;
5168 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5169out:
5170 return err;
5171}
5172
5173static int restart_array(struct mddev *mddev)
5174{
5175 struct gendisk *disk = mddev->gendisk;
5176
5177
5178 if (list_empty(&mddev->disks))
5179 return -ENXIO;
5180 if (!mddev->pers)
5181 return -EINVAL;
5182 if (!mddev->ro)
5183 return -EBUSY;
5184 mddev->safemode = 0;
5185 mddev->ro = 0;
5186 set_disk_ro(disk, 0);
5187 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5188 mdname(mddev));
5189
5190 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5191 md_wakeup_thread(mddev->thread);
5192 md_wakeup_thread(mddev->sync_thread);
5193 sysfs_notify_dirent_safe(mddev->sysfs_state);
5194 return 0;
5195}
5196
5197
5198
5199static int deny_bitmap_write_access(struct file * file)
5200{
5201 struct inode *inode = file->f_mapping->host;
5202
5203 spin_lock(&inode->i_lock);
5204 if (atomic_read(&inode->i_writecount) > 1) {
5205 spin_unlock(&inode->i_lock);
5206 return -ETXTBSY;
5207 }
5208 atomic_set(&inode->i_writecount, -1);
5209 spin_unlock(&inode->i_lock);
5210
5211 return 0;
5212}
5213
5214void restore_bitmap_write_access(struct file *file)
5215{
5216 struct inode *inode = file->f_mapping->host;
5217
5218 spin_lock(&inode->i_lock);
5219 atomic_set(&inode->i_writecount, 1);
5220 spin_unlock(&inode->i_lock);
5221}
5222
5223static void md_clean(struct mddev *mddev)
5224{
5225 mddev->array_sectors = 0;
5226 mddev->external_size = 0;
5227 mddev->dev_sectors = 0;
5228 mddev->raid_disks = 0;
5229 mddev->recovery_cp = 0;
5230 mddev->resync_min = 0;
5231 mddev->resync_max = MaxSector;
5232 mddev->reshape_position = MaxSector;
5233 mddev->external = 0;
5234 mddev->persistent = 0;
5235 mddev->level = LEVEL_NONE;
5236 mddev->clevel[0] = 0;
5237 mddev->flags = 0;
5238 mddev->ro = 0;
5239 mddev->metadata_type[0] = 0;
5240 mddev->chunk_sectors = 0;
5241 mddev->ctime = mddev->utime = 0;
5242 mddev->layout = 0;
5243 mddev->max_disks = 0;
5244 mddev->events = 0;
5245 mddev->can_decrease_events = 0;
5246 mddev->delta_disks = 0;
5247 mddev->reshape_backwards = 0;
5248 mddev->new_level = LEVEL_NONE;
5249 mddev->new_layout = 0;
5250 mddev->new_chunk_sectors = 0;
5251 mddev->curr_resync = 0;
5252 atomic64_set(&mddev->resync_mismatches, 0);
5253 mddev->suspend_lo = mddev->suspend_hi = 0;
5254 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5255 mddev->recovery = 0;
5256 mddev->in_sync = 0;
5257 mddev->changed = 0;
5258 mddev->degraded = 0;
5259 mddev->safemode = 0;
5260 mddev->merge_check_needed = 0;
5261 mddev->bitmap_info.offset = 0;
5262 mddev->bitmap_info.default_offset = 0;
5263 mddev->bitmap_info.default_space = 0;
5264 mddev->bitmap_info.chunksize = 0;
5265 mddev->bitmap_info.daemon_sleep = 0;
5266 mddev->bitmap_info.max_write_behind = 0;
5267}
5268
5269static void __md_stop_writes(struct mddev *mddev)
5270{
5271 if (mddev->sync_thread) {
5272 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5273 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5274 reap_sync_thread(mddev);
5275 }
5276
5277 del_timer_sync(&mddev->safemode_timer);
5278
5279 bitmap_flush(mddev);
5280 md_super_wait(mddev);
5281
5282 if (!mddev->in_sync || mddev->flags) {
5283
5284 mddev->in_sync = 1;
5285 md_update_sb(mddev, 1);
5286 }
5287}
5288
5289void md_stop_writes(struct mddev *mddev)
5290{
5291 mddev_lock(mddev);
5292 __md_stop_writes(mddev);
5293 mddev_unlock(mddev);
5294}
5295EXPORT_SYMBOL_GPL(md_stop_writes);
5296
5297static void __md_stop(struct mddev *mddev)
5298{
5299 mddev->ready = 0;
5300 mddev->pers->stop(mddev);
5301 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5302 mddev->to_remove = &md_redundancy_group;
5303 module_put(mddev->pers->owner);
5304 mddev->pers = NULL;
5305 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5306}
5307
5308void md_stop(struct mddev *mddev)
5309{
5310
5311
5312
5313 __md_stop(mddev);
5314 bitmap_destroy(mddev);
5315 if (mddev->bio_set)
5316 bioset_free(mddev->bio_set);
5317}
5318
5319EXPORT_SYMBOL_GPL(md_stop);
5320
5321static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5322{
5323 int err = 0;
5324 mutex_lock(&mddev->open_mutex);
5325 if (atomic_read(&mddev->openers) > !!bdev) {
5326 printk("md: %s still in use.\n",mdname(mddev));
5327 err = -EBUSY;
5328 goto out;
5329 }
5330 if (bdev)
5331 sync_blockdev(bdev);
5332 if (mddev->pers) {
5333 __md_stop_writes(mddev);
5334
5335 err = -ENXIO;
5336 if (mddev->ro==1)
5337 goto out;
5338 mddev->ro = 1;
5339 set_disk_ro(mddev->gendisk, 1);
5340 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5341 sysfs_notify_dirent_safe(mddev->sysfs_state);
5342 err = 0;
5343 }
5344out:
5345 mutex_unlock(&mddev->open_mutex);
5346 return err;
5347}
5348
5349
5350
5351
5352
5353static int do_md_stop(struct mddev * mddev, int mode,
5354 struct block_device *bdev)
5355{
5356 struct gendisk *disk = mddev->gendisk;
5357 struct md_rdev *rdev;
5358
5359 mutex_lock(&mddev->open_mutex);
5360 if (atomic_read(&mddev->openers) > !!bdev ||
5361 mddev->sysfs_active) {
5362 printk("md: %s still in use.\n",mdname(mddev));
5363 mutex_unlock(&mddev->open_mutex);
5364 return -EBUSY;
5365 }
5366 if (bdev)
5367
5368
5369
5370
5371
5372 sync_blockdev(bdev);
5373
5374 if (mddev->pers) {
5375 if (mddev->ro)
5376 set_disk_ro(disk, 0);
5377
5378 __md_stop_writes(mddev);
5379 __md_stop(mddev);
5380 mddev->queue->merge_bvec_fn = NULL;
5381 mddev->queue->backing_dev_info.congested_fn = NULL;
5382
5383
5384 sysfs_notify_dirent_safe(mddev->sysfs_state);
5385
5386 rdev_for_each(rdev, mddev)
5387 if (rdev->raid_disk >= 0)
5388 sysfs_unlink_rdev(mddev, rdev);
5389
5390 set_capacity(disk, 0);
5391 mutex_unlock(&mddev->open_mutex);
5392 mddev->changed = 1;
5393 revalidate_disk(disk);
5394
5395 if (mddev->ro)
5396 mddev->ro = 0;
5397 } else
5398 mutex_unlock(&mddev->open_mutex);
5399
5400
5401
5402 if (mode == 0) {
5403 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5404
5405 bitmap_destroy(mddev);
5406 if (mddev->bitmap_info.file) {
5407 restore_bitmap_write_access(mddev->bitmap_info.file);
5408 fput(mddev->bitmap_info.file);
5409 mddev->bitmap_info.file = NULL;
5410 }
5411 mddev->bitmap_info.offset = 0;
5412
5413 export_array(mddev);
5414
5415 md_clean(mddev);
5416 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5417 if (mddev->hold_active == UNTIL_STOP)
5418 mddev->hold_active = 0;
5419 }
5420 blk_integrity_unregister(disk);
5421 md_new_event(mddev);
5422 sysfs_notify_dirent_safe(mddev->sysfs_state);
5423 return 0;
5424}
5425
5426#ifndef MODULE
5427static void autorun_array(struct mddev *mddev)
5428{
5429 struct md_rdev *rdev;
5430 int err;
5431
5432 if (list_empty(&mddev->disks))
5433 return;
5434
5435 printk(KERN_INFO "md: running: ");
5436
5437 rdev_for_each(rdev, mddev) {
5438 char b[BDEVNAME_SIZE];
5439 printk("<%s>", bdevname(rdev->bdev,b));
5440 }
5441 printk("\n");
5442
5443 err = do_md_run(mddev);
5444 if (err) {
5445 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5446 do_md_stop(mddev, 0, NULL);
5447 }
5448}
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462static void autorun_devices(int part)
5463{
5464 struct md_rdev *rdev0, *rdev, *tmp;
5465 struct mddev *mddev;
5466 char b[BDEVNAME_SIZE];
5467
5468 printk(KERN_INFO "md: autorun ...\n");
5469 while (!list_empty(&pending_raid_disks)) {
5470 int unit;
5471 dev_t dev;
5472 LIST_HEAD(candidates);
5473 rdev0 = list_entry(pending_raid_disks.next,
5474 struct md_rdev, same_set);
5475
5476 printk(KERN_INFO "md: considering %s ...\n",
5477 bdevname(rdev0->bdev,b));
5478 INIT_LIST_HEAD(&candidates);
5479 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5480 if (super_90_load(rdev, rdev0, 0) >= 0) {
5481 printk(KERN_INFO "md: adding %s ...\n",
5482 bdevname(rdev->bdev,b));
5483 list_move(&rdev->same_set, &candidates);
5484 }
5485
5486
5487
5488
5489
5490 if (part) {
5491 dev = MKDEV(mdp_major,
5492 rdev0->preferred_minor << MdpMinorShift);
5493 unit = MINOR(dev) >> MdpMinorShift;
5494 } else {
5495 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5496 unit = MINOR(dev);
5497 }
5498 if (rdev0->preferred_minor != unit) {
5499 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5500 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5501 break;
5502 }
5503
5504 md_probe(dev, NULL, NULL);
5505 mddev = mddev_find(dev);
5506 if (!mddev || !mddev->gendisk) {
5507 if (mddev)
5508 mddev_put(mddev);
5509 printk(KERN_ERR
5510 "md: cannot allocate memory for md drive.\n");
5511 break;
5512 }
5513 if (mddev_lock(mddev))
5514 printk(KERN_WARNING "md: %s locked, cannot run\n",
5515 mdname(mddev));
5516 else if (mddev->raid_disks || mddev->major_version
5517 || !list_empty(&mddev->disks)) {
5518 printk(KERN_WARNING
5519 "md: %s already running, cannot run %s\n",
5520 mdname(mddev), bdevname(rdev0->bdev,b));
5521 mddev_unlock(mddev);
5522 } else {
5523 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5524 mddev->persistent = 1;
5525 rdev_for_each_list(rdev, tmp, &candidates) {
5526 list_del_init(&rdev->same_set);
5527 if (bind_rdev_to_array(rdev, mddev))
5528 export_rdev(rdev);
5529 }
5530 autorun_array(mddev);
5531 mddev_unlock(mddev);
5532 }
5533
5534
5535
5536 rdev_for_each_list(rdev, tmp, &candidates) {
5537 list_del_init(&rdev->same_set);
5538 export_rdev(rdev);
5539 }
5540 mddev_put(mddev);
5541 }
5542 printk(KERN_INFO "md: ... autorun DONE.\n");
5543}
5544#endif
5545
5546static int get_version(void __user * arg)
5547{
5548 mdu_version_t ver;
5549
5550 ver.major = MD_MAJOR_VERSION;
5551 ver.minor = MD_MINOR_VERSION;
5552 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5553
5554 if (copy_to_user(arg, &ver, sizeof(ver)))
5555 return -EFAULT;
5556
5557 return 0;
5558}
5559
5560static int get_array_info(struct mddev * mddev, void __user * arg)
5561{
5562 mdu_array_info_t info;
5563 int nr,working,insync,failed,spare;
5564 struct md_rdev *rdev;
5565
5566 nr = working = insync = failed = spare = 0;
5567 rcu_read_lock();
5568 rdev_for_each_rcu(rdev, mddev) {
5569 nr++;
5570 if (test_bit(Faulty, &rdev->flags))
5571 failed++;
5572 else {
5573 working++;
5574 if (test_bit(In_sync, &rdev->flags))
5575 insync++;
5576 else
5577 spare++;
5578 }
5579 }
5580 rcu_read_unlock();
5581
5582 info.major_version = mddev->major_version;
5583 info.minor_version = mddev->minor_version;
5584 info.patch_version = MD_PATCHLEVEL_VERSION;
5585 info.ctime = mddev->ctime;
5586 info.level = mddev->level;
5587 info.size = mddev->dev_sectors / 2;
5588 if (info.size != mddev->dev_sectors / 2)
5589 info.size = -1;
5590 info.nr_disks = nr;
5591 info.raid_disks = mddev->raid_disks;
5592 info.md_minor = mddev->md_minor;
5593 info.not_persistent= !mddev->persistent;
5594
5595 info.utime = mddev->utime;
5596 info.state = 0;
5597 if (mddev->in_sync)
5598 info.state = (1<<MD_SB_CLEAN);
5599 if (mddev->bitmap && mddev->bitmap_info.offset)
5600 info.state = (1<<MD_SB_BITMAP_PRESENT);
5601 info.active_disks = insync;
5602 info.working_disks = working;
5603 info.failed_disks = failed;
5604 info.spare_disks = spare;
5605
5606 info.layout = mddev->layout;
5607 info.chunk_size = mddev->chunk_sectors << 9;
5608
5609 if (copy_to_user(arg, &info, sizeof(info)))
5610 return -EFAULT;
5611
5612 return 0;
5613}
5614
5615static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5616{
5617 mdu_bitmap_file_t *file = NULL;
5618 char *ptr, *buf = NULL;
5619 int err = -ENOMEM;
5620
5621 if (md_allow_write(mddev))
5622 file = kmalloc(sizeof(*file), GFP_NOIO);
5623 else
5624 file = kmalloc(sizeof(*file), GFP_KERNEL);
5625
5626 if (!file)
5627 goto out;
5628
5629
5630 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5631 file->pathname[0] = '\0';
5632 goto copy_out;
5633 }
5634
5635 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5636 if (!buf)
5637 goto out;
5638
5639 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5640 buf, sizeof(file->pathname));
5641 if (IS_ERR(ptr))
5642 goto out;
5643
5644 strcpy(file->pathname, ptr);
5645
5646copy_out:
5647 err = 0;
5648 if (copy_to_user(arg, file, sizeof(*file)))
5649 err = -EFAULT;
5650out:
5651 kfree(buf);
5652 kfree(file);
5653 return err;
5654}
5655
5656static int get_disk_info(struct mddev * mddev, void __user * arg)
5657{
5658 mdu_disk_info_t info;
5659 struct md_rdev *rdev;
5660
5661 if (copy_from_user(&info, arg, sizeof(info)))
5662 return -EFAULT;
5663
5664 rcu_read_lock();
5665 rdev = find_rdev_nr_rcu(mddev, info.number);
5666 if (rdev) {
5667 info.major = MAJOR(rdev->bdev->bd_dev);
5668 info.minor = MINOR(rdev->bdev->bd_dev);
5669 info.raid_disk = rdev->raid_disk;
5670 info.state = 0;
5671 if (test_bit(Faulty, &rdev->flags))
5672 info.state |= (1<<MD_DISK_FAULTY);
5673 else if (test_bit(In_sync, &rdev->flags)) {
5674 info.state |= (1<<MD_DISK_ACTIVE);
5675 info.state |= (1<<MD_DISK_SYNC);
5676 }
5677 if (test_bit(WriteMostly, &rdev->flags))
5678 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5679 } else {
5680 info.major = info.minor = 0;
5681 info.raid_disk = -1;
5682 info.state = (1<<MD_DISK_REMOVED);
5683 }
5684 rcu_read_unlock();
5685
5686 if (copy_to_user(arg, &info, sizeof(info)))
5687 return -EFAULT;
5688
5689 return 0;
5690}
5691
5692static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5693{
5694 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5695 struct md_rdev *rdev;
5696 dev_t dev = MKDEV(info->major,info->minor);
5697
5698 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5699 return -EOVERFLOW;
5700
5701 if (!mddev->raid_disks) {
5702 int err;
5703
5704 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5705 if (IS_ERR(rdev)) {
5706 printk(KERN_WARNING
5707 "md: md_import_device returned %ld\n",
5708 PTR_ERR(rdev));
5709 return PTR_ERR(rdev);
5710 }
5711 if (!list_empty(&mddev->disks)) {
5712 struct md_rdev *rdev0
5713 = list_entry(mddev->disks.next,
5714 struct md_rdev, same_set);
5715 err = super_types[mddev->major_version]
5716 .load_super(rdev, rdev0, mddev->minor_version);
5717 if (err < 0) {
5718 printk(KERN_WARNING
5719 "md: %s has different UUID to %s\n",
5720 bdevname(rdev->bdev,b),
5721 bdevname(rdev0->bdev,b2));
5722 export_rdev(rdev);
5723 return -EINVAL;
5724 }
5725 }
5726 err = bind_rdev_to_array(rdev, mddev);
5727 if (err)
5728 export_rdev(rdev);
5729 return err;
5730 }
5731
5732
5733
5734
5735
5736
5737 if (mddev->pers) {
5738 int err;
5739 if (!mddev->pers->hot_add_disk) {
5740 printk(KERN_WARNING
5741 "%s: personality does not support diskops!\n",
5742 mdname(mddev));
5743 return -EINVAL;
5744 }
5745 if (mddev->persistent)
5746 rdev = md_import_device(dev, mddev->major_version,
5747 mddev->minor_version);
5748 else
5749 rdev = md_import_device(dev, -1, -1);
5750 if (IS_ERR(rdev)) {
5751 printk(KERN_WARNING
5752 "md: md_import_device returned %ld\n",
5753 PTR_ERR(rdev));
5754 return PTR_ERR(rdev);
5755 }
5756
5757 if (!mddev->persistent) {
5758 if (info->state & (1<<MD_DISK_SYNC) &&
5759 info->raid_disk < mddev->raid_disks) {
5760 rdev->raid_disk = info->raid_disk;
5761 set_bit(In_sync, &rdev->flags);
5762 } else
5763 rdev->raid_disk = -1;
5764 } else
5765 super_types[mddev->major_version].
5766 validate_super(mddev, rdev);
5767 if ((info->state & (1<<MD_DISK_SYNC)) &&
5768 rdev->raid_disk != info->raid_disk) {
5769
5770
5771
5772 export_rdev(rdev);
5773 return -EINVAL;
5774 }
5775
5776 if (test_bit(In_sync, &rdev->flags))
5777 rdev->saved_raid_disk = rdev->raid_disk;
5778 else
5779 rdev->saved_raid_disk = -1;
5780
5781 clear_bit(In_sync, &rdev->flags);
5782 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5783 set_bit(WriteMostly, &rdev->flags);
5784 else
5785 clear_bit(WriteMostly, &rdev->flags);
5786
5787 rdev->raid_disk = -1;
5788 err = bind_rdev_to_array(rdev, mddev);
5789 if (!err && !mddev->pers->hot_remove_disk) {
5790
5791
5792
5793
5794 super_types[mddev->major_version].
5795 validate_super(mddev, rdev);
5796 err = mddev->pers->hot_add_disk(mddev, rdev);
5797 if (err)
5798 unbind_rdev_from_array(rdev);
5799 }
5800 if (err)
5801 export_rdev(rdev);
5802 else
5803 sysfs_notify_dirent_safe(rdev->sysfs_state);
5804
5805 md_update_sb(mddev, 1);
5806 if (mddev->degraded)
5807 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5808 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5809 if (!err)
5810 md_new_event(mddev);
5811 md_wakeup_thread(mddev->thread);
5812 return err;
5813 }
5814
5815
5816
5817
5818 if (mddev->major_version != 0) {
5819 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5820 mdname(mddev));
5821 return -EINVAL;
5822 }
5823
5824 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5825 int err;
5826 rdev = md_import_device(dev, -1, 0);
5827 if (IS_ERR(rdev)) {
5828 printk(KERN_WARNING
5829 "md: error, md_import_device() returned %ld\n",
5830 PTR_ERR(rdev));
5831 return PTR_ERR(rdev);
5832 }
5833 rdev->desc_nr = info->number;
5834 if (info->raid_disk < mddev->raid_disks)
5835 rdev->raid_disk = info->raid_disk;
5836 else
5837 rdev->raid_disk = -1;
5838
5839 if (rdev->raid_disk < mddev->raid_disks)
5840 if (info->state & (1<<MD_DISK_SYNC))
5841 set_bit(In_sync, &rdev->flags);
5842
5843 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5844 set_bit(WriteMostly, &rdev->flags);
5845
5846 if (!mddev->persistent) {
5847 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5848 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5849 } else
5850 rdev->sb_start = calc_dev_sboffset(rdev);
5851 rdev->sectors = rdev->sb_start;
5852
5853 err = bind_rdev_to_array(rdev, mddev);
5854 if (err) {
5855 export_rdev(rdev);
5856 return err;
5857 }
5858 }
5859
5860 return 0;
5861}
5862
5863static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5864{
5865 char b[BDEVNAME_SIZE];
5866 struct md_rdev *rdev;
5867
5868 rdev = find_rdev(mddev, dev);
5869 if (!rdev)
5870 return -ENXIO;
5871
5872 if (rdev->raid_disk >= 0)
5873 goto busy;
5874
5875 kick_rdev_from_array(rdev);
5876 md_update_sb(mddev, 1);
5877 md_new_event(mddev);
5878
5879 return 0;
5880busy:
5881 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5882 bdevname(rdev->bdev,b), mdname(mddev));
5883 return -EBUSY;
5884}
5885
5886static int hot_add_disk(struct mddev * mddev, dev_t dev)
5887{
5888 char b[BDEVNAME_SIZE];
5889 int err;
5890 struct md_rdev *rdev;
5891
5892 if (!mddev->pers)
5893 return -ENODEV;
5894
5895 if (mddev->major_version != 0) {
5896 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5897 " version-0 superblocks.\n",
5898 mdname(mddev));
5899 return -EINVAL;
5900 }
5901 if (!mddev->pers->hot_add_disk) {
5902 printk(KERN_WARNING
5903 "%s: personality does not support diskops!\n",
5904 mdname(mddev));
5905 return -EINVAL;
5906 }
5907
5908 rdev = md_import_device(dev, -1, 0);
5909 if (IS_ERR(rdev)) {
5910 printk(KERN_WARNING
5911 "md: error, md_import_device() returned %ld\n",
5912 PTR_ERR(rdev));
5913 return -EINVAL;
5914 }
5915
5916 if (mddev->persistent)
5917 rdev->sb_start = calc_dev_sboffset(rdev);
5918 else
5919 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5920
5921 rdev->sectors = rdev->sb_start;
5922
5923 if (test_bit(Faulty, &rdev->flags)) {
5924 printk(KERN_WARNING
5925 "md: can not hot-add faulty %s disk to %s!\n",
5926 bdevname(rdev->bdev,b), mdname(mddev));
5927 err = -EINVAL;
5928 goto abort_export;
5929 }
5930 clear_bit(In_sync, &rdev->flags);
5931 rdev->desc_nr = -1;
5932 rdev->saved_raid_disk = -1;
5933 err = bind_rdev_to_array(rdev, mddev);
5934 if (err)
5935 goto abort_export;
5936
5937
5938
5939
5940
5941
5942 rdev->raid_disk = -1;
5943
5944 md_update_sb(mddev, 1);
5945
5946
5947
5948
5949
5950 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5951 md_wakeup_thread(mddev->thread);
5952 md_new_event(mddev);
5953 return 0;
5954
5955abort_export:
5956 export_rdev(rdev);
5957 return err;
5958}
5959
5960static int set_bitmap_file(struct mddev *mddev, int fd)
5961{
5962 int err;
5963
5964 if (mddev->pers) {
5965 if (!mddev->pers->quiesce)
5966 return -EBUSY;
5967 if (mddev->recovery || mddev->sync_thread)
5968 return -EBUSY;
5969
5970 }
5971
5972
5973 if (fd >= 0) {
5974 if (mddev->bitmap)
5975 return -EEXIST;
5976 mddev->bitmap_info.file = fget(fd);
5977
5978 if (mddev->bitmap_info.file == NULL) {
5979 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5980 mdname(mddev));
5981 return -EBADF;
5982 }
5983
5984 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5985 if (err) {
5986 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5987 mdname(mddev));
5988 fput(mddev->bitmap_info.file);
5989 mddev->bitmap_info.file = NULL;
5990 return err;
5991 }
5992 mddev->bitmap_info.offset = 0;
5993 } else if (mddev->bitmap == NULL)
5994 return -ENOENT;
5995 err = 0;
5996 if (mddev->pers) {
5997 mddev->pers->quiesce(mddev, 1);
5998 if (fd >= 0) {
5999 err = bitmap_create(mddev);
6000 if (!err)
6001 err = bitmap_load(mddev);
6002 }
6003 if (fd < 0 || err) {
6004 bitmap_destroy(mddev);
6005 fd = -1;
6006 }
6007 mddev->pers->quiesce(mddev, 0);
6008 }
6009 if (fd < 0) {
6010 if (mddev->bitmap_info.file) {
6011 restore_bitmap_write_access(mddev->bitmap_info.file);
6012 fput(mddev->bitmap_info.file);
6013 }
6014 mddev->bitmap_info.file = NULL;
6015 }
6016
6017 return err;
6018}
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6034{
6035
6036 if (info->raid_disks == 0) {
6037
6038 if (info->major_version < 0 ||
6039 info->major_version >= ARRAY_SIZE(super_types) ||
6040 super_types[info->major_version].name == NULL) {
6041
6042 printk(KERN_INFO
6043 "md: superblock version %d not known\n",
6044 info->major_version);
6045 return -EINVAL;
6046 }
6047 mddev->major_version = info->major_version;
6048 mddev->minor_version = info->minor_version;
6049 mddev->patch_version = info->patch_version;
6050 mddev->persistent = !info->not_persistent;
6051
6052
6053
6054 mddev->ctime = get_seconds();
6055 return 0;
6056 }
6057 mddev->major_version = MD_MAJOR_VERSION;
6058 mddev->minor_version = MD_MINOR_VERSION;
6059 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6060 mddev->ctime = get_seconds();
6061
6062 mddev->level = info->level;
6063 mddev->clevel[0] = 0;
6064 mddev->dev_sectors = 2 * (sector_t)info->size;
6065 mddev->raid_disks = info->raid_disks;
6066
6067
6068
6069 if (info->state & (1<<MD_SB_CLEAN))
6070 mddev->recovery_cp = MaxSector;
6071 else
6072 mddev->recovery_cp = 0;
6073 mddev->persistent = ! info->not_persistent;
6074 mddev->external = 0;
6075
6076 mddev->layout = info->layout;
6077 mddev->chunk_sectors = info->chunk_size >> 9;
6078
6079 mddev->max_disks = MD_SB_DISKS;
6080
6081 if (mddev->persistent)
6082 mddev->flags = 0;
6083 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6084
6085 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6086 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6087 mddev->bitmap_info.offset = 0;
6088
6089 mddev->reshape_position = MaxSector;
6090
6091
6092
6093
6094 get_random_bytes(mddev->uuid, 16);
6095
6096 mddev->new_level = mddev->level;
6097 mddev->new_chunk_sectors = mddev->chunk_sectors;
6098 mddev->new_layout = mddev->layout;
6099 mddev->delta_disks = 0;
6100 mddev->reshape_backwards = 0;
6101
6102 return 0;
6103}
6104
6105void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6106{
6107 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6108
6109 if (mddev->external_size)
6110 return;
6111
6112 mddev->array_sectors = array_sectors;
6113}
6114EXPORT_SYMBOL(md_set_array_sectors);
6115
6116static int update_size(struct mddev *mddev, sector_t num_sectors)
6117{
6118 struct md_rdev *rdev;
6119 int rv;
6120 int fit = (num_sectors == 0);
6121
6122 if (mddev->pers->resize == NULL)
6123 return -EINVAL;
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133 if (mddev->sync_thread)
6134 return -EBUSY;
6135
6136 rdev_for_each(rdev, mddev) {
6137 sector_t avail = rdev->sectors;
6138
6139 if (fit && (num_sectors == 0 || num_sectors > avail))
6140 num_sectors = avail;
6141 if (avail < num_sectors)
6142 return -ENOSPC;
6143 }
6144 rv = mddev->pers->resize(mddev, num_sectors);
6145 if (!rv)
6146 revalidate_disk(mddev->gendisk);
6147 return rv;
6148}
6149
6150static int update_raid_disks(struct mddev *mddev, int raid_disks)
6151{
6152 int rv;
6153 struct md_rdev *rdev;
6154
6155 if (mddev->pers->check_reshape == NULL)
6156 return -EINVAL;
6157 if (raid_disks <= 0 ||
6158 (mddev->max_disks && raid_disks >= mddev->max_disks))
6159 return -EINVAL;
6160 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6161 return -EBUSY;
6162
6163 rdev_for_each(rdev, mddev) {
6164 if (mddev->raid_disks < raid_disks &&
6165 rdev->data_offset < rdev->new_data_offset)
6166 return -EINVAL;
6167 if (mddev->raid_disks > raid_disks &&
6168 rdev->data_offset > rdev->new_data_offset)
6169 return -EINVAL;
6170 }
6171
6172 mddev->delta_disks = raid_disks - mddev->raid_disks;
6173 if (mddev->delta_disks < 0)
6174 mddev->reshape_backwards = 1;
6175 else if (mddev->delta_disks > 0)
6176 mddev->reshape_backwards = 0;
6177
6178 rv = mddev->pers->check_reshape(mddev);
6179 if (rv < 0) {
6180 mddev->delta_disks = 0;
6181 mddev->reshape_backwards = 0;
6182 }
6183 return rv;
6184}
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6196{
6197 int rv = 0;
6198 int cnt = 0;
6199 int state = 0;
6200
6201
6202 if (mddev->bitmap && mddev->bitmap_info.offset)
6203 state |= (1 << MD_SB_BITMAP_PRESENT);
6204
6205 if (mddev->major_version != info->major_version ||
6206 mddev->minor_version != info->minor_version ||
6207
6208 mddev->ctime != info->ctime ||
6209 mddev->level != info->level ||
6210
6211 !mddev->persistent != info->not_persistent||
6212 mddev->chunk_sectors != info->chunk_size >> 9 ||
6213
6214 ((state^info->state) & 0xfffffe00)
6215 )
6216 return -EINVAL;
6217
6218 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6219 cnt++;
6220 if (mddev->raid_disks != info->raid_disks)
6221 cnt++;
6222 if (mddev->layout != info->layout)
6223 cnt++;
6224 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6225 cnt++;
6226 if (cnt == 0)
6227 return 0;
6228 if (cnt > 1)
6229 return -EINVAL;
6230
6231 if (mddev->layout != info->layout) {
6232
6233
6234
6235
6236 if (mddev->pers->check_reshape == NULL)
6237 return -EINVAL;
6238 else {
6239 mddev->new_layout = info->layout;
6240 rv = mddev->pers->check_reshape(mddev);
6241 if (rv)
6242 mddev->new_layout = mddev->layout;
6243 return rv;
6244 }
6245 }
6246 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6247 rv = update_size(mddev, (sector_t)info->size * 2);
6248
6249 if (mddev->raid_disks != info->raid_disks)
6250 rv = update_raid_disks(mddev, info->raid_disks);
6251
6252 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6253 if (mddev->pers->quiesce == NULL)
6254 return -EINVAL;
6255 if (mddev->recovery || mddev->sync_thread)
6256 return -EBUSY;
6257 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6258
6259 if (mddev->bitmap)
6260 return -EEXIST;
6261 if (mddev->bitmap_info.default_offset == 0)
6262 return -EINVAL;
6263 mddev->bitmap_info.offset =
6264 mddev->bitmap_info.default_offset;
6265 mddev->bitmap_info.space =
6266 mddev->bitmap_info.default_space;
6267 mddev->pers->quiesce(mddev, 1);
6268 rv = bitmap_create(mddev);
6269 if (!rv)
6270 rv = bitmap_load(mddev);
6271 if (rv)
6272 bitmap_destroy(mddev);
6273 mddev->pers->quiesce(mddev, 0);
6274 } else {
6275
6276 if (!mddev->bitmap)
6277 return -ENOENT;
6278 if (mddev->bitmap->storage.file)
6279 return -EINVAL;
6280 mddev->pers->quiesce(mddev, 1);
6281 bitmap_destroy(mddev);
6282 mddev->pers->quiesce(mddev, 0);
6283 mddev->bitmap_info.offset = 0;
6284 }
6285 }
6286 md_update_sb(mddev, 1);
6287 return rv;
6288}
6289
6290static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6291{
6292 struct md_rdev *rdev;
6293 int err = 0;
6294
6295 if (mddev->pers == NULL)
6296 return -ENODEV;
6297
6298 rcu_read_lock();
6299 rdev = find_rdev_rcu(mddev, dev);
6300 if (!rdev)
6301 err = -ENODEV;
6302 else {
6303 md_error(mddev, rdev);
6304 if (!test_bit(Faulty, &rdev->flags))
6305 err = -EBUSY;
6306 }
6307 rcu_read_unlock();
6308 return err;
6309}
6310
6311
6312
6313
6314
6315
6316
6317static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6318{
6319 struct mddev *mddev = bdev->bd_disk->private_data;
6320
6321 geo->heads = 2;
6322 geo->sectors = 4;
6323 geo->cylinders = mddev->array_sectors / 8;
6324 return 0;
6325}
6326
6327static int md_ioctl(struct block_device *bdev, fmode_t mode,
6328 unsigned int cmd, unsigned long arg)
6329{
6330 int err = 0;
6331 void __user *argp = (void __user *)arg;
6332 struct mddev *mddev = NULL;
6333 int ro;
6334
6335 switch (cmd) {
6336 case RAID_VERSION:
6337 case GET_ARRAY_INFO:
6338 case GET_DISK_INFO:
6339 break;
6340 default:
6341 if (!capable(CAP_SYS_ADMIN))
6342 return -EACCES;
6343 }
6344
6345
6346
6347
6348
6349 switch (cmd)
6350 {
6351 case RAID_VERSION:
6352 err = get_version(argp);
6353 goto done;
6354
6355 case PRINT_RAID_DEBUG:
6356 err = 0;
6357 md_print_devices();
6358 goto done;
6359
6360#ifndef MODULE
6361 case RAID_AUTORUN:
6362 err = 0;
6363 autostart_arrays(arg);
6364 goto done;
6365#endif
6366 default:;
6367 }
6368
6369
6370
6371
6372
6373 mddev = bdev->bd_disk->private_data;
6374
6375 if (!mddev) {
6376 BUG();
6377 goto abort;
6378 }
6379
6380
6381 switch (cmd) {
6382 case GET_ARRAY_INFO:
6383 if (!mddev->raid_disks && !mddev->external)
6384 err = -ENODEV;
6385 else
6386 err = get_array_info(mddev, argp);
6387 goto abort;
6388
6389 case GET_DISK_INFO:
6390 if (!mddev->raid_disks && !mddev->external)
6391 err = -ENODEV;
6392 else
6393 err = get_disk_info(mddev, argp);
6394 goto abort;
6395
6396 case SET_DISK_FAULTY:
6397 err = set_disk_faulty(mddev, new_decode_dev(arg));
6398 goto abort;
6399 }
6400
6401 err = mddev_lock(mddev);
6402 if (err) {
6403 printk(KERN_INFO
6404 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6405 err, cmd);
6406 goto abort;
6407 }
6408
6409 switch (cmd)
6410 {
6411 case SET_ARRAY_INFO:
6412 {
6413 mdu_array_info_t info;
6414 if (!arg)
6415 memset(&info, 0, sizeof(info));
6416 else if (copy_from_user(&info, argp, sizeof(info))) {
6417 err = -EFAULT;
6418 goto abort_unlock;
6419 }
6420 if (mddev->pers) {
6421 err = update_array_info(mddev, &info);
6422 if (err) {
6423 printk(KERN_WARNING "md: couldn't update"
6424 " array info. %d\n", err);
6425 goto abort_unlock;
6426 }
6427 goto done_unlock;
6428 }
6429 if (!list_empty(&mddev->disks)) {
6430 printk(KERN_WARNING
6431 "md: array %s already has disks!\n",
6432 mdname(mddev));
6433 err = -EBUSY;
6434 goto abort_unlock;
6435 }
6436 if (mddev->raid_disks) {
6437 printk(KERN_WARNING
6438 "md: array %s already initialised!\n",
6439 mdname(mddev));
6440 err = -EBUSY;
6441 goto abort_unlock;
6442 }
6443 err = set_array_info(mddev, &info);
6444 if (err) {
6445 printk(KERN_WARNING "md: couldn't set"
6446 " array info. %d\n", err);
6447 goto abort_unlock;
6448 }
6449 }
6450 goto done_unlock;
6451
6452 default:;
6453 }
6454
6455
6456
6457
6458
6459
6460 if ((!mddev->raid_disks && !mddev->external)
6461 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6462 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6463 && cmd != GET_BITMAP_FILE) {
6464 err = -ENODEV;
6465 goto abort_unlock;
6466 }
6467
6468
6469
6470
6471 switch (cmd)
6472 {
6473 case GET_BITMAP_FILE:
6474 err = get_bitmap_file(mddev, argp);
6475 goto done_unlock;
6476
6477 case RESTART_ARRAY_RW:
6478 err = restart_array(mddev);
6479 goto done_unlock;
6480
6481 case STOP_ARRAY:
6482 err = do_md_stop(mddev, 0, bdev);
6483 goto done_unlock;
6484
6485 case STOP_ARRAY_RO:
6486 err = md_set_readonly(mddev, bdev);
6487 goto done_unlock;
6488
6489 case BLKROSET:
6490 if (get_user(ro, (int __user *)(arg))) {
6491 err = -EFAULT;
6492 goto done_unlock;
6493 }
6494 err = -EINVAL;
6495
6496
6497
6498
6499 if (ro)
6500 goto done_unlock;
6501
6502
6503 if (mddev->ro != 1)
6504 goto done_unlock;
6505
6506
6507
6508
6509 if (mddev->pers) {
6510 err = restart_array(mddev);
6511 if (err == 0) {
6512 mddev->ro = 2;
6513 set_disk_ro(mddev->gendisk, 0);
6514 }
6515 }
6516 goto done_unlock;
6517 }
6518
6519
6520
6521
6522
6523
6524
6525
6526 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6527 if (mddev->ro == 2) {
6528 mddev->ro = 0;
6529 sysfs_notify_dirent_safe(mddev->sysfs_state);
6530 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6531 md_wakeup_thread(mddev->thread);
6532 } else {
6533 err = -EROFS;
6534 goto abort_unlock;
6535 }
6536 }
6537
6538 switch (cmd)
6539 {
6540 case ADD_NEW_DISK:
6541 {
6542 mdu_disk_info_t info;
6543 if (copy_from_user(&info, argp, sizeof(info)))
6544 err = -EFAULT;
6545 else
6546 err = add_new_disk(mddev, &info);
6547 goto done_unlock;
6548 }
6549
6550 case HOT_REMOVE_DISK:
6551 err = hot_remove_disk(mddev, new_decode_dev(arg));
6552 goto done_unlock;
6553
6554 case HOT_ADD_DISK:
6555 err = hot_add_disk(mddev, new_decode_dev(arg));
6556 goto done_unlock;
6557
6558 case RUN_ARRAY:
6559 err = do_md_run(mddev);
6560 goto done_unlock;
6561
6562 case SET_BITMAP_FILE:
6563 err = set_bitmap_file(mddev, (int)arg);
6564 goto done_unlock;
6565
6566 default:
6567 err = -EINVAL;
6568 goto abort_unlock;
6569 }
6570
6571done_unlock:
6572abort_unlock:
6573 if (mddev->hold_active == UNTIL_IOCTL &&
6574 err != -EINVAL)
6575 mddev->hold_active = 0;
6576 mddev_unlock(mddev);
6577
6578 return err;
6579done:
6580 if (err)
6581 MD_BUG();
6582abort:
6583 return err;
6584}
6585#ifdef CONFIG_COMPAT
6586static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6587 unsigned int cmd, unsigned long arg)
6588{
6589 switch (cmd) {
6590 case HOT_REMOVE_DISK:
6591 case HOT_ADD_DISK:
6592 case SET_DISK_FAULTY:
6593 case SET_BITMAP_FILE:
6594
6595 break;
6596 default:
6597 arg = (unsigned long)compat_ptr(arg);
6598 break;
6599 }
6600
6601 return md_ioctl(bdev, mode, cmd, arg);
6602}
6603#endif
6604
6605static int md_open(struct block_device *bdev, fmode_t mode)
6606{
6607
6608
6609
6610
6611 struct mddev *mddev = mddev_find(bdev->bd_dev);
6612 int err;
6613
6614 if (!mddev)
6615 return -ENODEV;
6616
6617 if (mddev->gendisk != bdev->bd_disk) {
6618
6619
6620
6621 mddev_put(mddev);
6622
6623 flush_workqueue(md_misc_wq);
6624
6625 return -ERESTARTSYS;
6626 }
6627 BUG_ON(mddev != bdev->bd_disk->private_data);
6628
6629 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6630 goto out;
6631
6632 err = 0;
6633 atomic_inc(&mddev->openers);
6634 mutex_unlock(&mddev->open_mutex);
6635
6636 check_disk_change(bdev);
6637 out:
6638 return err;
6639}
6640
6641static int md_release(struct gendisk *disk, fmode_t mode)
6642{
6643 struct mddev *mddev = disk->private_data;
6644
6645 BUG_ON(!mddev);
6646 atomic_dec(&mddev->openers);
6647 mddev_put(mddev);
6648
6649 return 0;
6650}
6651
6652static int md_media_changed(struct gendisk *disk)
6653{
6654 struct mddev *mddev = disk->private_data;
6655
6656 return mddev->changed;
6657}
6658
6659static int md_revalidate(struct gendisk *disk)
6660{
6661 struct mddev *mddev = disk->private_data;
6662
6663 mddev->changed = 0;
6664 return 0;
6665}
6666static const struct block_device_operations md_fops =
6667{
6668 .owner = THIS_MODULE,
6669 .open = md_open,
6670 .release = md_release,
6671 .ioctl = md_ioctl,
6672#ifdef CONFIG_COMPAT
6673 .compat_ioctl = md_compat_ioctl,
6674#endif
6675 .getgeo = md_getgeo,
6676 .media_changed = md_media_changed,
6677 .revalidate_disk= md_revalidate,
6678};
6679
6680static int md_thread(void * arg)
6681{
6682 struct md_thread *thread = arg;
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696 allow_signal(SIGKILL);
6697 while (!kthread_should_stop()) {
6698
6699
6700
6701
6702
6703
6704 if (signal_pending(current))
6705 flush_signals(current);
6706
6707 wait_event_interruptible_timeout
6708 (thread->wqueue,
6709 test_bit(THREAD_WAKEUP, &thread->flags)
6710 || kthread_should_stop(),
6711 thread->timeout);
6712
6713 clear_bit(THREAD_WAKEUP, &thread->flags);
6714 if (!kthread_should_stop())
6715 thread->run(thread);
6716 }
6717
6718 return 0;
6719}
6720
6721void md_wakeup_thread(struct md_thread *thread)
6722{
6723 if (thread) {
6724 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6725 set_bit(THREAD_WAKEUP, &thread->flags);
6726 wake_up(&thread->wqueue);
6727 }
6728}
6729
6730struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6731 struct mddev *mddev, const char *name)
6732{
6733 struct md_thread *thread;
6734
6735 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6736 if (!thread)
6737 return NULL;
6738
6739 init_waitqueue_head(&thread->wqueue);
6740
6741 thread->run = run;
6742 thread->mddev = mddev;
6743 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6744 thread->tsk = kthread_run(md_thread, thread,
6745 "%s_%s",
6746 mdname(thread->mddev),
6747 name);
6748 if (IS_ERR(thread->tsk)) {
6749 kfree(thread);
6750 return NULL;
6751 }
6752 return thread;
6753}
6754
6755void md_unregister_thread(struct md_thread **threadp)
6756{
6757 struct md_thread *thread = *threadp;
6758 if (!thread)
6759 return;
6760 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6761
6762
6763
6764 spin_lock(&pers_lock);
6765 *threadp = NULL;
6766 spin_unlock(&pers_lock);
6767
6768 kthread_stop(thread->tsk);
6769 kfree(thread);
6770}
6771
6772void md_error(struct mddev *mddev, struct md_rdev *rdev)
6773{
6774 if (!mddev) {
6775 MD_BUG();
6776 return;
6777 }
6778
6779 if (!rdev || test_bit(Faulty, &rdev->flags))
6780 return;
6781
6782 if (!mddev->pers || !mddev->pers->error_handler)
6783 return;
6784 mddev->pers->error_handler(mddev,rdev);
6785 if (mddev->degraded)
6786 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6787 sysfs_notify_dirent_safe(rdev->sysfs_state);
6788 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6789 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6790 md_wakeup_thread(mddev->thread);
6791 if (mddev->event_work.func)
6792 queue_work(md_misc_wq, &mddev->event_work);
6793 md_new_event_inintr(mddev);
6794}
6795
6796
6797
6798static void status_unused(struct seq_file *seq)
6799{
6800 int i = 0;
6801 struct md_rdev *rdev;
6802
6803 seq_printf(seq, "unused devices: ");
6804
6805 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6806 char b[BDEVNAME_SIZE];
6807 i++;
6808 seq_printf(seq, "%s ",
6809 bdevname(rdev->bdev,b));
6810 }
6811 if (!i)
6812 seq_printf(seq, "<none>");
6813
6814 seq_printf(seq, "\n");
6815}
6816
6817
6818static void status_resync(struct seq_file *seq, struct mddev * mddev)
6819{
6820 sector_t max_sectors, resync, res;
6821 unsigned long dt, db;
6822 sector_t rt;
6823 int scale;
6824 unsigned int per_milli;
6825
6826 if (mddev->curr_resync <= 3)
6827 resync = 0;
6828 else
6829 resync = mddev->curr_resync
6830 - atomic_read(&mddev->recovery_active);
6831
6832 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6833 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6834 max_sectors = mddev->resync_max_sectors;
6835 else
6836 max_sectors = mddev->dev_sectors;
6837
6838
6839
6840
6841 if (!max_sectors) {
6842 MD_BUG();
6843 return;
6844 }
6845
6846
6847
6848
6849
6850 scale = 10;
6851 if (sizeof(sector_t) > sizeof(unsigned long)) {
6852 while ( max_sectors/2 > (1ULL<<(scale+32)))
6853 scale++;
6854 }
6855 res = (resync>>scale)*1000;
6856 sector_div(res, (u32)((max_sectors>>scale)+1));
6857
6858 per_milli = res;
6859 {
6860 int i, x = per_milli/50, y = 20-x;
6861 seq_printf(seq, "[");
6862 for (i = 0; i < x; i++)
6863 seq_printf(seq, "=");
6864 seq_printf(seq, ">");
6865 for (i = 0; i < y; i++)
6866 seq_printf(seq, ".");
6867 seq_printf(seq, "] ");
6868 }
6869 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6870 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6871 "reshape" :
6872 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6873 "check" :
6874 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6875 "resync" : "recovery"))),
6876 per_milli/10, per_milli % 10,
6877 (unsigned long long) resync/2,
6878 (unsigned long long) max_sectors/2);
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894 dt = ((jiffies - mddev->resync_mark) / HZ);
6895 if (!dt) dt++;
6896 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6897 - mddev->resync_mark_cnt;
6898
6899 rt = max_sectors - resync;
6900 sector_div(rt, db/32+1);
6901 rt *= dt;
6902 rt >>= 5;
6903
6904 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6905 ((unsigned long)rt % 60)/6);
6906
6907 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6908}
6909
6910static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6911{
6912 struct list_head *tmp;
6913 loff_t l = *pos;
6914 struct mddev *mddev;
6915
6916 if (l >= 0x10000)
6917 return NULL;
6918 if (!l--)
6919
6920 return (void*)1;
6921
6922 spin_lock(&all_mddevs_lock);
6923 list_for_each(tmp,&all_mddevs)
6924 if (!l--) {
6925 mddev = list_entry(tmp, struct mddev, all_mddevs);
6926 mddev_get(mddev);
6927 spin_unlock(&all_mddevs_lock);
6928 return mddev;
6929 }
6930 spin_unlock(&all_mddevs_lock);
6931 if (!l--)
6932 return (void*)2;
6933 return NULL;
6934}
6935
6936static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6937{
6938 struct list_head *tmp;
6939 struct mddev *next_mddev, *mddev = v;
6940
6941 ++*pos;
6942 if (v == (void*)2)
6943 return NULL;
6944
6945 spin_lock(&all_mddevs_lock);
6946 if (v == (void*)1)
6947 tmp = all_mddevs.next;
6948 else
6949 tmp = mddev->all_mddevs.next;
6950 if (tmp != &all_mddevs)
6951 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
6952 else {
6953 next_mddev = (void*)2;
6954 *pos = 0x10000;
6955 }
6956 spin_unlock(&all_mddevs_lock);
6957
6958 if (v != (void*)1)
6959 mddev_put(mddev);
6960 return next_mddev;
6961
6962}
6963
6964static void md_seq_stop(struct seq_file *seq, void *v)
6965{
6966 struct mddev *mddev = v;
6967
6968 if (mddev && v != (void*)1 && v != (void*)2)
6969 mddev_put(mddev);
6970}
6971
6972static int md_seq_show(struct seq_file *seq, void *v)
6973{
6974 struct mddev *mddev = v;
6975 sector_t sectors;
6976 struct md_rdev *rdev;
6977
6978 if (v == (void*)1) {
6979 struct md_personality *pers;
6980 seq_printf(seq, "Personalities : ");
6981 spin_lock(&pers_lock);
6982 list_for_each_entry(pers, &pers_list, list)
6983 seq_printf(seq, "[%s] ", pers->name);
6984
6985 spin_unlock(&pers_lock);
6986 seq_printf(seq, "\n");
6987 seq->poll_event = atomic_read(&md_event_count);
6988 return 0;
6989 }
6990 if (v == (void*)2) {
6991 status_unused(seq);
6992 return 0;
6993 }
6994
6995 if (mddev_lock(mddev) < 0)
6996 return -EINTR;
6997
6998 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
6999 seq_printf(seq, "%s : %sactive", mdname(mddev),
7000 mddev->pers ? "" : "in");
7001 if (mddev->pers) {
7002 if (mddev->ro==1)
7003 seq_printf(seq, " (read-only)");
7004 if (mddev->ro==2)
7005 seq_printf(seq, " (auto-read-only)");
7006 seq_printf(seq, " %s", mddev->pers->name);
7007 }
7008
7009 sectors = 0;
7010 rdev_for_each(rdev, mddev) {
7011 char b[BDEVNAME_SIZE];
7012 seq_printf(seq, " %s[%d]",
7013 bdevname(rdev->bdev,b), rdev->desc_nr);
7014 if (test_bit(WriteMostly, &rdev->flags))
7015 seq_printf(seq, "(W)");
7016 if (test_bit(Faulty, &rdev->flags)) {
7017 seq_printf(seq, "(F)");
7018 continue;
7019 }
7020 if (rdev->raid_disk < 0)
7021 seq_printf(seq, "(S)");
7022 if (test_bit(Replacement, &rdev->flags))
7023 seq_printf(seq, "(R)");
7024 sectors += rdev->sectors;
7025 }
7026
7027 if (!list_empty(&mddev->disks)) {
7028 if (mddev->pers)
7029 seq_printf(seq, "\n %llu blocks",
7030 (unsigned long long)
7031 mddev->array_sectors / 2);
7032 else
7033 seq_printf(seq, "\n %llu blocks",
7034 (unsigned long long)sectors / 2);
7035 }
7036 if (mddev->persistent) {
7037 if (mddev->major_version != 0 ||
7038 mddev->minor_version != 90) {
7039 seq_printf(seq," super %d.%d",
7040 mddev->major_version,
7041 mddev->minor_version);
7042 }
7043 } else if (mddev->external)
7044 seq_printf(seq, " super external:%s",
7045 mddev->metadata_type);
7046 else
7047 seq_printf(seq, " super non-persistent");
7048
7049 if (mddev->pers) {
7050 mddev->pers->status(seq, mddev);
7051 seq_printf(seq, "\n ");
7052 if (mddev->pers->sync_request) {
7053 if (mddev->curr_resync > 2) {
7054 status_resync(seq, mddev);
7055 seq_printf(seq, "\n ");
7056 } else if (mddev->curr_resync >= 1)
7057 seq_printf(seq, "\tresync=DELAYED\n ");
7058 else if (mddev->recovery_cp < MaxSector)
7059 seq_printf(seq, "\tresync=PENDING\n ");
7060 }
7061 } else
7062 seq_printf(seq, "\n ");
7063
7064 bitmap_status(seq, mddev->bitmap);
7065
7066 seq_printf(seq, "\n");
7067 }
7068 mddev_unlock(mddev);
7069
7070 return 0;
7071}
7072
7073static const struct seq_operations md_seq_ops = {
7074 .start = md_seq_start,
7075 .next = md_seq_next,
7076 .stop = md_seq_stop,
7077 .show = md_seq_show,
7078};
7079
7080static int md_seq_open(struct inode *inode, struct file *file)
7081{
7082 struct seq_file *seq;
7083 int error;
7084
7085 error = seq_open(file, &md_seq_ops);
7086 if (error)
7087 return error;
7088
7089 seq = file->private_data;
7090 seq->poll_event = atomic_read(&md_event_count);
7091 return error;
7092}
7093
7094static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7095{
7096 struct seq_file *seq = filp->private_data;
7097 int mask;
7098
7099 poll_wait(filp, &md_event_waiters, wait);
7100
7101
7102 mask = POLLIN | POLLRDNORM;
7103
7104 if (seq->poll_event != atomic_read(&md_event_count))
7105 mask |= POLLERR | POLLPRI;
7106 return mask;
7107}
7108
7109static const struct file_operations md_seq_fops = {
7110 .owner = THIS_MODULE,
7111 .open = md_seq_open,
7112 .read = seq_read,
7113 .llseek = seq_lseek,
7114 .release = seq_release_private,
7115 .poll = mdstat_poll,
7116};
7117
7118int register_md_personality(struct md_personality *p)
7119{
7120 spin_lock(&pers_lock);
7121 list_add_tail(&p->list, &pers_list);
7122 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7123 spin_unlock(&pers_lock);
7124 return 0;
7125}
7126
7127int unregister_md_personality(struct md_personality *p)
7128{
7129 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7130 spin_lock(&pers_lock);
7131 list_del_init(&p->list);
7132 spin_unlock(&pers_lock);
7133 return 0;
7134}
7135
7136static int is_mddev_idle(struct mddev *mddev, int init)
7137{
7138 struct md_rdev * rdev;
7139 int idle;
7140 int curr_events;
7141
7142 idle = 1;
7143 rcu_read_lock();
7144 rdev_for_each_rcu(rdev, mddev) {
7145 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7146 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7147 (int)part_stat_read(&disk->part0, sectors[1]) -
7148 atomic_read(&disk->sync_io);
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171 if (init || curr_events - rdev->last_events > 64) {
7172 rdev->last_events = curr_events;
7173 idle = 0;
7174 }
7175 }
7176 rcu_read_unlock();
7177 return idle;
7178}
7179
7180void md_done_sync(struct mddev *mddev, int blocks, int ok)
7181{
7182
7183 atomic_sub(blocks, &mddev->recovery_active);
7184 wake_up(&mddev->recovery_wait);
7185 if (!ok) {
7186 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7187 md_wakeup_thread(mddev->thread);
7188
7189 }
7190}
7191
7192
7193
7194
7195
7196
7197
7198void md_write_start(struct mddev *mddev, struct bio *bi)
7199{
7200 int did_change = 0;
7201 if (bio_data_dir(bi) != WRITE)
7202 return;
7203
7204 BUG_ON(mddev->ro == 1);
7205 if (mddev->ro == 2) {
7206
7207 mddev->ro = 0;
7208 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7209 md_wakeup_thread(mddev->thread);
7210 md_wakeup_thread(mddev->sync_thread);
7211 did_change = 1;
7212 }
7213 atomic_inc(&mddev->writes_pending);
7214 if (mddev->safemode == 1)
7215 mddev->safemode = 0;
7216 if (mddev->in_sync) {
7217 spin_lock_irq(&mddev->write_lock);
7218 if (mddev->in_sync) {
7219 mddev->in_sync = 0;
7220 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7221 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7222 md_wakeup_thread(mddev->thread);
7223 did_change = 1;
7224 }
7225 spin_unlock_irq(&mddev->write_lock);
7226 }
7227 if (did_change)
7228 sysfs_notify_dirent_safe(mddev->sysfs_state);
7229 wait_event(mddev->sb_wait,
7230 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7231}
7232
7233void md_write_end(struct mddev *mddev)
7234{
7235 if (atomic_dec_and_test(&mddev->writes_pending)) {
7236 if (mddev->safemode == 2)
7237 md_wakeup_thread(mddev->thread);
7238 else if (mddev->safemode_delay)
7239 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7240 }
7241}
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252int md_allow_write(struct mddev *mddev)
7253{
7254 if (!mddev->pers)
7255 return 0;
7256 if (mddev->ro)
7257 return 0;
7258 if (!mddev->pers->sync_request)
7259 return 0;
7260
7261 spin_lock_irq(&mddev->write_lock);
7262 if (mddev->in_sync) {
7263 mddev->in_sync = 0;
7264 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7265 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7266 if (mddev->safemode_delay &&
7267 mddev->safemode == 0)
7268 mddev->safemode = 1;
7269 spin_unlock_irq(&mddev->write_lock);
7270 md_update_sb(mddev, 0);
7271 sysfs_notify_dirent_safe(mddev->sysfs_state);
7272 } else
7273 spin_unlock_irq(&mddev->write_lock);
7274
7275 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7276 return -EAGAIN;
7277 else
7278 return 0;
7279}
7280EXPORT_SYMBOL_GPL(md_allow_write);
7281
7282#define SYNC_MARKS 10
7283#define SYNC_MARK_STEP (3*HZ)
7284void md_do_sync(struct md_thread *thread)
7285{
7286 struct mddev *mddev = thread->mddev;
7287 struct mddev *mddev2;
7288 unsigned int currspeed = 0,
7289 window;
7290 sector_t max_sectors,j, io_sectors;
7291 unsigned long mark[SYNC_MARKS];
7292 sector_t mark_cnt[SYNC_MARKS];
7293 int last_mark,m;
7294 struct list_head *tmp;
7295 sector_t last_check;
7296 int skipped = 0;
7297 struct md_rdev *rdev;
7298 char *desc;
7299 struct blk_plug plug;
7300
7301
7302 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7303 return;
7304 if (mddev->ro)
7305 return;
7306
7307 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7308 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
7309 desc = "data-check";
7310 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7311 desc = "requested-resync";
7312 else
7313 desc = "resync";
7314 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7315 desc = "reshape";
7316 else
7317 desc = "recovery";
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335 do {
7336 mddev->curr_resync = 2;
7337
7338 try_again:
7339 if (kthread_should_stop())
7340 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7341
7342 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7343 goto skip;
7344 for_each_mddev(mddev2, tmp) {
7345 if (mddev2 == mddev)
7346 continue;
7347 if (!mddev->parallel_resync
7348 && mddev2->curr_resync
7349 && match_mddev_units(mddev, mddev2)) {
7350 DEFINE_WAIT(wq);
7351 if (mddev < mddev2 && mddev->curr_resync == 2) {
7352
7353 mddev->curr_resync = 1;
7354 wake_up(&resync_wait);
7355 }
7356 if (mddev > mddev2 && mddev->curr_resync == 1)
7357
7358
7359
7360 continue;
7361
7362
7363
7364
7365 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7366 if (!kthread_should_stop() &&
7367 mddev2->curr_resync >= mddev->curr_resync) {
7368 printk(KERN_INFO "md: delaying %s of %s"
7369 " until %s has finished (they"
7370 " share one or more physical units)\n",
7371 desc, mdname(mddev), mdname(mddev2));
7372 mddev_put(mddev2);
7373 if (signal_pending(current))
7374 flush_signals(current);
7375 schedule();
7376 finish_wait(&resync_wait, &wq);
7377 goto try_again;
7378 }
7379 finish_wait(&resync_wait, &wq);
7380 }
7381 }
7382 } while (mddev->curr_resync < 2);
7383
7384 j = 0;
7385 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7386
7387
7388
7389 max_sectors = mddev->resync_max_sectors;
7390 atomic64_set(&mddev->resync_mismatches, 0);
7391
7392 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7393 j = mddev->resync_min;
7394 else if (!mddev->bitmap)
7395 j = mddev->recovery_cp;
7396
7397 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7398 max_sectors = mddev->resync_max_sectors;
7399 else {
7400
7401 max_sectors = mddev->dev_sectors;
7402 j = MaxSector;
7403 rcu_read_lock();
7404 rdev_for_each_rcu(rdev, mddev)
7405 if (rdev->raid_disk >= 0 &&
7406 !test_bit(Faulty, &rdev->flags) &&
7407 !test_bit(In_sync, &rdev->flags) &&
7408 rdev->recovery_offset < j)
7409 j = rdev->recovery_offset;
7410 rcu_read_unlock();
7411 }
7412
7413 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7414 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7415 " %d KB/sec/disk.\n", speed_min(mddev));
7416 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7417 "(but not more than %d KB/sec) for %s.\n",
7418 speed_max(mddev), desc);
7419
7420 is_mddev_idle(mddev, 1);
7421
7422 io_sectors = 0;
7423 for (m = 0; m < SYNC_MARKS; m++) {
7424 mark[m] = jiffies;
7425 mark_cnt[m] = io_sectors;
7426 }
7427 last_mark = 0;
7428 mddev->resync_mark = mark[last_mark];
7429 mddev->resync_mark_cnt = mark_cnt[last_mark];
7430
7431
7432
7433
7434 window = 32*(PAGE_SIZE/512);
7435 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7436 window/2, (unsigned long long)max_sectors/2);
7437
7438 atomic_set(&mddev->recovery_active, 0);
7439 last_check = 0;
7440
7441 if (j>2) {
7442 printk(KERN_INFO
7443 "md: resuming %s of %s from checkpoint.\n",
7444 desc, mdname(mddev));
7445 mddev->curr_resync = j;
7446 } else
7447 mddev->curr_resync = 3;
7448 mddev->curr_resync_completed = j;
7449 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7450 md_new_event(mddev);
7451
7452 blk_start_plug(&plug);
7453 while (j < max_sectors) {
7454 sector_t sectors;
7455
7456 skipped = 0;
7457
7458 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7459 ((mddev->curr_resync > mddev->curr_resync_completed &&
7460 (mddev->curr_resync - mddev->curr_resync_completed)
7461 > (max_sectors >> 4)) ||
7462 (j - mddev->curr_resync_completed)*2
7463 >= mddev->resync_max - mddev->curr_resync_completed
7464 )) {
7465
7466 wait_event(mddev->recovery_wait,
7467 atomic_read(&mddev->recovery_active) == 0);
7468 mddev->curr_resync_completed = j;
7469 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7470 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7471 }
7472
7473 while (j >= mddev->resync_max && !kthread_should_stop()) {
7474
7475
7476
7477
7478 flush_signals(current);
7479 wait_event_interruptible(mddev->recovery_wait,
7480 mddev->resync_max > j
7481 || kthread_should_stop());
7482 }
7483
7484 if (kthread_should_stop())
7485 goto interrupted;
7486
7487 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7488 currspeed < speed_min(mddev));
7489 if (sectors == 0) {
7490 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7491 goto out;
7492 }
7493
7494 if (!skipped) {
7495 io_sectors += sectors;
7496 atomic_add(sectors, &mddev->recovery_active);
7497 }
7498
7499 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7500 break;
7501
7502 j += sectors;
7503 if (j > 2)
7504 mddev->curr_resync = j;
7505 mddev->curr_mark_cnt = io_sectors;
7506 if (last_check == 0)
7507
7508
7509
7510 md_new_event(mddev);
7511
7512 if (last_check + window > io_sectors || j == max_sectors)
7513 continue;
7514
7515 last_check = io_sectors;
7516 repeat:
7517 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7518
7519 int next = (last_mark+1) % SYNC_MARKS;
7520
7521 mddev->resync_mark = mark[next];
7522 mddev->resync_mark_cnt = mark_cnt[next];
7523 mark[next] = jiffies;
7524 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7525 last_mark = next;
7526 }
7527
7528
7529 if (kthread_should_stop())
7530 goto interrupted;
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541 cond_resched();
7542
7543 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7544 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7545
7546 if (currspeed > speed_min(mddev)) {
7547 if ((currspeed > speed_max(mddev)) ||
7548 !is_mddev_idle(mddev, 0)) {
7549 msleep(500);
7550 goto repeat;
7551 }
7552 }
7553 }
7554 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7555
7556
7557
7558 out:
7559 blk_finish_plug(&plug);
7560 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7561
7562
7563 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7564
7565 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7566 mddev->curr_resync > 2) {
7567 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7568 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7569 if (mddev->curr_resync >= mddev->recovery_cp) {
7570 printk(KERN_INFO
7571 "md: checkpointing %s of %s.\n",
7572 desc, mdname(mddev));
7573 mddev->recovery_cp =
7574 mddev->curr_resync_completed;
7575 }
7576 } else
7577 mddev->recovery_cp = MaxSector;
7578 } else {
7579 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7580 mddev->curr_resync = MaxSector;
7581 rcu_read_lock();
7582 rdev_for_each_rcu(rdev, mddev)
7583 if (rdev->raid_disk >= 0 &&
7584 mddev->delta_disks >= 0 &&
7585 !test_bit(Faulty, &rdev->flags) &&
7586 !test_bit(In_sync, &rdev->flags) &&
7587 rdev->recovery_offset < mddev->curr_resync)
7588 rdev->recovery_offset = mddev->curr_resync;
7589 rcu_read_unlock();
7590 }
7591 }
7592 skip:
7593 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7594
7595 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7596
7597 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7598 mddev->resync_min = 0;
7599 mddev->resync_max = MaxSector;
7600 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7601 mddev->resync_min = mddev->curr_resync_completed;
7602 mddev->curr_resync = 0;
7603 wake_up(&resync_wait);
7604 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7605 md_wakeup_thread(mddev->thread);
7606 return;
7607
7608 interrupted:
7609
7610
7611
7612 printk(KERN_INFO
7613 "md: md_do_sync() got signal ... exiting\n");
7614 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7615 goto out;
7616
7617}
7618EXPORT_SYMBOL_GPL(md_do_sync);
7619
7620static int remove_and_add_spares(struct mddev *mddev)
7621{
7622 struct md_rdev *rdev;
7623 int spares = 0;
7624 int removed = 0;
7625
7626 rdev_for_each(rdev, mddev)
7627 if (rdev->raid_disk >= 0 &&
7628 !test_bit(Blocked, &rdev->flags) &&
7629 (test_bit(Faulty, &rdev->flags) ||
7630 ! test_bit(In_sync, &rdev->flags)) &&
7631 atomic_read(&rdev->nr_pending)==0) {
7632 if (mddev->pers->hot_remove_disk(
7633 mddev, rdev) == 0) {
7634 sysfs_unlink_rdev(mddev, rdev);
7635 rdev->raid_disk = -1;
7636 removed++;
7637 }
7638 }
7639 if (removed)
7640 sysfs_notify(&mddev->kobj, NULL,
7641 "degraded");
7642
7643
7644 rdev_for_each(rdev, mddev) {
7645 if (rdev->raid_disk >= 0 &&
7646 !test_bit(In_sync, &rdev->flags) &&
7647 !test_bit(Faulty, &rdev->flags))
7648 spares++;
7649 if (rdev->raid_disk < 0
7650 && !test_bit(Faulty, &rdev->flags)) {
7651 rdev->recovery_offset = 0;
7652 if (mddev->pers->
7653 hot_add_disk(mddev, rdev) == 0) {
7654 if (sysfs_link_rdev(mddev, rdev))
7655 ;
7656 spares++;
7657 md_new_event(mddev);
7658 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7659 }
7660 }
7661 }
7662 if (removed)
7663 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7664 return spares;
7665}
7666
7667static void reap_sync_thread(struct mddev *mddev)
7668{
7669 struct md_rdev *rdev;
7670
7671
7672 md_unregister_thread(&mddev->sync_thread);
7673 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7674 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7675
7676
7677 if (mddev->pers->spare_active(mddev)) {
7678 sysfs_notify(&mddev->kobj, NULL,
7679 "degraded");
7680 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7681 }
7682 }
7683 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7684 mddev->pers->finish_reshape)
7685 mddev->pers->finish_reshape(mddev);
7686
7687
7688
7689
7690
7691
7692
7693 rdev_for_each(rdev, mddev)
7694 if (!mddev->degraded ||
7695 test_bit(In_sync, &rdev->flags))
7696 rdev->saved_raid_disk = -1;
7697
7698 md_update_sb(mddev, 1);
7699 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7700 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7701 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7702 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7703 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7704
7705 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7706 sysfs_notify_dirent_safe(mddev->sysfs_action);
7707 md_new_event(mddev);
7708 if (mddev->event_work.func)
7709 queue_work(md_misc_wq, &mddev->event_work);
7710}
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734void md_check_recovery(struct mddev *mddev)
7735{
7736 if (mddev->suspended)
7737 return;
7738
7739 if (mddev->bitmap)
7740 bitmap_daemon_work(mddev);
7741
7742 if (signal_pending(current)) {
7743 if (mddev->pers->sync_request && !mddev->external) {
7744 printk(KERN_INFO "md: %s in immediate safe mode\n",
7745 mdname(mddev));
7746 mddev->safemode = 2;
7747 }
7748 flush_signals(current);
7749 }
7750
7751 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7752 return;
7753 if ( ! (
7754 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7755 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7756 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7757 (mddev->external == 0 && mddev->safemode == 1) ||
7758 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7759 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7760 ))
7761 return;
7762
7763 if (mddev_trylock(mddev)) {
7764 int spares = 0;
7765
7766 if (mddev->ro) {
7767
7768
7769
7770 struct md_rdev *rdev;
7771 rdev_for_each(rdev, mddev)
7772 if (rdev->raid_disk >= 0 &&
7773 !test_bit(Blocked, &rdev->flags) &&
7774 test_bit(Faulty, &rdev->flags) &&
7775 atomic_read(&rdev->nr_pending)==0) {
7776 if (mddev->pers->hot_remove_disk(
7777 mddev, rdev) == 0) {
7778 sysfs_unlink_rdev(mddev, rdev);
7779 rdev->raid_disk = -1;
7780 }
7781 }
7782 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7783 goto unlock;
7784 }
7785
7786 if (!mddev->external) {
7787 int did_change = 0;
7788 spin_lock_irq(&mddev->write_lock);
7789 if (mddev->safemode &&
7790 !atomic_read(&mddev->writes_pending) &&
7791 !mddev->in_sync &&
7792 mddev->recovery_cp == MaxSector) {
7793 mddev->in_sync = 1;
7794 did_change = 1;
7795 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7796 }
7797 if (mddev->safemode == 1)
7798 mddev->safemode = 0;
7799 spin_unlock_irq(&mddev->write_lock);
7800 if (did_change)
7801 sysfs_notify_dirent_safe(mddev->sysfs_state);
7802 }
7803
7804 if (mddev->flags)
7805 md_update_sb(mddev, 0);
7806
7807 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7808 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7809
7810 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7811 goto unlock;
7812 }
7813 if (mddev->sync_thread) {
7814 reap_sync_thread(mddev);
7815 goto unlock;
7816 }
7817
7818
7819
7820 mddev->curr_resync_completed = 0;
7821 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7822
7823
7824
7825 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7826 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7827
7828 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7829 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7830 goto unlock;
7831
7832
7833
7834
7835
7836
7837
7838 if (mddev->reshape_position != MaxSector) {
7839 if (mddev->pers->check_reshape == NULL ||
7840 mddev->pers->check_reshape(mddev) != 0)
7841
7842 goto unlock;
7843 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7844 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7845 } else if ((spares = remove_and_add_spares(mddev))) {
7846 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7847 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7848 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7849 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7850 } else if (mddev->recovery_cp < MaxSector) {
7851 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7852 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7853 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7854
7855 goto unlock;
7856
7857 if (mddev->pers->sync_request) {
7858 if (spares) {
7859
7860
7861
7862
7863 bitmap_write_all(mddev->bitmap);
7864 }
7865 mddev->sync_thread = md_register_thread(md_do_sync,
7866 mddev,
7867 "resync");
7868 if (!mddev->sync_thread) {
7869 printk(KERN_ERR "%s: could not start resync"
7870 " thread...\n",
7871 mdname(mddev));
7872
7873 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7874 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7875 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7876 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7877 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7878 } else
7879 md_wakeup_thread(mddev->sync_thread);
7880 sysfs_notify_dirent_safe(mddev->sysfs_action);
7881 md_new_event(mddev);
7882 }
7883 unlock:
7884 if (!mddev->sync_thread) {
7885 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7886 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7887 &mddev->recovery))
7888 if (mddev->sysfs_action)
7889 sysfs_notify_dirent_safe(mddev->sysfs_action);
7890 }
7891 mddev_unlock(mddev);
7892 }
7893}
7894
7895void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7896{
7897 sysfs_notify_dirent_safe(rdev->sysfs_state);
7898 wait_event_timeout(rdev->blocked_wait,
7899 !test_bit(Blocked, &rdev->flags) &&
7900 !test_bit(BlockedBadBlocks, &rdev->flags),
7901 msecs_to_jiffies(5000));
7902 rdev_dec_pending(rdev, mddev);
7903}
7904EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7905
7906void md_finish_reshape(struct mddev *mddev)
7907{
7908
7909 struct md_rdev *rdev;
7910
7911 rdev_for_each(rdev, mddev) {
7912 if (rdev->data_offset > rdev->new_data_offset)
7913 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
7914 else
7915 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
7916 rdev->data_offset = rdev->new_data_offset;
7917 }
7918}
7919EXPORT_SYMBOL(md_finish_reshape);
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
7948 sector_t *first_bad, int *bad_sectors)
7949{
7950 int hi;
7951 int lo;
7952 u64 *p = bb->page;
7953 int rv;
7954 sector_t target = s + sectors;
7955 unsigned seq;
7956
7957 if (bb->shift > 0) {
7958
7959 s >>= bb->shift;
7960 target += (1<<bb->shift) - 1;
7961 target >>= bb->shift;
7962 sectors = target - s;
7963 }
7964
7965
7966retry:
7967 seq = read_seqbegin(&bb->lock);
7968 lo = 0;
7969 rv = 0;
7970 hi = bb->count;
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980 while (hi - lo > 1) {
7981 int mid = (lo + hi) / 2;
7982 sector_t a = BB_OFFSET(p[mid]);
7983 if (a < target)
7984
7985
7986 lo = mid;
7987 else
7988
7989 hi = mid;
7990 }
7991
7992 if (hi > lo) {
7993
7994
7995
7996 while (lo >= 0 &&
7997 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
7998 if (BB_OFFSET(p[lo]) < target) {
7999
8000
8001
8002 if (rv != -1 && BB_ACK(p[lo]))
8003 rv = 1;
8004 else
8005 rv = -1;
8006 *first_bad = BB_OFFSET(p[lo]);
8007 *bad_sectors = BB_LEN(p[lo]);
8008 }
8009 lo--;
8010 }
8011 }
8012
8013 if (read_seqretry(&bb->lock, seq))
8014 goto retry;
8015
8016 return rv;
8017}
8018EXPORT_SYMBOL_GPL(md_is_badblock);
8019
8020
8021
8022
8023
8024
8025
8026
8027static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8028 int acknowledged)
8029{
8030 u64 *p;
8031 int lo, hi;
8032 int rv = 1;
8033
8034 if (bb->shift < 0)
8035
8036 return 0;
8037
8038 if (bb->shift) {
8039
8040 sector_t next = s + sectors;
8041 s >>= bb->shift;
8042 next += (1<<bb->shift) - 1;
8043 next >>= bb->shift;
8044 sectors = next - s;
8045 }
8046
8047 write_seqlock_irq(&bb->lock);
8048
8049 p = bb->page;
8050 lo = 0;
8051 hi = bb->count;
8052
8053 while (hi - lo > 1) {
8054 int mid = (lo + hi) / 2;
8055 sector_t a = BB_OFFSET(p[mid]);
8056 if (a <= s)
8057 lo = mid;
8058 else
8059 hi = mid;
8060 }
8061 if (hi > lo && BB_OFFSET(p[lo]) > s)
8062 hi = lo;
8063
8064 if (hi > lo) {
8065
8066
8067
8068 sector_t a = BB_OFFSET(p[lo]);
8069 sector_t e = a + BB_LEN(p[lo]);
8070 int ack = BB_ACK(p[lo]);
8071 if (e >= s) {
8072
8073 if (s == a && s + sectors >= e)
8074
8075 ack = acknowledged;
8076 else
8077 ack = ack && acknowledged;
8078
8079 if (e < s + sectors)
8080 e = s + sectors;
8081 if (e - a <= BB_MAX_LEN) {
8082 p[lo] = BB_MAKE(a, e-a, ack);
8083 s = e;
8084 } else {
8085
8086
8087
8088 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8089 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8090 s = a + BB_MAX_LEN;
8091 }
8092 sectors = e - s;
8093 }
8094 }
8095 if (sectors && hi < bb->count) {
8096
8097
8098 sector_t a = BB_OFFSET(p[hi]);
8099 sector_t e = a + BB_LEN(p[hi]);
8100 int ack = BB_ACK(p[hi]);
8101 if (a <= s + sectors) {
8102
8103 if (e <= s + sectors) {
8104
8105 e = s + sectors;
8106 ack = acknowledged;
8107 } else
8108 ack = ack && acknowledged;
8109
8110 a = s;
8111 if (e - a <= BB_MAX_LEN) {
8112 p[hi] = BB_MAKE(a, e-a, ack);
8113 s = e;
8114 } else {
8115 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8116 s = a + BB_MAX_LEN;
8117 }
8118 sectors = e - s;
8119 lo = hi;
8120 hi++;
8121 }
8122 }
8123 if (sectors == 0 && hi < bb->count) {
8124
8125
8126 sector_t a = BB_OFFSET(p[hi]);
8127 int lolen = BB_LEN(p[lo]);
8128 int hilen = BB_LEN(p[hi]);
8129 int newlen = lolen + hilen - (s - a);
8130 if (s >= a && newlen < BB_MAX_LEN) {
8131
8132 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8133 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8134 memmove(p + hi, p + hi + 1,
8135 (bb->count - hi - 1) * 8);
8136 bb->count--;
8137 }
8138 }
8139 while (sectors) {
8140
8141
8142 if (bb->count >= MD_MAX_BADBLOCKS) {
8143
8144 rv = 0;
8145 break;
8146 } else {
8147 int this_sectors = sectors;
8148 memmove(p + hi + 1, p + hi,
8149 (bb->count - hi) * 8);
8150 bb->count++;
8151
8152 if (this_sectors > BB_MAX_LEN)
8153 this_sectors = BB_MAX_LEN;
8154 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8155 sectors -= this_sectors;
8156 s += this_sectors;
8157 }
8158 }
8159
8160 bb->changed = 1;
8161 if (!acknowledged)
8162 bb->unacked_exist = 1;
8163 write_sequnlock_irq(&bb->lock);
8164
8165 return rv;
8166}
8167
8168int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8169 int is_new)
8170{
8171 int rv;
8172 if (is_new)
8173 s += rdev->new_data_offset;
8174 else
8175 s += rdev->data_offset;
8176 rv = md_set_badblocks(&rdev->badblocks,
8177 s, sectors, 0);
8178 if (rv) {
8179
8180 sysfs_notify_dirent_safe(rdev->sysfs_state);
8181 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8182 md_wakeup_thread(rdev->mddev->thread);
8183 }
8184 return rv;
8185}
8186EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8187
8188
8189
8190
8191
8192
8193
8194static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8195{
8196 u64 *p;
8197 int lo, hi;
8198 sector_t target = s + sectors;
8199 int rv = 0;
8200
8201 if (bb->shift > 0) {
8202
8203
8204
8205
8206
8207
8208 s += (1<<bb->shift) - 1;
8209 s >>= bb->shift;
8210 target >>= bb->shift;
8211 sectors = target - s;
8212 }
8213
8214 write_seqlock_irq(&bb->lock);
8215
8216 p = bb->page;
8217 lo = 0;
8218 hi = bb->count;
8219
8220 while (hi - lo > 1) {
8221 int mid = (lo + hi) / 2;
8222 sector_t a = BB_OFFSET(p[mid]);
8223 if (a < target)
8224 lo = mid;
8225 else
8226 hi = mid;
8227 }
8228 if (hi > lo) {
8229
8230
8231
8232
8233 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8234
8235 int ack = BB_ACK(p[lo]);
8236 sector_t a = BB_OFFSET(p[lo]);
8237 sector_t end = a + BB_LEN(p[lo]);
8238
8239 if (a < s) {
8240
8241 if (bb->count >= MD_MAX_BADBLOCKS) {
8242 rv = 0;
8243 goto out;
8244 }
8245 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8246 bb->count++;
8247 p[lo] = BB_MAKE(a, s-a, ack);
8248 lo++;
8249 }
8250 p[lo] = BB_MAKE(target, end - target, ack);
8251
8252 hi = lo;
8253 lo--;
8254 }
8255 while (lo >= 0 &&
8256 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8257
8258 if (BB_OFFSET(p[lo]) < s) {
8259
8260 int ack = BB_ACK(p[lo]);
8261 sector_t start = BB_OFFSET(p[lo]);
8262 p[lo] = BB_MAKE(start, s - start, ack);
8263
8264 break;
8265 }
8266 lo--;
8267 }
8268
8269
8270
8271 if (hi - lo > 1) {
8272 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8273 bb->count -= (hi - lo - 1);
8274 }
8275 }
8276
8277 bb->changed = 1;
8278out:
8279 write_sequnlock_irq(&bb->lock);
8280 return rv;
8281}
8282
8283int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8284 int is_new)
8285{
8286 if (is_new)
8287 s += rdev->new_data_offset;
8288 else
8289 s += rdev->data_offset;
8290 return md_clear_badblocks(&rdev->badblocks,
8291 s, sectors);
8292}
8293EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8294
8295
8296
8297
8298
8299
8300void md_ack_all_badblocks(struct badblocks *bb)
8301{
8302 if (bb->page == NULL || bb->changed)
8303
8304 return;
8305 write_seqlock_irq(&bb->lock);
8306
8307 if (bb->changed == 0 && bb->unacked_exist) {
8308 u64 *p = bb->page;
8309 int i;
8310 for (i = 0; i < bb->count ; i++) {
8311 if (!BB_ACK(p[i])) {
8312 sector_t start = BB_OFFSET(p[i]);
8313 int len = BB_LEN(p[i]);
8314 p[i] = BB_MAKE(start, len, 1);
8315 }
8316 }
8317 bb->unacked_exist = 0;
8318 }
8319 write_sequnlock_irq(&bb->lock);
8320}
8321EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335static ssize_t
8336badblocks_show(struct badblocks *bb, char *page, int unack)
8337{
8338 size_t len;
8339 int i;
8340 u64 *p = bb->page;
8341 unsigned seq;
8342
8343 if (bb->shift < 0)
8344 return 0;
8345
8346retry:
8347 seq = read_seqbegin(&bb->lock);
8348
8349 len = 0;
8350 i = 0;
8351
8352 while (len < PAGE_SIZE && i < bb->count) {
8353 sector_t s = BB_OFFSET(p[i]);
8354 unsigned int length = BB_LEN(p[i]);
8355 int ack = BB_ACK(p[i]);
8356 i++;
8357
8358 if (unack && ack)
8359 continue;
8360
8361 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8362 (unsigned long long)s << bb->shift,
8363 length << bb->shift);
8364 }
8365 if (unack && len == 0)
8366 bb->unacked_exist = 0;
8367
8368 if (read_seqretry(&bb->lock, seq))
8369 goto retry;
8370
8371 return len;
8372}
8373
8374#define DO_DEBUG 1
8375
8376static ssize_t
8377badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8378{
8379 unsigned long long sector;
8380 int length;
8381 char newline;
8382#ifdef DO_DEBUG
8383
8384
8385
8386 int clear = 0;
8387 if (page[0] == '-') {
8388 clear = 1;
8389 page++;
8390 }
8391#endif
8392
8393 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8394 case 3:
8395 if (newline != '\n')
8396 return -EINVAL;
8397 case 2:
8398 if (length <= 0)
8399 return -EINVAL;
8400 break;
8401 default:
8402 return -EINVAL;
8403 }
8404
8405#ifdef DO_DEBUG
8406 if (clear) {
8407 md_clear_badblocks(bb, sector, length);
8408 return len;
8409 }
8410#endif
8411 if (md_set_badblocks(bb, sector, length, !unack))
8412 return len;
8413 else
8414 return -ENOSPC;
8415}
8416
8417static int md_notify_reboot(struct notifier_block *this,
8418 unsigned long code, void *x)
8419{
8420 struct list_head *tmp;
8421 struct mddev *mddev;
8422 int need_delay = 0;
8423
8424 for_each_mddev(mddev, tmp) {
8425 if (mddev_trylock(mddev)) {
8426 if (mddev->pers)
8427 __md_stop_writes(mddev);
8428 mddev->safemode = 2;
8429 mddev_unlock(mddev);
8430 }
8431 need_delay = 1;
8432 }
8433
8434
8435
8436
8437
8438
8439 if (need_delay)
8440 mdelay(1000*1);
8441
8442 return NOTIFY_DONE;
8443}
8444
8445static struct notifier_block md_notifier = {
8446 .notifier_call = md_notify_reboot,
8447 .next = NULL,
8448 .priority = INT_MAX,
8449};
8450
8451static void md_geninit(void)
8452{
8453 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8454
8455 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8456}
8457
8458static int __init md_init(void)
8459{
8460 int ret = -ENOMEM;
8461
8462 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8463 if (!md_wq)
8464 goto err_wq;
8465
8466 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8467 if (!md_misc_wq)
8468 goto err_misc_wq;
8469
8470 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8471 goto err_md;
8472
8473 if ((ret = register_blkdev(0, "mdp")) < 0)
8474 goto err_mdp;
8475 mdp_major = ret;
8476
8477 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8478 md_probe, NULL, NULL);
8479 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8480 md_probe, NULL, NULL);
8481
8482 register_reboot_notifier(&md_notifier);
8483 raid_table_header = register_sysctl_table(raid_root_table);
8484
8485 md_geninit();
8486 return 0;
8487
8488err_mdp:
8489 unregister_blkdev(MD_MAJOR, "md");
8490err_md:
8491 destroy_workqueue(md_misc_wq);
8492err_misc_wq:
8493 destroy_workqueue(md_wq);
8494err_wq:
8495 return ret;
8496}
8497
8498#ifndef MODULE
8499
8500
8501
8502
8503
8504
8505static LIST_HEAD(all_detected_devices);
8506struct detected_devices_node {
8507 struct list_head list;
8508 dev_t dev;
8509};
8510
8511void md_autodetect_dev(dev_t dev)
8512{
8513 struct detected_devices_node *node_detected_dev;
8514
8515 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8516 if (node_detected_dev) {
8517 node_detected_dev->dev = dev;
8518 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8519 } else {
8520 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8521 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8522 }
8523}
8524
8525
8526static void autostart_arrays(int part)
8527{
8528 struct md_rdev *rdev;
8529 struct detected_devices_node *node_detected_dev;
8530 dev_t dev;
8531 int i_scanned, i_passed;
8532
8533 i_scanned = 0;
8534 i_passed = 0;
8535
8536 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8537
8538 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8539 i_scanned++;
8540 node_detected_dev = list_entry(all_detected_devices.next,
8541 struct detected_devices_node, list);
8542 list_del(&node_detected_dev->list);
8543 dev = node_detected_dev->dev;
8544 kfree(node_detected_dev);
8545 rdev = md_import_device(dev,0, 90);
8546 if (IS_ERR(rdev))
8547 continue;
8548
8549 if (test_bit(Faulty, &rdev->flags)) {
8550 MD_BUG();
8551 continue;
8552 }
8553 set_bit(AutoDetected, &rdev->flags);
8554 list_add(&rdev->same_set, &pending_raid_disks);
8555 i_passed++;
8556 }
8557
8558 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8559 i_scanned, i_passed);
8560
8561 autorun_devices(part);
8562}
8563
8564#endif
8565
8566static __exit void md_exit(void)
8567{
8568 struct mddev *mddev;
8569 struct list_head *tmp;
8570
8571 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8572 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8573
8574 unregister_blkdev(MD_MAJOR,"md");
8575 unregister_blkdev(mdp_major, "mdp");
8576 unregister_reboot_notifier(&md_notifier);
8577 unregister_sysctl_table(raid_table_header);
8578 remove_proc_entry("mdstat", NULL);
8579 for_each_mddev(mddev, tmp) {
8580 export_array(mddev);
8581 mddev->hold_active = 0;
8582 }
8583 destroy_workqueue(md_misc_wq);
8584 destroy_workqueue(md_wq);
8585}
8586
8587subsys_initcall(md_init);
8588module_exit(md_exit)
8589
8590static int get_ro(char *buffer, struct kernel_param *kp)
8591{
8592 return sprintf(buffer, "%d", start_readonly);
8593}
8594static int set_ro(const char *val, struct kernel_param *kp)
8595{
8596 char *e;
8597 int num = simple_strtoul(val, &e, 10);
8598 if (*val && (*e == '\0' || *e == '\n')) {
8599 start_readonly = num;
8600 return 0;
8601 }
8602 return -EINVAL;
8603}
8604
8605module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8606module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8607
8608module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8609
8610EXPORT_SYMBOL(register_md_personality);
8611EXPORT_SYMBOL(unregister_md_personality);
8612EXPORT_SYMBOL(md_error);
8613EXPORT_SYMBOL(md_done_sync);
8614EXPORT_SYMBOL(md_write_start);
8615EXPORT_SYMBOL(md_write_end);
8616EXPORT_SYMBOL(md_register_thread);
8617EXPORT_SYMBOL(md_unregister_thread);
8618EXPORT_SYMBOL(md_wakeup_thread);
8619EXPORT_SYMBOL(md_check_recovery);
8620MODULE_LICENSE("GPL");
8621MODULE_DESCRIPTION("MD RAID framework");
8622MODULE_ALIAS("md");
8623MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8624