1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
76
77
78
79
80
81
82#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
83
84
85
86
87
88
89
90
91
92
93
94
95
96static int sysctl_speed_limit_min = 1000;
97static int sysctl_speed_limit_max = 200000;
98static inline int speed_min(struct mddev *mddev)
99{
100 return mddev->sync_speed_min ?
101 mddev->sync_speed_min : sysctl_speed_limit_min;
102}
103
104static inline int speed_max(struct mddev *mddev)
105{
106 return mddev->sync_speed_max ?
107 mddev->sync_speed_max : sysctl_speed_limit_max;
108}
109
110static struct ctl_table_header *raid_table_header;
111
112static ctl_table raid_table[] = {
113 {
114 .procname = "speed_limit_min",
115 .data = &sysctl_speed_limit_min,
116 .maxlen = sizeof(int),
117 .mode = S_IRUGO|S_IWUSR,
118 .proc_handler = proc_dointvec,
119 },
120 {
121 .procname = "speed_limit_max",
122 .data = &sysctl_speed_limit_max,
123 .maxlen = sizeof(int),
124 .mode = S_IRUGO|S_IWUSR,
125 .proc_handler = proc_dointvec,
126 },
127 { }
128};
129
130static ctl_table raid_dir_table[] = {
131 {
132 .procname = "raid",
133 .maxlen = 0,
134 .mode = S_IRUGO|S_IXUGO,
135 .child = raid_table,
136 },
137 { }
138};
139
140static ctl_table raid_root_table[] = {
141 {
142 .procname = "dev",
143 .maxlen = 0,
144 .mode = 0555,
145 .child = raid_dir_table,
146 },
147 { }
148};
149
150static const struct block_device_operations md_fops;
151
152static int start_readonly;
153
154
155
156
157
158struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
159 struct mddev *mddev)
160{
161 struct bio *b;
162
163 if (!mddev || !mddev->bio_set)
164 return bio_alloc(gfp_mask, nr_iovecs);
165
166 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
167 if (!b)
168 return NULL;
169 return b;
170}
171EXPORT_SYMBOL_GPL(bio_alloc_mddev);
172
173struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
174 struct mddev *mddev)
175{
176 if (!mddev || !mddev->bio_set)
177 return bio_clone(bio, gfp_mask);
178
179 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
180}
181EXPORT_SYMBOL_GPL(bio_clone_mddev);
182
183void md_trim_bio(struct bio *bio, int offset, int size)
184{
185
186
187
188
189 int i;
190 struct bio_vec *bvec;
191 int sofar = 0;
192
193 size <<= 9;
194 if (offset == 0 && size == bio->bi_size)
195 return;
196
197 bio->bi_sector += offset;
198 bio->bi_size = size;
199 offset <<= 9;
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
201
202 while (bio->bi_idx < bio->bi_vcnt &&
203 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
204
205 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
206 bio->bi_idx++;
207 }
208 if (bio->bi_idx < bio->bi_vcnt) {
209 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
210 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
211 }
212
213 if (bio->bi_idx) {
214 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
215 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
216 bio->bi_vcnt -= bio->bi_idx;
217 bio->bi_idx = 0;
218 }
219
220 bio_for_each_segment(bvec, bio, i) {
221 if (sofar + bvec->bv_len > size)
222 bvec->bv_len = size - sofar;
223 if (bvec->bv_len == 0) {
224 bio->bi_vcnt = i;
225 break;
226 }
227 sofar += bvec->bv_len;
228 }
229}
230EXPORT_SYMBOL_GPL(md_trim_bio);
231
232
233
234
235
236
237
238
239
240
241
242static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
243static atomic_t md_event_count;
244void md_new_event(struct mddev *mddev)
245{
246 atomic_inc(&md_event_count);
247 wake_up(&md_event_waiters);
248}
249EXPORT_SYMBOL_GPL(md_new_event);
250
251
252
253
254static void md_new_event_inintr(struct mddev *mddev)
255{
256 atomic_inc(&md_event_count);
257 wake_up(&md_event_waiters);
258}
259
260
261
262
263
264static LIST_HEAD(all_mddevs);
265static DEFINE_SPINLOCK(all_mddevs_lock);
266
267
268
269
270
271
272
273
274
275#define for_each_mddev(_mddev,_tmp) \
276 \
277 for (({ spin_lock(&all_mddevs_lock); \
278 _tmp = all_mddevs.next; \
279 _mddev = NULL;}); \
280 ({ if (_tmp != &all_mddevs) \
281 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
282 spin_unlock(&all_mddevs_lock); \
283 if (_mddev) mddev_put(_mddev); \
284 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
285 _tmp != &all_mddevs;}); \
286 ({ spin_lock(&all_mddevs_lock); \
287 _tmp = _tmp->next;}) \
288 )
289
290
291
292
293
294
295
296
297
298static void md_make_request(struct request_queue *q, struct bio *bio)
299{
300 const int rw = bio_data_dir(bio);
301 struct mddev *mddev = q->queuedata;
302 int cpu;
303 unsigned int sectors;
304
305 if (mddev == NULL || mddev->pers == NULL
306 || !mddev->ready) {
307 bio_io_error(bio);
308 return;
309 }
310 smp_rmb();
311 rcu_read_lock();
312 if (mddev->suspended) {
313 DEFINE_WAIT(__wait);
314 for (;;) {
315 prepare_to_wait(&mddev->sb_wait, &__wait,
316 TASK_UNINTERRUPTIBLE);
317 if (!mddev->suspended)
318 break;
319 rcu_read_unlock();
320 schedule();
321 rcu_read_lock();
322 }
323 finish_wait(&mddev->sb_wait, &__wait);
324 }
325 atomic_inc(&mddev->active_io);
326 rcu_read_unlock();
327
328
329
330
331
332 sectors = bio_sectors(bio);
333 mddev->pers->make_request(mddev, bio);
334
335 cpu = part_stat_lock();
336 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
337 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
338 part_stat_unlock();
339
340 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
341 wake_up(&mddev->sb_wait);
342}
343
344
345
346
347
348
349
350void mddev_suspend(struct mddev *mddev)
351{
352 BUG_ON(mddev->suspended);
353 mddev->suspended = 1;
354 synchronize_rcu();
355 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
356 mddev->pers->quiesce(mddev, 1);
357
358 del_timer_sync(&mddev->safemode_timer);
359}
360EXPORT_SYMBOL_GPL(mddev_suspend);
361
362void mddev_resume(struct mddev *mddev)
363{
364 mddev->suspended = 0;
365 wake_up(&mddev->sb_wait);
366 mddev->pers->quiesce(mddev, 0);
367
368 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
369 md_wakeup_thread(mddev->thread);
370 md_wakeup_thread(mddev->sync_thread);
371}
372EXPORT_SYMBOL_GPL(mddev_resume);
373
374int mddev_congested(struct mddev *mddev, int bits)
375{
376 return mddev->suspended;
377}
378EXPORT_SYMBOL(mddev_congested);
379
380
381
382
383
384static void md_end_flush(struct bio *bio, int err)
385{
386 struct md_rdev *rdev = bio->bi_private;
387 struct mddev *mddev = rdev->mddev;
388
389 rdev_dec_pending(rdev, mddev);
390
391 if (atomic_dec_and_test(&mddev->flush_pending)) {
392
393 queue_work(md_wq, &mddev->flush_work);
394 }
395 bio_put(bio);
396}
397
398static void md_submit_flush_data(struct work_struct *ws);
399
400static void submit_flushes(struct work_struct *ws)
401{
402 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
403 struct md_rdev *rdev;
404
405 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
406 atomic_set(&mddev->flush_pending, 1);
407 rcu_read_lock();
408 rdev_for_each_rcu(rdev, mddev)
409 if (rdev->raid_disk >= 0 &&
410 !test_bit(Faulty, &rdev->flags)) {
411
412
413
414
415 struct bio *bi;
416 atomic_inc(&rdev->nr_pending);
417 atomic_inc(&rdev->nr_pending);
418 rcu_read_unlock();
419 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
420 bi->bi_end_io = md_end_flush;
421 bi->bi_private = rdev;
422 bi->bi_bdev = rdev->bdev;
423 atomic_inc(&mddev->flush_pending);
424 submit_bio(WRITE_FLUSH, bi);
425 rcu_read_lock();
426 rdev_dec_pending(rdev, mddev);
427 }
428 rcu_read_unlock();
429 if (atomic_dec_and_test(&mddev->flush_pending))
430 queue_work(md_wq, &mddev->flush_work);
431}
432
433static void md_submit_flush_data(struct work_struct *ws)
434{
435 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
436 struct bio *bio = mddev->flush_bio;
437
438 if (bio->bi_size == 0)
439
440 bio_endio(bio, 0);
441 else {
442 bio->bi_rw &= ~REQ_FLUSH;
443 mddev->pers->make_request(mddev, bio);
444 }
445
446 mddev->flush_bio = NULL;
447 wake_up(&mddev->sb_wait);
448}
449
450void md_flush_request(struct mddev *mddev, struct bio *bio)
451{
452 spin_lock_irq(&mddev->write_lock);
453 wait_event_lock_irq(mddev->sb_wait,
454 !mddev->flush_bio,
455 mddev->write_lock);
456 mddev->flush_bio = bio;
457 spin_unlock_irq(&mddev->write_lock);
458
459 INIT_WORK(&mddev->flush_work, submit_flushes);
460 queue_work(md_wq, &mddev->flush_work);
461}
462EXPORT_SYMBOL(md_flush_request);
463
464void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
465{
466 struct mddev *mddev = cb->data;
467 md_wakeup_thread(mddev->thread);
468 kfree(cb);
469}
470EXPORT_SYMBOL(md_unplug);
471
472static inline struct mddev *mddev_get(struct mddev *mddev)
473{
474 atomic_inc(&mddev->active);
475 return mddev;
476}
477
478static void mddev_delayed_delete(struct work_struct *ws);
479
480static void mddev_put(struct mddev *mddev)
481{
482 struct bio_set *bs = NULL;
483
484 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
485 return;
486 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
487 mddev->ctime == 0 && !mddev->hold_active) {
488
489
490 list_del_init(&mddev->all_mddevs);
491 bs = mddev->bio_set;
492 mddev->bio_set = NULL;
493 if (mddev->gendisk) {
494
495
496
497
498
499 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
500 queue_work(md_misc_wq, &mddev->del_work);
501 } else
502 kfree(mddev);
503 }
504 spin_unlock(&all_mddevs_lock);
505 if (bs)
506 bioset_free(bs);
507}
508
509void mddev_init(struct mddev *mddev)
510{
511 mutex_init(&mddev->open_mutex);
512 mutex_init(&mddev->reconfig_mutex);
513 mutex_init(&mddev->bitmap_info.mutex);
514 INIT_LIST_HEAD(&mddev->disks);
515 INIT_LIST_HEAD(&mddev->all_mddevs);
516 init_timer(&mddev->safemode_timer);
517 atomic_set(&mddev->active, 1);
518 atomic_set(&mddev->openers, 0);
519 atomic_set(&mddev->active_io, 0);
520 spin_lock_init(&mddev->write_lock);
521 atomic_set(&mddev->flush_pending, 0);
522 init_waitqueue_head(&mddev->sb_wait);
523 init_waitqueue_head(&mddev->recovery_wait);
524 mddev->reshape_position = MaxSector;
525 mddev->reshape_backwards = 0;
526 mddev->resync_min = 0;
527 mddev->resync_max = MaxSector;
528 mddev->level = LEVEL_NONE;
529}
530EXPORT_SYMBOL_GPL(mddev_init);
531
532static struct mddev * mddev_find(dev_t unit)
533{
534 struct mddev *mddev, *new = NULL;
535
536 if (unit && MAJOR(unit) != MD_MAJOR)
537 unit &= ~((1<<MdpMinorShift)-1);
538
539 retry:
540 spin_lock(&all_mddevs_lock);
541
542 if (unit) {
543 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
544 if (mddev->unit == unit) {
545 mddev_get(mddev);
546 spin_unlock(&all_mddevs_lock);
547 kfree(new);
548 return mddev;
549 }
550
551 if (new) {
552 list_add(&new->all_mddevs, &all_mddevs);
553 spin_unlock(&all_mddevs_lock);
554 new->hold_active = UNTIL_IOCTL;
555 return new;
556 }
557 } else if (new) {
558
559 static int next_minor = 512;
560 int start = next_minor;
561 int is_free = 0;
562 int dev = 0;
563 while (!is_free) {
564 dev = MKDEV(MD_MAJOR, next_minor);
565 next_minor++;
566 if (next_minor > MINORMASK)
567 next_minor = 0;
568 if (next_minor == start) {
569
570 spin_unlock(&all_mddevs_lock);
571 kfree(new);
572 return NULL;
573 }
574
575 is_free = 1;
576 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
577 if (mddev->unit == dev) {
578 is_free = 0;
579 break;
580 }
581 }
582 new->unit = dev;
583 new->md_minor = MINOR(dev);
584 new->hold_active = UNTIL_STOP;
585 list_add(&new->all_mddevs, &all_mddevs);
586 spin_unlock(&all_mddevs_lock);
587 return new;
588 }
589 spin_unlock(&all_mddevs_lock);
590
591 new = kzalloc(sizeof(*new), GFP_KERNEL);
592 if (!new)
593 return NULL;
594
595 new->unit = unit;
596 if (MAJOR(unit) == MD_MAJOR)
597 new->md_minor = MINOR(unit);
598 else
599 new->md_minor = MINOR(unit) >> MdpMinorShift;
600
601 mddev_init(new);
602
603 goto retry;
604}
605
606static inline int mddev_lock(struct mddev * mddev)
607{
608 return mutex_lock_interruptible(&mddev->reconfig_mutex);
609}
610
611static inline int mddev_is_locked(struct mddev *mddev)
612{
613 return mutex_is_locked(&mddev->reconfig_mutex);
614}
615
616static inline int mddev_trylock(struct mddev * mddev)
617{
618 return mutex_trylock(&mddev->reconfig_mutex);
619}
620
621static struct attribute_group md_redundancy_group;
622
623static void mddev_unlock(struct mddev * mddev)
624{
625 if (mddev->to_remove) {
626
627
628
629
630
631
632
633
634
635
636
637
638 struct attribute_group *to_remove = mddev->to_remove;
639 mddev->to_remove = NULL;
640 mddev->sysfs_active = 1;
641 mutex_unlock(&mddev->reconfig_mutex);
642
643 if (mddev->kobj.sd) {
644 if (to_remove != &md_redundancy_group)
645 sysfs_remove_group(&mddev->kobj, to_remove);
646 if (mddev->pers == NULL ||
647 mddev->pers->sync_request == NULL) {
648 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
649 if (mddev->sysfs_action)
650 sysfs_put(mddev->sysfs_action);
651 mddev->sysfs_action = NULL;
652 }
653 }
654 mddev->sysfs_active = 0;
655 } else
656 mutex_unlock(&mddev->reconfig_mutex);
657
658
659
660
661 spin_lock(&pers_lock);
662 md_wakeup_thread(mddev->thread);
663 spin_unlock(&pers_lock);
664}
665
666static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
667{
668 struct md_rdev *rdev;
669
670 rdev_for_each(rdev, mddev)
671 if (rdev->desc_nr == nr)
672 return rdev;
673
674 return NULL;
675}
676
677static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
678{
679 struct md_rdev *rdev;
680
681 rdev_for_each_rcu(rdev, mddev)
682 if (rdev->desc_nr == nr)
683 return rdev;
684
685 return NULL;
686}
687
688static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
689{
690 struct md_rdev *rdev;
691
692 rdev_for_each(rdev, mddev)
693 if (rdev->bdev->bd_dev == dev)
694 return rdev;
695
696 return NULL;
697}
698
699static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
700{
701 struct md_rdev *rdev;
702
703 rdev_for_each_rcu(rdev, mddev)
704 if (rdev->bdev->bd_dev == dev)
705 return rdev;
706
707 return NULL;
708}
709
710static struct md_personality *find_pers(int level, char *clevel)
711{
712 struct md_personality *pers;
713 list_for_each_entry(pers, &pers_list, list) {
714 if (level != LEVEL_NONE && pers->level == level)
715 return pers;
716 if (strcmp(pers->name, clevel)==0)
717 return pers;
718 }
719 return NULL;
720}
721
722
723static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
724{
725 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
726 return MD_NEW_SIZE_SECTORS(num_sectors);
727}
728
729static int alloc_disk_sb(struct md_rdev * rdev)
730{
731 if (rdev->sb_page)
732 MD_BUG();
733
734 rdev->sb_page = alloc_page(GFP_KERNEL);
735 if (!rdev->sb_page) {
736 printk(KERN_ALERT "md: out of memory.\n");
737 return -ENOMEM;
738 }
739
740 return 0;
741}
742
743void md_rdev_clear(struct md_rdev *rdev)
744{
745 if (rdev->sb_page) {
746 put_page(rdev->sb_page);
747 rdev->sb_loaded = 0;
748 rdev->sb_page = NULL;
749 rdev->sb_start = 0;
750 rdev->sectors = 0;
751 }
752 if (rdev->bb_page) {
753 put_page(rdev->bb_page);
754 rdev->bb_page = NULL;
755 }
756 kfree(rdev->badblocks.page);
757 rdev->badblocks.page = NULL;
758}
759EXPORT_SYMBOL_GPL(md_rdev_clear);
760
761static void super_written(struct bio *bio, int error)
762{
763 struct md_rdev *rdev = bio->bi_private;
764 struct mddev *mddev = rdev->mddev;
765
766 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
767 printk("md: super_written gets error=%d, uptodate=%d\n",
768 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
769 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
770 md_error(mddev, rdev);
771 }
772
773 if (atomic_dec_and_test(&mddev->pending_writes))
774 wake_up(&mddev->sb_wait);
775 bio_put(bio);
776}
777
778void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
779 sector_t sector, int size, struct page *page)
780{
781
782
783
784
785
786
787 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
788
789 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
790 bio->bi_sector = sector;
791 bio_add_page(bio, page, size, 0);
792 bio->bi_private = rdev;
793 bio->bi_end_io = super_written;
794
795 atomic_inc(&mddev->pending_writes);
796 submit_bio(WRITE_FLUSH_FUA, bio);
797}
798
799void md_super_wait(struct mddev *mddev)
800{
801
802 DEFINE_WAIT(wq);
803 for(;;) {
804 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
805 if (atomic_read(&mddev->pending_writes)==0)
806 break;
807 schedule();
808 }
809 finish_wait(&mddev->sb_wait, &wq);
810}
811
812static void bi_complete(struct bio *bio, int error)
813{
814 complete((struct completion*)bio->bi_private);
815}
816
817int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
818 struct page *page, int rw, bool metadata_op)
819{
820 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
821 struct completion event;
822 int ret;
823
824 rw |= REQ_SYNC;
825
826 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
827 rdev->meta_bdev : rdev->bdev;
828 if (metadata_op)
829 bio->bi_sector = sector + rdev->sb_start;
830 else if (rdev->mddev->reshape_position != MaxSector &&
831 (rdev->mddev->reshape_backwards ==
832 (sector >= rdev->mddev->reshape_position)))
833 bio->bi_sector = sector + rdev->new_data_offset;
834 else
835 bio->bi_sector = sector + rdev->data_offset;
836 bio_add_page(bio, page, size, 0);
837 init_completion(&event);
838 bio->bi_private = &event;
839 bio->bi_end_io = bi_complete;
840 submit_bio(rw, bio);
841 wait_for_completion(&event);
842
843 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
844 bio_put(bio);
845 return ret;
846}
847EXPORT_SYMBOL_GPL(sync_page_io);
848
849static int read_disk_sb(struct md_rdev * rdev, int size)
850{
851 char b[BDEVNAME_SIZE];
852 if (!rdev->sb_page) {
853 MD_BUG();
854 return -EINVAL;
855 }
856 if (rdev->sb_loaded)
857 return 0;
858
859
860 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
861 goto fail;
862 rdev->sb_loaded = 1;
863 return 0;
864
865fail:
866 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
867 bdevname(rdev->bdev,b));
868 return -EINVAL;
869}
870
871static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
872{
873 return sb1->set_uuid0 == sb2->set_uuid0 &&
874 sb1->set_uuid1 == sb2->set_uuid1 &&
875 sb1->set_uuid2 == sb2->set_uuid2 &&
876 sb1->set_uuid3 == sb2->set_uuid3;
877}
878
879static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
880{
881 int ret;
882 mdp_super_t *tmp1, *tmp2;
883
884 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
885 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
886
887 if (!tmp1 || !tmp2) {
888 ret = 0;
889 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
890 goto abort;
891 }
892
893 *tmp1 = *sb1;
894 *tmp2 = *sb2;
895
896
897
898
899 tmp1->nr_disks = 0;
900 tmp2->nr_disks = 0;
901
902 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
903abort:
904 kfree(tmp1);
905 kfree(tmp2);
906 return ret;
907}
908
909
910static u32 md_csum_fold(u32 csum)
911{
912 csum = (csum & 0xffff) + (csum >> 16);
913 return (csum & 0xffff) + (csum >> 16);
914}
915
916static unsigned int calc_sb_csum(mdp_super_t * sb)
917{
918 u64 newcsum = 0;
919 u32 *sb32 = (u32*)sb;
920 int i;
921 unsigned int disk_csum, csum;
922
923 disk_csum = sb->sb_csum;
924 sb->sb_csum = 0;
925
926 for (i = 0; i < MD_SB_BYTES/4 ; i++)
927 newcsum += sb32[i];
928 csum = (newcsum & 0xffffffff) + (newcsum>>32);
929
930
931#ifdef CONFIG_ALPHA
932
933
934
935
936
937
938
939
940 sb->sb_csum = md_csum_fold(disk_csum);
941#else
942 sb->sb_csum = disk_csum;
943#endif
944 return csum;
945}
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978struct super_type {
979 char *name;
980 struct module *owner;
981 int (*load_super)(struct md_rdev *rdev,
982 struct md_rdev *refdev,
983 int minor_version);
984 int (*validate_super)(struct mddev *mddev,
985 struct md_rdev *rdev);
986 void (*sync_super)(struct mddev *mddev,
987 struct md_rdev *rdev);
988 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
989 sector_t num_sectors);
990 int (*allow_new_offset)(struct md_rdev *rdev,
991 unsigned long long new_offset);
992};
993
994
995
996
997
998
999
1000
1001
1002int md_check_no_bitmap(struct mddev *mddev)
1003{
1004 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1005 return 0;
1006 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1007 mdname(mddev), mddev->pers->name);
1008 return 1;
1009}
1010EXPORT_SYMBOL(md_check_no_bitmap);
1011
1012
1013
1014
1015static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1016{
1017 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1018 mdp_super_t *sb;
1019 int ret;
1020
1021
1022
1023
1024
1025
1026
1027 rdev->sb_start = calc_dev_sboffset(rdev);
1028
1029 ret = read_disk_sb(rdev, MD_SB_BYTES);
1030 if (ret) return ret;
1031
1032 ret = -EINVAL;
1033
1034 bdevname(rdev->bdev, b);
1035 sb = page_address(rdev->sb_page);
1036
1037 if (sb->md_magic != MD_SB_MAGIC) {
1038 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1039 b);
1040 goto abort;
1041 }
1042
1043 if (sb->major_version != 0 ||
1044 sb->minor_version < 90 ||
1045 sb->minor_version > 91) {
1046 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1047 sb->major_version, sb->minor_version,
1048 b);
1049 goto abort;
1050 }
1051
1052 if (sb->raid_disks <= 0)
1053 goto abort;
1054
1055 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1056 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1057 b);
1058 goto abort;
1059 }
1060
1061 rdev->preferred_minor = sb->md_minor;
1062 rdev->data_offset = 0;
1063 rdev->new_data_offset = 0;
1064 rdev->sb_size = MD_SB_BYTES;
1065 rdev->badblocks.shift = -1;
1066
1067 if (sb->level == LEVEL_MULTIPATH)
1068 rdev->desc_nr = -1;
1069 else
1070 rdev->desc_nr = sb->this_disk.number;
1071
1072 if (!refdev) {
1073 ret = 1;
1074 } else {
1075 __u64 ev1, ev2;
1076 mdp_super_t *refsb = page_address(refdev->sb_page);
1077 if (!uuid_equal(refsb, sb)) {
1078 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1079 b, bdevname(refdev->bdev,b2));
1080 goto abort;
1081 }
1082 if (!sb_equal(refsb, sb)) {
1083 printk(KERN_WARNING "md: %s has same UUID"
1084 " but different superblock to %s\n",
1085 b, bdevname(refdev->bdev, b2));
1086 goto abort;
1087 }
1088 ev1 = md_event(sb);
1089 ev2 = md_event(refsb);
1090 if (ev1 > ev2)
1091 ret = 1;
1092 else
1093 ret = 0;
1094 }
1095 rdev->sectors = rdev->sb_start;
1096
1097
1098
1099
1100 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1101 rdev->sectors = (2ULL << 32) - 2;
1102
1103 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1104
1105 ret = -EINVAL;
1106
1107 abort:
1108 return ret;
1109}
1110
1111
1112
1113
1114static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1115{
1116 mdp_disk_t *desc;
1117 mdp_super_t *sb = page_address(rdev->sb_page);
1118 __u64 ev1 = md_event(sb);
1119
1120 rdev->raid_disk = -1;
1121 clear_bit(Faulty, &rdev->flags);
1122 clear_bit(In_sync, &rdev->flags);
1123 clear_bit(WriteMostly, &rdev->flags);
1124
1125 if (mddev->raid_disks == 0) {
1126 mddev->major_version = 0;
1127 mddev->minor_version = sb->minor_version;
1128 mddev->patch_version = sb->patch_version;
1129 mddev->external = 0;
1130 mddev->chunk_sectors = sb->chunk_size >> 9;
1131 mddev->ctime = sb->ctime;
1132 mddev->utime = sb->utime;
1133 mddev->level = sb->level;
1134 mddev->clevel[0] = 0;
1135 mddev->layout = sb->layout;
1136 mddev->raid_disks = sb->raid_disks;
1137 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1138 mddev->events = ev1;
1139 mddev->bitmap_info.offset = 0;
1140 mddev->bitmap_info.space = 0;
1141
1142 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1143 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1144 mddev->reshape_backwards = 0;
1145
1146 if (mddev->minor_version >= 91) {
1147 mddev->reshape_position = sb->reshape_position;
1148 mddev->delta_disks = sb->delta_disks;
1149 mddev->new_level = sb->new_level;
1150 mddev->new_layout = sb->new_layout;
1151 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1152 if (mddev->delta_disks < 0)
1153 mddev->reshape_backwards = 1;
1154 } else {
1155 mddev->reshape_position = MaxSector;
1156 mddev->delta_disks = 0;
1157 mddev->new_level = mddev->level;
1158 mddev->new_layout = mddev->layout;
1159 mddev->new_chunk_sectors = mddev->chunk_sectors;
1160 }
1161
1162 if (sb->state & (1<<MD_SB_CLEAN))
1163 mddev->recovery_cp = MaxSector;
1164 else {
1165 if (sb->events_hi == sb->cp_events_hi &&
1166 sb->events_lo == sb->cp_events_lo) {
1167 mddev->recovery_cp = sb->recovery_cp;
1168 } else
1169 mddev->recovery_cp = 0;
1170 }
1171
1172 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1173 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1174 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1175 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1176
1177 mddev->max_disks = MD_SB_DISKS;
1178
1179 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1180 mddev->bitmap_info.file == NULL) {
1181 mddev->bitmap_info.offset =
1182 mddev->bitmap_info.default_offset;
1183 mddev->bitmap_info.space =
1184 mddev->bitmap_info.space;
1185 }
1186
1187 } else if (mddev->pers == NULL) {
1188
1189
1190 ++ev1;
1191 if (sb->disks[rdev->desc_nr].state & (
1192 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1193 if (ev1 < mddev->events)
1194 return -EINVAL;
1195 } else if (mddev->bitmap) {
1196
1197
1198
1199 if (ev1 < mddev->bitmap->events_cleared)
1200 return 0;
1201 } else {
1202 if (ev1 < mddev->events)
1203
1204 return 0;
1205 }
1206
1207 if (mddev->level != LEVEL_MULTIPATH) {
1208 desc = sb->disks + rdev->desc_nr;
1209
1210 if (desc->state & (1<<MD_DISK_FAULTY))
1211 set_bit(Faulty, &rdev->flags);
1212 else if (desc->state & (1<<MD_DISK_SYNC)
1213) {
1214 set_bit(In_sync, &rdev->flags);
1215 rdev->raid_disk = desc->raid_disk;
1216 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1217
1218
1219
1220 if (mddev->minor_version >= 91) {
1221 rdev->recovery_offset = 0;
1222 rdev->raid_disk = desc->raid_disk;
1223 }
1224 }
1225 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1226 set_bit(WriteMostly, &rdev->flags);
1227 } else
1228 set_bit(In_sync, &rdev->flags);
1229 return 0;
1230}
1231
1232
1233
1234
1235static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1236{
1237 mdp_super_t *sb;
1238 struct md_rdev *rdev2;
1239 int next_spare = mddev->raid_disks;
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252 int i;
1253 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1254
1255 rdev->sb_size = MD_SB_BYTES;
1256
1257 sb = page_address(rdev->sb_page);
1258
1259 memset(sb, 0, sizeof(*sb));
1260
1261 sb->md_magic = MD_SB_MAGIC;
1262 sb->major_version = mddev->major_version;
1263 sb->patch_version = mddev->patch_version;
1264 sb->gvalid_words = 0;
1265 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1266 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1267 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1268 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1269
1270 sb->ctime = mddev->ctime;
1271 sb->level = mddev->level;
1272 sb->size = mddev->dev_sectors / 2;
1273 sb->raid_disks = mddev->raid_disks;
1274 sb->md_minor = mddev->md_minor;
1275 sb->not_persistent = 0;
1276 sb->utime = mddev->utime;
1277 sb->state = 0;
1278 sb->events_hi = (mddev->events>>32);
1279 sb->events_lo = (u32)mddev->events;
1280
1281 if (mddev->reshape_position == MaxSector)
1282 sb->minor_version = 90;
1283 else {
1284 sb->minor_version = 91;
1285 sb->reshape_position = mddev->reshape_position;
1286 sb->new_level = mddev->new_level;
1287 sb->delta_disks = mddev->delta_disks;
1288 sb->new_layout = mddev->new_layout;
1289 sb->new_chunk = mddev->new_chunk_sectors << 9;
1290 }
1291 mddev->minor_version = sb->minor_version;
1292 if (mddev->in_sync)
1293 {
1294 sb->recovery_cp = mddev->recovery_cp;
1295 sb->cp_events_hi = (mddev->events>>32);
1296 sb->cp_events_lo = (u32)mddev->events;
1297 if (mddev->recovery_cp == MaxSector)
1298 sb->state = (1<< MD_SB_CLEAN);
1299 } else
1300 sb->recovery_cp = 0;
1301
1302 sb->layout = mddev->layout;
1303 sb->chunk_size = mddev->chunk_sectors << 9;
1304
1305 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1306 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1307
1308 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1309 rdev_for_each(rdev2, mddev) {
1310 mdp_disk_t *d;
1311 int desc_nr;
1312 int is_active = test_bit(In_sync, &rdev2->flags);
1313
1314 if (rdev2->raid_disk >= 0 &&
1315 sb->minor_version >= 91)
1316
1317
1318
1319
1320 is_active = 1;
1321 if (rdev2->raid_disk < 0 ||
1322 test_bit(Faulty, &rdev2->flags))
1323 is_active = 0;
1324 if (is_active)
1325 desc_nr = rdev2->raid_disk;
1326 else
1327 desc_nr = next_spare++;
1328 rdev2->desc_nr = desc_nr;
1329 d = &sb->disks[rdev2->desc_nr];
1330 nr_disks++;
1331 d->number = rdev2->desc_nr;
1332 d->major = MAJOR(rdev2->bdev->bd_dev);
1333 d->minor = MINOR(rdev2->bdev->bd_dev);
1334 if (is_active)
1335 d->raid_disk = rdev2->raid_disk;
1336 else
1337 d->raid_disk = rdev2->desc_nr;
1338 if (test_bit(Faulty, &rdev2->flags))
1339 d->state = (1<<MD_DISK_FAULTY);
1340 else if (is_active) {
1341 d->state = (1<<MD_DISK_ACTIVE);
1342 if (test_bit(In_sync, &rdev2->flags))
1343 d->state |= (1<<MD_DISK_SYNC);
1344 active++;
1345 working++;
1346 } else {
1347 d->state = 0;
1348 spare++;
1349 working++;
1350 }
1351 if (test_bit(WriteMostly, &rdev2->flags))
1352 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1353 }
1354
1355 for (i=0 ; i < mddev->raid_disks ; i++) {
1356 mdp_disk_t *d = &sb->disks[i];
1357 if (d->state == 0 && d->number == 0) {
1358 d->number = i;
1359 d->raid_disk = i;
1360 d->state = (1<<MD_DISK_REMOVED);
1361 d->state |= (1<<MD_DISK_FAULTY);
1362 failed++;
1363 }
1364 }
1365 sb->nr_disks = nr_disks;
1366 sb->active_disks = active;
1367 sb->working_disks = working;
1368 sb->failed_disks = failed;
1369 sb->spare_disks = spare;
1370
1371 sb->this_disk = sb->disks[rdev->desc_nr];
1372 sb->sb_csum = calc_sb_csum(sb);
1373}
1374
1375
1376
1377
1378static unsigned long long
1379super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1380{
1381 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1382 return 0;
1383 if (rdev->mddev->bitmap_info.offset)
1384 return 0;
1385 rdev->sb_start = calc_dev_sboffset(rdev);
1386 if (!num_sectors || num_sectors > rdev->sb_start)
1387 num_sectors = rdev->sb_start;
1388
1389
1390
1391 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1392 num_sectors = (2ULL << 32) - 2;
1393 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1394 rdev->sb_page);
1395 md_super_wait(rdev->mddev);
1396 return num_sectors;
1397}
1398
1399static int
1400super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1401{
1402
1403 return new_offset == 0;
1404}
1405
1406
1407
1408
1409
1410static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1411{
1412 __le32 disk_csum;
1413 u32 csum;
1414 unsigned long long newcsum;
1415 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1416 __le32 *isuper = (__le32*)sb;
1417
1418 disk_csum = sb->sb_csum;
1419 sb->sb_csum = 0;
1420 newcsum = 0;
1421 for (; size >= 4; size -= 4)
1422 newcsum += le32_to_cpu(*isuper++);
1423
1424 if (size == 2)
1425 newcsum += le16_to_cpu(*(__le16*) isuper);
1426
1427 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1428 sb->sb_csum = disk_csum;
1429 return cpu_to_le32(csum);
1430}
1431
1432static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1433 int acknowledged);
1434static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1435{
1436 struct mdp_superblock_1 *sb;
1437 int ret;
1438 sector_t sb_start;
1439 sector_t sectors;
1440 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1441 int bmask;
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451 switch(minor_version) {
1452 case 0:
1453 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1454 sb_start -= 8*2;
1455 sb_start &= ~(sector_t)(4*2-1);
1456 break;
1457 case 1:
1458 sb_start = 0;
1459 break;
1460 case 2:
1461 sb_start = 8;
1462 break;
1463 default:
1464 return -EINVAL;
1465 }
1466 rdev->sb_start = sb_start;
1467
1468
1469
1470
1471 ret = read_disk_sb(rdev, 4096);
1472 if (ret) return ret;
1473
1474
1475 sb = page_address(rdev->sb_page);
1476
1477 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1478 sb->major_version != cpu_to_le32(1) ||
1479 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1480 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1481 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1482 return -EINVAL;
1483
1484 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1485 printk("md: invalid superblock checksum on %s\n",
1486 bdevname(rdev->bdev,b));
1487 return -EINVAL;
1488 }
1489 if (le64_to_cpu(sb->data_size) < 10) {
1490 printk("md: data_size too small on %s\n",
1491 bdevname(rdev->bdev,b));
1492 return -EINVAL;
1493 }
1494 if (sb->pad0 ||
1495 sb->pad3[0] ||
1496 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1497
1498 return -EINVAL;
1499
1500 rdev->preferred_minor = 0xffff;
1501 rdev->data_offset = le64_to_cpu(sb->data_offset);
1502 rdev->new_data_offset = rdev->data_offset;
1503 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1504 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1505 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1506 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1507
1508 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1509 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1510 if (rdev->sb_size & bmask)
1511 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1512
1513 if (minor_version
1514 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1515 return -EINVAL;
1516 if (minor_version
1517 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1518 return -EINVAL;
1519
1520 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1521 rdev->desc_nr = -1;
1522 else
1523 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1524
1525 if (!rdev->bb_page) {
1526 rdev->bb_page = alloc_page(GFP_KERNEL);
1527 if (!rdev->bb_page)
1528 return -ENOMEM;
1529 }
1530 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1531 rdev->badblocks.count == 0) {
1532
1533
1534
1535 s32 offset;
1536 sector_t bb_sector;
1537 u64 *bbp;
1538 int i;
1539 int sectors = le16_to_cpu(sb->bblog_size);
1540 if (sectors > (PAGE_SIZE / 512))
1541 return -EINVAL;
1542 offset = le32_to_cpu(sb->bblog_offset);
1543 if (offset == 0)
1544 return -EINVAL;
1545 bb_sector = (long long)offset;
1546 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1547 rdev->bb_page, READ, true))
1548 return -EIO;
1549 bbp = (u64 *)page_address(rdev->bb_page);
1550 rdev->badblocks.shift = sb->bblog_shift;
1551 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1552 u64 bb = le64_to_cpu(*bbp);
1553 int count = bb & (0x3ff);
1554 u64 sector = bb >> 10;
1555 sector <<= sb->bblog_shift;
1556 count <<= sb->bblog_shift;
1557 if (bb + 1 == 0)
1558 break;
1559 if (md_set_badblocks(&rdev->badblocks,
1560 sector, count, 1) == 0)
1561 return -EINVAL;
1562 }
1563 } else if (sb->bblog_offset == 0)
1564 rdev->badblocks.shift = -1;
1565
1566 if (!refdev) {
1567 ret = 1;
1568 } else {
1569 __u64 ev1, ev2;
1570 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1571
1572 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1573 sb->level != refsb->level ||
1574 sb->layout != refsb->layout ||
1575 sb->chunksize != refsb->chunksize) {
1576 printk(KERN_WARNING "md: %s has strangely different"
1577 " superblock to %s\n",
1578 bdevname(rdev->bdev,b),
1579 bdevname(refdev->bdev,b2));
1580 return -EINVAL;
1581 }
1582 ev1 = le64_to_cpu(sb->events);
1583 ev2 = le64_to_cpu(refsb->events);
1584
1585 if (ev1 > ev2)
1586 ret = 1;
1587 else
1588 ret = 0;
1589 }
1590 if (minor_version) {
1591 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1592 sectors -= rdev->data_offset;
1593 } else
1594 sectors = rdev->sb_start;
1595 if (sectors < le64_to_cpu(sb->data_size))
1596 return -EINVAL;
1597 rdev->sectors = le64_to_cpu(sb->data_size);
1598 return ret;
1599}
1600
1601static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1602{
1603 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1604 __u64 ev1 = le64_to_cpu(sb->events);
1605
1606 rdev->raid_disk = -1;
1607 clear_bit(Faulty, &rdev->flags);
1608 clear_bit(In_sync, &rdev->flags);
1609 clear_bit(WriteMostly, &rdev->flags);
1610
1611 if (mddev->raid_disks == 0) {
1612 mddev->major_version = 1;
1613 mddev->patch_version = 0;
1614 mddev->external = 0;
1615 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1616 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1617 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1618 mddev->level = le32_to_cpu(sb->level);
1619 mddev->clevel[0] = 0;
1620 mddev->layout = le32_to_cpu(sb->layout);
1621 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1622 mddev->dev_sectors = le64_to_cpu(sb->size);
1623 mddev->events = ev1;
1624 mddev->bitmap_info.offset = 0;
1625 mddev->bitmap_info.space = 0;
1626
1627
1628
1629 mddev->bitmap_info.default_offset = 1024 >> 9;
1630 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1631 mddev->reshape_backwards = 0;
1632
1633 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1634 memcpy(mddev->uuid, sb->set_uuid, 16);
1635
1636 mddev->max_disks = (4096-256)/2;
1637
1638 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1639 mddev->bitmap_info.file == NULL) {
1640 mddev->bitmap_info.offset =
1641 (__s32)le32_to_cpu(sb->bitmap_offset);
1642
1643
1644
1645
1646
1647 if (mddev->minor_version > 0)
1648 mddev->bitmap_info.space = 0;
1649 else if (mddev->bitmap_info.offset > 0)
1650 mddev->bitmap_info.space =
1651 8 - mddev->bitmap_info.offset;
1652 else
1653 mddev->bitmap_info.space =
1654 -mddev->bitmap_info.offset;
1655 }
1656
1657 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1658 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1659 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1660 mddev->new_level = le32_to_cpu(sb->new_level);
1661 mddev->new_layout = le32_to_cpu(sb->new_layout);
1662 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1663 if (mddev->delta_disks < 0 ||
1664 (mddev->delta_disks == 0 &&
1665 (le32_to_cpu(sb->feature_map)
1666 & MD_FEATURE_RESHAPE_BACKWARDS)))
1667 mddev->reshape_backwards = 1;
1668 } else {
1669 mddev->reshape_position = MaxSector;
1670 mddev->delta_disks = 0;
1671 mddev->new_level = mddev->level;
1672 mddev->new_layout = mddev->layout;
1673 mddev->new_chunk_sectors = mddev->chunk_sectors;
1674 }
1675
1676 } else if (mddev->pers == NULL) {
1677
1678
1679 ++ev1;
1680 if (rdev->desc_nr >= 0 &&
1681 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1682 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1683 if (ev1 < mddev->events)
1684 return -EINVAL;
1685 } else if (mddev->bitmap) {
1686
1687
1688
1689 if (ev1 < mddev->bitmap->events_cleared)
1690 return 0;
1691 } else {
1692 if (ev1 < mddev->events)
1693
1694 return 0;
1695 }
1696 if (mddev->level != LEVEL_MULTIPATH) {
1697 int role;
1698 if (rdev->desc_nr < 0 ||
1699 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1700 role = 0xffff;
1701 rdev->desc_nr = -1;
1702 } else
1703 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1704 switch(role) {
1705 case 0xffff:
1706 break;
1707 case 0xfffe:
1708 set_bit(Faulty, &rdev->flags);
1709 break;
1710 default:
1711 if ((le32_to_cpu(sb->feature_map) &
1712 MD_FEATURE_RECOVERY_OFFSET))
1713 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1714 else
1715 set_bit(In_sync, &rdev->flags);
1716 rdev->raid_disk = role;
1717 break;
1718 }
1719 if (sb->devflags & WriteMostly1)
1720 set_bit(WriteMostly, &rdev->flags);
1721 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1722 set_bit(Replacement, &rdev->flags);
1723 } else
1724 set_bit(In_sync, &rdev->flags);
1725
1726 return 0;
1727}
1728
1729static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1730{
1731 struct mdp_superblock_1 *sb;
1732 struct md_rdev *rdev2;
1733 int max_dev, i;
1734
1735
1736 sb = page_address(rdev->sb_page);
1737
1738 sb->feature_map = 0;
1739 sb->pad0 = 0;
1740 sb->recovery_offset = cpu_to_le64(0);
1741 memset(sb->pad3, 0, sizeof(sb->pad3));
1742
1743 sb->utime = cpu_to_le64((__u64)mddev->utime);
1744 sb->events = cpu_to_le64(mddev->events);
1745 if (mddev->in_sync)
1746 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1747 else
1748 sb->resync_offset = cpu_to_le64(0);
1749
1750 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1751
1752 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1753 sb->size = cpu_to_le64(mddev->dev_sectors);
1754 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1755 sb->level = cpu_to_le32(mddev->level);
1756 sb->layout = cpu_to_le32(mddev->layout);
1757
1758 if (test_bit(WriteMostly, &rdev->flags))
1759 sb->devflags |= WriteMostly1;
1760 else
1761 sb->devflags &= ~WriteMostly1;
1762 sb->data_offset = cpu_to_le64(rdev->data_offset);
1763 sb->data_size = cpu_to_le64(rdev->sectors);
1764
1765 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1766 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1767 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1768 }
1769
1770 if (rdev->raid_disk >= 0 &&
1771 !test_bit(In_sync, &rdev->flags)) {
1772 sb->feature_map |=
1773 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1774 sb->recovery_offset =
1775 cpu_to_le64(rdev->recovery_offset);
1776 }
1777 if (test_bit(Replacement, &rdev->flags))
1778 sb->feature_map |=
1779 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1780
1781 if (mddev->reshape_position != MaxSector) {
1782 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1783 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1784 sb->new_layout = cpu_to_le32(mddev->new_layout);
1785 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1786 sb->new_level = cpu_to_le32(mddev->new_level);
1787 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1788 if (mddev->delta_disks == 0 &&
1789 mddev->reshape_backwards)
1790 sb->feature_map
1791 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1792 if (rdev->new_data_offset != rdev->data_offset) {
1793 sb->feature_map
1794 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1795 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1796 - rdev->data_offset));
1797 }
1798 }
1799
1800 if (rdev->badblocks.count == 0)
1801 ;
1802 else if (sb->bblog_offset == 0)
1803
1804 md_error(mddev, rdev);
1805 else {
1806 struct badblocks *bb = &rdev->badblocks;
1807 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1808 u64 *p = bb->page;
1809 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1810 if (bb->changed) {
1811 unsigned seq;
1812
1813retry:
1814 seq = read_seqbegin(&bb->lock);
1815
1816 memset(bbp, 0xff, PAGE_SIZE);
1817
1818 for (i = 0 ; i < bb->count ; i++) {
1819 u64 internal_bb = p[i];
1820 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1821 | BB_LEN(internal_bb));
1822 bbp[i] = cpu_to_le64(store_bb);
1823 }
1824 bb->changed = 0;
1825 if (read_seqretry(&bb->lock, seq))
1826 goto retry;
1827
1828 bb->sector = (rdev->sb_start +
1829 (int)le32_to_cpu(sb->bblog_offset));
1830 bb->size = le16_to_cpu(sb->bblog_size);
1831 }
1832 }
1833
1834 max_dev = 0;
1835 rdev_for_each(rdev2, mddev)
1836 if (rdev2->desc_nr+1 > max_dev)
1837 max_dev = rdev2->desc_nr+1;
1838
1839 if (max_dev > le32_to_cpu(sb->max_dev)) {
1840 int bmask;
1841 sb->max_dev = cpu_to_le32(max_dev);
1842 rdev->sb_size = max_dev * 2 + 256;
1843 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1844 if (rdev->sb_size & bmask)
1845 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1846 } else
1847 max_dev = le32_to_cpu(sb->max_dev);
1848
1849 for (i=0; i<max_dev;i++)
1850 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1851
1852 rdev_for_each(rdev2, mddev) {
1853 i = rdev2->desc_nr;
1854 if (test_bit(Faulty, &rdev2->flags))
1855 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1856 else if (test_bit(In_sync, &rdev2->flags))
1857 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1858 else if (rdev2->raid_disk >= 0)
1859 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1860 else
1861 sb->dev_roles[i] = cpu_to_le16(0xffff);
1862 }
1863
1864 sb->sb_csum = calc_sb_1_csum(sb);
1865}
1866
1867static unsigned long long
1868super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1869{
1870 struct mdp_superblock_1 *sb;
1871 sector_t max_sectors;
1872 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1873 return 0;
1874 if (rdev->data_offset != rdev->new_data_offset)
1875 return 0;
1876 if (rdev->sb_start < rdev->data_offset) {
1877
1878 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1879 max_sectors -= rdev->data_offset;
1880 if (!num_sectors || num_sectors > max_sectors)
1881 num_sectors = max_sectors;
1882 } else if (rdev->mddev->bitmap_info.offset) {
1883
1884 return 0;
1885 } else {
1886
1887 sector_t sb_start;
1888 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1889 sb_start &= ~(sector_t)(4*2 - 1);
1890 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1891 if (!num_sectors || num_sectors > max_sectors)
1892 num_sectors = max_sectors;
1893 rdev->sb_start = sb_start;
1894 }
1895 sb = page_address(rdev->sb_page);
1896 sb->data_size = cpu_to_le64(num_sectors);
1897 sb->super_offset = rdev->sb_start;
1898 sb->sb_csum = calc_sb_1_csum(sb);
1899 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1900 rdev->sb_page);
1901 md_super_wait(rdev->mddev);
1902 return num_sectors;
1903
1904}
1905
1906static int
1907super_1_allow_new_offset(struct md_rdev *rdev,
1908 unsigned long long new_offset)
1909{
1910
1911 struct bitmap *bitmap;
1912 if (new_offset >= rdev->data_offset)
1913 return 1;
1914
1915
1916
1917 if (rdev->mddev->minor_version == 0)
1918 return 1;
1919
1920
1921
1922
1923
1924
1925
1926 if (rdev->sb_start + (32+4)*2 > new_offset)
1927 return 0;
1928 bitmap = rdev->mddev->bitmap;
1929 if (bitmap && !rdev->mddev->bitmap_info.file &&
1930 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1931 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1932 return 0;
1933 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1934 return 0;
1935
1936 return 1;
1937}
1938
1939static struct super_type super_types[] = {
1940 [0] = {
1941 .name = "0.90.0",
1942 .owner = THIS_MODULE,
1943 .load_super = super_90_load,
1944 .validate_super = super_90_validate,
1945 .sync_super = super_90_sync,
1946 .rdev_size_change = super_90_rdev_size_change,
1947 .allow_new_offset = super_90_allow_new_offset,
1948 },
1949 [1] = {
1950 .name = "md-1",
1951 .owner = THIS_MODULE,
1952 .load_super = super_1_load,
1953 .validate_super = super_1_validate,
1954 .sync_super = super_1_sync,
1955 .rdev_size_change = super_1_rdev_size_change,
1956 .allow_new_offset = super_1_allow_new_offset,
1957 },
1958};
1959
1960static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1961{
1962 if (mddev->sync_super) {
1963 mddev->sync_super(mddev, rdev);
1964 return;
1965 }
1966
1967 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1968
1969 super_types[mddev->major_version].sync_super(mddev, rdev);
1970}
1971
1972static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1973{
1974 struct md_rdev *rdev, *rdev2;
1975
1976 rcu_read_lock();
1977 rdev_for_each_rcu(rdev, mddev1)
1978 rdev_for_each_rcu(rdev2, mddev2)
1979 if (rdev->bdev->bd_contains ==
1980 rdev2->bdev->bd_contains) {
1981 rcu_read_unlock();
1982 return 1;
1983 }
1984 rcu_read_unlock();
1985 return 0;
1986}
1987
1988static LIST_HEAD(pending_raid_disks);
1989
1990
1991
1992
1993
1994
1995
1996
1997int md_integrity_register(struct mddev *mddev)
1998{
1999 struct md_rdev *rdev, *reference = NULL;
2000
2001 if (list_empty(&mddev->disks))
2002 return 0;
2003 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2004 return 0;
2005 rdev_for_each(rdev, mddev) {
2006
2007 if (test_bit(Faulty, &rdev->flags))
2008 continue;
2009 if (rdev->raid_disk < 0)
2010 continue;
2011 if (!reference) {
2012
2013 reference = rdev;
2014 continue;
2015 }
2016
2017 if (blk_integrity_compare(reference->bdev->bd_disk,
2018 rdev->bdev->bd_disk) < 0)
2019 return -EINVAL;
2020 }
2021 if (!reference || !bdev_get_integrity(reference->bdev))
2022 return 0;
2023
2024
2025
2026
2027 if (blk_integrity_register(mddev->gendisk,
2028 bdev_get_integrity(reference->bdev)) != 0) {
2029 printk(KERN_ERR "md: failed to register integrity for %s\n",
2030 mdname(mddev));
2031 return -EINVAL;
2032 }
2033 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2034 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2035 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2036 mdname(mddev));
2037 return -EINVAL;
2038 }
2039 return 0;
2040}
2041EXPORT_SYMBOL(md_integrity_register);
2042
2043
2044void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2045{
2046 struct blk_integrity *bi_rdev;
2047 struct blk_integrity *bi_mddev;
2048
2049 if (!mddev->gendisk)
2050 return;
2051
2052 bi_rdev = bdev_get_integrity(rdev->bdev);
2053 bi_mddev = blk_get_integrity(mddev->gendisk);
2054
2055 if (!bi_mddev)
2056 return;
2057 if (rdev->raid_disk < 0)
2058 return;
2059 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2060 rdev->bdev->bd_disk) >= 0)
2061 return;
2062 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2063 blk_integrity_unregister(mddev->gendisk);
2064}
2065EXPORT_SYMBOL(md_integrity_add_rdev);
2066
2067static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2068{
2069 char b[BDEVNAME_SIZE];
2070 struct kobject *ko;
2071 char *s;
2072 int err;
2073
2074 if (rdev->mddev) {
2075 MD_BUG();
2076 return -EINVAL;
2077 }
2078
2079
2080 if (find_rdev(mddev, rdev->bdev->bd_dev))
2081 return -EEXIST;
2082
2083
2084 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2085 rdev->sectors < mddev->dev_sectors)) {
2086 if (mddev->pers) {
2087
2088
2089
2090
2091 if (mddev->level > 0)
2092 return -ENOSPC;
2093 } else
2094 mddev->dev_sectors = rdev->sectors;
2095 }
2096
2097
2098
2099
2100
2101 if (rdev->desc_nr < 0) {
2102 int choice = 0;
2103 if (mddev->pers) choice = mddev->raid_disks;
2104 while (find_rdev_nr(mddev, choice))
2105 choice++;
2106 rdev->desc_nr = choice;
2107 } else {
2108 if (find_rdev_nr(mddev, rdev->desc_nr))
2109 return -EBUSY;
2110 }
2111 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2112 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2113 mdname(mddev), mddev->max_disks);
2114 return -EBUSY;
2115 }
2116 bdevname(rdev->bdev,b);
2117 while ( (s=strchr(b, '/')) != NULL)
2118 *s = '!';
2119
2120 rdev->mddev = mddev;
2121 printk(KERN_INFO "md: bind<%s>\n", b);
2122
2123 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2124 goto fail;
2125
2126 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2127 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2128 ;
2129 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2130
2131 list_add_rcu(&rdev->same_set, &mddev->disks);
2132 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2133
2134
2135 mddev->recovery_disabled++;
2136
2137 return 0;
2138
2139 fail:
2140 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2141 b, mdname(mddev));
2142 return err;
2143}
2144
2145static void md_delayed_delete(struct work_struct *ws)
2146{
2147 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2148 kobject_del(&rdev->kobj);
2149 kobject_put(&rdev->kobj);
2150}
2151
2152static void unbind_rdev_from_array(struct md_rdev * rdev)
2153{
2154 char b[BDEVNAME_SIZE];
2155 if (!rdev->mddev) {
2156 MD_BUG();
2157 return;
2158 }
2159 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2160 list_del_rcu(&rdev->same_set);
2161 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2162 rdev->mddev = NULL;
2163 sysfs_remove_link(&rdev->kobj, "block");
2164 sysfs_put(rdev->sysfs_state);
2165 rdev->sysfs_state = NULL;
2166 rdev->badblocks.count = 0;
2167
2168
2169
2170
2171 synchronize_rcu();
2172 INIT_WORK(&rdev->del_work, md_delayed_delete);
2173 kobject_get(&rdev->kobj);
2174 queue_work(md_misc_wq, &rdev->del_work);
2175}
2176
2177
2178
2179
2180
2181
2182static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2183{
2184 int err = 0;
2185 struct block_device *bdev;
2186 char b[BDEVNAME_SIZE];
2187
2188 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2189 shared ? (struct md_rdev *)lock_rdev : rdev);
2190 if (IS_ERR(bdev)) {
2191 printk(KERN_ERR "md: could not open %s.\n",
2192 __bdevname(dev, b));
2193 return PTR_ERR(bdev);
2194 }
2195 rdev->bdev = bdev;
2196 return err;
2197}
2198
2199static void unlock_rdev(struct md_rdev *rdev)
2200{
2201 struct block_device *bdev = rdev->bdev;
2202 rdev->bdev = NULL;
2203 if (!bdev)
2204 MD_BUG();
2205 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2206}
2207
2208void md_autodetect_dev(dev_t dev);
2209
2210static void export_rdev(struct md_rdev * rdev)
2211{
2212 char b[BDEVNAME_SIZE];
2213 printk(KERN_INFO "md: export_rdev(%s)\n",
2214 bdevname(rdev->bdev,b));
2215 if (rdev->mddev)
2216 MD_BUG();
2217 md_rdev_clear(rdev);
2218#ifndef MODULE
2219 if (test_bit(AutoDetected, &rdev->flags))
2220 md_autodetect_dev(rdev->bdev->bd_dev);
2221#endif
2222 unlock_rdev(rdev);
2223 kobject_put(&rdev->kobj);
2224}
2225
2226static void kick_rdev_from_array(struct md_rdev * rdev)
2227{
2228 unbind_rdev_from_array(rdev);
2229 export_rdev(rdev);
2230}
2231
2232static void export_array(struct mddev *mddev)
2233{
2234 struct md_rdev *rdev, *tmp;
2235
2236 rdev_for_each_safe(rdev, tmp, mddev) {
2237 if (!rdev->mddev) {
2238 MD_BUG();
2239 continue;
2240 }
2241 kick_rdev_from_array(rdev);
2242 }
2243 if (!list_empty(&mddev->disks))
2244 MD_BUG();
2245 mddev->raid_disks = 0;
2246 mddev->major_version = 0;
2247}
2248
2249static void print_desc(mdp_disk_t *desc)
2250{
2251 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2252 desc->major,desc->minor,desc->raid_disk,desc->state);
2253}
2254
2255static void print_sb_90(mdp_super_t *sb)
2256{
2257 int i;
2258
2259 printk(KERN_INFO
2260 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2261 sb->major_version, sb->minor_version, sb->patch_version,
2262 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2263 sb->ctime);
2264 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2265 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2266 sb->md_minor, sb->layout, sb->chunk_size);
2267 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2268 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2269 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2270 sb->failed_disks, sb->spare_disks,
2271 sb->sb_csum, (unsigned long)sb->events_lo);
2272
2273 printk(KERN_INFO);
2274 for (i = 0; i < MD_SB_DISKS; i++) {
2275 mdp_disk_t *desc;
2276
2277 desc = sb->disks + i;
2278 if (desc->number || desc->major || desc->minor ||
2279 desc->raid_disk || (desc->state && (desc->state != 4))) {
2280 printk(" D %2d: ", i);
2281 print_desc(desc);
2282 }
2283 }
2284 printk(KERN_INFO "md: THIS: ");
2285 print_desc(&sb->this_disk);
2286}
2287
2288static void print_sb_1(struct mdp_superblock_1 *sb)
2289{
2290 __u8 *uuid;
2291
2292 uuid = sb->set_uuid;
2293 printk(KERN_INFO
2294 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2295 "md: Name: \"%s\" CT:%llu\n",
2296 le32_to_cpu(sb->major_version),
2297 le32_to_cpu(sb->feature_map),
2298 uuid,
2299 sb->set_name,
2300 (unsigned long long)le64_to_cpu(sb->ctime)
2301 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2302
2303 uuid = sb->device_uuid;
2304 printk(KERN_INFO
2305 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2306 " RO:%llu\n"
2307 "md: Dev:%08x UUID: %pU\n"
2308 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2309 "md: (MaxDev:%u) \n",
2310 le32_to_cpu(sb->level),
2311 (unsigned long long)le64_to_cpu(sb->size),
2312 le32_to_cpu(sb->raid_disks),
2313 le32_to_cpu(sb->layout),
2314 le32_to_cpu(sb->chunksize),
2315 (unsigned long long)le64_to_cpu(sb->data_offset),
2316 (unsigned long long)le64_to_cpu(sb->data_size),
2317 (unsigned long long)le64_to_cpu(sb->super_offset),
2318 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2319 le32_to_cpu(sb->dev_number),
2320 uuid,
2321 sb->devflags,
2322 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2323 (unsigned long long)le64_to_cpu(sb->events),
2324 (unsigned long long)le64_to_cpu(sb->resync_offset),
2325 le32_to_cpu(sb->sb_csum),
2326 le32_to_cpu(sb->max_dev)
2327 );
2328}
2329
2330static void print_rdev(struct md_rdev *rdev, int major_version)
2331{
2332 char b[BDEVNAME_SIZE];
2333 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2334 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2335 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2336 rdev->desc_nr);
2337 if (rdev->sb_loaded) {
2338 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2339 switch (major_version) {
2340 case 0:
2341 print_sb_90(page_address(rdev->sb_page));
2342 break;
2343 case 1:
2344 print_sb_1(page_address(rdev->sb_page));
2345 break;
2346 }
2347 } else
2348 printk(KERN_INFO "md: no rdev superblock!\n");
2349}
2350
2351static void md_print_devices(void)
2352{
2353 struct list_head *tmp;
2354 struct md_rdev *rdev;
2355 struct mddev *mddev;
2356 char b[BDEVNAME_SIZE];
2357
2358 printk("\n");
2359 printk("md: **********************************\n");
2360 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2361 printk("md: **********************************\n");
2362 for_each_mddev(mddev, tmp) {
2363
2364 if (mddev->bitmap)
2365 bitmap_print_sb(mddev->bitmap);
2366 else
2367 printk("%s: ", mdname(mddev));
2368 rdev_for_each(rdev, mddev)
2369 printk("<%s>", bdevname(rdev->bdev,b));
2370 printk("\n");
2371
2372 rdev_for_each(rdev, mddev)
2373 print_rdev(rdev, mddev->major_version);
2374 }
2375 printk("md: **********************************\n");
2376 printk("\n");
2377}
2378
2379
2380static void sync_sbs(struct mddev * mddev, int nospares)
2381{
2382
2383
2384
2385
2386
2387
2388 struct md_rdev *rdev;
2389 rdev_for_each(rdev, mddev) {
2390 if (rdev->sb_events == mddev->events ||
2391 (nospares &&
2392 rdev->raid_disk < 0 &&
2393 rdev->sb_events+1 == mddev->events)) {
2394
2395 rdev->sb_loaded = 2;
2396 } else {
2397 sync_super(mddev, rdev);
2398 rdev->sb_loaded = 1;
2399 }
2400 }
2401}
2402
2403static void md_update_sb(struct mddev * mddev, int force_change)
2404{
2405 struct md_rdev *rdev;
2406 int sync_req;
2407 int nospares = 0;
2408 int any_badblocks_changed = 0;
2409
2410repeat:
2411
2412 rdev_for_each(rdev, mddev) {
2413 if (rdev->raid_disk >= 0 &&
2414 mddev->delta_disks >= 0 &&
2415 !test_bit(In_sync, &rdev->flags) &&
2416 mddev->curr_resync_completed > rdev->recovery_offset)
2417 rdev->recovery_offset = mddev->curr_resync_completed;
2418
2419 }
2420 if (!mddev->persistent) {
2421 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2422 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2423 if (!mddev->external) {
2424 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2425 rdev_for_each(rdev, mddev) {
2426 if (rdev->badblocks.changed) {
2427 rdev->badblocks.changed = 0;
2428 md_ack_all_badblocks(&rdev->badblocks);
2429 md_error(mddev, rdev);
2430 }
2431 clear_bit(Blocked, &rdev->flags);
2432 clear_bit(BlockedBadBlocks, &rdev->flags);
2433 wake_up(&rdev->blocked_wait);
2434 }
2435 }
2436 wake_up(&mddev->sb_wait);
2437 return;
2438 }
2439
2440 spin_lock_irq(&mddev->write_lock);
2441
2442 mddev->utime = get_seconds();
2443
2444 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2445 force_change = 1;
2446 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2447
2448
2449
2450
2451 nospares = 1;
2452 if (force_change)
2453 nospares = 0;
2454 if (mddev->degraded)
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464 nospares = 0;
2465
2466 sync_req = mddev->in_sync;
2467
2468
2469
2470 if (nospares
2471 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2472 && mddev->can_decrease_events
2473 && mddev->events != 1) {
2474 mddev->events--;
2475 mddev->can_decrease_events = 0;
2476 } else {
2477
2478 mddev->events ++;
2479 mddev->can_decrease_events = nospares;
2480 }
2481
2482 if (!mddev->events) {
2483
2484
2485
2486
2487
2488 MD_BUG();
2489 mddev->events --;
2490 }
2491
2492 rdev_for_each(rdev, mddev) {
2493 if (rdev->badblocks.changed)
2494 any_badblocks_changed++;
2495 if (test_bit(Faulty, &rdev->flags))
2496 set_bit(FaultRecorded, &rdev->flags);
2497 }
2498
2499 sync_sbs(mddev, nospares);
2500 spin_unlock_irq(&mddev->write_lock);
2501
2502 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2503 mdname(mddev), mddev->in_sync);
2504
2505 bitmap_update_sb(mddev->bitmap);
2506 rdev_for_each(rdev, mddev) {
2507 char b[BDEVNAME_SIZE];
2508
2509 if (rdev->sb_loaded != 1)
2510 continue;
2511
2512 if (!test_bit(Faulty, &rdev->flags) &&
2513 rdev->saved_raid_disk == -1) {
2514 md_super_write(mddev,rdev,
2515 rdev->sb_start, rdev->sb_size,
2516 rdev->sb_page);
2517 pr_debug("md: (write) %s's sb offset: %llu\n",
2518 bdevname(rdev->bdev, b),
2519 (unsigned long long)rdev->sb_start);
2520 rdev->sb_events = mddev->events;
2521 if (rdev->badblocks.size) {
2522 md_super_write(mddev, rdev,
2523 rdev->badblocks.sector,
2524 rdev->badblocks.size << 9,
2525 rdev->bb_page);
2526 rdev->badblocks.size = 0;
2527 }
2528
2529 } else if (test_bit(Faulty, &rdev->flags))
2530 pr_debug("md: %s (skipping faulty)\n",
2531 bdevname(rdev->bdev, b));
2532 else
2533 pr_debug("(skipping incremental s/r ");
2534
2535 if (mddev->level == LEVEL_MULTIPATH)
2536
2537 break;
2538 }
2539 md_super_wait(mddev);
2540
2541
2542 spin_lock_irq(&mddev->write_lock);
2543 if (mddev->in_sync != sync_req ||
2544 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2545
2546 spin_unlock_irq(&mddev->write_lock);
2547 goto repeat;
2548 }
2549 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2550 spin_unlock_irq(&mddev->write_lock);
2551 wake_up(&mddev->sb_wait);
2552 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2553 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2554
2555 rdev_for_each(rdev, mddev) {
2556 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2557 clear_bit(Blocked, &rdev->flags);
2558
2559 if (any_badblocks_changed)
2560 md_ack_all_badblocks(&rdev->badblocks);
2561 clear_bit(BlockedBadBlocks, &rdev->flags);
2562 wake_up(&rdev->blocked_wait);
2563 }
2564}
2565
2566
2567
2568
2569static int cmd_match(const char *cmd, const char *str)
2570{
2571
2572
2573
2574
2575 while (*cmd && *str && *cmd == *str) {
2576 cmd++;
2577 str++;
2578 }
2579 if (*cmd == '\n')
2580 cmd++;
2581 if (*str || *cmd)
2582 return 0;
2583 return 1;
2584}
2585
2586struct rdev_sysfs_entry {
2587 struct attribute attr;
2588 ssize_t (*show)(struct md_rdev *, char *);
2589 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2590};
2591
2592static ssize_t
2593state_show(struct md_rdev *rdev, char *page)
2594{
2595 char *sep = "";
2596 size_t len = 0;
2597
2598 if (test_bit(Faulty, &rdev->flags) ||
2599 rdev->badblocks.unacked_exist) {
2600 len+= sprintf(page+len, "%sfaulty",sep);
2601 sep = ",";
2602 }
2603 if (test_bit(In_sync, &rdev->flags)) {
2604 len += sprintf(page+len, "%sin_sync",sep);
2605 sep = ",";
2606 }
2607 if (test_bit(WriteMostly, &rdev->flags)) {
2608 len += sprintf(page+len, "%swrite_mostly",sep);
2609 sep = ",";
2610 }
2611 if (test_bit(Blocked, &rdev->flags) ||
2612 (rdev->badblocks.unacked_exist
2613 && !test_bit(Faulty, &rdev->flags))) {
2614 len += sprintf(page+len, "%sblocked", sep);
2615 sep = ",";
2616 }
2617 if (!test_bit(Faulty, &rdev->flags) &&
2618 !test_bit(In_sync, &rdev->flags)) {
2619 len += sprintf(page+len, "%sspare", sep);
2620 sep = ",";
2621 }
2622 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2623 len += sprintf(page+len, "%swrite_error", sep);
2624 sep = ",";
2625 }
2626 if (test_bit(WantReplacement, &rdev->flags)) {
2627 len += sprintf(page+len, "%swant_replacement", sep);
2628 sep = ",";
2629 }
2630 if (test_bit(Replacement, &rdev->flags)) {
2631 len += sprintf(page+len, "%sreplacement", sep);
2632 sep = ",";
2633 }
2634
2635 return len+sprintf(page+len, "\n");
2636}
2637
2638static ssize_t
2639state_store(struct md_rdev *rdev, const char *buf, size_t len)
2640{
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652 int err = -EINVAL;
2653 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2654 md_error(rdev->mddev, rdev);
2655 if (test_bit(Faulty, &rdev->flags))
2656 err = 0;
2657 else
2658 err = -EBUSY;
2659 } else if (cmd_match(buf, "remove")) {
2660 if (rdev->raid_disk >= 0)
2661 err = -EBUSY;
2662 else {
2663 struct mddev *mddev = rdev->mddev;
2664 kick_rdev_from_array(rdev);
2665 if (mddev->pers)
2666 md_update_sb(mddev, 1);
2667 md_new_event(mddev);
2668 err = 0;
2669 }
2670 } else if (cmd_match(buf, "writemostly")) {
2671 set_bit(WriteMostly, &rdev->flags);
2672 err = 0;
2673 } else if (cmd_match(buf, "-writemostly")) {
2674 clear_bit(WriteMostly, &rdev->flags);
2675 err = 0;
2676 } else if (cmd_match(buf, "blocked")) {
2677 set_bit(Blocked, &rdev->flags);
2678 err = 0;
2679 } else if (cmd_match(buf, "-blocked")) {
2680 if (!test_bit(Faulty, &rdev->flags) &&
2681 rdev->badblocks.unacked_exist) {
2682
2683
2684
2685 md_error(rdev->mddev, rdev);
2686 }
2687 clear_bit(Blocked, &rdev->flags);
2688 clear_bit(BlockedBadBlocks, &rdev->flags);
2689 wake_up(&rdev->blocked_wait);
2690 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2691 md_wakeup_thread(rdev->mddev->thread);
2692
2693 err = 0;
2694 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2695 set_bit(In_sync, &rdev->flags);
2696 err = 0;
2697 } else if (cmd_match(buf, "write_error")) {
2698 set_bit(WriteErrorSeen, &rdev->flags);
2699 err = 0;
2700 } else if (cmd_match(buf, "-write_error")) {
2701 clear_bit(WriteErrorSeen, &rdev->flags);
2702 err = 0;
2703 } else if (cmd_match(buf, "want_replacement")) {
2704
2705
2706
2707
2708 if (rdev->raid_disk >= 0 &&
2709 !test_bit(Replacement, &rdev->flags))
2710 set_bit(WantReplacement, &rdev->flags);
2711 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2712 md_wakeup_thread(rdev->mddev->thread);
2713 err = 0;
2714 } else if (cmd_match(buf, "-want_replacement")) {
2715
2716
2717
2718 err = 0;
2719 clear_bit(WantReplacement, &rdev->flags);
2720 } else if (cmd_match(buf, "replacement")) {
2721
2722
2723
2724
2725 if (rdev->mddev->pers)
2726 err = -EBUSY;
2727 else {
2728 set_bit(Replacement, &rdev->flags);
2729 err = 0;
2730 }
2731 } else if (cmd_match(buf, "-replacement")) {
2732
2733 if (rdev->mddev->pers)
2734 err = -EBUSY;
2735 else {
2736 clear_bit(Replacement, &rdev->flags);
2737 err = 0;
2738 }
2739 }
2740 if (!err)
2741 sysfs_notify_dirent_safe(rdev->sysfs_state);
2742 return err ? err : len;
2743}
2744static struct rdev_sysfs_entry rdev_state =
2745__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2746
2747static ssize_t
2748errors_show(struct md_rdev *rdev, char *page)
2749{
2750 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2751}
2752
2753static ssize_t
2754errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2755{
2756 char *e;
2757 unsigned long n = simple_strtoul(buf, &e, 10);
2758 if (*buf && (*e == 0 || *e == '\n')) {
2759 atomic_set(&rdev->corrected_errors, n);
2760 return len;
2761 }
2762 return -EINVAL;
2763}
2764static struct rdev_sysfs_entry rdev_errors =
2765__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2766
2767static ssize_t
2768slot_show(struct md_rdev *rdev, char *page)
2769{
2770 if (rdev->raid_disk < 0)
2771 return sprintf(page, "none\n");
2772 else
2773 return sprintf(page, "%d\n", rdev->raid_disk);
2774}
2775
2776static ssize_t
2777slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2778{
2779 char *e;
2780 int err;
2781 int slot = simple_strtoul(buf, &e, 10);
2782 if (strncmp(buf, "none", 4)==0)
2783 slot = -1;
2784 else if (e==buf || (*e && *e!= '\n'))
2785 return -EINVAL;
2786 if (rdev->mddev->pers && slot == -1) {
2787
2788
2789
2790
2791
2792
2793
2794 if (rdev->raid_disk == -1)
2795 return -EEXIST;
2796
2797 if (rdev->mddev->pers->hot_remove_disk == NULL)
2798 return -EINVAL;
2799 err = rdev->mddev->pers->
2800 hot_remove_disk(rdev->mddev, rdev);
2801 if (err)
2802 return err;
2803 sysfs_unlink_rdev(rdev->mddev, rdev);
2804 rdev->raid_disk = -1;
2805 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2806 md_wakeup_thread(rdev->mddev->thread);
2807 } else if (rdev->mddev->pers) {
2808
2809
2810
2811
2812 if (rdev->raid_disk != -1)
2813 return -EBUSY;
2814
2815 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2816 return -EBUSY;
2817
2818 if (rdev->mddev->pers->hot_add_disk == NULL)
2819 return -EINVAL;
2820
2821 if (slot >= rdev->mddev->raid_disks &&
2822 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2823 return -ENOSPC;
2824
2825 rdev->raid_disk = slot;
2826 if (test_bit(In_sync, &rdev->flags))
2827 rdev->saved_raid_disk = slot;
2828 else
2829 rdev->saved_raid_disk = -1;
2830 clear_bit(In_sync, &rdev->flags);
2831 err = rdev->mddev->pers->
2832 hot_add_disk(rdev->mddev, rdev);
2833 if (err) {
2834 rdev->raid_disk = -1;
2835 return err;
2836 } else
2837 sysfs_notify_dirent_safe(rdev->sysfs_state);
2838 if (sysfs_link_rdev(rdev->mddev, rdev))
2839 ;
2840
2841 } else {
2842 if (slot >= rdev->mddev->raid_disks &&
2843 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2844 return -ENOSPC;
2845 rdev->raid_disk = slot;
2846
2847 clear_bit(Faulty, &rdev->flags);
2848 clear_bit(WriteMostly, &rdev->flags);
2849 set_bit(In_sync, &rdev->flags);
2850 sysfs_notify_dirent_safe(rdev->sysfs_state);
2851 }
2852 return len;
2853}
2854
2855
2856static struct rdev_sysfs_entry rdev_slot =
2857__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2858
2859static ssize_t
2860offset_show(struct md_rdev *rdev, char *page)
2861{
2862 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2863}
2864
2865static ssize_t
2866offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2867{
2868 unsigned long long offset;
2869 if (strict_strtoull(buf, 10, &offset) < 0)
2870 return -EINVAL;
2871 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2872 return -EBUSY;
2873 if (rdev->sectors && rdev->mddev->external)
2874
2875
2876 return -EBUSY;
2877 rdev->data_offset = offset;
2878 rdev->new_data_offset = offset;
2879 return len;
2880}
2881
2882static struct rdev_sysfs_entry rdev_offset =
2883__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2884
2885static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2886{
2887 return sprintf(page, "%llu\n",
2888 (unsigned long long)rdev->new_data_offset);
2889}
2890
2891static ssize_t new_offset_store(struct md_rdev *rdev,
2892 const char *buf, size_t len)
2893{
2894 unsigned long long new_offset;
2895 struct mddev *mddev = rdev->mddev;
2896
2897 if (strict_strtoull(buf, 10, &new_offset) < 0)
2898 return -EINVAL;
2899
2900 if (mddev->sync_thread)
2901 return -EBUSY;
2902 if (new_offset == rdev->data_offset)
2903
2904 ;
2905 else if (new_offset > rdev->data_offset) {
2906
2907 if (new_offset - rdev->data_offset
2908 + mddev->dev_sectors > rdev->sectors)
2909 return -E2BIG;
2910 }
2911
2912
2913
2914
2915
2916 if (new_offset < rdev->data_offset &&
2917 mddev->reshape_backwards)
2918 return -EINVAL;
2919
2920
2921
2922
2923 if (new_offset > rdev->data_offset &&
2924 !mddev->reshape_backwards)
2925 return -EINVAL;
2926
2927 if (mddev->pers && mddev->persistent &&
2928 !super_types[mddev->major_version]
2929 .allow_new_offset(rdev, new_offset))
2930 return -E2BIG;
2931 rdev->new_data_offset = new_offset;
2932 if (new_offset > rdev->data_offset)
2933 mddev->reshape_backwards = 1;
2934 else if (new_offset < rdev->data_offset)
2935 mddev->reshape_backwards = 0;
2936
2937 return len;
2938}
2939static struct rdev_sysfs_entry rdev_new_offset =
2940__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2941
2942static ssize_t
2943rdev_size_show(struct md_rdev *rdev, char *page)
2944{
2945 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2946}
2947
2948static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2949{
2950
2951 if (s1+l1 <= s2)
2952 return 0;
2953 if (s2+l2 <= s1)
2954 return 0;
2955 return 1;
2956}
2957
2958static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2959{
2960 unsigned long long blocks;
2961 sector_t new;
2962
2963 if (strict_strtoull(buf, 10, &blocks) < 0)
2964 return -EINVAL;
2965
2966 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2967 return -EINVAL;
2968
2969 new = blocks * 2;
2970 if (new != blocks * 2)
2971 return -EINVAL;
2972
2973 *sectors = new;
2974 return 0;
2975}
2976
2977static ssize_t
2978rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2979{
2980 struct mddev *my_mddev = rdev->mddev;
2981 sector_t oldsectors = rdev->sectors;
2982 sector_t sectors;
2983
2984 if (strict_blocks_to_sectors(buf, §ors) < 0)
2985 return -EINVAL;
2986 if (rdev->data_offset != rdev->new_data_offset)
2987 return -EINVAL;
2988 if (my_mddev->pers && rdev->raid_disk >= 0) {
2989 if (my_mddev->persistent) {
2990 sectors = super_types[my_mddev->major_version].
2991 rdev_size_change(rdev, sectors);
2992 if (!sectors)
2993 return -EBUSY;
2994 } else if (!sectors)
2995 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2996 rdev->data_offset;
2997 }
2998 if (sectors < my_mddev->dev_sectors)
2999 return -EINVAL;
3000
3001 rdev->sectors = sectors;
3002 if (sectors > oldsectors && my_mddev->external) {
3003
3004
3005
3006
3007
3008 struct mddev *mddev;
3009 int overlap = 0;
3010 struct list_head *tmp;
3011
3012 mddev_unlock(my_mddev);
3013 for_each_mddev(mddev, tmp) {
3014 struct md_rdev *rdev2;
3015
3016 mddev_lock(mddev);
3017 rdev_for_each(rdev2, mddev)
3018 if (rdev->bdev == rdev2->bdev &&
3019 rdev != rdev2 &&
3020 overlaps(rdev->data_offset, rdev->sectors,
3021 rdev2->data_offset,
3022 rdev2->sectors)) {
3023 overlap = 1;
3024 break;
3025 }
3026 mddev_unlock(mddev);
3027 if (overlap) {
3028 mddev_put(mddev);
3029 break;
3030 }
3031 }
3032 mddev_lock(my_mddev);
3033 if (overlap) {
3034
3035
3036
3037
3038
3039
3040 rdev->sectors = oldsectors;
3041 return -EBUSY;
3042 }
3043 }
3044 return len;
3045}
3046
3047static struct rdev_sysfs_entry rdev_size =
3048__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3049
3050
3051static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3052{
3053 unsigned long long recovery_start = rdev->recovery_offset;
3054
3055 if (test_bit(In_sync, &rdev->flags) ||
3056 recovery_start == MaxSector)
3057 return sprintf(page, "none\n");
3058
3059 return sprintf(page, "%llu\n", recovery_start);
3060}
3061
3062static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3063{
3064 unsigned long long recovery_start;
3065
3066 if (cmd_match(buf, "none"))
3067 recovery_start = MaxSector;
3068 else if (strict_strtoull(buf, 10, &recovery_start))
3069 return -EINVAL;
3070
3071 if (rdev->mddev->pers &&
3072 rdev->raid_disk >= 0)
3073 return -EBUSY;
3074
3075 rdev->recovery_offset = recovery_start;
3076 if (recovery_start == MaxSector)
3077 set_bit(In_sync, &rdev->flags);
3078 else
3079 clear_bit(In_sync, &rdev->flags);
3080 return len;
3081}
3082
3083static struct rdev_sysfs_entry rdev_recovery_start =
3084__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3085
3086
3087static ssize_t
3088badblocks_show(struct badblocks *bb, char *page, int unack);
3089static ssize_t
3090badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3091
3092static ssize_t bb_show(struct md_rdev *rdev, char *page)
3093{
3094 return badblocks_show(&rdev->badblocks, page, 0);
3095}
3096static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3097{
3098 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3099
3100 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3101 wake_up(&rdev->blocked_wait);
3102 return rv;
3103}
3104static struct rdev_sysfs_entry rdev_bad_blocks =
3105__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3106
3107
3108static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3109{
3110 return badblocks_show(&rdev->badblocks, page, 1);
3111}
3112static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3113{
3114 return badblocks_store(&rdev->badblocks, page, len, 1);
3115}
3116static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3117__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3118
3119static struct attribute *rdev_default_attrs[] = {
3120 &rdev_state.attr,
3121 &rdev_errors.attr,
3122 &rdev_slot.attr,
3123 &rdev_offset.attr,
3124 &rdev_new_offset.attr,
3125 &rdev_size.attr,
3126 &rdev_recovery_start.attr,
3127 &rdev_bad_blocks.attr,
3128 &rdev_unack_bad_blocks.attr,
3129 NULL,
3130};
3131static ssize_t
3132rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3133{
3134 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3135 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3136 struct mddev *mddev = rdev->mddev;
3137 ssize_t rv;
3138
3139 if (!entry->show)
3140 return -EIO;
3141
3142 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3143 if (!rv) {
3144 if (rdev->mddev == NULL)
3145 rv = -EBUSY;
3146 else
3147 rv = entry->show(rdev, page);
3148 mddev_unlock(mddev);
3149 }
3150 return rv;
3151}
3152
3153static ssize_t
3154rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3155 const char *page, size_t length)
3156{
3157 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3158 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3159 ssize_t rv;
3160 struct mddev *mddev = rdev->mddev;
3161
3162 if (!entry->store)
3163 return -EIO;
3164 if (!capable(CAP_SYS_ADMIN))
3165 return -EACCES;
3166 rv = mddev ? mddev_lock(mddev): -EBUSY;
3167 if (!rv) {
3168 if (rdev->mddev == NULL)
3169 rv = -EBUSY;
3170 else
3171 rv = entry->store(rdev, page, length);
3172 mddev_unlock(mddev);
3173 }
3174 return rv;
3175}
3176
3177static void rdev_free(struct kobject *ko)
3178{
3179 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3180 kfree(rdev);
3181}
3182static const struct sysfs_ops rdev_sysfs_ops = {
3183 .show = rdev_attr_show,
3184 .store = rdev_attr_store,
3185};
3186static struct kobj_type rdev_ktype = {
3187 .release = rdev_free,
3188 .sysfs_ops = &rdev_sysfs_ops,
3189 .default_attrs = rdev_default_attrs,
3190};
3191
3192int md_rdev_init(struct md_rdev *rdev)
3193{
3194 rdev->desc_nr = -1;
3195 rdev->saved_raid_disk = -1;
3196 rdev->raid_disk = -1;
3197 rdev->flags = 0;
3198 rdev->data_offset = 0;
3199 rdev->new_data_offset = 0;
3200 rdev->sb_events = 0;
3201 rdev->last_read_error.tv_sec = 0;
3202 rdev->last_read_error.tv_nsec = 0;
3203 rdev->sb_loaded = 0;
3204 rdev->bb_page = NULL;
3205 atomic_set(&rdev->nr_pending, 0);
3206 atomic_set(&rdev->read_errors, 0);
3207 atomic_set(&rdev->corrected_errors, 0);
3208
3209 INIT_LIST_HEAD(&rdev->same_set);
3210 init_waitqueue_head(&rdev->blocked_wait);
3211
3212
3213
3214
3215
3216 rdev->badblocks.count = 0;
3217 rdev->badblocks.shift = 0;
3218 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3219 seqlock_init(&rdev->badblocks.lock);
3220 if (rdev->badblocks.page == NULL)
3221 return -ENOMEM;
3222
3223 return 0;
3224}
3225EXPORT_SYMBOL_GPL(md_rdev_init);
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3237{
3238 char b[BDEVNAME_SIZE];
3239 int err;
3240 struct md_rdev *rdev;
3241 sector_t size;
3242
3243 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3244 if (!rdev) {
3245 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3246 return ERR_PTR(-ENOMEM);
3247 }
3248
3249 err = md_rdev_init(rdev);
3250 if (err)
3251 goto abort_free;
3252 err = alloc_disk_sb(rdev);
3253 if (err)
3254 goto abort_free;
3255
3256 err = lock_rdev(rdev, newdev, super_format == -2);
3257 if (err)
3258 goto abort_free;
3259
3260 kobject_init(&rdev->kobj, &rdev_ktype);
3261
3262 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3263 if (!size) {
3264 printk(KERN_WARNING
3265 "md: %s has zero or unknown size, marking faulty!\n",
3266 bdevname(rdev->bdev,b));
3267 err = -EINVAL;
3268 goto abort_free;
3269 }
3270
3271 if (super_format >= 0) {
3272 err = super_types[super_format].
3273 load_super(rdev, NULL, super_minor);
3274 if (err == -EINVAL) {
3275 printk(KERN_WARNING
3276 "md: %s does not have a valid v%d.%d "
3277 "superblock, not importing!\n",
3278 bdevname(rdev->bdev,b),
3279 super_format, super_minor);
3280 goto abort_free;
3281 }
3282 if (err < 0) {
3283 printk(KERN_WARNING
3284 "md: could not read %s's sb, not importing!\n",
3285 bdevname(rdev->bdev,b));
3286 goto abort_free;
3287 }
3288 }
3289 if (super_format == -1)
3290
3291 rdev->badblocks.shift = -1;
3292
3293 return rdev;
3294
3295abort_free:
3296 if (rdev->bdev)
3297 unlock_rdev(rdev);
3298 md_rdev_clear(rdev);
3299 kfree(rdev);
3300 return ERR_PTR(err);
3301}
3302
3303
3304
3305
3306
3307
3308static void analyze_sbs(struct mddev * mddev)
3309{
3310 int i;
3311 struct md_rdev *rdev, *freshest, *tmp;
3312 char b[BDEVNAME_SIZE];
3313
3314 freshest = NULL;
3315 rdev_for_each_safe(rdev, tmp, mddev)
3316 switch (super_types[mddev->major_version].
3317 load_super(rdev, freshest, mddev->minor_version)) {
3318 case 1:
3319 freshest = rdev;
3320 break;
3321 case 0:
3322 break;
3323 default:
3324 printk( KERN_ERR \
3325 "md: fatal superblock inconsistency in %s"
3326 " -- removing from array\n",
3327 bdevname(rdev->bdev,b));
3328 kick_rdev_from_array(rdev);
3329 }
3330
3331
3332 super_types[mddev->major_version].
3333 validate_super(mddev, freshest);
3334
3335 i = 0;
3336 rdev_for_each_safe(rdev, tmp, mddev) {
3337 if (mddev->max_disks &&
3338 (rdev->desc_nr >= mddev->max_disks ||
3339 i > mddev->max_disks)) {
3340 printk(KERN_WARNING
3341 "md: %s: %s: only %d devices permitted\n",
3342 mdname(mddev), bdevname(rdev->bdev, b),
3343 mddev->max_disks);
3344 kick_rdev_from_array(rdev);
3345 continue;
3346 }
3347 if (rdev != freshest)
3348 if (super_types[mddev->major_version].
3349 validate_super(mddev, rdev)) {
3350 printk(KERN_WARNING "md: kicking non-fresh %s"
3351 " from array!\n",
3352 bdevname(rdev->bdev,b));
3353 kick_rdev_from_array(rdev);
3354 continue;
3355 }
3356 if (mddev->level == LEVEL_MULTIPATH) {
3357 rdev->desc_nr = i++;
3358 rdev->raid_disk = rdev->desc_nr;
3359 set_bit(In_sync, &rdev->flags);
3360 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3361 rdev->raid_disk = -1;
3362 clear_bit(In_sync, &rdev->flags);
3363 }
3364 }
3365}
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3378{
3379 unsigned long result = 0;
3380 long decimals = -1;
3381 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3382 if (*cp == '.')
3383 decimals = 0;
3384 else if (decimals < scale) {
3385 unsigned int value;
3386 value = *cp - '0';
3387 result = result * 10 + value;
3388 if (decimals >= 0)
3389 decimals++;
3390 }
3391 cp++;
3392 }
3393 if (*cp == '\n')
3394 cp++;
3395 if (*cp)
3396 return -EINVAL;
3397 if (decimals < 0)
3398 decimals = 0;
3399 while (decimals < scale) {
3400 result *= 10;
3401 decimals ++;
3402 }
3403 *res = result;
3404 return 0;
3405}
3406
3407
3408static void md_safemode_timeout(unsigned long data);
3409
3410static ssize_t
3411safe_delay_show(struct mddev *mddev, char *page)
3412{
3413 int msec = (mddev->safemode_delay*1000)/HZ;
3414 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3415}
3416static ssize_t
3417safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3418{
3419 unsigned long msec;
3420
3421 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3422 return -EINVAL;
3423 if (msec == 0)
3424 mddev->safemode_delay = 0;
3425 else {
3426 unsigned long old_delay = mddev->safemode_delay;
3427 mddev->safemode_delay = (msec*HZ)/1000;
3428 if (mddev->safemode_delay == 0)
3429 mddev->safemode_delay = 1;
3430 if (mddev->safemode_delay < old_delay)
3431 md_safemode_timeout((unsigned long)mddev);
3432 }
3433 return len;
3434}
3435static struct md_sysfs_entry md_safe_delay =
3436__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3437
3438static ssize_t
3439level_show(struct mddev *mddev, char *page)
3440{
3441 struct md_personality *p = mddev->pers;
3442 if (p)
3443 return sprintf(page, "%s\n", p->name);
3444 else if (mddev->clevel[0])
3445 return sprintf(page, "%s\n", mddev->clevel);
3446 else if (mddev->level != LEVEL_NONE)
3447 return sprintf(page, "%d\n", mddev->level);
3448 else
3449 return 0;
3450}
3451
3452static ssize_t
3453level_store(struct mddev *mddev, const char *buf, size_t len)
3454{
3455 char clevel[16];
3456 ssize_t rv = len;
3457 struct md_personality *pers;
3458 long level;
3459 void *priv;
3460 struct md_rdev *rdev;
3461
3462 if (mddev->pers == NULL) {
3463 if (len == 0)
3464 return 0;
3465 if (len >= sizeof(mddev->clevel))
3466 return -ENOSPC;
3467 strncpy(mddev->clevel, buf, len);
3468 if (mddev->clevel[len-1] == '\n')
3469 len--;
3470 mddev->clevel[len] = 0;
3471 mddev->level = LEVEL_NONE;
3472 return rv;
3473 }
3474
3475
3476
3477
3478
3479
3480
3481 if (mddev->sync_thread ||
3482 mddev->reshape_position != MaxSector ||
3483 mddev->sysfs_active)
3484 return -EBUSY;
3485
3486 if (!mddev->pers->quiesce) {
3487 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3488 mdname(mddev), mddev->pers->name);
3489 return -EINVAL;
3490 }
3491
3492
3493 if (len == 0 || len >= sizeof(clevel))
3494 return -EINVAL;
3495 strncpy(clevel, buf, len);
3496 if (clevel[len-1] == '\n')
3497 len--;
3498 clevel[len] = 0;
3499 if (strict_strtol(clevel, 10, &level))
3500 level = LEVEL_NONE;
3501
3502 if (request_module("md-%s", clevel) != 0)
3503 request_module("md-level-%s", clevel);
3504 spin_lock(&pers_lock);
3505 pers = find_pers(level, clevel);
3506 if (!pers || !try_module_get(pers->owner)) {
3507 spin_unlock(&pers_lock);
3508 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3509 return -EINVAL;
3510 }
3511 spin_unlock(&pers_lock);
3512
3513 if (pers == mddev->pers) {
3514
3515 module_put(pers->owner);
3516 return rv;
3517 }
3518 if (!pers->takeover) {
3519 module_put(pers->owner);
3520 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3521 mdname(mddev), clevel);
3522 return -EINVAL;
3523 }
3524
3525 rdev_for_each(rdev, mddev)
3526 rdev->new_raid_disk = rdev->raid_disk;
3527
3528
3529
3530
3531 priv = pers->takeover(mddev);
3532 if (IS_ERR(priv)) {
3533 mddev->new_level = mddev->level;
3534 mddev->new_layout = mddev->layout;
3535 mddev->new_chunk_sectors = mddev->chunk_sectors;
3536 mddev->raid_disks -= mddev->delta_disks;
3537 mddev->delta_disks = 0;
3538 mddev->reshape_backwards = 0;
3539 module_put(pers->owner);
3540 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3541 mdname(mddev), clevel);
3542 return PTR_ERR(priv);
3543 }
3544
3545
3546 mddev_suspend(mddev);
3547 mddev->pers->stop(mddev);
3548
3549 if (mddev->pers->sync_request == NULL &&
3550 pers->sync_request != NULL) {
3551
3552 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3553 printk(KERN_WARNING
3554 "md: cannot register extra attributes for %s\n",
3555 mdname(mddev));
3556 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3557 }
3558 if (mddev->pers->sync_request != NULL &&
3559 pers->sync_request == NULL) {
3560
3561 if (mddev->to_remove == NULL)
3562 mddev->to_remove = &md_redundancy_group;
3563 }
3564
3565 if (mddev->pers->sync_request == NULL &&
3566 mddev->external) {
3567
3568
3569
3570
3571
3572
3573
3574 mddev->in_sync = 0;
3575 mddev->safemode_delay = 0;
3576 mddev->safemode = 0;
3577 }
3578
3579 rdev_for_each(rdev, mddev) {
3580 if (rdev->raid_disk < 0)
3581 continue;
3582 if (rdev->new_raid_disk >= mddev->raid_disks)
3583 rdev->new_raid_disk = -1;
3584 if (rdev->new_raid_disk == rdev->raid_disk)
3585 continue;
3586 sysfs_unlink_rdev(mddev, rdev);
3587 }
3588 rdev_for_each(rdev, mddev) {
3589 if (rdev->raid_disk < 0)
3590 continue;
3591 if (rdev->new_raid_disk == rdev->raid_disk)
3592 continue;
3593 rdev->raid_disk = rdev->new_raid_disk;
3594 if (rdev->raid_disk < 0)
3595 clear_bit(In_sync, &rdev->flags);
3596 else {
3597 if (sysfs_link_rdev(mddev, rdev))
3598 printk(KERN_WARNING "md: cannot register rd%d"
3599 " for %s after level change\n",
3600 rdev->raid_disk, mdname(mddev));
3601 }
3602 }
3603
3604 module_put(mddev->pers->owner);
3605 mddev->pers = pers;
3606 mddev->private = priv;
3607 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3608 mddev->level = mddev->new_level;
3609 mddev->layout = mddev->new_layout;
3610 mddev->chunk_sectors = mddev->new_chunk_sectors;
3611 mddev->delta_disks = 0;
3612 mddev->reshape_backwards = 0;
3613 mddev->degraded = 0;
3614 if (mddev->pers->sync_request == NULL) {
3615
3616
3617
3618 mddev->in_sync = 1;
3619 del_timer_sync(&mddev->safemode_timer);
3620 }
3621 pers->run(mddev);
3622 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3623 mddev_resume(mddev);
3624 sysfs_notify(&mddev->kobj, NULL, "level");
3625 md_new_event(mddev);
3626 return rv;
3627}
3628
3629static struct md_sysfs_entry md_level =
3630__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3631
3632
3633static ssize_t
3634layout_show(struct mddev *mddev, char *page)
3635{
3636
3637 if (mddev->reshape_position != MaxSector &&
3638 mddev->layout != mddev->new_layout)
3639 return sprintf(page, "%d (%d)\n",
3640 mddev->new_layout, mddev->layout);
3641 return sprintf(page, "%d\n", mddev->layout);
3642}
3643
3644static ssize_t
3645layout_store(struct mddev *mddev, const char *buf, size_t len)
3646{
3647 char *e;
3648 unsigned long n = simple_strtoul(buf, &e, 10);
3649
3650 if (!*buf || (*e && *e != '\n'))
3651 return -EINVAL;
3652
3653 if (mddev->pers) {
3654 int err;
3655 if (mddev->pers->check_reshape == NULL)
3656 return -EBUSY;
3657 mddev->new_layout = n;
3658 err = mddev->pers->check_reshape(mddev);
3659 if (err) {
3660 mddev->new_layout = mddev->layout;
3661 return err;
3662 }
3663 } else {
3664 mddev->new_layout = n;
3665 if (mddev->reshape_position == MaxSector)
3666 mddev->layout = n;
3667 }
3668 return len;
3669}
3670static struct md_sysfs_entry md_layout =
3671__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3672
3673
3674static ssize_t
3675raid_disks_show(struct mddev *mddev, char *page)
3676{
3677 if (mddev->raid_disks == 0)
3678 return 0;
3679 if (mddev->reshape_position != MaxSector &&
3680 mddev->delta_disks != 0)
3681 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3682 mddev->raid_disks - mddev->delta_disks);
3683 return sprintf(page, "%d\n", mddev->raid_disks);
3684}
3685
3686static int update_raid_disks(struct mddev *mddev, int raid_disks);
3687
3688static ssize_t
3689raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3690{
3691 char *e;
3692 int rv = 0;
3693 unsigned long n = simple_strtoul(buf, &e, 10);
3694
3695 if (!*buf || (*e && *e != '\n'))
3696 return -EINVAL;
3697
3698 if (mddev->pers)
3699 rv = update_raid_disks(mddev, n);
3700 else if (mddev->reshape_position != MaxSector) {
3701 struct md_rdev *rdev;
3702 int olddisks = mddev->raid_disks - mddev->delta_disks;
3703
3704 rdev_for_each(rdev, mddev) {
3705 if (olddisks < n &&
3706 rdev->data_offset < rdev->new_data_offset)
3707 return -EINVAL;
3708 if (olddisks > n &&
3709 rdev->data_offset > rdev->new_data_offset)
3710 return -EINVAL;
3711 }
3712 mddev->delta_disks = n - olddisks;
3713 mddev->raid_disks = n;
3714 mddev->reshape_backwards = (mddev->delta_disks < 0);
3715 } else
3716 mddev->raid_disks = n;
3717 return rv ? rv : len;
3718}
3719static struct md_sysfs_entry md_raid_disks =
3720__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3721
3722static ssize_t
3723chunk_size_show(struct mddev *mddev, char *page)
3724{
3725 if (mddev->reshape_position != MaxSector &&
3726 mddev->chunk_sectors != mddev->new_chunk_sectors)
3727 return sprintf(page, "%d (%d)\n",
3728 mddev->new_chunk_sectors << 9,
3729 mddev->chunk_sectors << 9);
3730 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3731}
3732
3733static ssize_t
3734chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3735{
3736 char *e;
3737 unsigned long n = simple_strtoul(buf, &e, 10);
3738
3739 if (!*buf || (*e && *e != '\n'))
3740 return -EINVAL;
3741
3742 if (mddev->pers) {
3743 int err;
3744 if (mddev->pers->check_reshape == NULL)
3745 return -EBUSY;
3746 mddev->new_chunk_sectors = n >> 9;
3747 err = mddev->pers->check_reshape(mddev);
3748 if (err) {
3749 mddev->new_chunk_sectors = mddev->chunk_sectors;
3750 return err;
3751 }
3752 } else {
3753 mddev->new_chunk_sectors = n >> 9;
3754 if (mddev->reshape_position == MaxSector)
3755 mddev->chunk_sectors = n >> 9;
3756 }
3757 return len;
3758}
3759static struct md_sysfs_entry md_chunk_size =
3760__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3761
3762static ssize_t
3763resync_start_show(struct mddev *mddev, char *page)
3764{
3765 if (mddev->recovery_cp == MaxSector)
3766 return sprintf(page, "none\n");
3767 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3768}
3769
3770static ssize_t
3771resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3772{
3773 char *e;
3774 unsigned long long n = simple_strtoull(buf, &e, 10);
3775
3776 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3777 return -EBUSY;
3778 if (cmd_match(buf, "none"))
3779 n = MaxSector;
3780 else if (!*buf || (*e && *e != '\n'))
3781 return -EINVAL;
3782
3783 mddev->recovery_cp = n;
3784 if (mddev->pers)
3785 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3786 return len;
3787}
3788static struct md_sysfs_entry md_resync_start =
3789__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3828 write_pending, active_idle, bad_word};
3829static char *array_states[] = {
3830 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3831 "write-pending", "active-idle", NULL };
3832
3833static int match_word(const char *word, char **list)
3834{
3835 int n;
3836 for (n=0; list[n]; n++)
3837 if (cmd_match(word, list[n]))
3838 break;
3839 return n;
3840}
3841
3842static ssize_t
3843array_state_show(struct mddev *mddev, char *page)
3844{
3845 enum array_state st = inactive;
3846
3847 if (mddev->pers)
3848 switch(mddev->ro) {
3849 case 1:
3850 st = readonly;
3851 break;
3852 case 2:
3853 st = read_auto;
3854 break;
3855 case 0:
3856 if (mddev->in_sync)
3857 st = clean;
3858 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3859 st = write_pending;
3860 else if (mddev->safemode)
3861 st = active_idle;
3862 else
3863 st = active;
3864 }
3865 else {
3866 if (list_empty(&mddev->disks) &&
3867 mddev->raid_disks == 0 &&
3868 mddev->dev_sectors == 0)
3869 st = clear;
3870 else
3871 st = inactive;
3872 }
3873 return sprintf(page, "%s\n", array_states[st]);
3874}
3875
3876static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3877static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3878static int do_md_run(struct mddev * mddev);
3879static int restart_array(struct mddev *mddev);
3880
3881static ssize_t
3882array_state_store(struct mddev *mddev, const char *buf, size_t len)
3883{
3884 int err = -EINVAL;
3885 enum array_state st = match_word(buf, array_states);
3886 switch(st) {
3887 case bad_word:
3888 break;
3889 case clear:
3890
3891 err = do_md_stop(mddev, 0, NULL);
3892 break;
3893 case inactive:
3894
3895 if (mddev->pers)
3896 err = do_md_stop(mddev, 2, NULL);
3897 else
3898 err = 0;
3899 break;
3900 case suspended:
3901 break;
3902 case readonly:
3903 if (mddev->pers)
3904 err = md_set_readonly(mddev, NULL);
3905 else {
3906 mddev->ro = 1;
3907 set_disk_ro(mddev->gendisk, 1);
3908 err = do_md_run(mddev);
3909 }
3910 break;
3911 case read_auto:
3912 if (mddev->pers) {
3913 if (mddev->ro == 0)
3914 err = md_set_readonly(mddev, NULL);
3915 else if (mddev->ro == 1)
3916 err = restart_array(mddev);
3917 if (err == 0) {
3918 mddev->ro = 2;
3919 set_disk_ro(mddev->gendisk, 0);
3920 }
3921 } else {
3922 mddev->ro = 2;
3923 err = do_md_run(mddev);
3924 }
3925 break;
3926 case clean:
3927 if (mddev->pers) {
3928 restart_array(mddev);
3929 spin_lock_irq(&mddev->write_lock);
3930 if (atomic_read(&mddev->writes_pending) == 0) {
3931 if (mddev->in_sync == 0) {
3932 mddev->in_sync = 1;
3933 if (mddev->safemode == 1)
3934 mddev->safemode = 0;
3935 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3936 }
3937 err = 0;
3938 } else
3939 err = -EBUSY;
3940 spin_unlock_irq(&mddev->write_lock);
3941 } else
3942 err = -EINVAL;
3943 break;
3944 case active:
3945 if (mddev->pers) {
3946 restart_array(mddev);
3947 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3948 wake_up(&mddev->sb_wait);
3949 err = 0;
3950 } else {
3951 mddev->ro = 0;
3952 set_disk_ro(mddev->gendisk, 0);
3953 err = do_md_run(mddev);
3954 }
3955 break;
3956 case write_pending:
3957 case active_idle:
3958
3959 break;
3960 }
3961 if (err)
3962 return err;
3963 else {
3964 if (mddev->hold_active == UNTIL_IOCTL)
3965 mddev->hold_active = 0;
3966 sysfs_notify_dirent_safe(mddev->sysfs_state);
3967 return len;
3968 }
3969}
3970static struct md_sysfs_entry md_array_state =
3971__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3972
3973static ssize_t
3974max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3975 return sprintf(page, "%d\n",
3976 atomic_read(&mddev->max_corr_read_errors));
3977}
3978
3979static ssize_t
3980max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3981{
3982 char *e;
3983 unsigned long n = simple_strtoul(buf, &e, 10);
3984
3985 if (*buf && (*e == 0 || *e == '\n')) {
3986 atomic_set(&mddev->max_corr_read_errors, n);
3987 return len;
3988 }
3989 return -EINVAL;
3990}
3991
3992static struct md_sysfs_entry max_corr_read_errors =
3993__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3994 max_corrected_read_errors_store);
3995
3996static ssize_t
3997null_show(struct mddev *mddev, char *page)
3998{
3999 return -EINVAL;
4000}
4001
4002static ssize_t
4003new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4004{
4005
4006
4007
4008
4009
4010
4011
4012 char *e;
4013 int major = simple_strtoul(buf, &e, 10);
4014 int minor;
4015 dev_t dev;
4016 struct md_rdev *rdev;
4017 int err;
4018
4019 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4020 return -EINVAL;
4021 minor = simple_strtoul(e+1, &e, 10);
4022 if (*e && *e != '\n')
4023 return -EINVAL;
4024 dev = MKDEV(major, minor);
4025 if (major != MAJOR(dev) ||
4026 minor != MINOR(dev))
4027 return -EOVERFLOW;
4028
4029
4030 if (mddev->persistent) {
4031 rdev = md_import_device(dev, mddev->major_version,
4032 mddev->minor_version);
4033 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4034 struct md_rdev *rdev0
4035 = list_entry(mddev->disks.next,
4036 struct md_rdev, same_set);
4037 err = super_types[mddev->major_version]
4038 .load_super(rdev, rdev0, mddev->minor_version);
4039 if (err < 0)
4040 goto out;
4041 }
4042 } else if (mddev->external)
4043 rdev = md_import_device(dev, -2, -1);
4044 else
4045 rdev = md_import_device(dev, -1, -1);
4046
4047 if (IS_ERR(rdev))
4048 return PTR_ERR(rdev);
4049 err = bind_rdev_to_array(rdev, mddev);
4050 out:
4051 if (err)
4052 export_rdev(rdev);
4053 return err ? err : len;
4054}
4055
4056static struct md_sysfs_entry md_new_device =
4057__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4058
4059static ssize_t
4060bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4061{
4062 char *end;
4063 unsigned long chunk, end_chunk;
4064
4065 if (!mddev->bitmap)
4066 goto out;
4067
4068 while (*buf) {
4069 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4070 if (buf == end) break;
4071 if (*end == '-') {
4072 buf = end + 1;
4073 end_chunk = simple_strtoul(buf, &end, 0);
4074 if (buf == end) break;
4075 }
4076 if (*end && !isspace(*end)) break;
4077 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4078 buf = skip_spaces(end);
4079 }
4080 bitmap_unplug(mddev->bitmap);
4081out:
4082 return len;
4083}
4084
4085static struct md_sysfs_entry md_bitmap =
4086__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4087
4088static ssize_t
4089size_show(struct mddev *mddev, char *page)
4090{
4091 return sprintf(page, "%llu\n",
4092 (unsigned long long)mddev->dev_sectors / 2);
4093}
4094
4095static int update_size(struct mddev *mddev, sector_t num_sectors);
4096
4097static ssize_t
4098size_store(struct mddev *mddev, const char *buf, size_t len)
4099{
4100
4101
4102
4103
4104 sector_t sectors;
4105 int err = strict_blocks_to_sectors(buf, §ors);
4106
4107 if (err < 0)
4108 return err;
4109 if (mddev->pers) {
4110 err = update_size(mddev, sectors);
4111 md_update_sb(mddev, 1);
4112 } else {
4113 if (mddev->dev_sectors == 0 ||
4114 mddev->dev_sectors > sectors)
4115 mddev->dev_sectors = sectors;
4116 else
4117 err = -ENOSPC;
4118 }
4119 return err ? err : len;
4120}
4121
4122static struct md_sysfs_entry md_size =
4123__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4124
4125
4126
4127
4128
4129
4130
4131
4132static ssize_t
4133metadata_show(struct mddev *mddev, char *page)
4134{
4135 if (mddev->persistent)
4136 return sprintf(page, "%d.%d\n",
4137 mddev->major_version, mddev->minor_version);
4138 else if (mddev->external)
4139 return sprintf(page, "external:%s\n", mddev->metadata_type);
4140 else
4141 return sprintf(page, "none\n");
4142}
4143
4144static ssize_t
4145metadata_store(struct mddev *mddev, const char *buf, size_t len)
4146{
4147 int major, minor;
4148 char *e;
4149
4150
4151
4152
4153 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4154 ;
4155 else if (!list_empty(&mddev->disks))
4156 return -EBUSY;
4157
4158 if (cmd_match(buf, "none")) {
4159 mddev->persistent = 0;
4160 mddev->external = 0;
4161 mddev->major_version = 0;
4162 mddev->minor_version = 90;
4163 return len;
4164 }
4165 if (strncmp(buf, "external:", 9) == 0) {
4166 size_t namelen = len-9;
4167 if (namelen >= sizeof(mddev->metadata_type))
4168 namelen = sizeof(mddev->metadata_type)-1;
4169 strncpy(mddev->metadata_type, buf+9, namelen);
4170 mddev->metadata_type[namelen] = 0;
4171 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4172 mddev->metadata_type[--namelen] = 0;
4173 mddev->persistent = 0;
4174 mddev->external = 1;
4175 mddev->major_version = 0;
4176 mddev->minor_version = 90;
4177 return len;
4178 }
4179 major = simple_strtoul(buf, &e, 10);
4180 if (e==buf || *e != '.')
4181 return -EINVAL;
4182 buf = e+1;
4183 minor = simple_strtoul(buf, &e, 10);
4184 if (e==buf || (*e && *e != '\n') )
4185 return -EINVAL;
4186 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4187 return -ENOENT;
4188 mddev->major_version = major;
4189 mddev->minor_version = minor;
4190 mddev->persistent = 1;
4191 mddev->external = 0;
4192 return len;
4193}
4194
4195static struct md_sysfs_entry md_metadata =
4196__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4197
4198static ssize_t
4199action_show(struct mddev *mddev, char *page)
4200{
4201 char *type = "idle";
4202 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4203 type = "frozen";
4204 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4205 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4206 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4207 type = "reshape";
4208 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4209 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4210 type = "resync";
4211 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4212 type = "check";
4213 else
4214 type = "repair";
4215 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4216 type = "recover";
4217 }
4218 return sprintf(page, "%s\n", type);
4219}
4220
4221static void reap_sync_thread(struct mddev *mddev);
4222
4223static ssize_t
4224action_store(struct mddev *mddev, const char *page, size_t len)
4225{
4226 if (!mddev->pers || !mddev->pers->sync_request)
4227 return -EINVAL;
4228
4229 if (cmd_match(page, "frozen"))
4230 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4231 else
4232 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4233
4234 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4235 if (mddev->sync_thread) {
4236 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4237 reap_sync_thread(mddev);
4238 }
4239 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4240 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4241 return -EBUSY;
4242 else if (cmd_match(page, "resync"))
4243 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4244 else if (cmd_match(page, "recover")) {
4245 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4246 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4247 } else if (cmd_match(page, "reshape")) {
4248 int err;
4249 if (mddev->pers->start_reshape == NULL)
4250 return -EINVAL;
4251 err = mddev->pers->start_reshape(mddev);
4252 if (err)
4253 return err;
4254 sysfs_notify(&mddev->kobj, NULL, "degraded");
4255 } else {
4256 if (cmd_match(page, "check"))
4257 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4258 else if (!cmd_match(page, "repair"))
4259 return -EINVAL;
4260 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4261 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4262 }
4263 if (mddev->ro == 2) {
4264
4265
4266
4267 mddev->ro = 0;
4268 md_wakeup_thread(mddev->sync_thread);
4269 }
4270 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4271 md_wakeup_thread(mddev->thread);
4272 sysfs_notify_dirent_safe(mddev->sysfs_action);
4273 return len;
4274}
4275
4276static ssize_t
4277mismatch_cnt_show(struct mddev *mddev, char *page)
4278{
4279 return sprintf(page, "%llu\n",
4280 (unsigned long long)
4281 atomic64_read(&mddev->resync_mismatches));
4282}
4283
4284static struct md_sysfs_entry md_scan_mode =
4285__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4286
4287
4288static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4289
4290static ssize_t
4291sync_min_show(struct mddev *mddev, char *page)
4292{
4293 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4294 mddev->sync_speed_min ? "local": "system");
4295}
4296
4297static ssize_t
4298sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4299{
4300 int min;
4301 char *e;
4302 if (strncmp(buf, "system", 6)==0) {
4303 mddev->sync_speed_min = 0;
4304 return len;
4305 }
4306 min = simple_strtoul(buf, &e, 10);
4307 if (buf == e || (*e && *e != '\n') || min <= 0)
4308 return -EINVAL;
4309 mddev->sync_speed_min = min;
4310 return len;
4311}
4312
4313static struct md_sysfs_entry md_sync_min =
4314__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4315
4316static ssize_t
4317sync_max_show(struct mddev *mddev, char *page)
4318{
4319 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4320 mddev->sync_speed_max ? "local": "system");
4321}
4322
4323static ssize_t
4324sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4325{
4326 int max;
4327 char *e;
4328 if (strncmp(buf, "system", 6)==0) {
4329 mddev->sync_speed_max = 0;
4330 return len;
4331 }
4332 max = simple_strtoul(buf, &e, 10);
4333 if (buf == e || (*e && *e != '\n') || max <= 0)
4334 return -EINVAL;
4335 mddev->sync_speed_max = max;
4336 return len;
4337}
4338
4339static struct md_sysfs_entry md_sync_max =
4340__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4341
4342static ssize_t
4343degraded_show(struct mddev *mddev, char *page)
4344{
4345 return sprintf(page, "%d\n", mddev->degraded);
4346}
4347static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4348
4349static ssize_t
4350sync_force_parallel_show(struct mddev *mddev, char *page)
4351{
4352 return sprintf(page, "%d\n", mddev->parallel_resync);
4353}
4354
4355static ssize_t
4356sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4357{
4358 long n;
4359
4360 if (strict_strtol(buf, 10, &n))
4361 return -EINVAL;
4362
4363 if (n != 0 && n != 1)
4364 return -EINVAL;
4365
4366 mddev->parallel_resync = n;
4367
4368 if (mddev->sync_thread)
4369 wake_up(&resync_wait);
4370
4371 return len;
4372}
4373
4374
4375static struct md_sysfs_entry md_sync_force_parallel =
4376__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4377 sync_force_parallel_show, sync_force_parallel_store);
4378
4379static ssize_t
4380sync_speed_show(struct mddev *mddev, char *page)
4381{
4382 unsigned long resync, dt, db;
4383 if (mddev->curr_resync == 0)
4384 return sprintf(page, "none\n");
4385 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4386 dt = (jiffies - mddev->resync_mark) / HZ;
4387 if (!dt) dt++;
4388 db = resync - mddev->resync_mark_cnt;
4389 return sprintf(page, "%lu\n", db/dt/2);
4390}
4391
4392static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4393
4394static ssize_t
4395sync_completed_show(struct mddev *mddev, char *page)
4396{
4397 unsigned long long max_sectors, resync;
4398
4399 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4400 return sprintf(page, "none\n");
4401
4402 if (mddev->curr_resync == 1 ||
4403 mddev->curr_resync == 2)
4404 return sprintf(page, "delayed\n");
4405
4406 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4407 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4408 max_sectors = mddev->resync_max_sectors;
4409 else
4410 max_sectors = mddev->dev_sectors;
4411
4412 resync = mddev->curr_resync_completed;
4413 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4414}
4415
4416static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4417
4418static ssize_t
4419min_sync_show(struct mddev *mddev, char *page)
4420{
4421 return sprintf(page, "%llu\n",
4422 (unsigned long long)mddev->resync_min);
4423}
4424static ssize_t
4425min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4426{
4427 unsigned long long min;
4428 if (strict_strtoull(buf, 10, &min))
4429 return -EINVAL;
4430 if (min > mddev->resync_max)
4431 return -EINVAL;
4432 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4433 return -EBUSY;
4434
4435
4436 if (mddev->chunk_sectors) {
4437 sector_t temp = min;
4438 if (sector_div(temp, mddev->chunk_sectors))
4439 return -EINVAL;
4440 }
4441 mddev->resync_min = min;
4442
4443 return len;
4444}
4445
4446static struct md_sysfs_entry md_min_sync =
4447__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4448
4449static ssize_t
4450max_sync_show(struct mddev *mddev, char *page)
4451{
4452 if (mddev->resync_max == MaxSector)
4453 return sprintf(page, "max\n");
4454 else
4455 return sprintf(page, "%llu\n",
4456 (unsigned long long)mddev->resync_max);
4457}
4458static ssize_t
4459max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4460{
4461 if (strncmp(buf, "max", 3) == 0)
4462 mddev->resync_max = MaxSector;
4463 else {
4464 unsigned long long max;
4465 if (strict_strtoull(buf, 10, &max))
4466 return -EINVAL;
4467 if (max < mddev->resync_min)
4468 return -EINVAL;
4469 if (max < mddev->resync_max &&
4470 mddev->ro == 0 &&
4471 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4472 return -EBUSY;
4473
4474
4475 if (mddev->chunk_sectors) {
4476 sector_t temp = max;
4477 if (sector_div(temp, mddev->chunk_sectors))
4478 return -EINVAL;
4479 }
4480 mddev->resync_max = max;
4481 }
4482 wake_up(&mddev->recovery_wait);
4483 return len;
4484}
4485
4486static struct md_sysfs_entry md_max_sync =
4487__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4488
4489static ssize_t
4490suspend_lo_show(struct mddev *mddev, char *page)
4491{
4492 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4493}
4494
4495static ssize_t
4496suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4497{
4498 char *e;
4499 unsigned long long new = simple_strtoull(buf, &e, 10);
4500 unsigned long long old = mddev->suspend_lo;
4501
4502 if (mddev->pers == NULL ||
4503 mddev->pers->quiesce == NULL)
4504 return -EINVAL;
4505 if (buf == e || (*e && *e != '\n'))
4506 return -EINVAL;
4507
4508 mddev->suspend_lo = new;
4509 if (new >= old)
4510
4511 mddev->pers->quiesce(mddev, 2);
4512 else {
4513
4514 mddev->pers->quiesce(mddev, 1);
4515 mddev->pers->quiesce(mddev, 0);
4516 }
4517 return len;
4518}
4519static struct md_sysfs_entry md_suspend_lo =
4520__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4521
4522
4523static ssize_t
4524suspend_hi_show(struct mddev *mddev, char *page)
4525{
4526 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4527}
4528
4529static ssize_t
4530suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4531{
4532 char *e;
4533 unsigned long long new = simple_strtoull(buf, &e, 10);
4534 unsigned long long old = mddev->suspend_hi;
4535
4536 if (mddev->pers == NULL ||
4537 mddev->pers->quiesce == NULL)
4538 return -EINVAL;
4539 if (buf == e || (*e && *e != '\n'))
4540 return -EINVAL;
4541
4542 mddev->suspend_hi = new;
4543 if (new <= old)
4544
4545 mddev->pers->quiesce(mddev, 2);
4546 else {
4547
4548 mddev->pers->quiesce(mddev, 1);
4549 mddev->pers->quiesce(mddev, 0);
4550 }
4551 return len;
4552}
4553static struct md_sysfs_entry md_suspend_hi =
4554__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4555
4556static ssize_t
4557reshape_position_show(struct mddev *mddev, char *page)
4558{
4559 if (mddev->reshape_position != MaxSector)
4560 return sprintf(page, "%llu\n",
4561 (unsigned long long)mddev->reshape_position);
4562 strcpy(page, "none\n");
4563 return 5;
4564}
4565
4566static ssize_t
4567reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4568{
4569 struct md_rdev *rdev;
4570 char *e;
4571 unsigned long long new = simple_strtoull(buf, &e, 10);
4572 if (mddev->pers)
4573 return -EBUSY;
4574 if (buf == e || (*e && *e != '\n'))
4575 return -EINVAL;
4576 mddev->reshape_position = new;
4577 mddev->delta_disks = 0;
4578 mddev->reshape_backwards = 0;
4579 mddev->new_level = mddev->level;
4580 mddev->new_layout = mddev->layout;
4581 mddev->new_chunk_sectors = mddev->chunk_sectors;
4582 rdev_for_each(rdev, mddev)
4583 rdev->new_data_offset = rdev->data_offset;
4584 return len;
4585}
4586
4587static struct md_sysfs_entry md_reshape_position =
4588__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4589 reshape_position_store);
4590
4591static ssize_t
4592reshape_direction_show(struct mddev *mddev, char *page)
4593{
4594 return sprintf(page, "%s\n",
4595 mddev->reshape_backwards ? "backwards" : "forwards");
4596}
4597
4598static ssize_t
4599reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4600{
4601 int backwards = 0;
4602 if (cmd_match(buf, "forwards"))
4603 backwards = 0;
4604 else if (cmd_match(buf, "backwards"))
4605 backwards = 1;
4606 else
4607 return -EINVAL;
4608 if (mddev->reshape_backwards == backwards)
4609 return len;
4610
4611
4612 if (mddev->delta_disks)
4613 return -EBUSY;
4614
4615 if (mddev->persistent &&
4616 mddev->major_version == 0)
4617 return -EINVAL;
4618
4619 mddev->reshape_backwards = backwards;
4620 return len;
4621}
4622
4623static struct md_sysfs_entry md_reshape_direction =
4624__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4625 reshape_direction_store);
4626
4627static ssize_t
4628array_size_show(struct mddev *mddev, char *page)
4629{
4630 if (mddev->external_size)
4631 return sprintf(page, "%llu\n",
4632 (unsigned long long)mddev->array_sectors/2);
4633 else
4634 return sprintf(page, "default\n");
4635}
4636
4637static ssize_t
4638array_size_store(struct mddev *mddev, const char *buf, size_t len)
4639{
4640 sector_t sectors;
4641
4642 if (strncmp(buf, "default", 7) == 0) {
4643 if (mddev->pers)
4644 sectors = mddev->pers->size(mddev, 0, 0);
4645 else
4646 sectors = mddev->array_sectors;
4647
4648 mddev->external_size = 0;
4649 } else {
4650 if (strict_blocks_to_sectors(buf, §ors) < 0)
4651 return -EINVAL;
4652 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4653 return -E2BIG;
4654
4655 mddev->external_size = 1;
4656 }
4657
4658 mddev->array_sectors = sectors;
4659 if (mddev->pers) {
4660 set_capacity(mddev->gendisk, mddev->array_sectors);
4661 revalidate_disk(mddev->gendisk);
4662 }
4663 return len;
4664}
4665
4666static struct md_sysfs_entry md_array_size =
4667__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4668 array_size_store);
4669
4670static struct attribute *md_default_attrs[] = {
4671 &md_level.attr,
4672 &md_layout.attr,
4673 &md_raid_disks.attr,
4674 &md_chunk_size.attr,
4675 &md_size.attr,
4676 &md_resync_start.attr,
4677 &md_metadata.attr,
4678 &md_new_device.attr,
4679 &md_safe_delay.attr,
4680 &md_array_state.attr,
4681 &md_reshape_position.attr,
4682 &md_reshape_direction.attr,
4683 &md_array_size.attr,
4684 &max_corr_read_errors.attr,
4685 NULL,
4686};
4687
4688static struct attribute *md_redundancy_attrs[] = {
4689 &md_scan_mode.attr,
4690 &md_mismatches.attr,
4691 &md_sync_min.attr,
4692 &md_sync_max.attr,
4693 &md_sync_speed.attr,
4694 &md_sync_force_parallel.attr,
4695 &md_sync_completed.attr,
4696 &md_min_sync.attr,
4697 &md_max_sync.attr,
4698 &md_suspend_lo.attr,
4699 &md_suspend_hi.attr,
4700 &md_bitmap.attr,
4701 &md_degraded.attr,
4702 NULL,
4703};
4704static struct attribute_group md_redundancy_group = {
4705 .name = NULL,
4706 .attrs = md_redundancy_attrs,
4707};
4708
4709
4710static ssize_t
4711md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4712{
4713 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4714 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4715 ssize_t rv;
4716
4717 if (!entry->show)
4718 return -EIO;
4719 spin_lock(&all_mddevs_lock);
4720 if (list_empty(&mddev->all_mddevs)) {
4721 spin_unlock(&all_mddevs_lock);
4722 return -EBUSY;
4723 }
4724 mddev_get(mddev);
4725 spin_unlock(&all_mddevs_lock);
4726
4727 rv = mddev_lock(mddev);
4728 if (!rv) {
4729 rv = entry->show(mddev, page);
4730 mddev_unlock(mddev);
4731 }
4732 mddev_put(mddev);
4733 return rv;
4734}
4735
4736static ssize_t
4737md_attr_store(struct kobject *kobj, struct attribute *attr,
4738 const char *page, size_t length)
4739{
4740 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4741 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4742 ssize_t rv;
4743
4744 if (!entry->store)
4745 return -EIO;
4746 if (!capable(CAP_SYS_ADMIN))
4747 return -EACCES;
4748 spin_lock(&all_mddevs_lock);
4749 if (list_empty(&mddev->all_mddevs)) {
4750 spin_unlock(&all_mddevs_lock);
4751 return -EBUSY;
4752 }
4753 mddev_get(mddev);
4754 spin_unlock(&all_mddevs_lock);
4755 if (entry->store == new_dev_store)
4756 flush_workqueue(md_misc_wq);
4757 rv = mddev_lock(mddev);
4758 if (!rv) {
4759 rv = entry->store(mddev, page, length);
4760 mddev_unlock(mddev);
4761 }
4762 mddev_put(mddev);
4763 return rv;
4764}
4765
4766static void md_free(struct kobject *ko)
4767{
4768 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4769
4770 if (mddev->sysfs_state)
4771 sysfs_put(mddev->sysfs_state);
4772
4773 if (mddev->gendisk) {
4774 del_gendisk(mddev->gendisk);
4775 put_disk(mddev->gendisk);
4776 }
4777 if (mddev->queue)
4778 blk_cleanup_queue(mddev->queue);
4779
4780 kfree(mddev);
4781}
4782
4783static const struct sysfs_ops md_sysfs_ops = {
4784 .show = md_attr_show,
4785 .store = md_attr_store,
4786};
4787static struct kobj_type md_ktype = {
4788 .release = md_free,
4789 .sysfs_ops = &md_sysfs_ops,
4790 .default_attrs = md_default_attrs,
4791};
4792
4793int mdp_major = 0;
4794
4795static void mddev_delayed_delete(struct work_struct *ws)
4796{
4797 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4798
4799 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4800 kobject_del(&mddev->kobj);
4801 kobject_put(&mddev->kobj);
4802}
4803
4804static int md_alloc(dev_t dev, char *name)
4805{
4806 static DEFINE_MUTEX(disks_mutex);
4807 struct mddev *mddev = mddev_find(dev);
4808 struct gendisk *disk;
4809 int partitioned;
4810 int shift;
4811 int unit;
4812 int error;
4813
4814 if (!mddev)
4815 return -ENODEV;
4816
4817 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4818 shift = partitioned ? MdpMinorShift : 0;
4819 unit = MINOR(mddev->unit) >> shift;
4820
4821
4822
4823
4824 flush_workqueue(md_misc_wq);
4825
4826 mutex_lock(&disks_mutex);
4827 error = -EEXIST;
4828 if (mddev->gendisk)
4829 goto abort;
4830
4831 if (name) {
4832
4833
4834 struct mddev *mddev2;
4835 spin_lock(&all_mddevs_lock);
4836
4837 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4838 if (mddev2->gendisk &&
4839 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4840 spin_unlock(&all_mddevs_lock);
4841 goto abort;
4842 }
4843 spin_unlock(&all_mddevs_lock);
4844 }
4845
4846 error = -ENOMEM;
4847 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4848 if (!mddev->queue)
4849 goto abort;
4850 mddev->queue->queuedata = mddev;
4851
4852 blk_queue_make_request(mddev->queue, md_make_request);
4853 blk_set_stacking_limits(&mddev->queue->limits);
4854
4855 disk = alloc_disk(1 << shift);
4856 if (!disk) {
4857 blk_cleanup_queue(mddev->queue);
4858 mddev->queue = NULL;
4859 goto abort;
4860 }
4861 disk->major = MAJOR(mddev->unit);
4862 disk->first_minor = unit << shift;
4863 if (name)
4864 strcpy(disk->disk_name, name);
4865 else if (partitioned)
4866 sprintf(disk->disk_name, "md_d%d", unit);
4867 else
4868 sprintf(disk->disk_name, "md%d", unit);
4869 disk->fops = &md_fops;
4870 disk->private_data = mddev;
4871 disk->queue = mddev->queue;
4872 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4873
4874
4875
4876
4877 disk->flags |= GENHD_FL_EXT_DEVT;
4878 mddev->gendisk = disk;
4879
4880
4881
4882 mutex_lock(&mddev->open_mutex);
4883 add_disk(disk);
4884
4885 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4886 &disk_to_dev(disk)->kobj, "%s", "md");
4887 if (error) {
4888
4889
4890
4891 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4892 disk->disk_name);
4893 error = 0;
4894 }
4895 if (mddev->kobj.sd &&
4896 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4897 printk(KERN_DEBUG "pointless warning\n");
4898 mutex_unlock(&mddev->open_mutex);
4899 abort:
4900 mutex_unlock(&disks_mutex);
4901 if (!error && mddev->kobj.sd) {
4902 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4903 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4904 }
4905 mddev_put(mddev);
4906 return error;
4907}
4908
4909static struct kobject *md_probe(dev_t dev, int *part, void *data)
4910{
4911 md_alloc(dev, NULL);
4912 return NULL;
4913}
4914
4915static int add_named_array(const char *val, struct kernel_param *kp)
4916{
4917
4918
4919
4920
4921 int len = strlen(val);
4922 char buf[DISK_NAME_LEN];
4923
4924 while (len && val[len-1] == '\n')
4925 len--;
4926 if (len >= DISK_NAME_LEN)
4927 return -E2BIG;
4928 strlcpy(buf, val, len+1);
4929 if (strncmp(buf, "md_", 3) != 0)
4930 return -EINVAL;
4931 return md_alloc(0, buf);
4932}
4933
4934static void md_safemode_timeout(unsigned long data)
4935{
4936 struct mddev *mddev = (struct mddev *) data;
4937
4938 if (!atomic_read(&mddev->writes_pending)) {
4939 mddev->safemode = 1;
4940 if (mddev->external)
4941 sysfs_notify_dirent_safe(mddev->sysfs_state);
4942 }
4943 md_wakeup_thread(mddev->thread);
4944}
4945
4946static int start_dirty_degraded;
4947
4948int md_run(struct mddev *mddev)
4949{
4950 int err;
4951 struct md_rdev *rdev;
4952 struct md_personality *pers;
4953
4954 if (list_empty(&mddev->disks))
4955
4956 return -EINVAL;
4957
4958 if (mddev->pers)
4959 return -EBUSY;
4960
4961 if (mddev->sysfs_active)
4962 return -EBUSY;
4963
4964
4965
4966
4967 if (!mddev->raid_disks) {
4968 if (!mddev->persistent)
4969 return -EINVAL;
4970 analyze_sbs(mddev);
4971 }
4972
4973 if (mddev->level != LEVEL_NONE)
4974 request_module("md-level-%d", mddev->level);
4975 else if (mddev->clevel[0])
4976 request_module("md-%s", mddev->clevel);
4977
4978
4979
4980
4981
4982
4983 rdev_for_each(rdev, mddev) {
4984 if (test_bit(Faulty, &rdev->flags))
4985 continue;
4986 sync_blockdev(rdev->bdev);
4987 invalidate_bdev(rdev->bdev);
4988
4989
4990
4991
4992
4993 if (rdev->meta_bdev) {
4994 ;
4995 } else if (rdev->data_offset < rdev->sb_start) {
4996 if (mddev->dev_sectors &&
4997 rdev->data_offset + mddev->dev_sectors
4998 > rdev->sb_start) {
4999 printk("md: %s: data overlaps metadata\n",
5000 mdname(mddev));
5001 return -EINVAL;
5002 }
5003 } else {
5004 if (rdev->sb_start + rdev->sb_size/512
5005 > rdev->data_offset) {
5006 printk("md: %s: metadata overlaps data\n",
5007 mdname(mddev));
5008 return -EINVAL;
5009 }
5010 }
5011 sysfs_notify_dirent_safe(rdev->sysfs_state);
5012 }
5013
5014 if (mddev->bio_set == NULL)
5015 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5016
5017 spin_lock(&pers_lock);
5018 pers = find_pers(mddev->level, mddev->clevel);
5019 if (!pers || !try_module_get(pers->owner)) {
5020 spin_unlock(&pers_lock);
5021 if (mddev->level != LEVEL_NONE)
5022 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5023 mddev->level);
5024 else
5025 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5026 mddev->clevel);
5027 return -EINVAL;
5028 }
5029 mddev->pers = pers;
5030 spin_unlock(&pers_lock);
5031 if (mddev->level != pers->level) {
5032 mddev->level = pers->level;
5033 mddev->new_level = pers->level;
5034 }
5035 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5036
5037 if (mddev->reshape_position != MaxSector &&
5038 pers->start_reshape == NULL) {
5039
5040 mddev->pers = NULL;
5041 module_put(pers->owner);
5042 return -EINVAL;
5043 }
5044
5045 if (pers->sync_request) {
5046
5047
5048
5049 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5050 struct md_rdev *rdev2;
5051 int warned = 0;
5052
5053 rdev_for_each(rdev, mddev)
5054 rdev_for_each(rdev2, mddev) {
5055 if (rdev < rdev2 &&
5056 rdev->bdev->bd_contains ==
5057 rdev2->bdev->bd_contains) {
5058 printk(KERN_WARNING
5059 "%s: WARNING: %s appears to be"
5060 " on the same physical disk as"
5061 " %s.\n",
5062 mdname(mddev),
5063 bdevname(rdev->bdev,b),
5064 bdevname(rdev2->bdev,b2));
5065 warned = 1;
5066 }
5067 }
5068
5069 if (warned)
5070 printk(KERN_WARNING
5071 "True protection against single-disk"
5072 " failure might be compromised.\n");
5073 }
5074
5075 mddev->recovery = 0;
5076
5077 mddev->resync_max_sectors = mddev->dev_sectors;
5078
5079 mddev->ok_start_degraded = start_dirty_degraded;
5080
5081 if (start_readonly && mddev->ro == 0)
5082 mddev->ro = 2;
5083
5084 err = mddev->pers->run(mddev);
5085 if (err)
5086 printk(KERN_ERR "md: pers->run() failed ...\n");
5087 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5088 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5089 " but 'external_size' not in effect?\n", __func__);
5090 printk(KERN_ERR
5091 "md: invalid array_size %llu > default size %llu\n",
5092 (unsigned long long)mddev->array_sectors / 2,
5093 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5094 err = -EINVAL;
5095 mddev->pers->stop(mddev);
5096 }
5097 if (err == 0 && mddev->pers->sync_request &&
5098 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5099 err = bitmap_create(mddev);
5100 if (err) {
5101 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5102 mdname(mddev), err);
5103 mddev->pers->stop(mddev);
5104 }
5105 }
5106 if (err) {
5107 module_put(mddev->pers->owner);
5108 mddev->pers = NULL;
5109 bitmap_destroy(mddev);
5110 return err;
5111 }
5112 if (mddev->pers->sync_request) {
5113 if (mddev->kobj.sd &&
5114 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5115 printk(KERN_WARNING
5116 "md: cannot register extra attributes for %s\n",
5117 mdname(mddev));
5118 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5119 } else if (mddev->ro == 2)
5120 mddev->ro = 0;
5121
5122 atomic_set(&mddev->writes_pending,0);
5123 atomic_set(&mddev->max_corr_read_errors,
5124 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5125 mddev->safemode = 0;
5126 mddev->safemode_timer.function = md_safemode_timeout;
5127 mddev->safemode_timer.data = (unsigned long) mddev;
5128 mddev->safemode_delay = (200 * HZ)/1000 +1;
5129 mddev->in_sync = 1;
5130 smp_wmb();
5131 mddev->ready = 1;
5132 rdev_for_each(rdev, mddev)
5133 if (rdev->raid_disk >= 0)
5134 if (sysfs_link_rdev(mddev, rdev))
5135 ;
5136
5137 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5138
5139 if (mddev->flags)
5140 md_update_sb(mddev, 0);
5141
5142 md_new_event(mddev);
5143 sysfs_notify_dirent_safe(mddev->sysfs_state);
5144 sysfs_notify_dirent_safe(mddev->sysfs_action);
5145 sysfs_notify(&mddev->kobj, NULL, "degraded");
5146 return 0;
5147}
5148EXPORT_SYMBOL_GPL(md_run);
5149
5150static int do_md_run(struct mddev *mddev)
5151{
5152 int err;
5153
5154 err = md_run(mddev);
5155 if (err)
5156 goto out;
5157 err = bitmap_load(mddev);
5158 if (err) {
5159 bitmap_destroy(mddev);
5160 goto out;
5161 }
5162
5163 md_wakeup_thread(mddev->thread);
5164 md_wakeup_thread(mddev->sync_thread);
5165
5166 set_capacity(mddev->gendisk, mddev->array_sectors);
5167 revalidate_disk(mddev->gendisk);
5168 mddev->changed = 1;
5169 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5170out:
5171 return err;
5172}
5173
5174static int restart_array(struct mddev *mddev)
5175{
5176 struct gendisk *disk = mddev->gendisk;
5177
5178
5179 if (list_empty(&mddev->disks))
5180 return -ENXIO;
5181 if (!mddev->pers)
5182 return -EINVAL;
5183 if (!mddev->ro)
5184 return -EBUSY;
5185 mddev->safemode = 0;
5186 mddev->ro = 0;
5187 set_disk_ro(disk, 0);
5188 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5189 mdname(mddev));
5190
5191 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5192 md_wakeup_thread(mddev->thread);
5193 md_wakeup_thread(mddev->sync_thread);
5194 sysfs_notify_dirent_safe(mddev->sysfs_state);
5195 return 0;
5196}
5197
5198
5199
5200static int deny_bitmap_write_access(struct file * file)
5201{
5202 struct inode *inode = file->f_mapping->host;
5203
5204 spin_lock(&inode->i_lock);
5205 if (atomic_read(&inode->i_writecount) > 1) {
5206 spin_unlock(&inode->i_lock);
5207 return -ETXTBSY;
5208 }
5209 atomic_set(&inode->i_writecount, -1);
5210 spin_unlock(&inode->i_lock);
5211
5212 return 0;
5213}
5214
5215void restore_bitmap_write_access(struct file *file)
5216{
5217 struct inode *inode = file->f_mapping->host;
5218
5219 spin_lock(&inode->i_lock);
5220 atomic_set(&inode->i_writecount, 1);
5221 spin_unlock(&inode->i_lock);
5222}
5223
5224static void md_clean(struct mddev *mddev)
5225{
5226 mddev->array_sectors = 0;
5227 mddev->external_size = 0;
5228 mddev->dev_sectors = 0;
5229 mddev->raid_disks = 0;
5230 mddev->recovery_cp = 0;
5231 mddev->resync_min = 0;
5232 mddev->resync_max = MaxSector;
5233 mddev->reshape_position = MaxSector;
5234 mddev->external = 0;
5235 mddev->persistent = 0;
5236 mddev->level = LEVEL_NONE;
5237 mddev->clevel[0] = 0;
5238 mddev->flags = 0;
5239 mddev->ro = 0;
5240 mddev->metadata_type[0] = 0;
5241 mddev->chunk_sectors = 0;
5242 mddev->ctime = mddev->utime = 0;
5243 mddev->layout = 0;
5244 mddev->max_disks = 0;
5245 mddev->events = 0;
5246 mddev->can_decrease_events = 0;
5247 mddev->delta_disks = 0;
5248 mddev->reshape_backwards = 0;
5249 mddev->new_level = LEVEL_NONE;
5250 mddev->new_layout = 0;
5251 mddev->new_chunk_sectors = 0;
5252 mddev->curr_resync = 0;
5253 atomic64_set(&mddev->resync_mismatches, 0);
5254 mddev->suspend_lo = mddev->suspend_hi = 0;
5255 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5256 mddev->recovery = 0;
5257 mddev->in_sync = 0;
5258 mddev->changed = 0;
5259 mddev->degraded = 0;
5260 mddev->safemode = 0;
5261 mddev->merge_check_needed = 0;
5262 mddev->bitmap_info.offset = 0;
5263 mddev->bitmap_info.default_offset = 0;
5264 mddev->bitmap_info.default_space = 0;
5265 mddev->bitmap_info.chunksize = 0;
5266 mddev->bitmap_info.daemon_sleep = 0;
5267 mddev->bitmap_info.max_write_behind = 0;
5268}
5269
5270static void __md_stop_writes(struct mddev *mddev)
5271{
5272 if (mddev->sync_thread) {
5273 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5274 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5275 reap_sync_thread(mddev);
5276 }
5277
5278 del_timer_sync(&mddev->safemode_timer);
5279
5280 bitmap_flush(mddev);
5281 md_super_wait(mddev);
5282
5283 if (!mddev->in_sync || mddev->flags) {
5284
5285 mddev->in_sync = 1;
5286 md_update_sb(mddev, 1);
5287 }
5288}
5289
5290void md_stop_writes(struct mddev *mddev)
5291{
5292 mddev_lock(mddev);
5293 __md_stop_writes(mddev);
5294 mddev_unlock(mddev);
5295}
5296EXPORT_SYMBOL_GPL(md_stop_writes);
5297
5298static void __md_stop(struct mddev *mddev)
5299{
5300 mddev->ready = 0;
5301 mddev->pers->stop(mddev);
5302 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5303 mddev->to_remove = &md_redundancy_group;
5304 module_put(mddev->pers->owner);
5305 mddev->pers = NULL;
5306 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5307}
5308
5309void md_stop(struct mddev *mddev)
5310{
5311
5312
5313
5314 __md_stop(mddev);
5315 bitmap_destroy(mddev);
5316 if (mddev->bio_set)
5317 bioset_free(mddev->bio_set);
5318}
5319
5320EXPORT_SYMBOL_GPL(md_stop);
5321
5322static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5323{
5324 int err = 0;
5325 mutex_lock(&mddev->open_mutex);
5326 if (atomic_read(&mddev->openers) > !!bdev) {
5327 printk("md: %s still in use.\n",mdname(mddev));
5328 err = -EBUSY;
5329 goto out;
5330 }
5331 if (bdev)
5332 sync_blockdev(bdev);
5333 if (mddev->pers) {
5334 __md_stop_writes(mddev);
5335
5336 err = -ENXIO;
5337 if (mddev->ro==1)
5338 goto out;
5339 mddev->ro = 1;
5340 set_disk_ro(mddev->gendisk, 1);
5341 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5342 sysfs_notify_dirent_safe(mddev->sysfs_state);
5343 err = 0;
5344 }
5345out:
5346 mutex_unlock(&mddev->open_mutex);
5347 return err;
5348}
5349
5350
5351
5352
5353
5354static int do_md_stop(struct mddev * mddev, int mode,
5355 struct block_device *bdev)
5356{
5357 struct gendisk *disk = mddev->gendisk;
5358 struct md_rdev *rdev;
5359
5360 mutex_lock(&mddev->open_mutex);
5361 if (atomic_read(&mddev->openers) > !!bdev ||
5362 mddev->sysfs_active) {
5363 printk("md: %s still in use.\n",mdname(mddev));
5364 mutex_unlock(&mddev->open_mutex);
5365 return -EBUSY;
5366 }
5367 if (bdev)
5368
5369
5370
5371
5372
5373 sync_blockdev(bdev);
5374
5375 if (mddev->pers) {
5376 if (mddev->ro)
5377 set_disk_ro(disk, 0);
5378
5379 __md_stop_writes(mddev);
5380 __md_stop(mddev);
5381 mddev->queue->merge_bvec_fn = NULL;
5382 mddev->queue->backing_dev_info.congested_fn = NULL;
5383
5384
5385 sysfs_notify_dirent_safe(mddev->sysfs_state);
5386
5387 rdev_for_each(rdev, mddev)
5388 if (rdev->raid_disk >= 0)
5389 sysfs_unlink_rdev(mddev, rdev);
5390
5391 set_capacity(disk, 0);
5392 mutex_unlock(&mddev->open_mutex);
5393 mddev->changed = 1;
5394 revalidate_disk(disk);
5395
5396 if (mddev->ro)
5397 mddev->ro = 0;
5398 } else
5399 mutex_unlock(&mddev->open_mutex);
5400
5401
5402
5403 if (mode == 0) {
5404 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5405
5406 bitmap_destroy(mddev);
5407 if (mddev->bitmap_info.file) {
5408 restore_bitmap_write_access(mddev->bitmap_info.file);
5409 fput(mddev->bitmap_info.file);
5410 mddev->bitmap_info.file = NULL;
5411 }
5412 mddev->bitmap_info.offset = 0;
5413
5414 export_array(mddev);
5415
5416 md_clean(mddev);
5417 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5418 if (mddev->hold_active == UNTIL_STOP)
5419 mddev->hold_active = 0;
5420 }
5421 blk_integrity_unregister(disk);
5422 md_new_event(mddev);
5423 sysfs_notify_dirent_safe(mddev->sysfs_state);
5424 return 0;
5425}
5426
5427#ifndef MODULE
5428static void autorun_array(struct mddev *mddev)
5429{
5430 struct md_rdev *rdev;
5431 int err;
5432
5433 if (list_empty(&mddev->disks))
5434 return;
5435
5436 printk(KERN_INFO "md: running: ");
5437
5438 rdev_for_each(rdev, mddev) {
5439 char b[BDEVNAME_SIZE];
5440 printk("<%s>", bdevname(rdev->bdev,b));
5441 }
5442 printk("\n");
5443
5444 err = do_md_run(mddev);
5445 if (err) {
5446 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5447 do_md_stop(mddev, 0, NULL);
5448 }
5449}
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463static void autorun_devices(int part)
5464{
5465 struct md_rdev *rdev0, *rdev, *tmp;
5466 struct mddev *mddev;
5467 char b[BDEVNAME_SIZE];
5468
5469 printk(KERN_INFO "md: autorun ...\n");
5470 while (!list_empty(&pending_raid_disks)) {
5471 int unit;
5472 dev_t dev;
5473 LIST_HEAD(candidates);
5474 rdev0 = list_entry(pending_raid_disks.next,
5475 struct md_rdev, same_set);
5476
5477 printk(KERN_INFO "md: considering %s ...\n",
5478 bdevname(rdev0->bdev,b));
5479 INIT_LIST_HEAD(&candidates);
5480 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5481 if (super_90_load(rdev, rdev0, 0) >= 0) {
5482 printk(KERN_INFO "md: adding %s ...\n",
5483 bdevname(rdev->bdev,b));
5484 list_move(&rdev->same_set, &candidates);
5485 }
5486
5487
5488
5489
5490
5491 if (part) {
5492 dev = MKDEV(mdp_major,
5493 rdev0->preferred_minor << MdpMinorShift);
5494 unit = MINOR(dev) >> MdpMinorShift;
5495 } else {
5496 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5497 unit = MINOR(dev);
5498 }
5499 if (rdev0->preferred_minor != unit) {
5500 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5501 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5502 break;
5503 }
5504
5505 md_probe(dev, NULL, NULL);
5506 mddev = mddev_find(dev);
5507 if (!mddev || !mddev->gendisk) {
5508 if (mddev)
5509 mddev_put(mddev);
5510 printk(KERN_ERR
5511 "md: cannot allocate memory for md drive.\n");
5512 break;
5513 }
5514 if (mddev_lock(mddev))
5515 printk(KERN_WARNING "md: %s locked, cannot run\n",
5516 mdname(mddev));
5517 else if (mddev->raid_disks || mddev->major_version
5518 || !list_empty(&mddev->disks)) {
5519 printk(KERN_WARNING
5520 "md: %s already running, cannot run %s\n",
5521 mdname(mddev), bdevname(rdev0->bdev,b));
5522 mddev_unlock(mddev);
5523 } else {
5524 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5525 mddev->persistent = 1;
5526 rdev_for_each_list(rdev, tmp, &candidates) {
5527 list_del_init(&rdev->same_set);
5528 if (bind_rdev_to_array(rdev, mddev))
5529 export_rdev(rdev);
5530 }
5531 autorun_array(mddev);
5532 mddev_unlock(mddev);
5533 }
5534
5535
5536
5537 rdev_for_each_list(rdev, tmp, &candidates) {
5538 list_del_init(&rdev->same_set);
5539 export_rdev(rdev);
5540 }
5541 mddev_put(mddev);
5542 }
5543 printk(KERN_INFO "md: ... autorun DONE.\n");
5544}
5545#endif
5546
5547static int get_version(void __user * arg)
5548{
5549 mdu_version_t ver;
5550
5551 ver.major = MD_MAJOR_VERSION;
5552 ver.minor = MD_MINOR_VERSION;
5553 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5554
5555 if (copy_to_user(arg, &ver, sizeof(ver)))
5556 return -EFAULT;
5557
5558 return 0;
5559}
5560
5561static int get_array_info(struct mddev * mddev, void __user * arg)
5562{
5563 mdu_array_info_t info;
5564 int nr,working,insync,failed,spare;
5565 struct md_rdev *rdev;
5566
5567 nr = working = insync = failed = spare = 0;
5568 rcu_read_lock();
5569 rdev_for_each_rcu(rdev, mddev) {
5570 nr++;
5571 if (test_bit(Faulty, &rdev->flags))
5572 failed++;
5573 else {
5574 working++;
5575 if (test_bit(In_sync, &rdev->flags))
5576 insync++;
5577 else
5578 spare++;
5579 }
5580 }
5581 rcu_read_unlock();
5582
5583 info.major_version = mddev->major_version;
5584 info.minor_version = mddev->minor_version;
5585 info.patch_version = MD_PATCHLEVEL_VERSION;
5586 info.ctime = mddev->ctime;
5587 info.level = mddev->level;
5588 info.size = mddev->dev_sectors / 2;
5589 if (info.size != mddev->dev_sectors / 2)
5590 info.size = -1;
5591 info.nr_disks = nr;
5592 info.raid_disks = mddev->raid_disks;
5593 info.md_minor = mddev->md_minor;
5594 info.not_persistent= !mddev->persistent;
5595
5596 info.utime = mddev->utime;
5597 info.state = 0;
5598 if (mddev->in_sync)
5599 info.state = (1<<MD_SB_CLEAN);
5600 if (mddev->bitmap && mddev->bitmap_info.offset)
5601 info.state = (1<<MD_SB_BITMAP_PRESENT);
5602 info.active_disks = insync;
5603 info.working_disks = working;
5604 info.failed_disks = failed;
5605 info.spare_disks = spare;
5606
5607 info.layout = mddev->layout;
5608 info.chunk_size = mddev->chunk_sectors << 9;
5609
5610 if (copy_to_user(arg, &info, sizeof(info)))
5611 return -EFAULT;
5612
5613 return 0;
5614}
5615
5616static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5617{
5618 mdu_bitmap_file_t *file = NULL;
5619 char *ptr, *buf = NULL;
5620 int err = -ENOMEM;
5621
5622 if (md_allow_write(mddev))
5623 file = kmalloc(sizeof(*file), GFP_NOIO);
5624 else
5625 file = kmalloc(sizeof(*file), GFP_KERNEL);
5626
5627 if (!file)
5628 goto out;
5629
5630
5631 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5632 file->pathname[0] = '\0';
5633 goto copy_out;
5634 }
5635
5636 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5637 if (!buf)
5638 goto out;
5639
5640 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5641 buf, sizeof(file->pathname));
5642 if (IS_ERR(ptr))
5643 goto out;
5644
5645 strcpy(file->pathname, ptr);
5646
5647copy_out:
5648 err = 0;
5649 if (copy_to_user(arg, file, sizeof(*file)))
5650 err = -EFAULT;
5651out:
5652 kfree(buf);
5653 kfree(file);
5654 return err;
5655}
5656
5657static int get_disk_info(struct mddev * mddev, void __user * arg)
5658{
5659 mdu_disk_info_t info;
5660 struct md_rdev *rdev;
5661
5662 if (copy_from_user(&info, arg, sizeof(info)))
5663 return -EFAULT;
5664
5665 rcu_read_lock();
5666 rdev = find_rdev_nr_rcu(mddev, info.number);
5667 if (rdev) {
5668 info.major = MAJOR(rdev->bdev->bd_dev);
5669 info.minor = MINOR(rdev->bdev->bd_dev);
5670 info.raid_disk = rdev->raid_disk;
5671 info.state = 0;
5672 if (test_bit(Faulty, &rdev->flags))
5673 info.state |= (1<<MD_DISK_FAULTY);
5674 else if (test_bit(In_sync, &rdev->flags)) {
5675 info.state |= (1<<MD_DISK_ACTIVE);
5676 info.state |= (1<<MD_DISK_SYNC);
5677 }
5678 if (test_bit(WriteMostly, &rdev->flags))
5679 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5680 } else {
5681 info.major = info.minor = 0;
5682 info.raid_disk = -1;
5683 info.state = (1<<MD_DISK_REMOVED);
5684 }
5685 rcu_read_unlock();
5686
5687 if (copy_to_user(arg, &info, sizeof(info)))
5688 return -EFAULT;
5689
5690 return 0;
5691}
5692
5693static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5694{
5695 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5696 struct md_rdev *rdev;
5697 dev_t dev = MKDEV(info->major,info->minor);
5698
5699 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5700 return -EOVERFLOW;
5701
5702 if (!mddev->raid_disks) {
5703 int err;
5704
5705 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5706 if (IS_ERR(rdev)) {
5707 printk(KERN_WARNING
5708 "md: md_import_device returned %ld\n",
5709 PTR_ERR(rdev));
5710 return PTR_ERR(rdev);
5711 }
5712 if (!list_empty(&mddev->disks)) {
5713 struct md_rdev *rdev0
5714 = list_entry(mddev->disks.next,
5715 struct md_rdev, same_set);
5716 err = super_types[mddev->major_version]
5717 .load_super(rdev, rdev0, mddev->minor_version);
5718 if (err < 0) {
5719 printk(KERN_WARNING
5720 "md: %s has different UUID to %s\n",
5721 bdevname(rdev->bdev,b),
5722 bdevname(rdev0->bdev,b2));
5723 export_rdev(rdev);
5724 return -EINVAL;
5725 }
5726 }
5727 err = bind_rdev_to_array(rdev, mddev);
5728 if (err)
5729 export_rdev(rdev);
5730 return err;
5731 }
5732
5733
5734
5735
5736
5737
5738 if (mddev->pers) {
5739 int err;
5740 if (!mddev->pers->hot_add_disk) {
5741 printk(KERN_WARNING
5742 "%s: personality does not support diskops!\n",
5743 mdname(mddev));
5744 return -EINVAL;
5745 }
5746 if (mddev->persistent)
5747 rdev = md_import_device(dev, mddev->major_version,
5748 mddev->minor_version);
5749 else
5750 rdev = md_import_device(dev, -1, -1);
5751 if (IS_ERR(rdev)) {
5752 printk(KERN_WARNING
5753 "md: md_import_device returned %ld\n",
5754 PTR_ERR(rdev));
5755 return PTR_ERR(rdev);
5756 }
5757
5758 if (!mddev->persistent) {
5759 if (info->state & (1<<MD_DISK_SYNC) &&
5760 info->raid_disk < mddev->raid_disks) {
5761 rdev->raid_disk = info->raid_disk;
5762 set_bit(In_sync, &rdev->flags);
5763 } else
5764 rdev->raid_disk = -1;
5765 } else
5766 super_types[mddev->major_version].
5767 validate_super(mddev, rdev);
5768 if ((info->state & (1<<MD_DISK_SYNC)) &&
5769 rdev->raid_disk != info->raid_disk) {
5770
5771
5772
5773 export_rdev(rdev);
5774 return -EINVAL;
5775 }
5776
5777 if (test_bit(In_sync, &rdev->flags))
5778 rdev->saved_raid_disk = rdev->raid_disk;
5779 else
5780 rdev->saved_raid_disk = -1;
5781
5782 clear_bit(In_sync, &rdev->flags);
5783 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5784 set_bit(WriteMostly, &rdev->flags);
5785 else
5786 clear_bit(WriteMostly, &rdev->flags);
5787
5788 rdev->raid_disk = -1;
5789 err = bind_rdev_to_array(rdev, mddev);
5790 if (!err && !mddev->pers->hot_remove_disk) {
5791
5792
5793
5794
5795 super_types[mddev->major_version].
5796 validate_super(mddev, rdev);
5797 err = mddev->pers->hot_add_disk(mddev, rdev);
5798 if (err)
5799 unbind_rdev_from_array(rdev);
5800 }
5801 if (err)
5802 export_rdev(rdev);
5803 else
5804 sysfs_notify_dirent_safe(rdev->sysfs_state);
5805
5806 md_update_sb(mddev, 1);
5807 if (mddev->degraded)
5808 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5809 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5810 if (!err)
5811 md_new_event(mddev);
5812 md_wakeup_thread(mddev->thread);
5813 return err;
5814 }
5815
5816
5817
5818
5819 if (mddev->major_version != 0) {
5820 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5821 mdname(mddev));
5822 return -EINVAL;
5823 }
5824
5825 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5826 int err;
5827 rdev = md_import_device(dev, -1, 0);
5828 if (IS_ERR(rdev)) {
5829 printk(KERN_WARNING
5830 "md: error, md_import_device() returned %ld\n",
5831 PTR_ERR(rdev));
5832 return PTR_ERR(rdev);
5833 }
5834 rdev->desc_nr = info->number;
5835 if (info->raid_disk < mddev->raid_disks)
5836 rdev->raid_disk = info->raid_disk;
5837 else
5838 rdev->raid_disk = -1;
5839
5840 if (rdev->raid_disk < mddev->raid_disks)
5841 if (info->state & (1<<MD_DISK_SYNC))
5842 set_bit(In_sync, &rdev->flags);
5843
5844 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5845 set_bit(WriteMostly, &rdev->flags);
5846
5847 if (!mddev->persistent) {
5848 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5849 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5850 } else
5851 rdev->sb_start = calc_dev_sboffset(rdev);
5852 rdev->sectors = rdev->sb_start;
5853
5854 err = bind_rdev_to_array(rdev, mddev);
5855 if (err) {
5856 export_rdev(rdev);
5857 return err;
5858 }
5859 }
5860
5861 return 0;
5862}
5863
5864static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5865{
5866 char b[BDEVNAME_SIZE];
5867 struct md_rdev *rdev;
5868
5869 rdev = find_rdev(mddev, dev);
5870 if (!rdev)
5871 return -ENXIO;
5872
5873 if (rdev->raid_disk >= 0)
5874 goto busy;
5875
5876 kick_rdev_from_array(rdev);
5877 md_update_sb(mddev, 1);
5878 md_new_event(mddev);
5879
5880 return 0;
5881busy:
5882 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5883 bdevname(rdev->bdev,b), mdname(mddev));
5884 return -EBUSY;
5885}
5886
5887static int hot_add_disk(struct mddev * mddev, dev_t dev)
5888{
5889 char b[BDEVNAME_SIZE];
5890 int err;
5891 struct md_rdev *rdev;
5892
5893 if (!mddev->pers)
5894 return -ENODEV;
5895
5896 if (mddev->major_version != 0) {
5897 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5898 " version-0 superblocks.\n",
5899 mdname(mddev));
5900 return -EINVAL;
5901 }
5902 if (!mddev->pers->hot_add_disk) {
5903 printk(KERN_WARNING
5904 "%s: personality does not support diskops!\n",
5905 mdname(mddev));
5906 return -EINVAL;
5907 }
5908
5909 rdev = md_import_device(dev, -1, 0);
5910 if (IS_ERR(rdev)) {
5911 printk(KERN_WARNING
5912 "md: error, md_import_device() returned %ld\n",
5913 PTR_ERR(rdev));
5914 return -EINVAL;
5915 }
5916
5917 if (mddev->persistent)
5918 rdev->sb_start = calc_dev_sboffset(rdev);
5919 else
5920 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5921
5922 rdev->sectors = rdev->sb_start;
5923
5924 if (test_bit(Faulty, &rdev->flags)) {
5925 printk(KERN_WARNING
5926 "md: can not hot-add faulty %s disk to %s!\n",
5927 bdevname(rdev->bdev,b), mdname(mddev));
5928 err = -EINVAL;
5929 goto abort_export;
5930 }
5931 clear_bit(In_sync, &rdev->flags);
5932 rdev->desc_nr = -1;
5933 rdev->saved_raid_disk = -1;
5934 err = bind_rdev_to_array(rdev, mddev);
5935 if (err)
5936 goto abort_export;
5937
5938
5939
5940
5941
5942
5943 rdev->raid_disk = -1;
5944
5945 md_update_sb(mddev, 1);
5946
5947
5948
5949
5950
5951 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5952 md_wakeup_thread(mddev->thread);
5953 md_new_event(mddev);
5954 return 0;
5955
5956abort_export:
5957 export_rdev(rdev);
5958 return err;
5959}
5960
5961static int set_bitmap_file(struct mddev *mddev, int fd)
5962{
5963 int err;
5964
5965 if (mddev->pers) {
5966 if (!mddev->pers->quiesce)
5967 return -EBUSY;
5968 if (mddev->recovery || mddev->sync_thread)
5969 return -EBUSY;
5970
5971 }
5972
5973
5974 if (fd >= 0) {
5975 if (mddev->bitmap)
5976 return -EEXIST;
5977 mddev->bitmap_info.file = fget(fd);
5978
5979 if (mddev->bitmap_info.file == NULL) {
5980 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5981 mdname(mddev));
5982 return -EBADF;
5983 }
5984
5985 err = deny_bitmap_write_access(mddev->bitmap_info.file);
5986 if (err) {
5987 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
5988 mdname(mddev));
5989 fput(mddev->bitmap_info.file);
5990 mddev->bitmap_info.file = NULL;
5991 return err;
5992 }
5993 mddev->bitmap_info.offset = 0;
5994 } else if (mddev->bitmap == NULL)
5995 return -ENOENT;
5996 err = 0;
5997 if (mddev->pers) {
5998 mddev->pers->quiesce(mddev, 1);
5999 if (fd >= 0) {
6000 err = bitmap_create(mddev);
6001 if (!err)
6002 err = bitmap_load(mddev);
6003 }
6004 if (fd < 0 || err) {
6005 bitmap_destroy(mddev);
6006 fd = -1;
6007 }
6008 mddev->pers->quiesce(mddev, 0);
6009 }
6010 if (fd < 0) {
6011 if (mddev->bitmap_info.file) {
6012 restore_bitmap_write_access(mddev->bitmap_info.file);
6013 fput(mddev->bitmap_info.file);
6014 }
6015 mddev->bitmap_info.file = NULL;
6016 }
6017
6018 return err;
6019}
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6035{
6036
6037 if (info->raid_disks == 0) {
6038
6039 if (info->major_version < 0 ||
6040 info->major_version >= ARRAY_SIZE(super_types) ||
6041 super_types[info->major_version].name == NULL) {
6042
6043 printk(KERN_INFO
6044 "md: superblock version %d not known\n",
6045 info->major_version);
6046 return -EINVAL;
6047 }
6048 mddev->major_version = info->major_version;
6049 mddev->minor_version = info->minor_version;
6050 mddev->patch_version = info->patch_version;
6051 mddev->persistent = !info->not_persistent;
6052
6053
6054
6055 mddev->ctime = get_seconds();
6056 return 0;
6057 }
6058 mddev->major_version = MD_MAJOR_VERSION;
6059 mddev->minor_version = MD_MINOR_VERSION;
6060 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6061 mddev->ctime = get_seconds();
6062
6063 mddev->level = info->level;
6064 mddev->clevel[0] = 0;
6065 mddev->dev_sectors = 2 * (sector_t)info->size;
6066 mddev->raid_disks = info->raid_disks;
6067
6068
6069
6070 if (info->state & (1<<MD_SB_CLEAN))
6071 mddev->recovery_cp = MaxSector;
6072 else
6073 mddev->recovery_cp = 0;
6074 mddev->persistent = ! info->not_persistent;
6075 mddev->external = 0;
6076
6077 mddev->layout = info->layout;
6078 mddev->chunk_sectors = info->chunk_size >> 9;
6079
6080 mddev->max_disks = MD_SB_DISKS;
6081
6082 if (mddev->persistent)
6083 mddev->flags = 0;
6084 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6085
6086 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6087 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6088 mddev->bitmap_info.offset = 0;
6089
6090 mddev->reshape_position = MaxSector;
6091
6092
6093
6094
6095 get_random_bytes(mddev->uuid, 16);
6096
6097 mddev->new_level = mddev->level;
6098 mddev->new_chunk_sectors = mddev->chunk_sectors;
6099 mddev->new_layout = mddev->layout;
6100 mddev->delta_disks = 0;
6101 mddev->reshape_backwards = 0;
6102
6103 return 0;
6104}
6105
6106void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6107{
6108 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6109
6110 if (mddev->external_size)
6111 return;
6112
6113 mddev->array_sectors = array_sectors;
6114}
6115EXPORT_SYMBOL(md_set_array_sectors);
6116
6117static int update_size(struct mddev *mddev, sector_t num_sectors)
6118{
6119 struct md_rdev *rdev;
6120 int rv;
6121 int fit = (num_sectors == 0);
6122
6123 if (mddev->pers->resize == NULL)
6124 return -EINVAL;
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134 if (mddev->sync_thread)
6135 return -EBUSY;
6136
6137 rdev_for_each(rdev, mddev) {
6138 sector_t avail = rdev->sectors;
6139
6140 if (fit && (num_sectors == 0 || num_sectors > avail))
6141 num_sectors = avail;
6142 if (avail < num_sectors)
6143 return -ENOSPC;
6144 }
6145 rv = mddev->pers->resize(mddev, num_sectors);
6146 if (!rv)
6147 revalidate_disk(mddev->gendisk);
6148 return rv;
6149}
6150
6151static int update_raid_disks(struct mddev *mddev, int raid_disks)
6152{
6153 int rv;
6154 struct md_rdev *rdev;
6155
6156 if (mddev->pers->check_reshape == NULL)
6157 return -EINVAL;
6158 if (raid_disks <= 0 ||
6159 (mddev->max_disks && raid_disks >= mddev->max_disks))
6160 return -EINVAL;
6161 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6162 return -EBUSY;
6163
6164 rdev_for_each(rdev, mddev) {
6165 if (mddev->raid_disks < raid_disks &&
6166 rdev->data_offset < rdev->new_data_offset)
6167 return -EINVAL;
6168 if (mddev->raid_disks > raid_disks &&
6169 rdev->data_offset > rdev->new_data_offset)
6170 return -EINVAL;
6171 }
6172
6173 mddev->delta_disks = raid_disks - mddev->raid_disks;
6174 if (mddev->delta_disks < 0)
6175 mddev->reshape_backwards = 1;
6176 else if (mddev->delta_disks > 0)
6177 mddev->reshape_backwards = 0;
6178
6179 rv = mddev->pers->check_reshape(mddev);
6180 if (rv < 0) {
6181 mddev->delta_disks = 0;
6182 mddev->reshape_backwards = 0;
6183 }
6184 return rv;
6185}
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6197{
6198 int rv = 0;
6199 int cnt = 0;
6200 int state = 0;
6201
6202
6203 if (mddev->bitmap && mddev->bitmap_info.offset)
6204 state |= (1 << MD_SB_BITMAP_PRESENT);
6205
6206 if (mddev->major_version != info->major_version ||
6207 mddev->minor_version != info->minor_version ||
6208
6209 mddev->ctime != info->ctime ||
6210 mddev->level != info->level ||
6211
6212 !mddev->persistent != info->not_persistent||
6213 mddev->chunk_sectors != info->chunk_size >> 9 ||
6214
6215 ((state^info->state) & 0xfffffe00)
6216 )
6217 return -EINVAL;
6218
6219 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6220 cnt++;
6221 if (mddev->raid_disks != info->raid_disks)
6222 cnt++;
6223 if (mddev->layout != info->layout)
6224 cnt++;
6225 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6226 cnt++;
6227 if (cnt == 0)
6228 return 0;
6229 if (cnt > 1)
6230 return -EINVAL;
6231
6232 if (mddev->layout != info->layout) {
6233
6234
6235
6236
6237 if (mddev->pers->check_reshape == NULL)
6238 return -EINVAL;
6239 else {
6240 mddev->new_layout = info->layout;
6241 rv = mddev->pers->check_reshape(mddev);
6242 if (rv)
6243 mddev->new_layout = mddev->layout;
6244 return rv;
6245 }
6246 }
6247 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6248 rv = update_size(mddev, (sector_t)info->size * 2);
6249
6250 if (mddev->raid_disks != info->raid_disks)
6251 rv = update_raid_disks(mddev, info->raid_disks);
6252
6253 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6254 if (mddev->pers->quiesce == NULL)
6255 return -EINVAL;
6256 if (mddev->recovery || mddev->sync_thread)
6257 return -EBUSY;
6258 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6259
6260 if (mddev->bitmap)
6261 return -EEXIST;
6262 if (mddev->bitmap_info.default_offset == 0)
6263 return -EINVAL;
6264 mddev->bitmap_info.offset =
6265 mddev->bitmap_info.default_offset;
6266 mddev->bitmap_info.space =
6267 mddev->bitmap_info.default_space;
6268 mddev->pers->quiesce(mddev, 1);
6269 rv = bitmap_create(mddev);
6270 if (!rv)
6271 rv = bitmap_load(mddev);
6272 if (rv)
6273 bitmap_destroy(mddev);
6274 mddev->pers->quiesce(mddev, 0);
6275 } else {
6276
6277 if (!mddev->bitmap)
6278 return -ENOENT;
6279 if (mddev->bitmap->storage.file)
6280 return -EINVAL;
6281 mddev->pers->quiesce(mddev, 1);
6282 bitmap_destroy(mddev);
6283 mddev->pers->quiesce(mddev, 0);
6284 mddev->bitmap_info.offset = 0;
6285 }
6286 }
6287 md_update_sb(mddev, 1);
6288 return rv;
6289}
6290
6291static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6292{
6293 struct md_rdev *rdev;
6294 int err = 0;
6295
6296 if (mddev->pers == NULL)
6297 return -ENODEV;
6298
6299 rcu_read_lock();
6300 rdev = find_rdev_rcu(mddev, dev);
6301 if (!rdev)
6302 err = -ENODEV;
6303 else {
6304 md_error(mddev, rdev);
6305 if (!test_bit(Faulty, &rdev->flags))
6306 err = -EBUSY;
6307 }
6308 rcu_read_unlock();
6309 return err;
6310}
6311
6312
6313
6314
6315
6316
6317
6318static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6319{
6320 struct mddev *mddev = bdev->bd_disk->private_data;
6321
6322 geo->heads = 2;
6323 geo->sectors = 4;
6324 geo->cylinders = mddev->array_sectors / 8;
6325 return 0;
6326}
6327
6328static int md_ioctl(struct block_device *bdev, fmode_t mode,
6329 unsigned int cmd, unsigned long arg)
6330{
6331 int err = 0;
6332 void __user *argp = (void __user *)arg;
6333 struct mddev *mddev = NULL;
6334 int ro;
6335
6336 switch (cmd) {
6337 case RAID_VERSION:
6338 case GET_ARRAY_INFO:
6339 case GET_DISK_INFO:
6340 break;
6341 default:
6342 if (!capable(CAP_SYS_ADMIN))
6343 return -EACCES;
6344 }
6345
6346
6347
6348
6349
6350 switch (cmd) {
6351 case RAID_VERSION:
6352 err = get_version(argp);
6353 goto done;
6354
6355 case PRINT_RAID_DEBUG:
6356 err = 0;
6357 md_print_devices();
6358 goto done;
6359
6360#ifndef MODULE
6361 case RAID_AUTORUN:
6362 err = 0;
6363 autostart_arrays(arg);
6364 goto done;
6365#endif
6366 default:;
6367 }
6368
6369
6370
6371
6372
6373 mddev = bdev->bd_disk->private_data;
6374
6375 if (!mddev) {
6376 BUG();
6377 goto abort;
6378 }
6379
6380
6381 switch (cmd) {
6382 case GET_ARRAY_INFO:
6383 if (!mddev->raid_disks && !mddev->external)
6384 err = -ENODEV;
6385 else
6386 err = get_array_info(mddev, argp);
6387 goto abort;
6388
6389 case GET_DISK_INFO:
6390 if (!mddev->raid_disks && !mddev->external)
6391 err = -ENODEV;
6392 else
6393 err = get_disk_info(mddev, argp);
6394 goto abort;
6395
6396 case SET_DISK_FAULTY:
6397 err = set_disk_faulty(mddev, new_decode_dev(arg));
6398 goto abort;
6399 }
6400
6401 if (cmd == ADD_NEW_DISK)
6402
6403 flush_workqueue(md_misc_wq);
6404
6405 err = mddev_lock(mddev);
6406 if (err) {
6407 printk(KERN_INFO
6408 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6409 err, cmd);
6410 goto abort;
6411 }
6412
6413 if (cmd == SET_ARRAY_INFO) {
6414 mdu_array_info_t info;
6415 if (!arg)
6416 memset(&info, 0, sizeof(info));
6417 else if (copy_from_user(&info, argp, sizeof(info))) {
6418 err = -EFAULT;
6419 goto abort_unlock;
6420 }
6421 if (mddev->pers) {
6422 err = update_array_info(mddev, &info);
6423 if (err) {
6424 printk(KERN_WARNING "md: couldn't update"
6425 " array info. %d\n", err);
6426 goto abort_unlock;
6427 }
6428 goto done_unlock;
6429 }
6430 if (!list_empty(&mddev->disks)) {
6431 printk(KERN_WARNING
6432 "md: array %s already has disks!\n",
6433 mdname(mddev));
6434 err = -EBUSY;
6435 goto abort_unlock;
6436 }
6437 if (mddev->raid_disks) {
6438 printk(KERN_WARNING
6439 "md: array %s already initialised!\n",
6440 mdname(mddev));
6441 err = -EBUSY;
6442 goto abort_unlock;
6443 }
6444 err = set_array_info(mddev, &info);
6445 if (err) {
6446 printk(KERN_WARNING "md: couldn't set"
6447 " array info. %d\n", err);
6448 goto abort_unlock;
6449 }
6450 goto done_unlock;
6451 }
6452
6453
6454
6455
6456
6457
6458 if ((!mddev->raid_disks && !mddev->external)
6459 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6460 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6461 && cmd != GET_BITMAP_FILE) {
6462 err = -ENODEV;
6463 goto abort_unlock;
6464 }
6465
6466
6467
6468
6469 switch (cmd) {
6470 case GET_BITMAP_FILE:
6471 err = get_bitmap_file(mddev, argp);
6472 goto done_unlock;
6473
6474 case RESTART_ARRAY_RW:
6475 err = restart_array(mddev);
6476 goto done_unlock;
6477
6478 case STOP_ARRAY:
6479 err = do_md_stop(mddev, 0, bdev);
6480 goto done_unlock;
6481
6482 case STOP_ARRAY_RO:
6483 err = md_set_readonly(mddev, bdev);
6484 goto done_unlock;
6485
6486 case BLKROSET:
6487 if (get_user(ro, (int __user *)(arg))) {
6488 err = -EFAULT;
6489 goto done_unlock;
6490 }
6491 err = -EINVAL;
6492
6493
6494
6495
6496 if (ro)
6497 goto done_unlock;
6498
6499
6500 if (mddev->ro != 1)
6501 goto done_unlock;
6502
6503
6504
6505
6506 if (mddev->pers) {
6507 err = restart_array(mddev);
6508 if (err == 0) {
6509 mddev->ro = 2;
6510 set_disk_ro(mddev->gendisk, 0);
6511 }
6512 }
6513 goto done_unlock;
6514 }
6515
6516
6517
6518
6519
6520
6521
6522
6523 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6524 if (mddev->ro == 2) {
6525 mddev->ro = 0;
6526 sysfs_notify_dirent_safe(mddev->sysfs_state);
6527 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6528 md_wakeup_thread(mddev->thread);
6529 } else {
6530 err = -EROFS;
6531 goto abort_unlock;
6532 }
6533 }
6534
6535 switch (cmd) {
6536 case ADD_NEW_DISK:
6537 {
6538 mdu_disk_info_t info;
6539 if (copy_from_user(&info, argp, sizeof(info)))
6540 err = -EFAULT;
6541 else
6542 err = add_new_disk(mddev, &info);
6543 goto done_unlock;
6544 }
6545
6546 case HOT_REMOVE_DISK:
6547 err = hot_remove_disk(mddev, new_decode_dev(arg));
6548 goto done_unlock;
6549
6550 case HOT_ADD_DISK:
6551 err = hot_add_disk(mddev, new_decode_dev(arg));
6552 goto done_unlock;
6553
6554 case RUN_ARRAY:
6555 err = do_md_run(mddev);
6556 goto done_unlock;
6557
6558 case SET_BITMAP_FILE:
6559 err = set_bitmap_file(mddev, (int)arg);
6560 goto done_unlock;
6561
6562 default:
6563 err = -EINVAL;
6564 goto abort_unlock;
6565 }
6566
6567done_unlock:
6568abort_unlock:
6569 if (mddev->hold_active == UNTIL_IOCTL &&
6570 err != -EINVAL)
6571 mddev->hold_active = 0;
6572 mddev_unlock(mddev);
6573
6574 return err;
6575done:
6576 if (err)
6577 MD_BUG();
6578abort:
6579 return err;
6580}
6581#ifdef CONFIG_COMPAT
6582static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6583 unsigned int cmd, unsigned long arg)
6584{
6585 switch (cmd) {
6586 case HOT_REMOVE_DISK:
6587 case HOT_ADD_DISK:
6588 case SET_DISK_FAULTY:
6589 case SET_BITMAP_FILE:
6590
6591 break;
6592 default:
6593 arg = (unsigned long)compat_ptr(arg);
6594 break;
6595 }
6596
6597 return md_ioctl(bdev, mode, cmd, arg);
6598}
6599#endif
6600
6601static int md_open(struct block_device *bdev, fmode_t mode)
6602{
6603
6604
6605
6606
6607 struct mddev *mddev = mddev_find(bdev->bd_dev);
6608 int err;
6609
6610 if (!mddev)
6611 return -ENODEV;
6612
6613 if (mddev->gendisk != bdev->bd_disk) {
6614
6615
6616
6617 mddev_put(mddev);
6618
6619 flush_workqueue(md_misc_wq);
6620
6621 return -ERESTARTSYS;
6622 }
6623 BUG_ON(mddev != bdev->bd_disk->private_data);
6624
6625 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6626 goto out;
6627
6628 err = 0;
6629 atomic_inc(&mddev->openers);
6630 mutex_unlock(&mddev->open_mutex);
6631
6632 check_disk_change(bdev);
6633 out:
6634 return err;
6635}
6636
6637static int md_release(struct gendisk *disk, fmode_t mode)
6638{
6639 struct mddev *mddev = disk->private_data;
6640
6641 BUG_ON(!mddev);
6642 atomic_dec(&mddev->openers);
6643 mddev_put(mddev);
6644
6645 return 0;
6646}
6647
6648static int md_media_changed(struct gendisk *disk)
6649{
6650 struct mddev *mddev = disk->private_data;
6651
6652 return mddev->changed;
6653}
6654
6655static int md_revalidate(struct gendisk *disk)
6656{
6657 struct mddev *mddev = disk->private_data;
6658
6659 mddev->changed = 0;
6660 return 0;
6661}
6662static const struct block_device_operations md_fops =
6663{
6664 .owner = THIS_MODULE,
6665 .open = md_open,
6666 .release = md_release,
6667 .ioctl = md_ioctl,
6668#ifdef CONFIG_COMPAT
6669 .compat_ioctl = md_compat_ioctl,
6670#endif
6671 .getgeo = md_getgeo,
6672 .media_changed = md_media_changed,
6673 .revalidate_disk= md_revalidate,
6674};
6675
6676static int md_thread(void * arg)
6677{
6678 struct md_thread *thread = arg;
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692 allow_signal(SIGKILL);
6693 while (!kthread_should_stop()) {
6694
6695
6696
6697
6698
6699
6700 if (signal_pending(current))
6701 flush_signals(current);
6702
6703 wait_event_interruptible_timeout
6704 (thread->wqueue,
6705 test_bit(THREAD_WAKEUP, &thread->flags)
6706 || kthread_should_stop(),
6707 thread->timeout);
6708
6709 clear_bit(THREAD_WAKEUP, &thread->flags);
6710 if (!kthread_should_stop())
6711 thread->run(thread);
6712 }
6713
6714 return 0;
6715}
6716
6717void md_wakeup_thread(struct md_thread *thread)
6718{
6719 if (thread) {
6720 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6721 set_bit(THREAD_WAKEUP, &thread->flags);
6722 wake_up(&thread->wqueue);
6723 }
6724}
6725
6726struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6727 struct mddev *mddev, const char *name)
6728{
6729 struct md_thread *thread;
6730
6731 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6732 if (!thread)
6733 return NULL;
6734
6735 init_waitqueue_head(&thread->wqueue);
6736
6737 thread->run = run;
6738 thread->mddev = mddev;
6739 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6740 thread->tsk = kthread_run(md_thread, thread,
6741 "%s_%s",
6742 mdname(thread->mddev),
6743 name);
6744 if (IS_ERR(thread->tsk)) {
6745 kfree(thread);
6746 return NULL;
6747 }
6748 return thread;
6749}
6750
6751void md_unregister_thread(struct md_thread **threadp)
6752{
6753 struct md_thread *thread = *threadp;
6754 if (!thread)
6755 return;
6756 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6757
6758
6759
6760 spin_lock(&pers_lock);
6761 *threadp = NULL;
6762 spin_unlock(&pers_lock);
6763
6764 kthread_stop(thread->tsk);
6765 kfree(thread);
6766}
6767
6768void md_error(struct mddev *mddev, struct md_rdev *rdev)
6769{
6770 if (!mddev) {
6771 MD_BUG();
6772 return;
6773 }
6774
6775 if (!rdev || test_bit(Faulty, &rdev->flags))
6776 return;
6777
6778 if (!mddev->pers || !mddev->pers->error_handler)
6779 return;
6780 mddev->pers->error_handler(mddev,rdev);
6781 if (mddev->degraded)
6782 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6783 sysfs_notify_dirent_safe(rdev->sysfs_state);
6784 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6785 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6786 md_wakeup_thread(mddev->thread);
6787 if (mddev->event_work.func)
6788 queue_work(md_misc_wq, &mddev->event_work);
6789 md_new_event_inintr(mddev);
6790}
6791
6792
6793
6794static void status_unused(struct seq_file *seq)
6795{
6796 int i = 0;
6797 struct md_rdev *rdev;
6798
6799 seq_printf(seq, "unused devices: ");
6800
6801 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6802 char b[BDEVNAME_SIZE];
6803 i++;
6804 seq_printf(seq, "%s ",
6805 bdevname(rdev->bdev,b));
6806 }
6807 if (!i)
6808 seq_printf(seq, "<none>");
6809
6810 seq_printf(seq, "\n");
6811}
6812
6813
6814static void status_resync(struct seq_file *seq, struct mddev * mddev)
6815{
6816 sector_t max_sectors, resync, res;
6817 unsigned long dt, db;
6818 sector_t rt;
6819 int scale;
6820 unsigned int per_milli;
6821
6822 if (mddev->curr_resync <= 3)
6823 resync = 0;
6824 else
6825 resync = mddev->curr_resync
6826 - atomic_read(&mddev->recovery_active);
6827
6828 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6829 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6830 max_sectors = mddev->resync_max_sectors;
6831 else
6832 max_sectors = mddev->dev_sectors;
6833
6834
6835
6836
6837 if (!max_sectors) {
6838 MD_BUG();
6839 return;
6840 }
6841
6842
6843
6844
6845
6846 scale = 10;
6847 if (sizeof(sector_t) > sizeof(unsigned long)) {
6848 while ( max_sectors/2 > (1ULL<<(scale+32)))
6849 scale++;
6850 }
6851 res = (resync>>scale)*1000;
6852 sector_div(res, (u32)((max_sectors>>scale)+1));
6853
6854 per_milli = res;
6855 {
6856 int i, x = per_milli/50, y = 20-x;
6857 seq_printf(seq, "[");
6858 for (i = 0; i < x; i++)
6859 seq_printf(seq, "=");
6860 seq_printf(seq, ">");
6861 for (i = 0; i < y; i++)
6862 seq_printf(seq, ".");
6863 seq_printf(seq, "] ");
6864 }
6865 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6866 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6867 "reshape" :
6868 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6869 "check" :
6870 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6871 "resync" : "recovery"))),
6872 per_milli/10, per_milli % 10,
6873 (unsigned long long) resync/2,
6874 (unsigned long long) max_sectors/2);
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890 dt = ((jiffies - mddev->resync_mark) / HZ);
6891 if (!dt) dt++;
6892 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6893 - mddev->resync_mark_cnt;
6894
6895 rt = max_sectors - resync;
6896 sector_div(rt, db/32+1);
6897 rt *= dt;
6898 rt >>= 5;
6899
6900 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6901 ((unsigned long)rt % 60)/6);
6902
6903 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6904}
6905
6906static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6907{
6908 struct list_head *tmp;
6909 loff_t l = *pos;
6910 struct mddev *mddev;
6911
6912 if (l >= 0x10000)
6913 return NULL;
6914 if (!l--)
6915
6916 return (void*)1;
6917
6918 spin_lock(&all_mddevs_lock);
6919 list_for_each(tmp,&all_mddevs)
6920 if (!l--) {
6921 mddev = list_entry(tmp, struct mddev, all_mddevs);
6922 mddev_get(mddev);
6923 spin_unlock(&all_mddevs_lock);
6924 return mddev;
6925 }
6926 spin_unlock(&all_mddevs_lock);
6927 if (!l--)
6928 return (void*)2;
6929 return NULL;
6930}
6931
6932static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6933{
6934 struct list_head *tmp;
6935 struct mddev *next_mddev, *mddev = v;
6936
6937 ++*pos;
6938 if (v == (void*)2)
6939 return NULL;
6940
6941 spin_lock(&all_mddevs_lock);
6942 if (v == (void*)1)
6943 tmp = all_mddevs.next;
6944 else
6945 tmp = mddev->all_mddevs.next;
6946 if (tmp != &all_mddevs)
6947 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
6948 else {
6949 next_mddev = (void*)2;
6950 *pos = 0x10000;
6951 }
6952 spin_unlock(&all_mddevs_lock);
6953
6954 if (v != (void*)1)
6955 mddev_put(mddev);
6956 return next_mddev;
6957
6958}
6959
6960static void md_seq_stop(struct seq_file *seq, void *v)
6961{
6962 struct mddev *mddev = v;
6963
6964 if (mddev && v != (void*)1 && v != (void*)2)
6965 mddev_put(mddev);
6966}
6967
6968static int md_seq_show(struct seq_file *seq, void *v)
6969{
6970 struct mddev *mddev = v;
6971 sector_t sectors;
6972 struct md_rdev *rdev;
6973
6974 if (v == (void*)1) {
6975 struct md_personality *pers;
6976 seq_printf(seq, "Personalities : ");
6977 spin_lock(&pers_lock);
6978 list_for_each_entry(pers, &pers_list, list)
6979 seq_printf(seq, "[%s] ", pers->name);
6980
6981 spin_unlock(&pers_lock);
6982 seq_printf(seq, "\n");
6983 seq->poll_event = atomic_read(&md_event_count);
6984 return 0;
6985 }
6986 if (v == (void*)2) {
6987 status_unused(seq);
6988 return 0;
6989 }
6990
6991 if (mddev_lock(mddev) < 0)
6992 return -EINTR;
6993
6994 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
6995 seq_printf(seq, "%s : %sactive", mdname(mddev),
6996 mddev->pers ? "" : "in");
6997 if (mddev->pers) {
6998 if (mddev->ro==1)
6999 seq_printf(seq, " (read-only)");
7000 if (mddev->ro==2)
7001 seq_printf(seq, " (auto-read-only)");
7002 seq_printf(seq, " %s", mddev->pers->name);
7003 }
7004
7005 sectors = 0;
7006 rdev_for_each(rdev, mddev) {
7007 char b[BDEVNAME_SIZE];
7008 seq_printf(seq, " %s[%d]",
7009 bdevname(rdev->bdev,b), rdev->desc_nr);
7010 if (test_bit(WriteMostly, &rdev->flags))
7011 seq_printf(seq, "(W)");
7012 if (test_bit(Faulty, &rdev->flags)) {
7013 seq_printf(seq, "(F)");
7014 continue;
7015 }
7016 if (rdev->raid_disk < 0)
7017 seq_printf(seq, "(S)");
7018 if (test_bit(Replacement, &rdev->flags))
7019 seq_printf(seq, "(R)");
7020 sectors += rdev->sectors;
7021 }
7022
7023 if (!list_empty(&mddev->disks)) {
7024 if (mddev->pers)
7025 seq_printf(seq, "\n %llu blocks",
7026 (unsigned long long)
7027 mddev->array_sectors / 2);
7028 else
7029 seq_printf(seq, "\n %llu blocks",
7030 (unsigned long long)sectors / 2);
7031 }
7032 if (mddev->persistent) {
7033 if (mddev->major_version != 0 ||
7034 mddev->minor_version != 90) {
7035 seq_printf(seq," super %d.%d",
7036 mddev->major_version,
7037 mddev->minor_version);
7038 }
7039 } else if (mddev->external)
7040 seq_printf(seq, " super external:%s",
7041 mddev->metadata_type);
7042 else
7043 seq_printf(seq, " super non-persistent");
7044
7045 if (mddev->pers) {
7046 mddev->pers->status(seq, mddev);
7047 seq_printf(seq, "\n ");
7048 if (mddev->pers->sync_request) {
7049 if (mddev->curr_resync > 2) {
7050 status_resync(seq, mddev);
7051 seq_printf(seq, "\n ");
7052 } else if (mddev->curr_resync >= 1)
7053 seq_printf(seq, "\tresync=DELAYED\n ");
7054 else if (mddev->recovery_cp < MaxSector)
7055 seq_printf(seq, "\tresync=PENDING\n ");
7056 }
7057 } else
7058 seq_printf(seq, "\n ");
7059
7060 bitmap_status(seq, mddev->bitmap);
7061
7062 seq_printf(seq, "\n");
7063 }
7064 mddev_unlock(mddev);
7065
7066 return 0;
7067}
7068
7069static const struct seq_operations md_seq_ops = {
7070 .start = md_seq_start,
7071 .next = md_seq_next,
7072 .stop = md_seq_stop,
7073 .show = md_seq_show,
7074};
7075
7076static int md_seq_open(struct inode *inode, struct file *file)
7077{
7078 struct seq_file *seq;
7079 int error;
7080
7081 error = seq_open(file, &md_seq_ops);
7082 if (error)
7083 return error;
7084
7085 seq = file->private_data;
7086 seq->poll_event = atomic_read(&md_event_count);
7087 return error;
7088}
7089
7090static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7091{
7092 struct seq_file *seq = filp->private_data;
7093 int mask;
7094
7095 poll_wait(filp, &md_event_waiters, wait);
7096
7097
7098 mask = POLLIN | POLLRDNORM;
7099
7100 if (seq->poll_event != atomic_read(&md_event_count))
7101 mask |= POLLERR | POLLPRI;
7102 return mask;
7103}
7104
7105static const struct file_operations md_seq_fops = {
7106 .owner = THIS_MODULE,
7107 .open = md_seq_open,
7108 .read = seq_read,
7109 .llseek = seq_lseek,
7110 .release = seq_release_private,
7111 .poll = mdstat_poll,
7112};
7113
7114int register_md_personality(struct md_personality *p)
7115{
7116 spin_lock(&pers_lock);
7117 list_add_tail(&p->list, &pers_list);
7118 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7119 spin_unlock(&pers_lock);
7120 return 0;
7121}
7122
7123int unregister_md_personality(struct md_personality *p)
7124{
7125 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7126 spin_lock(&pers_lock);
7127 list_del_init(&p->list);
7128 spin_unlock(&pers_lock);
7129 return 0;
7130}
7131
7132static int is_mddev_idle(struct mddev *mddev, int init)
7133{
7134 struct md_rdev * rdev;
7135 int idle;
7136 int curr_events;
7137
7138 idle = 1;
7139 rcu_read_lock();
7140 rdev_for_each_rcu(rdev, mddev) {
7141 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7142 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7143 (int)part_stat_read(&disk->part0, sectors[1]) -
7144 atomic_read(&disk->sync_io);
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167 if (init || curr_events - rdev->last_events > 64) {
7168 rdev->last_events = curr_events;
7169 idle = 0;
7170 }
7171 }
7172 rcu_read_unlock();
7173 return idle;
7174}
7175
7176void md_done_sync(struct mddev *mddev, int blocks, int ok)
7177{
7178
7179 atomic_sub(blocks, &mddev->recovery_active);
7180 wake_up(&mddev->recovery_wait);
7181 if (!ok) {
7182 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7183 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7184 md_wakeup_thread(mddev->thread);
7185
7186 }
7187}
7188
7189
7190
7191
7192
7193
7194
7195void md_write_start(struct mddev *mddev, struct bio *bi)
7196{
7197 int did_change = 0;
7198 if (bio_data_dir(bi) != WRITE)
7199 return;
7200
7201 BUG_ON(mddev->ro == 1);
7202 if (mddev->ro == 2) {
7203
7204 mddev->ro = 0;
7205 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7206 md_wakeup_thread(mddev->thread);
7207 md_wakeup_thread(mddev->sync_thread);
7208 did_change = 1;
7209 }
7210 atomic_inc(&mddev->writes_pending);
7211 if (mddev->safemode == 1)
7212 mddev->safemode = 0;
7213 if (mddev->in_sync) {
7214 spin_lock_irq(&mddev->write_lock);
7215 if (mddev->in_sync) {
7216 mddev->in_sync = 0;
7217 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7218 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7219 md_wakeup_thread(mddev->thread);
7220 did_change = 1;
7221 }
7222 spin_unlock_irq(&mddev->write_lock);
7223 }
7224 if (did_change)
7225 sysfs_notify_dirent_safe(mddev->sysfs_state);
7226 wait_event(mddev->sb_wait,
7227 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7228}
7229
7230void md_write_end(struct mddev *mddev)
7231{
7232 if (atomic_dec_and_test(&mddev->writes_pending)) {
7233 if (mddev->safemode == 2)
7234 md_wakeup_thread(mddev->thread);
7235 else if (mddev->safemode_delay)
7236 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7237 }
7238}
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249int md_allow_write(struct mddev *mddev)
7250{
7251 if (!mddev->pers)
7252 return 0;
7253 if (mddev->ro)
7254 return 0;
7255 if (!mddev->pers->sync_request)
7256 return 0;
7257
7258 spin_lock_irq(&mddev->write_lock);
7259 if (mddev->in_sync) {
7260 mddev->in_sync = 0;
7261 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7262 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7263 if (mddev->safemode_delay &&
7264 mddev->safemode == 0)
7265 mddev->safemode = 1;
7266 spin_unlock_irq(&mddev->write_lock);
7267 md_update_sb(mddev, 0);
7268 sysfs_notify_dirent_safe(mddev->sysfs_state);
7269 } else
7270 spin_unlock_irq(&mddev->write_lock);
7271
7272 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7273 return -EAGAIN;
7274 else
7275 return 0;
7276}
7277EXPORT_SYMBOL_GPL(md_allow_write);
7278
7279#define SYNC_MARKS 10
7280#define SYNC_MARK_STEP (3*HZ)
7281#define UPDATE_FREQUENCY (5*60*HZ)
7282void md_do_sync(struct md_thread *thread)
7283{
7284 struct mddev *mddev = thread->mddev;
7285 struct mddev *mddev2;
7286 unsigned int currspeed = 0,
7287 window;
7288 sector_t max_sectors,j, io_sectors;
7289 unsigned long mark[SYNC_MARKS];
7290 unsigned long update_time;
7291 sector_t mark_cnt[SYNC_MARKS];
7292 int last_mark,m;
7293 struct list_head *tmp;
7294 sector_t last_check;
7295 int skipped = 0;
7296 struct md_rdev *rdev;
7297 char *desc;
7298 struct blk_plug plug;
7299
7300
7301 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7302 return;
7303 if (mddev->ro)
7304 return;
7305
7306 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7307 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
7308 desc = "data-check";
7309 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7310 desc = "requested-resync";
7311 else
7312 desc = "resync";
7313 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7314 desc = "reshape";
7315 else
7316 desc = "recovery";
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334 do {
7335 mddev->curr_resync = 2;
7336
7337 try_again:
7338 if (kthread_should_stop())
7339 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7340
7341 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7342 goto skip;
7343 for_each_mddev(mddev2, tmp) {
7344 if (mddev2 == mddev)
7345 continue;
7346 if (!mddev->parallel_resync
7347 && mddev2->curr_resync
7348 && match_mddev_units(mddev, mddev2)) {
7349 DEFINE_WAIT(wq);
7350 if (mddev < mddev2 && mddev->curr_resync == 2) {
7351
7352 mddev->curr_resync = 1;
7353 wake_up(&resync_wait);
7354 }
7355 if (mddev > mddev2 && mddev->curr_resync == 1)
7356
7357
7358
7359 continue;
7360
7361
7362
7363
7364 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7365 if (!kthread_should_stop() &&
7366 mddev2->curr_resync >= mddev->curr_resync) {
7367 printk(KERN_INFO "md: delaying %s of %s"
7368 " until %s has finished (they"
7369 " share one or more physical units)\n",
7370 desc, mdname(mddev), mdname(mddev2));
7371 mddev_put(mddev2);
7372 if (signal_pending(current))
7373 flush_signals(current);
7374 schedule();
7375 finish_wait(&resync_wait, &wq);
7376 goto try_again;
7377 }
7378 finish_wait(&resync_wait, &wq);
7379 }
7380 }
7381 } while (mddev->curr_resync < 2);
7382
7383 j = 0;
7384 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7385
7386
7387
7388 max_sectors = mddev->resync_max_sectors;
7389 atomic64_set(&mddev->resync_mismatches, 0);
7390
7391 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7392 j = mddev->resync_min;
7393 else if (!mddev->bitmap)
7394 j = mddev->recovery_cp;
7395
7396 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7397 max_sectors = mddev->resync_max_sectors;
7398 else {
7399
7400 max_sectors = mddev->dev_sectors;
7401 j = MaxSector;
7402 rcu_read_lock();
7403 rdev_for_each_rcu(rdev, mddev)
7404 if (rdev->raid_disk >= 0 &&
7405 !test_bit(Faulty, &rdev->flags) &&
7406 !test_bit(In_sync, &rdev->flags) &&
7407 rdev->recovery_offset < j)
7408 j = rdev->recovery_offset;
7409 rcu_read_unlock();
7410 }
7411
7412 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7413 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7414 " %d KB/sec/disk.\n", speed_min(mddev));
7415 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7416 "(but not more than %d KB/sec) for %s.\n",
7417 speed_max(mddev), desc);
7418
7419 is_mddev_idle(mddev, 1);
7420
7421 io_sectors = 0;
7422 for (m = 0; m < SYNC_MARKS; m++) {
7423 mark[m] = jiffies;
7424 mark_cnt[m] = io_sectors;
7425 }
7426 last_mark = 0;
7427 mddev->resync_mark = mark[last_mark];
7428 mddev->resync_mark_cnt = mark_cnt[last_mark];
7429
7430
7431
7432
7433 window = 32*(PAGE_SIZE/512);
7434 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7435 window/2, (unsigned long long)max_sectors/2);
7436
7437 atomic_set(&mddev->recovery_active, 0);
7438 last_check = 0;
7439
7440 if (j>2) {
7441 printk(KERN_INFO
7442 "md: resuming %s of %s from checkpoint.\n",
7443 desc, mdname(mddev));
7444 mddev->curr_resync = j;
7445 } else
7446 mddev->curr_resync = 3;
7447 mddev->curr_resync_completed = j;
7448 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7449 md_new_event(mddev);
7450 update_time = jiffies;
7451
7452 blk_start_plug(&plug);
7453 while (j < max_sectors) {
7454 sector_t sectors;
7455
7456 skipped = 0;
7457
7458 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7459 ((mddev->curr_resync > mddev->curr_resync_completed &&
7460 (mddev->curr_resync - mddev->curr_resync_completed)
7461 > (max_sectors >> 4)) ||
7462 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7463 (j - mddev->curr_resync_completed)*2
7464 >= mddev->resync_max - mddev->curr_resync_completed
7465 )) {
7466
7467 wait_event(mddev->recovery_wait,
7468 atomic_read(&mddev->recovery_active) == 0);
7469 mddev->curr_resync_completed = j;
7470 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7471 j > mddev->recovery_cp)
7472 mddev->recovery_cp = j;
7473 update_time = jiffies;
7474 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7475 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7476 }
7477
7478 while (j >= mddev->resync_max && !kthread_should_stop()) {
7479
7480
7481
7482
7483 flush_signals(current);
7484 wait_event_interruptible(mddev->recovery_wait,
7485 mddev->resync_max > j
7486 || kthread_should_stop());
7487 }
7488
7489 if (kthread_should_stop())
7490 goto interrupted;
7491
7492 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7493 currspeed < speed_min(mddev));
7494 if (sectors == 0) {
7495 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7496 goto out;
7497 }
7498
7499 if (!skipped) {
7500 io_sectors += sectors;
7501 atomic_add(sectors, &mddev->recovery_active);
7502 }
7503
7504 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7505 break;
7506
7507 j += sectors;
7508 if (j > 2)
7509 mddev->curr_resync = j;
7510 mddev->curr_mark_cnt = io_sectors;
7511 if (last_check == 0)
7512
7513
7514
7515 md_new_event(mddev);
7516
7517 if (last_check + window > io_sectors || j == max_sectors)
7518 continue;
7519
7520 last_check = io_sectors;
7521 repeat:
7522 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7523
7524 int next = (last_mark+1) % SYNC_MARKS;
7525
7526 mddev->resync_mark = mark[next];
7527 mddev->resync_mark_cnt = mark_cnt[next];
7528 mark[next] = jiffies;
7529 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7530 last_mark = next;
7531 }
7532
7533
7534 if (kthread_should_stop())
7535 goto interrupted;
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546 cond_resched();
7547
7548 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7549 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7550
7551 if (currspeed > speed_min(mddev)) {
7552 if ((currspeed > speed_max(mddev)) ||
7553 !is_mddev_idle(mddev, 0)) {
7554 msleep(500);
7555 goto repeat;
7556 }
7557 }
7558 }
7559 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7560
7561
7562
7563 out:
7564 blk_finish_plug(&plug);
7565 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7566
7567
7568 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7569
7570 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7571 mddev->curr_resync > 2) {
7572 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7573 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7574 if (mddev->curr_resync >= mddev->recovery_cp) {
7575 printk(KERN_INFO
7576 "md: checkpointing %s of %s.\n",
7577 desc, mdname(mddev));
7578 if (test_bit(MD_RECOVERY_ERROR,
7579 &mddev->recovery))
7580 mddev->recovery_cp =
7581 mddev->curr_resync_completed;
7582 else
7583 mddev->recovery_cp =
7584 mddev->curr_resync;
7585 }
7586 } else
7587 mddev->recovery_cp = MaxSector;
7588 } else {
7589 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7590 mddev->curr_resync = MaxSector;
7591 rcu_read_lock();
7592 rdev_for_each_rcu(rdev, mddev)
7593 if (rdev->raid_disk >= 0 &&
7594 mddev->delta_disks >= 0 &&
7595 !test_bit(Faulty, &rdev->flags) &&
7596 !test_bit(In_sync, &rdev->flags) &&
7597 rdev->recovery_offset < mddev->curr_resync)
7598 rdev->recovery_offset = mddev->curr_resync;
7599 rcu_read_unlock();
7600 }
7601 }
7602 skip:
7603 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7604
7605 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7606
7607 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7608 mddev->resync_min = 0;
7609 mddev->resync_max = MaxSector;
7610 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7611 mddev->resync_min = mddev->curr_resync_completed;
7612 mddev->curr_resync = 0;
7613 wake_up(&resync_wait);
7614 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7615 md_wakeup_thread(mddev->thread);
7616 return;
7617
7618 interrupted:
7619
7620
7621
7622 printk(KERN_INFO
7623 "md: md_do_sync() got signal ... exiting\n");
7624 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7625 goto out;
7626
7627}
7628EXPORT_SYMBOL_GPL(md_do_sync);
7629
7630static int remove_and_add_spares(struct mddev *mddev)
7631{
7632 struct md_rdev *rdev;
7633 int spares = 0;
7634 int removed = 0;
7635
7636 rdev_for_each(rdev, mddev)
7637 if (rdev->raid_disk >= 0 &&
7638 !test_bit(Blocked, &rdev->flags) &&
7639 (test_bit(Faulty, &rdev->flags) ||
7640 ! test_bit(In_sync, &rdev->flags)) &&
7641 atomic_read(&rdev->nr_pending)==0) {
7642 if (mddev->pers->hot_remove_disk(
7643 mddev, rdev) == 0) {
7644 sysfs_unlink_rdev(mddev, rdev);
7645 rdev->raid_disk = -1;
7646 removed++;
7647 }
7648 }
7649 if (removed)
7650 sysfs_notify(&mddev->kobj, NULL,
7651 "degraded");
7652
7653
7654 rdev_for_each(rdev, mddev) {
7655 if (rdev->raid_disk >= 0 &&
7656 !test_bit(In_sync, &rdev->flags) &&
7657 !test_bit(Faulty, &rdev->flags))
7658 spares++;
7659 if (rdev->raid_disk < 0
7660 && !test_bit(Faulty, &rdev->flags)) {
7661 rdev->recovery_offset = 0;
7662 if (mddev->pers->
7663 hot_add_disk(mddev, rdev) == 0) {
7664 if (sysfs_link_rdev(mddev, rdev))
7665 ;
7666 spares++;
7667 md_new_event(mddev);
7668 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7669 }
7670 }
7671 }
7672 if (removed)
7673 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7674 return spares;
7675}
7676
7677static void reap_sync_thread(struct mddev *mddev)
7678{
7679 struct md_rdev *rdev;
7680
7681
7682 md_unregister_thread(&mddev->sync_thread);
7683 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7684 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7685
7686
7687 if (mddev->pers->spare_active(mddev)) {
7688 sysfs_notify(&mddev->kobj, NULL,
7689 "degraded");
7690 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7691 }
7692 }
7693 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7694 mddev->pers->finish_reshape)
7695 mddev->pers->finish_reshape(mddev);
7696
7697
7698
7699
7700
7701
7702
7703 rdev_for_each(rdev, mddev)
7704 if (!mddev->degraded ||
7705 test_bit(In_sync, &rdev->flags))
7706 rdev->saved_raid_disk = -1;
7707
7708 md_update_sb(mddev, 1);
7709 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7710 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7711 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7712 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7713 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7714
7715 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7716 sysfs_notify_dirent_safe(mddev->sysfs_action);
7717 md_new_event(mddev);
7718 if (mddev->event_work.func)
7719 queue_work(md_misc_wq, &mddev->event_work);
7720}
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744void md_check_recovery(struct mddev *mddev)
7745{
7746 if (mddev->suspended)
7747 return;
7748
7749 if (mddev->bitmap)
7750 bitmap_daemon_work(mddev);
7751
7752 if (signal_pending(current)) {
7753 if (mddev->pers->sync_request && !mddev->external) {
7754 printk(KERN_INFO "md: %s in immediate safe mode\n",
7755 mdname(mddev));
7756 mddev->safemode = 2;
7757 }
7758 flush_signals(current);
7759 }
7760
7761 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7762 return;
7763 if ( ! (
7764 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7765 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7766 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7767 (mddev->external == 0 && mddev->safemode == 1) ||
7768 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7769 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7770 ))
7771 return;
7772
7773 if (mddev_trylock(mddev)) {
7774 int spares = 0;
7775
7776 if (mddev->ro) {
7777
7778
7779
7780 struct md_rdev *rdev;
7781 rdev_for_each(rdev, mddev)
7782 if (rdev->raid_disk >= 0 &&
7783 !test_bit(Blocked, &rdev->flags) &&
7784 test_bit(Faulty, &rdev->flags) &&
7785 atomic_read(&rdev->nr_pending)==0) {
7786 if (mddev->pers->hot_remove_disk(
7787 mddev, rdev) == 0) {
7788 sysfs_unlink_rdev(mddev, rdev);
7789 rdev->raid_disk = -1;
7790 }
7791 }
7792 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7793 goto unlock;
7794 }
7795
7796 if (!mddev->external) {
7797 int did_change = 0;
7798 spin_lock_irq(&mddev->write_lock);
7799 if (mddev->safemode &&
7800 !atomic_read(&mddev->writes_pending) &&
7801 !mddev->in_sync &&
7802 mddev->recovery_cp == MaxSector) {
7803 mddev->in_sync = 1;
7804 did_change = 1;
7805 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7806 }
7807 if (mddev->safemode == 1)
7808 mddev->safemode = 0;
7809 spin_unlock_irq(&mddev->write_lock);
7810 if (did_change)
7811 sysfs_notify_dirent_safe(mddev->sysfs_state);
7812 }
7813
7814 if (mddev->flags)
7815 md_update_sb(mddev, 0);
7816
7817 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7818 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7819
7820 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7821 goto unlock;
7822 }
7823 if (mddev->sync_thread) {
7824 reap_sync_thread(mddev);
7825 goto unlock;
7826 }
7827
7828
7829
7830 mddev->curr_resync_completed = 0;
7831 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7832
7833
7834
7835 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7836 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7837
7838 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7839 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7840 goto unlock;
7841
7842
7843
7844
7845
7846
7847
7848 if (mddev->reshape_position != MaxSector) {
7849 if (mddev->pers->check_reshape == NULL ||
7850 mddev->pers->check_reshape(mddev) != 0)
7851
7852 goto unlock;
7853 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7854 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7855 } else if ((spares = remove_and_add_spares(mddev))) {
7856 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7857 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7858 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7859 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7860 } else if (mddev->recovery_cp < MaxSector) {
7861 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7862 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7863 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7864
7865 goto unlock;
7866
7867 if (mddev->pers->sync_request) {
7868 if (spares) {
7869
7870
7871
7872
7873 bitmap_write_all(mddev->bitmap);
7874 }
7875 mddev->sync_thread = md_register_thread(md_do_sync,
7876 mddev,
7877 "resync");
7878 if (!mddev->sync_thread) {
7879 printk(KERN_ERR "%s: could not start resync"
7880 " thread...\n",
7881 mdname(mddev));
7882
7883 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7884 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7885 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7886 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7887 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7888 } else
7889 md_wakeup_thread(mddev->sync_thread);
7890 sysfs_notify_dirent_safe(mddev->sysfs_action);
7891 md_new_event(mddev);
7892 }
7893 unlock:
7894 if (!mddev->sync_thread) {
7895 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7896 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7897 &mddev->recovery))
7898 if (mddev->sysfs_action)
7899 sysfs_notify_dirent_safe(mddev->sysfs_action);
7900 }
7901 mddev_unlock(mddev);
7902 }
7903}
7904
7905void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7906{
7907 sysfs_notify_dirent_safe(rdev->sysfs_state);
7908 wait_event_timeout(rdev->blocked_wait,
7909 !test_bit(Blocked, &rdev->flags) &&
7910 !test_bit(BlockedBadBlocks, &rdev->flags),
7911 msecs_to_jiffies(5000));
7912 rdev_dec_pending(rdev, mddev);
7913}
7914EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7915
7916void md_finish_reshape(struct mddev *mddev)
7917{
7918
7919 struct md_rdev *rdev;
7920
7921 rdev_for_each(rdev, mddev) {
7922 if (rdev->data_offset > rdev->new_data_offset)
7923 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
7924 else
7925 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
7926 rdev->data_offset = rdev->new_data_offset;
7927 }
7928}
7929EXPORT_SYMBOL(md_finish_reshape);
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
7958 sector_t *first_bad, int *bad_sectors)
7959{
7960 int hi;
7961 int lo;
7962 u64 *p = bb->page;
7963 int rv;
7964 sector_t target = s + sectors;
7965 unsigned seq;
7966
7967 if (bb->shift > 0) {
7968
7969 s >>= bb->shift;
7970 target += (1<<bb->shift) - 1;
7971 target >>= bb->shift;
7972 sectors = target - s;
7973 }
7974
7975
7976retry:
7977 seq = read_seqbegin(&bb->lock);
7978 lo = 0;
7979 rv = 0;
7980 hi = bb->count;
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990 while (hi - lo > 1) {
7991 int mid = (lo + hi) / 2;
7992 sector_t a = BB_OFFSET(p[mid]);
7993 if (a < target)
7994
7995
7996 lo = mid;
7997 else
7998
7999 hi = mid;
8000 }
8001
8002 if (hi > lo) {
8003
8004
8005
8006 while (lo >= 0 &&
8007 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8008 if (BB_OFFSET(p[lo]) < target) {
8009
8010
8011
8012 if (rv != -1 && BB_ACK(p[lo]))
8013 rv = 1;
8014 else
8015 rv = -1;
8016 *first_bad = BB_OFFSET(p[lo]);
8017 *bad_sectors = BB_LEN(p[lo]);
8018 }
8019 lo--;
8020 }
8021 }
8022
8023 if (read_seqretry(&bb->lock, seq))
8024 goto retry;
8025
8026 return rv;
8027}
8028EXPORT_SYMBOL_GPL(md_is_badblock);
8029
8030
8031
8032
8033
8034
8035
8036
8037static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8038 int acknowledged)
8039{
8040 u64 *p;
8041 int lo, hi;
8042 int rv = 1;
8043
8044 if (bb->shift < 0)
8045
8046 return 0;
8047
8048 if (bb->shift) {
8049
8050 sector_t next = s + sectors;
8051 s >>= bb->shift;
8052 next += (1<<bb->shift) - 1;
8053 next >>= bb->shift;
8054 sectors = next - s;
8055 }
8056
8057 write_seqlock_irq(&bb->lock);
8058
8059 p = bb->page;
8060 lo = 0;
8061 hi = bb->count;
8062
8063 while (hi - lo > 1) {
8064 int mid = (lo + hi) / 2;
8065 sector_t a = BB_OFFSET(p[mid]);
8066 if (a <= s)
8067 lo = mid;
8068 else
8069 hi = mid;
8070 }
8071 if (hi > lo && BB_OFFSET(p[lo]) > s)
8072 hi = lo;
8073
8074 if (hi > lo) {
8075
8076
8077
8078 sector_t a = BB_OFFSET(p[lo]);
8079 sector_t e = a + BB_LEN(p[lo]);
8080 int ack = BB_ACK(p[lo]);
8081 if (e >= s) {
8082
8083 if (s == a && s + sectors >= e)
8084
8085 ack = acknowledged;
8086 else
8087 ack = ack && acknowledged;
8088
8089 if (e < s + sectors)
8090 e = s + sectors;
8091 if (e - a <= BB_MAX_LEN) {
8092 p[lo] = BB_MAKE(a, e-a, ack);
8093 s = e;
8094 } else {
8095
8096
8097
8098 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8099 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8100 s = a + BB_MAX_LEN;
8101 }
8102 sectors = e - s;
8103 }
8104 }
8105 if (sectors && hi < bb->count) {
8106
8107
8108 sector_t a = BB_OFFSET(p[hi]);
8109 sector_t e = a + BB_LEN(p[hi]);
8110 int ack = BB_ACK(p[hi]);
8111 if (a <= s + sectors) {
8112
8113 if (e <= s + sectors) {
8114
8115 e = s + sectors;
8116 ack = acknowledged;
8117 } else
8118 ack = ack && acknowledged;
8119
8120 a = s;
8121 if (e - a <= BB_MAX_LEN) {
8122 p[hi] = BB_MAKE(a, e-a, ack);
8123 s = e;
8124 } else {
8125 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8126 s = a + BB_MAX_LEN;
8127 }
8128 sectors = e - s;
8129 lo = hi;
8130 hi++;
8131 }
8132 }
8133 if (sectors == 0 && hi < bb->count) {
8134
8135
8136 sector_t a = BB_OFFSET(p[hi]);
8137 int lolen = BB_LEN(p[lo]);
8138 int hilen = BB_LEN(p[hi]);
8139 int newlen = lolen + hilen - (s - a);
8140 if (s >= a && newlen < BB_MAX_LEN) {
8141
8142 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8143 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8144 memmove(p + hi, p + hi + 1,
8145 (bb->count - hi - 1) * 8);
8146 bb->count--;
8147 }
8148 }
8149 while (sectors) {
8150
8151
8152 if (bb->count >= MD_MAX_BADBLOCKS) {
8153
8154 rv = 0;
8155 break;
8156 } else {
8157 int this_sectors = sectors;
8158 memmove(p + hi + 1, p + hi,
8159 (bb->count - hi) * 8);
8160 bb->count++;
8161
8162 if (this_sectors > BB_MAX_LEN)
8163 this_sectors = BB_MAX_LEN;
8164 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8165 sectors -= this_sectors;
8166 s += this_sectors;
8167 }
8168 }
8169
8170 bb->changed = 1;
8171 if (!acknowledged)
8172 bb->unacked_exist = 1;
8173 write_sequnlock_irq(&bb->lock);
8174
8175 return rv;
8176}
8177
8178int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8179 int is_new)
8180{
8181 int rv;
8182 if (is_new)
8183 s += rdev->new_data_offset;
8184 else
8185 s += rdev->data_offset;
8186 rv = md_set_badblocks(&rdev->badblocks,
8187 s, sectors, 0);
8188 if (rv) {
8189
8190 sysfs_notify_dirent_safe(rdev->sysfs_state);
8191 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8192 md_wakeup_thread(rdev->mddev->thread);
8193 }
8194 return rv;
8195}
8196EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8197
8198
8199
8200
8201
8202
8203
8204static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8205{
8206 u64 *p;
8207 int lo, hi;
8208 sector_t target = s + sectors;
8209 int rv = 0;
8210
8211 if (bb->shift > 0) {
8212
8213
8214
8215
8216
8217
8218 s += (1<<bb->shift) - 1;
8219 s >>= bb->shift;
8220 target >>= bb->shift;
8221 sectors = target - s;
8222 }
8223
8224 write_seqlock_irq(&bb->lock);
8225
8226 p = bb->page;
8227 lo = 0;
8228 hi = bb->count;
8229
8230 while (hi - lo > 1) {
8231 int mid = (lo + hi) / 2;
8232 sector_t a = BB_OFFSET(p[mid]);
8233 if (a < target)
8234 lo = mid;
8235 else
8236 hi = mid;
8237 }
8238 if (hi > lo) {
8239
8240
8241
8242
8243 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8244
8245 int ack = BB_ACK(p[lo]);
8246 sector_t a = BB_OFFSET(p[lo]);
8247 sector_t end = a + BB_LEN(p[lo]);
8248
8249 if (a < s) {
8250
8251 if (bb->count >= MD_MAX_BADBLOCKS) {
8252 rv = 0;
8253 goto out;
8254 }
8255 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8256 bb->count++;
8257 p[lo] = BB_MAKE(a, s-a, ack);
8258 lo++;
8259 }
8260 p[lo] = BB_MAKE(target, end - target, ack);
8261
8262 hi = lo;
8263 lo--;
8264 }
8265 while (lo >= 0 &&
8266 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8267
8268 if (BB_OFFSET(p[lo]) < s) {
8269
8270 int ack = BB_ACK(p[lo]);
8271 sector_t start = BB_OFFSET(p[lo]);
8272 p[lo] = BB_MAKE(start, s - start, ack);
8273
8274 break;
8275 }
8276 lo--;
8277 }
8278
8279
8280
8281 if (hi - lo > 1) {
8282 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8283 bb->count -= (hi - lo - 1);
8284 }
8285 }
8286
8287 bb->changed = 1;
8288out:
8289 write_sequnlock_irq(&bb->lock);
8290 return rv;
8291}
8292
8293int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8294 int is_new)
8295{
8296 if (is_new)
8297 s += rdev->new_data_offset;
8298 else
8299 s += rdev->data_offset;
8300 return md_clear_badblocks(&rdev->badblocks,
8301 s, sectors);
8302}
8303EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8304
8305
8306
8307
8308
8309
8310void md_ack_all_badblocks(struct badblocks *bb)
8311{
8312 if (bb->page == NULL || bb->changed)
8313
8314 return;
8315 write_seqlock_irq(&bb->lock);
8316
8317 if (bb->changed == 0 && bb->unacked_exist) {
8318 u64 *p = bb->page;
8319 int i;
8320 for (i = 0; i < bb->count ; i++) {
8321 if (!BB_ACK(p[i])) {
8322 sector_t start = BB_OFFSET(p[i]);
8323 int len = BB_LEN(p[i]);
8324 p[i] = BB_MAKE(start, len, 1);
8325 }
8326 }
8327 bb->unacked_exist = 0;
8328 }
8329 write_sequnlock_irq(&bb->lock);
8330}
8331EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345static ssize_t
8346badblocks_show(struct badblocks *bb, char *page, int unack)
8347{
8348 size_t len;
8349 int i;
8350 u64 *p = bb->page;
8351 unsigned seq;
8352
8353 if (bb->shift < 0)
8354 return 0;
8355
8356retry:
8357 seq = read_seqbegin(&bb->lock);
8358
8359 len = 0;
8360 i = 0;
8361
8362 while (len < PAGE_SIZE && i < bb->count) {
8363 sector_t s = BB_OFFSET(p[i]);
8364 unsigned int length = BB_LEN(p[i]);
8365 int ack = BB_ACK(p[i]);
8366 i++;
8367
8368 if (unack && ack)
8369 continue;
8370
8371 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8372 (unsigned long long)s << bb->shift,
8373 length << bb->shift);
8374 }
8375 if (unack && len == 0)
8376 bb->unacked_exist = 0;
8377
8378 if (read_seqretry(&bb->lock, seq))
8379 goto retry;
8380
8381 return len;
8382}
8383
8384#define DO_DEBUG 1
8385
8386static ssize_t
8387badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8388{
8389 unsigned long long sector;
8390 int length;
8391 char newline;
8392#ifdef DO_DEBUG
8393
8394
8395
8396 int clear = 0;
8397 if (page[0] == '-') {
8398 clear = 1;
8399 page++;
8400 }
8401#endif
8402
8403 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8404 case 3:
8405 if (newline != '\n')
8406 return -EINVAL;
8407 case 2:
8408 if (length <= 0)
8409 return -EINVAL;
8410 break;
8411 default:
8412 return -EINVAL;
8413 }
8414
8415#ifdef DO_DEBUG
8416 if (clear) {
8417 md_clear_badblocks(bb, sector, length);
8418 return len;
8419 }
8420#endif
8421 if (md_set_badblocks(bb, sector, length, !unack))
8422 return len;
8423 else
8424 return -ENOSPC;
8425}
8426
8427static int md_notify_reboot(struct notifier_block *this,
8428 unsigned long code, void *x)
8429{
8430 struct list_head *tmp;
8431 struct mddev *mddev;
8432 int need_delay = 0;
8433
8434 for_each_mddev(mddev, tmp) {
8435 if (mddev_trylock(mddev)) {
8436 if (mddev->pers)
8437 __md_stop_writes(mddev);
8438 mddev->safemode = 2;
8439 mddev_unlock(mddev);
8440 }
8441 need_delay = 1;
8442 }
8443
8444
8445
8446
8447
8448
8449 if (need_delay)
8450 mdelay(1000*1);
8451
8452 return NOTIFY_DONE;
8453}
8454
8455static struct notifier_block md_notifier = {
8456 .notifier_call = md_notify_reboot,
8457 .next = NULL,
8458 .priority = INT_MAX,
8459};
8460
8461static void md_geninit(void)
8462{
8463 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8464
8465 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8466}
8467
8468static int __init md_init(void)
8469{
8470 int ret = -ENOMEM;
8471
8472 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8473 if (!md_wq)
8474 goto err_wq;
8475
8476 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8477 if (!md_misc_wq)
8478 goto err_misc_wq;
8479
8480 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8481 goto err_md;
8482
8483 if ((ret = register_blkdev(0, "mdp")) < 0)
8484 goto err_mdp;
8485 mdp_major = ret;
8486
8487 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8488 md_probe, NULL, NULL);
8489 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8490 md_probe, NULL, NULL);
8491
8492 register_reboot_notifier(&md_notifier);
8493 raid_table_header = register_sysctl_table(raid_root_table);
8494
8495 md_geninit();
8496 return 0;
8497
8498err_mdp:
8499 unregister_blkdev(MD_MAJOR, "md");
8500err_md:
8501 destroy_workqueue(md_misc_wq);
8502err_misc_wq:
8503 destroy_workqueue(md_wq);
8504err_wq:
8505 return ret;
8506}
8507
8508#ifndef MODULE
8509
8510
8511
8512
8513
8514
8515static LIST_HEAD(all_detected_devices);
8516struct detected_devices_node {
8517 struct list_head list;
8518 dev_t dev;
8519};
8520
8521void md_autodetect_dev(dev_t dev)
8522{
8523 struct detected_devices_node *node_detected_dev;
8524
8525 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8526 if (node_detected_dev) {
8527 node_detected_dev->dev = dev;
8528 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8529 } else {
8530 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8531 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8532 }
8533}
8534
8535
8536static void autostart_arrays(int part)
8537{
8538 struct md_rdev *rdev;
8539 struct detected_devices_node *node_detected_dev;
8540 dev_t dev;
8541 int i_scanned, i_passed;
8542
8543 i_scanned = 0;
8544 i_passed = 0;
8545
8546 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8547
8548 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8549 i_scanned++;
8550 node_detected_dev = list_entry(all_detected_devices.next,
8551 struct detected_devices_node, list);
8552 list_del(&node_detected_dev->list);
8553 dev = node_detected_dev->dev;
8554 kfree(node_detected_dev);
8555 rdev = md_import_device(dev,0, 90);
8556 if (IS_ERR(rdev))
8557 continue;
8558
8559 if (test_bit(Faulty, &rdev->flags)) {
8560 MD_BUG();
8561 continue;
8562 }
8563 set_bit(AutoDetected, &rdev->flags);
8564 list_add(&rdev->same_set, &pending_raid_disks);
8565 i_passed++;
8566 }
8567
8568 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8569 i_scanned, i_passed);
8570
8571 autorun_devices(part);
8572}
8573
8574#endif
8575
8576static __exit void md_exit(void)
8577{
8578 struct mddev *mddev;
8579 struct list_head *tmp;
8580
8581 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8582 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8583
8584 unregister_blkdev(MD_MAJOR,"md");
8585 unregister_blkdev(mdp_major, "mdp");
8586 unregister_reboot_notifier(&md_notifier);
8587 unregister_sysctl_table(raid_table_header);
8588 remove_proc_entry("mdstat", NULL);
8589 for_each_mddev(mddev, tmp) {
8590 export_array(mddev);
8591 mddev->hold_active = 0;
8592 }
8593 destroy_workqueue(md_misc_wq);
8594 destroy_workqueue(md_wq);
8595}
8596
8597subsys_initcall(md_init);
8598module_exit(md_exit)
8599
8600static int get_ro(char *buffer, struct kernel_param *kp)
8601{
8602 return sprintf(buffer, "%d", start_readonly);
8603}
8604static int set_ro(const char *val, struct kernel_param *kp)
8605{
8606 char *e;
8607 int num = simple_strtoul(val, &e, 10);
8608 if (*val && (*e == '\0' || *e == '\n')) {
8609 start_readonly = num;
8610 return 0;
8611 }
8612 return -EINVAL;
8613}
8614
8615module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8616module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8617
8618module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8619
8620EXPORT_SYMBOL(register_md_personality);
8621EXPORT_SYMBOL(unregister_md_personality);
8622EXPORT_SYMBOL(md_error);
8623EXPORT_SYMBOL(md_done_sync);
8624EXPORT_SYMBOL(md_write_start);
8625EXPORT_SYMBOL(md_write_end);
8626EXPORT_SYMBOL(md_register_thread);
8627EXPORT_SYMBOL(md_unregister_thread);
8628EXPORT_SYMBOL(md_wakeup_thread);
8629EXPORT_SYMBOL(md_check_recovery);
8630MODULE_LICENSE("GPL");
8631MODULE_DESCRIPTION("MD RAID framework");
8632MODULE_ALIAS("md");
8633MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8634