1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include <linux/kthread.h>
36#include <linux/blkdev.h>
37#include <linux/sysctl.h>
38#include <linux/seq_file.h>
39#include <linux/fs.h>
40#include <linux/poll.h>
41#include <linux/ctype.h>
42#include <linux/string.h>
43#include <linux/hdreg.h>
44#include <linux/proc_fs.h>
45#include <linux/random.h>
46#include <linux/module.h>
47#include <linux/reboot.h>
48#include <linux/file.h>
49#include <linux/compat.h>
50#include <linux/delay.h>
51#include <linux/raid/md_p.h>
52#include <linux/raid/md_u.h>
53#include <linux/slab.h>
54#include "md.h"
55#include "bitmap.h"
56
57#ifndef MODULE
58static void autostart_arrays(int part);
59#endif
60
61
62
63
64
65
66static LIST_HEAD(pers_list);
67static DEFINE_SPINLOCK(pers_lock);
68
69static void md_print_devices(void);
70
71static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq;
74
75static int remove_and_add_spares(struct mddev *mddev,
76 struct md_rdev *this);
77
78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
79
80
81
82
83
84
85#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
86
87
88
89
90
91
92
93
94
95
96
97
98
99static int sysctl_speed_limit_min = 1000;
100static int sysctl_speed_limit_max = 200000;
101static inline int speed_min(struct mddev *mddev)
102{
103 return mddev->sync_speed_min ?
104 mddev->sync_speed_min : sysctl_speed_limit_min;
105}
106
107static inline int speed_max(struct mddev *mddev)
108{
109 return mddev->sync_speed_max ?
110 mddev->sync_speed_max : sysctl_speed_limit_max;
111}
112
113static struct ctl_table_header *raid_table_header;
114
115static ctl_table raid_table[] = {
116 {
117 .procname = "speed_limit_min",
118 .data = &sysctl_speed_limit_min,
119 .maxlen = sizeof(int),
120 .mode = S_IRUGO|S_IWUSR,
121 .proc_handler = proc_dointvec,
122 },
123 {
124 .procname = "speed_limit_max",
125 .data = &sysctl_speed_limit_max,
126 .maxlen = sizeof(int),
127 .mode = S_IRUGO|S_IWUSR,
128 .proc_handler = proc_dointvec,
129 },
130 { }
131};
132
133static ctl_table raid_dir_table[] = {
134 {
135 .procname = "raid",
136 .maxlen = 0,
137 .mode = S_IRUGO|S_IXUGO,
138 .child = raid_table,
139 },
140 { }
141};
142
143static ctl_table raid_root_table[] = {
144 {
145 .procname = "dev",
146 .maxlen = 0,
147 .mode = 0555,
148 .child = raid_dir_table,
149 },
150 { }
151};
152
153static const struct block_device_operations md_fops;
154
155static int start_readonly;
156
157
158
159
160
161struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
162 struct mddev *mddev)
163{
164 struct bio *b;
165
166 if (!mddev || !mddev->bio_set)
167 return bio_alloc(gfp_mask, nr_iovecs);
168
169 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
170 if (!b)
171 return NULL;
172 return b;
173}
174EXPORT_SYMBOL_GPL(bio_alloc_mddev);
175
176struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
177 struct mddev *mddev)
178{
179 if (!mddev || !mddev->bio_set)
180 return bio_clone(bio, gfp_mask);
181
182 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
183}
184EXPORT_SYMBOL_GPL(bio_clone_mddev);
185
186void md_trim_bio(struct bio *bio, int offset, int size)
187{
188
189
190
191
192 int i;
193 struct bio_vec *bvec;
194 int sofar = 0;
195
196 size <<= 9;
197 if (offset == 0 && size == bio->bi_size)
198 return;
199
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
201
202 bio_advance(bio, offset << 9);
203
204 bio->bi_size = size;
205
206
207 if (bio->bi_idx) {
208 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
209 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
210 bio->bi_vcnt -= bio->bi_idx;
211 bio->bi_idx = 0;
212 }
213
214 bio_for_each_segment(bvec, bio, i) {
215 if (sofar + bvec->bv_len > size)
216 bvec->bv_len = size - sofar;
217 if (bvec->bv_len == 0) {
218 bio->bi_vcnt = i;
219 break;
220 }
221 sofar += bvec->bv_len;
222 }
223}
224EXPORT_SYMBOL_GPL(md_trim_bio);
225
226
227
228
229
230
231
232
233
234
235
236static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
237static atomic_t md_event_count;
238void md_new_event(struct mddev *mddev)
239{
240 atomic_inc(&md_event_count);
241 wake_up(&md_event_waiters);
242}
243EXPORT_SYMBOL_GPL(md_new_event);
244
245
246
247
248static void md_new_event_inintr(struct mddev *mddev)
249{
250 atomic_inc(&md_event_count);
251 wake_up(&md_event_waiters);
252}
253
254
255
256
257
258static LIST_HEAD(all_mddevs);
259static DEFINE_SPINLOCK(all_mddevs_lock);
260
261
262
263
264
265
266
267
268
269#define for_each_mddev(_mddev,_tmp) \
270 \
271 for (({ spin_lock(&all_mddevs_lock); \
272 _tmp = all_mddevs.next; \
273 _mddev = NULL;}); \
274 ({ if (_tmp != &all_mddevs) \
275 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
276 spin_unlock(&all_mddevs_lock); \
277 if (_mddev) mddev_put(_mddev); \
278 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
279 _tmp != &all_mddevs;}); \
280 ({ spin_lock(&all_mddevs_lock); \
281 _tmp = _tmp->next;}) \
282 )
283
284
285
286
287
288
289
290
291
292static void md_make_request(struct request_queue *q, struct bio *bio)
293{
294 const int rw = bio_data_dir(bio);
295 struct mddev *mddev = q->queuedata;
296 int cpu;
297 unsigned int sectors;
298
299 if (mddev == NULL || mddev->pers == NULL
300 || !mddev->ready) {
301 bio_io_error(bio);
302 return;
303 }
304 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
305 bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
306 return;
307 }
308 smp_rmb();
309 rcu_read_lock();
310 if (mddev->suspended) {
311 DEFINE_WAIT(__wait);
312 for (;;) {
313 prepare_to_wait(&mddev->sb_wait, &__wait,
314 TASK_UNINTERRUPTIBLE);
315 if (!mddev->suspended)
316 break;
317 rcu_read_unlock();
318 schedule();
319 rcu_read_lock();
320 }
321 finish_wait(&mddev->sb_wait, &__wait);
322 }
323 atomic_inc(&mddev->active_io);
324 rcu_read_unlock();
325
326
327
328
329
330 sectors = bio_sectors(bio);
331 mddev->pers->make_request(mddev, bio);
332
333 cpu = part_stat_lock();
334 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
335 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
336 part_stat_unlock();
337
338 if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
339 wake_up(&mddev->sb_wait);
340}
341
342
343
344
345
346
347
348void mddev_suspend(struct mddev *mddev)
349{
350 BUG_ON(mddev->suspended);
351 mddev->suspended = 1;
352 synchronize_rcu();
353 wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
354 mddev->pers->quiesce(mddev, 1);
355
356 del_timer_sync(&mddev->safemode_timer);
357}
358EXPORT_SYMBOL_GPL(mddev_suspend);
359
360void mddev_resume(struct mddev *mddev)
361{
362 mddev->suspended = 0;
363 wake_up(&mddev->sb_wait);
364 mddev->pers->quiesce(mddev, 0);
365
366 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
367 md_wakeup_thread(mddev->thread);
368 md_wakeup_thread(mddev->sync_thread);
369}
370EXPORT_SYMBOL_GPL(mddev_resume);
371
372int mddev_congested(struct mddev *mddev, int bits)
373{
374 return mddev->suspended;
375}
376EXPORT_SYMBOL(mddev_congested);
377
378
379
380
381
382static void md_end_flush(struct bio *bio, int err)
383{
384 struct md_rdev *rdev = bio->bi_private;
385 struct mddev *mddev = rdev->mddev;
386
387 rdev_dec_pending(rdev, mddev);
388
389 if (atomic_dec_and_test(&mddev->flush_pending)) {
390
391 queue_work(md_wq, &mddev->flush_work);
392 }
393 bio_put(bio);
394}
395
396static void md_submit_flush_data(struct work_struct *ws);
397
398static void submit_flushes(struct work_struct *ws)
399{
400 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
401 struct md_rdev *rdev;
402
403 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
404 atomic_set(&mddev->flush_pending, 1);
405 rcu_read_lock();
406 rdev_for_each_rcu(rdev, mddev)
407 if (rdev->raid_disk >= 0 &&
408 !test_bit(Faulty, &rdev->flags)) {
409
410
411
412
413 struct bio *bi;
414 atomic_inc(&rdev->nr_pending);
415 atomic_inc(&rdev->nr_pending);
416 rcu_read_unlock();
417 bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
418 bi->bi_end_io = md_end_flush;
419 bi->bi_private = rdev;
420 bi->bi_bdev = rdev->bdev;
421 atomic_inc(&mddev->flush_pending);
422 submit_bio(WRITE_FLUSH, bi);
423 rcu_read_lock();
424 rdev_dec_pending(rdev, mddev);
425 }
426 rcu_read_unlock();
427 if (atomic_dec_and_test(&mddev->flush_pending))
428 queue_work(md_wq, &mddev->flush_work);
429}
430
431static void md_submit_flush_data(struct work_struct *ws)
432{
433 struct mddev *mddev = container_of(ws, struct mddev, flush_work);
434 struct bio *bio = mddev->flush_bio;
435
436 if (bio->bi_size == 0)
437
438 bio_endio(bio, 0);
439 else {
440 bio->bi_rw &= ~REQ_FLUSH;
441 mddev->pers->make_request(mddev, bio);
442 }
443
444 mddev->flush_bio = NULL;
445 wake_up(&mddev->sb_wait);
446}
447
448void md_flush_request(struct mddev *mddev, struct bio *bio)
449{
450 spin_lock_irq(&mddev->write_lock);
451 wait_event_lock_irq(mddev->sb_wait,
452 !mddev->flush_bio,
453 mddev->write_lock);
454 mddev->flush_bio = bio;
455 spin_unlock_irq(&mddev->write_lock);
456
457 INIT_WORK(&mddev->flush_work, submit_flushes);
458 queue_work(md_wq, &mddev->flush_work);
459}
460EXPORT_SYMBOL(md_flush_request);
461
462void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
463{
464 struct mddev *mddev = cb->data;
465 md_wakeup_thread(mddev->thread);
466 kfree(cb);
467}
468EXPORT_SYMBOL(md_unplug);
469
470static inline struct mddev *mddev_get(struct mddev *mddev)
471{
472 atomic_inc(&mddev->active);
473 return mddev;
474}
475
476static void mddev_delayed_delete(struct work_struct *ws);
477
478static void mddev_put(struct mddev *mddev)
479{
480 struct bio_set *bs = NULL;
481
482 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
483 return;
484 if (!mddev->raid_disks && list_empty(&mddev->disks) &&
485 mddev->ctime == 0 && !mddev->hold_active) {
486
487
488 list_del_init(&mddev->all_mddevs);
489 bs = mddev->bio_set;
490 mddev->bio_set = NULL;
491 if (mddev->gendisk) {
492
493
494
495
496
497 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
498 queue_work(md_misc_wq, &mddev->del_work);
499 } else
500 kfree(mddev);
501 }
502 spin_unlock(&all_mddevs_lock);
503 if (bs)
504 bioset_free(bs);
505}
506
507void mddev_init(struct mddev *mddev)
508{
509 mutex_init(&mddev->open_mutex);
510 mutex_init(&mddev->reconfig_mutex);
511 mutex_init(&mddev->bitmap_info.mutex);
512 INIT_LIST_HEAD(&mddev->disks);
513 INIT_LIST_HEAD(&mddev->all_mddevs);
514 init_timer(&mddev->safemode_timer);
515 atomic_set(&mddev->active, 1);
516 atomic_set(&mddev->openers, 0);
517 atomic_set(&mddev->active_io, 0);
518 spin_lock_init(&mddev->write_lock);
519 atomic_set(&mddev->flush_pending, 0);
520 init_waitqueue_head(&mddev->sb_wait);
521 init_waitqueue_head(&mddev->recovery_wait);
522 mddev->reshape_position = MaxSector;
523 mddev->reshape_backwards = 0;
524 mddev->last_sync_action = "none";
525 mddev->resync_min = 0;
526 mddev->resync_max = MaxSector;
527 mddev->level = LEVEL_NONE;
528}
529EXPORT_SYMBOL_GPL(mddev_init);
530
531static struct mddev * mddev_find(dev_t unit)
532{
533 struct mddev *mddev, *new = NULL;
534
535 if (unit && MAJOR(unit) != MD_MAJOR)
536 unit &= ~((1<<MdpMinorShift)-1);
537
538 retry:
539 spin_lock(&all_mddevs_lock);
540
541 if (unit) {
542 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
543 if (mddev->unit == unit) {
544 mddev_get(mddev);
545 spin_unlock(&all_mddevs_lock);
546 kfree(new);
547 return mddev;
548 }
549
550 if (new) {
551 list_add(&new->all_mddevs, &all_mddevs);
552 spin_unlock(&all_mddevs_lock);
553 new->hold_active = UNTIL_IOCTL;
554 return new;
555 }
556 } else if (new) {
557
558 static int next_minor = 512;
559 int start = next_minor;
560 int is_free = 0;
561 int dev = 0;
562 while (!is_free) {
563 dev = MKDEV(MD_MAJOR, next_minor);
564 next_minor++;
565 if (next_minor > MINORMASK)
566 next_minor = 0;
567 if (next_minor == start) {
568
569 spin_unlock(&all_mddevs_lock);
570 kfree(new);
571 return NULL;
572 }
573
574 is_free = 1;
575 list_for_each_entry(mddev, &all_mddevs, all_mddevs)
576 if (mddev->unit == dev) {
577 is_free = 0;
578 break;
579 }
580 }
581 new->unit = dev;
582 new->md_minor = MINOR(dev);
583 new->hold_active = UNTIL_STOP;
584 list_add(&new->all_mddevs, &all_mddevs);
585 spin_unlock(&all_mddevs_lock);
586 return new;
587 }
588 spin_unlock(&all_mddevs_lock);
589
590 new = kzalloc(sizeof(*new), GFP_KERNEL);
591 if (!new)
592 return NULL;
593
594 new->unit = unit;
595 if (MAJOR(unit) == MD_MAJOR)
596 new->md_minor = MINOR(unit);
597 else
598 new->md_minor = MINOR(unit) >> MdpMinorShift;
599
600 mddev_init(new);
601
602 goto retry;
603}
604
605static inline int mddev_lock(struct mddev * mddev)
606{
607 return mutex_lock_interruptible(&mddev->reconfig_mutex);
608}
609
610static inline int mddev_is_locked(struct mddev *mddev)
611{
612 return mutex_is_locked(&mddev->reconfig_mutex);
613}
614
615static inline int mddev_trylock(struct mddev * mddev)
616{
617 return mutex_trylock(&mddev->reconfig_mutex);
618}
619
620static struct attribute_group md_redundancy_group;
621
622static void mddev_unlock(struct mddev * mddev)
623{
624 if (mddev->to_remove) {
625
626
627
628
629
630
631
632
633
634
635
636
637 struct attribute_group *to_remove = mddev->to_remove;
638 mddev->to_remove = NULL;
639 mddev->sysfs_active = 1;
640 mutex_unlock(&mddev->reconfig_mutex);
641
642 if (mddev->kobj.sd) {
643 if (to_remove != &md_redundancy_group)
644 sysfs_remove_group(&mddev->kobj, to_remove);
645 if (mddev->pers == NULL ||
646 mddev->pers->sync_request == NULL) {
647 sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
648 if (mddev->sysfs_action)
649 sysfs_put(mddev->sysfs_action);
650 mddev->sysfs_action = NULL;
651 }
652 }
653 mddev->sysfs_active = 0;
654 } else
655 mutex_unlock(&mddev->reconfig_mutex);
656
657
658
659
660 spin_lock(&pers_lock);
661 md_wakeup_thread(mddev->thread);
662 spin_unlock(&pers_lock);
663}
664
665static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
666{
667 struct md_rdev *rdev;
668
669 rdev_for_each(rdev, mddev)
670 if (rdev->desc_nr == nr)
671 return rdev;
672
673 return NULL;
674}
675
676static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
677{
678 struct md_rdev *rdev;
679
680 rdev_for_each_rcu(rdev, mddev)
681 if (rdev->desc_nr == nr)
682 return rdev;
683
684 return NULL;
685}
686
687static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
688{
689 struct md_rdev *rdev;
690
691 rdev_for_each(rdev, mddev)
692 if (rdev->bdev->bd_dev == dev)
693 return rdev;
694
695 return NULL;
696}
697
698static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
699{
700 struct md_rdev *rdev;
701
702 rdev_for_each_rcu(rdev, mddev)
703 if (rdev->bdev->bd_dev == dev)
704 return rdev;
705
706 return NULL;
707}
708
709static struct md_personality *find_pers(int level, char *clevel)
710{
711 struct md_personality *pers;
712 list_for_each_entry(pers, &pers_list, list) {
713 if (level != LEVEL_NONE && pers->level == level)
714 return pers;
715 if (strcmp(pers->name, clevel)==0)
716 return pers;
717 }
718 return NULL;
719}
720
721
722static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
723{
724 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
725 return MD_NEW_SIZE_SECTORS(num_sectors);
726}
727
728static int alloc_disk_sb(struct md_rdev * rdev)
729{
730 if (rdev->sb_page)
731 MD_BUG();
732
733 rdev->sb_page = alloc_page(GFP_KERNEL);
734 if (!rdev->sb_page) {
735 printk(KERN_ALERT "md: out of memory.\n");
736 return -ENOMEM;
737 }
738
739 return 0;
740}
741
742void md_rdev_clear(struct md_rdev *rdev)
743{
744 if (rdev->sb_page) {
745 put_page(rdev->sb_page);
746 rdev->sb_loaded = 0;
747 rdev->sb_page = NULL;
748 rdev->sb_start = 0;
749 rdev->sectors = 0;
750 }
751 if (rdev->bb_page) {
752 put_page(rdev->bb_page);
753 rdev->bb_page = NULL;
754 }
755 kfree(rdev->badblocks.page);
756 rdev->badblocks.page = NULL;
757}
758EXPORT_SYMBOL_GPL(md_rdev_clear);
759
760static void super_written(struct bio *bio, int error)
761{
762 struct md_rdev *rdev = bio->bi_private;
763 struct mddev *mddev = rdev->mddev;
764
765 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
766 printk("md: super_written gets error=%d, uptodate=%d\n",
767 error, test_bit(BIO_UPTODATE, &bio->bi_flags));
768 WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
769 md_error(mddev, rdev);
770 }
771
772 if (atomic_dec_and_test(&mddev->pending_writes))
773 wake_up(&mddev->sb_wait);
774 bio_put(bio);
775}
776
777void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
778 sector_t sector, int size, struct page *page)
779{
780
781
782
783
784
785
786 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
787
788 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
789 bio->bi_sector = sector;
790 bio_add_page(bio, page, size, 0);
791 bio->bi_private = rdev;
792 bio->bi_end_io = super_written;
793
794 atomic_inc(&mddev->pending_writes);
795 submit_bio(WRITE_FLUSH_FUA, bio);
796}
797
798void md_super_wait(struct mddev *mddev)
799{
800
801 DEFINE_WAIT(wq);
802 for(;;) {
803 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
804 if (atomic_read(&mddev->pending_writes)==0)
805 break;
806 schedule();
807 }
808 finish_wait(&mddev->sb_wait, &wq);
809}
810
811static void bi_complete(struct bio *bio, int error)
812{
813 complete((struct completion*)bio->bi_private);
814}
815
816int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
817 struct page *page, int rw, bool metadata_op)
818{
819 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
820 struct completion event;
821 int ret;
822
823 rw |= REQ_SYNC;
824
825 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
826 rdev->meta_bdev : rdev->bdev;
827 if (metadata_op)
828 bio->bi_sector = sector + rdev->sb_start;
829 else if (rdev->mddev->reshape_position != MaxSector &&
830 (rdev->mddev->reshape_backwards ==
831 (sector >= rdev->mddev->reshape_position)))
832 bio->bi_sector = sector + rdev->new_data_offset;
833 else
834 bio->bi_sector = sector + rdev->data_offset;
835 bio_add_page(bio, page, size, 0);
836 init_completion(&event);
837 bio->bi_private = &event;
838 bio->bi_end_io = bi_complete;
839 submit_bio(rw, bio);
840 wait_for_completion(&event);
841
842 ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
843 bio_put(bio);
844 return ret;
845}
846EXPORT_SYMBOL_GPL(sync_page_io);
847
848static int read_disk_sb(struct md_rdev * rdev, int size)
849{
850 char b[BDEVNAME_SIZE];
851 if (!rdev->sb_page) {
852 MD_BUG();
853 return -EINVAL;
854 }
855 if (rdev->sb_loaded)
856 return 0;
857
858
859 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
860 goto fail;
861 rdev->sb_loaded = 1;
862 return 0;
863
864fail:
865 printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
866 bdevname(rdev->bdev,b));
867 return -EINVAL;
868}
869
870static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
871{
872 return sb1->set_uuid0 == sb2->set_uuid0 &&
873 sb1->set_uuid1 == sb2->set_uuid1 &&
874 sb1->set_uuid2 == sb2->set_uuid2 &&
875 sb1->set_uuid3 == sb2->set_uuid3;
876}
877
878static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
879{
880 int ret;
881 mdp_super_t *tmp1, *tmp2;
882
883 tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
884 tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
885
886 if (!tmp1 || !tmp2) {
887 ret = 0;
888 printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
889 goto abort;
890 }
891
892 *tmp1 = *sb1;
893 *tmp2 = *sb2;
894
895
896
897
898 tmp1->nr_disks = 0;
899 tmp2->nr_disks = 0;
900
901 ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
902abort:
903 kfree(tmp1);
904 kfree(tmp2);
905 return ret;
906}
907
908
909static u32 md_csum_fold(u32 csum)
910{
911 csum = (csum & 0xffff) + (csum >> 16);
912 return (csum & 0xffff) + (csum >> 16);
913}
914
915static unsigned int calc_sb_csum(mdp_super_t * sb)
916{
917 u64 newcsum = 0;
918 u32 *sb32 = (u32*)sb;
919 int i;
920 unsigned int disk_csum, csum;
921
922 disk_csum = sb->sb_csum;
923 sb->sb_csum = 0;
924
925 for (i = 0; i < MD_SB_BYTES/4 ; i++)
926 newcsum += sb32[i];
927 csum = (newcsum & 0xffffffff) + (newcsum>>32);
928
929
930#ifdef CONFIG_ALPHA
931
932
933
934
935
936
937
938
939 sb->sb_csum = md_csum_fold(disk_csum);
940#else
941 sb->sb_csum = disk_csum;
942#endif
943 return csum;
944}
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977struct super_type {
978 char *name;
979 struct module *owner;
980 int (*load_super)(struct md_rdev *rdev,
981 struct md_rdev *refdev,
982 int minor_version);
983 int (*validate_super)(struct mddev *mddev,
984 struct md_rdev *rdev);
985 void (*sync_super)(struct mddev *mddev,
986 struct md_rdev *rdev);
987 unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
988 sector_t num_sectors);
989 int (*allow_new_offset)(struct md_rdev *rdev,
990 unsigned long long new_offset);
991};
992
993
994
995
996
997
998
999
1000
1001int md_check_no_bitmap(struct mddev *mddev)
1002{
1003 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
1004 return 0;
1005 printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
1006 mdname(mddev), mddev->pers->name);
1007 return 1;
1008}
1009EXPORT_SYMBOL(md_check_no_bitmap);
1010
1011
1012
1013
1014static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1015{
1016 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1017 mdp_super_t *sb;
1018 int ret;
1019
1020
1021
1022
1023
1024
1025
1026 rdev->sb_start = calc_dev_sboffset(rdev);
1027
1028 ret = read_disk_sb(rdev, MD_SB_BYTES);
1029 if (ret) return ret;
1030
1031 ret = -EINVAL;
1032
1033 bdevname(rdev->bdev, b);
1034 sb = page_address(rdev->sb_page);
1035
1036 if (sb->md_magic != MD_SB_MAGIC) {
1037 printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
1038 b);
1039 goto abort;
1040 }
1041
1042 if (sb->major_version != 0 ||
1043 sb->minor_version < 90 ||
1044 sb->minor_version > 91) {
1045 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
1046 sb->major_version, sb->minor_version,
1047 b);
1048 goto abort;
1049 }
1050
1051 if (sb->raid_disks <= 0)
1052 goto abort;
1053
1054 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
1055 printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
1056 b);
1057 goto abort;
1058 }
1059
1060 rdev->preferred_minor = sb->md_minor;
1061 rdev->data_offset = 0;
1062 rdev->new_data_offset = 0;
1063 rdev->sb_size = MD_SB_BYTES;
1064 rdev->badblocks.shift = -1;
1065
1066 if (sb->level == LEVEL_MULTIPATH)
1067 rdev->desc_nr = -1;
1068 else
1069 rdev->desc_nr = sb->this_disk.number;
1070
1071 if (!refdev) {
1072 ret = 1;
1073 } else {
1074 __u64 ev1, ev2;
1075 mdp_super_t *refsb = page_address(refdev->sb_page);
1076 if (!uuid_equal(refsb, sb)) {
1077 printk(KERN_WARNING "md: %s has different UUID to %s\n",
1078 b, bdevname(refdev->bdev,b2));
1079 goto abort;
1080 }
1081 if (!sb_equal(refsb, sb)) {
1082 printk(KERN_WARNING "md: %s has same UUID"
1083 " but different superblock to %s\n",
1084 b, bdevname(refdev->bdev, b2));
1085 goto abort;
1086 }
1087 ev1 = md_event(sb);
1088 ev2 = md_event(refsb);
1089 if (ev1 > ev2)
1090 ret = 1;
1091 else
1092 ret = 0;
1093 }
1094 rdev->sectors = rdev->sb_start;
1095
1096
1097
1098
1099 if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
1100 rdev->sectors = (2ULL << 32) - 2;
1101
1102 if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
1103
1104 ret = -EINVAL;
1105
1106 abort:
1107 return ret;
1108}
1109
1110
1111
1112
1113static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1114{
1115 mdp_disk_t *desc;
1116 mdp_super_t *sb = page_address(rdev->sb_page);
1117 __u64 ev1 = md_event(sb);
1118
1119 rdev->raid_disk = -1;
1120 clear_bit(Faulty, &rdev->flags);
1121 clear_bit(In_sync, &rdev->flags);
1122 clear_bit(WriteMostly, &rdev->flags);
1123
1124 if (mddev->raid_disks == 0) {
1125 mddev->major_version = 0;
1126 mddev->minor_version = sb->minor_version;
1127 mddev->patch_version = sb->patch_version;
1128 mddev->external = 0;
1129 mddev->chunk_sectors = sb->chunk_size >> 9;
1130 mddev->ctime = sb->ctime;
1131 mddev->utime = sb->utime;
1132 mddev->level = sb->level;
1133 mddev->clevel[0] = 0;
1134 mddev->layout = sb->layout;
1135 mddev->raid_disks = sb->raid_disks;
1136 mddev->dev_sectors = ((sector_t)sb->size) * 2;
1137 mddev->events = ev1;
1138 mddev->bitmap_info.offset = 0;
1139 mddev->bitmap_info.space = 0;
1140
1141 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
1142 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
1143 mddev->reshape_backwards = 0;
1144
1145 if (mddev->minor_version >= 91) {
1146 mddev->reshape_position = sb->reshape_position;
1147 mddev->delta_disks = sb->delta_disks;
1148 mddev->new_level = sb->new_level;
1149 mddev->new_layout = sb->new_layout;
1150 mddev->new_chunk_sectors = sb->new_chunk >> 9;
1151 if (mddev->delta_disks < 0)
1152 mddev->reshape_backwards = 1;
1153 } else {
1154 mddev->reshape_position = MaxSector;
1155 mddev->delta_disks = 0;
1156 mddev->new_level = mddev->level;
1157 mddev->new_layout = mddev->layout;
1158 mddev->new_chunk_sectors = mddev->chunk_sectors;
1159 }
1160
1161 if (sb->state & (1<<MD_SB_CLEAN))
1162 mddev->recovery_cp = MaxSector;
1163 else {
1164 if (sb->events_hi == sb->cp_events_hi &&
1165 sb->events_lo == sb->cp_events_lo) {
1166 mddev->recovery_cp = sb->recovery_cp;
1167 } else
1168 mddev->recovery_cp = 0;
1169 }
1170
1171 memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
1172 memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
1173 memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
1174 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
1175
1176 mddev->max_disks = MD_SB_DISKS;
1177
1178 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
1179 mddev->bitmap_info.file == NULL) {
1180 mddev->bitmap_info.offset =
1181 mddev->bitmap_info.default_offset;
1182 mddev->bitmap_info.space =
1183 mddev->bitmap_info.default_space;
1184 }
1185
1186 } else if (mddev->pers == NULL) {
1187
1188
1189 ++ev1;
1190 if (sb->disks[rdev->desc_nr].state & (
1191 (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
1192 if (ev1 < mddev->events)
1193 return -EINVAL;
1194 } else if (mddev->bitmap) {
1195
1196
1197
1198 if (ev1 < mddev->bitmap->events_cleared)
1199 return 0;
1200 } else {
1201 if (ev1 < mddev->events)
1202
1203 return 0;
1204 }
1205
1206 if (mddev->level != LEVEL_MULTIPATH) {
1207 desc = sb->disks + rdev->desc_nr;
1208
1209 if (desc->state & (1<<MD_DISK_FAULTY))
1210 set_bit(Faulty, &rdev->flags);
1211 else if (desc->state & (1<<MD_DISK_SYNC)
1212) {
1213 set_bit(In_sync, &rdev->flags);
1214 rdev->raid_disk = desc->raid_disk;
1215 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1216
1217
1218
1219 if (mddev->minor_version >= 91) {
1220 rdev->recovery_offset = 0;
1221 rdev->raid_disk = desc->raid_disk;
1222 }
1223 }
1224 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
1225 set_bit(WriteMostly, &rdev->flags);
1226 } else
1227 set_bit(In_sync, &rdev->flags);
1228 return 0;
1229}
1230
1231
1232
1233
1234static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
1235{
1236 mdp_super_t *sb;
1237 struct md_rdev *rdev2;
1238 int next_spare = mddev->raid_disks;
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251 int i;
1252 int active=0, working=0,failed=0,spare=0,nr_disks=0;
1253
1254 rdev->sb_size = MD_SB_BYTES;
1255
1256 sb = page_address(rdev->sb_page);
1257
1258 memset(sb, 0, sizeof(*sb));
1259
1260 sb->md_magic = MD_SB_MAGIC;
1261 sb->major_version = mddev->major_version;
1262 sb->patch_version = mddev->patch_version;
1263 sb->gvalid_words = 0;
1264 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
1265 memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
1266 memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
1267 memcpy(&sb->set_uuid3, mddev->uuid+12,4);
1268
1269 sb->ctime = mddev->ctime;
1270 sb->level = mddev->level;
1271 sb->size = mddev->dev_sectors / 2;
1272 sb->raid_disks = mddev->raid_disks;
1273 sb->md_minor = mddev->md_minor;
1274 sb->not_persistent = 0;
1275 sb->utime = mddev->utime;
1276 sb->state = 0;
1277 sb->events_hi = (mddev->events>>32);
1278 sb->events_lo = (u32)mddev->events;
1279
1280 if (mddev->reshape_position == MaxSector)
1281 sb->minor_version = 90;
1282 else {
1283 sb->minor_version = 91;
1284 sb->reshape_position = mddev->reshape_position;
1285 sb->new_level = mddev->new_level;
1286 sb->delta_disks = mddev->delta_disks;
1287 sb->new_layout = mddev->new_layout;
1288 sb->new_chunk = mddev->new_chunk_sectors << 9;
1289 }
1290 mddev->minor_version = sb->minor_version;
1291 if (mddev->in_sync)
1292 {
1293 sb->recovery_cp = mddev->recovery_cp;
1294 sb->cp_events_hi = (mddev->events>>32);
1295 sb->cp_events_lo = (u32)mddev->events;
1296 if (mddev->recovery_cp == MaxSector)
1297 sb->state = (1<< MD_SB_CLEAN);
1298 } else
1299 sb->recovery_cp = 0;
1300
1301 sb->layout = mddev->layout;
1302 sb->chunk_size = mddev->chunk_sectors << 9;
1303
1304 if (mddev->bitmap && mddev->bitmap_info.file == NULL)
1305 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
1306
1307 sb->disks[0].state = (1<<MD_DISK_REMOVED);
1308 rdev_for_each(rdev2, mddev) {
1309 mdp_disk_t *d;
1310 int desc_nr;
1311 int is_active = test_bit(In_sync, &rdev2->flags);
1312
1313 if (rdev2->raid_disk >= 0 &&
1314 sb->minor_version >= 91)
1315
1316
1317
1318
1319 is_active = 1;
1320 if (rdev2->raid_disk < 0 ||
1321 test_bit(Faulty, &rdev2->flags))
1322 is_active = 0;
1323 if (is_active)
1324 desc_nr = rdev2->raid_disk;
1325 else
1326 desc_nr = next_spare++;
1327 rdev2->desc_nr = desc_nr;
1328 d = &sb->disks[rdev2->desc_nr];
1329 nr_disks++;
1330 d->number = rdev2->desc_nr;
1331 d->major = MAJOR(rdev2->bdev->bd_dev);
1332 d->minor = MINOR(rdev2->bdev->bd_dev);
1333 if (is_active)
1334 d->raid_disk = rdev2->raid_disk;
1335 else
1336 d->raid_disk = rdev2->desc_nr;
1337 if (test_bit(Faulty, &rdev2->flags))
1338 d->state = (1<<MD_DISK_FAULTY);
1339 else if (is_active) {
1340 d->state = (1<<MD_DISK_ACTIVE);
1341 if (test_bit(In_sync, &rdev2->flags))
1342 d->state |= (1<<MD_DISK_SYNC);
1343 active++;
1344 working++;
1345 } else {
1346 d->state = 0;
1347 spare++;
1348 working++;
1349 }
1350 if (test_bit(WriteMostly, &rdev2->flags))
1351 d->state |= (1<<MD_DISK_WRITEMOSTLY);
1352 }
1353
1354 for (i=0 ; i < mddev->raid_disks ; i++) {
1355 mdp_disk_t *d = &sb->disks[i];
1356 if (d->state == 0 && d->number == 0) {
1357 d->number = i;
1358 d->raid_disk = i;
1359 d->state = (1<<MD_DISK_REMOVED);
1360 d->state |= (1<<MD_DISK_FAULTY);
1361 failed++;
1362 }
1363 }
1364 sb->nr_disks = nr_disks;
1365 sb->active_disks = active;
1366 sb->working_disks = working;
1367 sb->failed_disks = failed;
1368 sb->spare_disks = spare;
1369
1370 sb->this_disk = sb->disks[rdev->desc_nr];
1371 sb->sb_csum = calc_sb_csum(sb);
1372}
1373
1374
1375
1376
1377static unsigned long long
1378super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1379{
1380 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1381 return 0;
1382 if (rdev->mddev->bitmap_info.offset)
1383 return 0;
1384 rdev->sb_start = calc_dev_sboffset(rdev);
1385 if (!num_sectors || num_sectors > rdev->sb_start)
1386 num_sectors = rdev->sb_start;
1387
1388
1389
1390 if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
1391 num_sectors = (2ULL << 32) - 2;
1392 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1393 rdev->sb_page);
1394 md_super_wait(rdev->mddev);
1395 return num_sectors;
1396}
1397
1398static int
1399super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset)
1400{
1401
1402 return new_offset == 0;
1403}
1404
1405
1406
1407
1408
1409static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
1410{
1411 __le32 disk_csum;
1412 u32 csum;
1413 unsigned long long newcsum;
1414 int size = 256 + le32_to_cpu(sb->max_dev)*2;
1415 __le32 *isuper = (__le32*)sb;
1416
1417 disk_csum = sb->sb_csum;
1418 sb->sb_csum = 0;
1419 newcsum = 0;
1420 for (; size >= 4; size -= 4)
1421 newcsum += le32_to_cpu(*isuper++);
1422
1423 if (size == 2)
1424 newcsum += le16_to_cpu(*(__le16*) isuper);
1425
1426 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1427 sb->sb_csum = disk_csum;
1428 return cpu_to_le32(csum);
1429}
1430
1431static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1432 int acknowledged);
1433static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1434{
1435 struct mdp_superblock_1 *sb;
1436 int ret;
1437 sector_t sb_start;
1438 sector_t sectors;
1439 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
1440 int bmask;
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 switch(minor_version) {
1451 case 0:
1452 sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
1453 sb_start -= 8*2;
1454 sb_start &= ~(sector_t)(4*2-1);
1455 break;
1456 case 1:
1457 sb_start = 0;
1458 break;
1459 case 2:
1460 sb_start = 8;
1461 break;
1462 default:
1463 return -EINVAL;
1464 }
1465 rdev->sb_start = sb_start;
1466
1467
1468
1469
1470 ret = read_disk_sb(rdev, 4096);
1471 if (ret) return ret;
1472
1473
1474 sb = page_address(rdev->sb_page);
1475
1476 if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
1477 sb->major_version != cpu_to_le32(1) ||
1478 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
1479 le64_to_cpu(sb->super_offset) != rdev->sb_start ||
1480 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
1481 return -EINVAL;
1482
1483 if (calc_sb_1_csum(sb) != sb->sb_csum) {
1484 printk("md: invalid superblock checksum on %s\n",
1485 bdevname(rdev->bdev,b));
1486 return -EINVAL;
1487 }
1488 if (le64_to_cpu(sb->data_size) < 10) {
1489 printk("md: data_size too small on %s\n",
1490 bdevname(rdev->bdev,b));
1491 return -EINVAL;
1492 }
1493 if (sb->pad0 ||
1494 sb->pad3[0] ||
1495 memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
1496
1497 return -EINVAL;
1498
1499 rdev->preferred_minor = 0xffff;
1500 rdev->data_offset = le64_to_cpu(sb->data_offset);
1501 rdev->new_data_offset = rdev->data_offset;
1502 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) &&
1503 (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET))
1504 rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset);
1505 atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
1506
1507 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
1508 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1509 if (rdev->sb_size & bmask)
1510 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1511
1512 if (minor_version
1513 && rdev->data_offset < sb_start + (rdev->sb_size/512))
1514 return -EINVAL;
1515 if (minor_version
1516 && rdev->new_data_offset < sb_start + (rdev->sb_size/512))
1517 return -EINVAL;
1518
1519 if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
1520 rdev->desc_nr = -1;
1521 else
1522 rdev->desc_nr = le32_to_cpu(sb->dev_number);
1523
1524 if (!rdev->bb_page) {
1525 rdev->bb_page = alloc_page(GFP_KERNEL);
1526 if (!rdev->bb_page)
1527 return -ENOMEM;
1528 }
1529 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
1530 rdev->badblocks.count == 0) {
1531
1532
1533
1534 s32 offset;
1535 sector_t bb_sector;
1536 u64 *bbp;
1537 int i;
1538 int sectors = le16_to_cpu(sb->bblog_size);
1539 if (sectors > (PAGE_SIZE / 512))
1540 return -EINVAL;
1541 offset = le32_to_cpu(sb->bblog_offset);
1542 if (offset == 0)
1543 return -EINVAL;
1544 bb_sector = (long long)offset;
1545 if (!sync_page_io(rdev, bb_sector, sectors << 9,
1546 rdev->bb_page, READ, true))
1547 return -EIO;
1548 bbp = (u64 *)page_address(rdev->bb_page);
1549 rdev->badblocks.shift = sb->bblog_shift;
1550 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1551 u64 bb = le64_to_cpu(*bbp);
1552 int count = bb & (0x3ff);
1553 u64 sector = bb >> 10;
1554 sector <<= sb->bblog_shift;
1555 count <<= sb->bblog_shift;
1556 if (bb + 1 == 0)
1557 break;
1558 if (md_set_badblocks(&rdev->badblocks,
1559 sector, count, 1) == 0)
1560 return -EINVAL;
1561 }
1562 } else if (sb->bblog_offset != 0)
1563 rdev->badblocks.shift = 0;
1564
1565 if (!refdev) {
1566 ret = 1;
1567 } else {
1568 __u64 ev1, ev2;
1569 struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
1570
1571 if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
1572 sb->level != refsb->level ||
1573 sb->layout != refsb->layout ||
1574 sb->chunksize != refsb->chunksize) {
1575 printk(KERN_WARNING "md: %s has strangely different"
1576 " superblock to %s\n",
1577 bdevname(rdev->bdev,b),
1578 bdevname(refdev->bdev,b2));
1579 return -EINVAL;
1580 }
1581 ev1 = le64_to_cpu(sb->events);
1582 ev2 = le64_to_cpu(refsb->events);
1583
1584 if (ev1 > ev2)
1585 ret = 1;
1586 else
1587 ret = 0;
1588 }
1589 if (minor_version) {
1590 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9);
1591 sectors -= rdev->data_offset;
1592 } else
1593 sectors = rdev->sb_start;
1594 if (sectors < le64_to_cpu(sb->data_size))
1595 return -EINVAL;
1596 rdev->sectors = le64_to_cpu(sb->data_size);
1597 return ret;
1598}
1599
1600static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1601{
1602 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1603 __u64 ev1 = le64_to_cpu(sb->events);
1604
1605 rdev->raid_disk = -1;
1606 clear_bit(Faulty, &rdev->flags);
1607 clear_bit(In_sync, &rdev->flags);
1608 clear_bit(WriteMostly, &rdev->flags);
1609
1610 if (mddev->raid_disks == 0) {
1611 mddev->major_version = 1;
1612 mddev->patch_version = 0;
1613 mddev->external = 0;
1614 mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
1615 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1616 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
1617 mddev->level = le32_to_cpu(sb->level);
1618 mddev->clevel[0] = 0;
1619 mddev->layout = le32_to_cpu(sb->layout);
1620 mddev->raid_disks = le32_to_cpu(sb->raid_disks);
1621 mddev->dev_sectors = le64_to_cpu(sb->size);
1622 mddev->events = ev1;
1623 mddev->bitmap_info.offset = 0;
1624 mddev->bitmap_info.space = 0;
1625
1626
1627
1628 mddev->bitmap_info.default_offset = 1024 >> 9;
1629 mddev->bitmap_info.default_space = (4096-1024) >> 9;
1630 mddev->reshape_backwards = 0;
1631
1632 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
1633 memcpy(mddev->uuid, sb->set_uuid, 16);
1634
1635 mddev->max_disks = (4096-256)/2;
1636
1637 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
1638 mddev->bitmap_info.file == NULL) {
1639 mddev->bitmap_info.offset =
1640 (__s32)le32_to_cpu(sb->bitmap_offset);
1641
1642
1643
1644
1645
1646 if (mddev->minor_version > 0)
1647 mddev->bitmap_info.space = 0;
1648 else if (mddev->bitmap_info.offset > 0)
1649 mddev->bitmap_info.space =
1650 8 - mddev->bitmap_info.offset;
1651 else
1652 mddev->bitmap_info.space =
1653 -mddev->bitmap_info.offset;
1654 }
1655
1656 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1657 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1658 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1659 mddev->new_level = le32_to_cpu(sb->new_level);
1660 mddev->new_layout = le32_to_cpu(sb->new_layout);
1661 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
1662 if (mddev->delta_disks < 0 ||
1663 (mddev->delta_disks == 0 &&
1664 (le32_to_cpu(sb->feature_map)
1665 & MD_FEATURE_RESHAPE_BACKWARDS)))
1666 mddev->reshape_backwards = 1;
1667 } else {
1668 mddev->reshape_position = MaxSector;
1669 mddev->delta_disks = 0;
1670 mddev->new_level = mddev->level;
1671 mddev->new_layout = mddev->layout;
1672 mddev->new_chunk_sectors = mddev->chunk_sectors;
1673 }
1674
1675 } else if (mddev->pers == NULL) {
1676
1677
1678 ++ev1;
1679 if (rdev->desc_nr >= 0 &&
1680 rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
1681 le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
1682 if (ev1 < mddev->events)
1683 return -EINVAL;
1684 } else if (mddev->bitmap) {
1685
1686
1687
1688 if (ev1 < mddev->bitmap->events_cleared)
1689 return 0;
1690 } else {
1691 if (ev1 < mddev->events)
1692
1693 return 0;
1694 }
1695 if (mddev->level != LEVEL_MULTIPATH) {
1696 int role;
1697 if (rdev->desc_nr < 0 ||
1698 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1699 role = 0xffff;
1700 rdev->desc_nr = -1;
1701 } else
1702 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1703 switch(role) {
1704 case 0xffff:
1705 break;
1706 case 0xfffe:
1707 set_bit(Faulty, &rdev->flags);
1708 break;
1709 default:
1710 if ((le32_to_cpu(sb->feature_map) &
1711 MD_FEATURE_RECOVERY_OFFSET))
1712 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1713 else
1714 set_bit(In_sync, &rdev->flags);
1715 rdev->raid_disk = role;
1716 break;
1717 }
1718 if (sb->devflags & WriteMostly1)
1719 set_bit(WriteMostly, &rdev->flags);
1720 if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
1721 set_bit(Replacement, &rdev->flags);
1722 } else
1723 set_bit(In_sync, &rdev->flags);
1724
1725 return 0;
1726}
1727
1728static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1729{
1730 struct mdp_superblock_1 *sb;
1731 struct md_rdev *rdev2;
1732 int max_dev, i;
1733
1734
1735 sb = page_address(rdev->sb_page);
1736
1737 sb->feature_map = 0;
1738 sb->pad0 = 0;
1739 sb->recovery_offset = cpu_to_le64(0);
1740 memset(sb->pad3, 0, sizeof(sb->pad3));
1741
1742 sb->utime = cpu_to_le64((__u64)mddev->utime);
1743 sb->events = cpu_to_le64(mddev->events);
1744 if (mddev->in_sync)
1745 sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
1746 else
1747 sb->resync_offset = cpu_to_le64(0);
1748
1749 sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
1750
1751 sb->raid_disks = cpu_to_le32(mddev->raid_disks);
1752 sb->size = cpu_to_le64(mddev->dev_sectors);
1753 sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
1754 sb->level = cpu_to_le32(mddev->level);
1755 sb->layout = cpu_to_le32(mddev->layout);
1756
1757 if (test_bit(WriteMostly, &rdev->flags))
1758 sb->devflags |= WriteMostly1;
1759 else
1760 sb->devflags &= ~WriteMostly1;
1761 sb->data_offset = cpu_to_le64(rdev->data_offset);
1762 sb->data_size = cpu_to_le64(rdev->sectors);
1763
1764 if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
1765 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
1766 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1767 }
1768
1769 if (rdev->raid_disk >= 0 &&
1770 !test_bit(In_sync, &rdev->flags)) {
1771 sb->feature_map |=
1772 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1773 sb->recovery_offset =
1774 cpu_to_le64(rdev->recovery_offset);
1775 }
1776 if (test_bit(Replacement, &rdev->flags))
1777 sb->feature_map |=
1778 cpu_to_le32(MD_FEATURE_REPLACEMENT);
1779
1780 if (mddev->reshape_position != MaxSector) {
1781 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1782 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1783 sb->new_layout = cpu_to_le32(mddev->new_layout);
1784 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1785 sb->new_level = cpu_to_le32(mddev->new_level);
1786 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
1787 if (mddev->delta_disks == 0 &&
1788 mddev->reshape_backwards)
1789 sb->feature_map
1790 |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
1791 if (rdev->new_data_offset != rdev->data_offset) {
1792 sb->feature_map
1793 |= cpu_to_le32(MD_FEATURE_NEW_OFFSET);
1794 sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset
1795 - rdev->data_offset));
1796 }
1797 }
1798
1799 if (rdev->badblocks.count == 0)
1800 ;
1801 else if (sb->bblog_offset == 0)
1802
1803 md_error(mddev, rdev);
1804 else {
1805 struct badblocks *bb = &rdev->badblocks;
1806 u64 *bbp = (u64 *)page_address(rdev->bb_page);
1807 u64 *p = bb->page;
1808 sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
1809 if (bb->changed) {
1810 unsigned seq;
1811
1812retry:
1813 seq = read_seqbegin(&bb->lock);
1814
1815 memset(bbp, 0xff, PAGE_SIZE);
1816
1817 for (i = 0 ; i < bb->count ; i++) {
1818 u64 internal_bb = p[i];
1819 u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
1820 | BB_LEN(internal_bb));
1821 bbp[i] = cpu_to_le64(store_bb);
1822 }
1823 bb->changed = 0;
1824 if (read_seqretry(&bb->lock, seq))
1825 goto retry;
1826
1827 bb->sector = (rdev->sb_start +
1828 (int)le32_to_cpu(sb->bblog_offset));
1829 bb->size = le16_to_cpu(sb->bblog_size);
1830 }
1831 }
1832
1833 max_dev = 0;
1834 rdev_for_each(rdev2, mddev)
1835 if (rdev2->desc_nr+1 > max_dev)
1836 max_dev = rdev2->desc_nr+1;
1837
1838 if (max_dev > le32_to_cpu(sb->max_dev)) {
1839 int bmask;
1840 sb->max_dev = cpu_to_le32(max_dev);
1841 rdev->sb_size = max_dev * 2 + 256;
1842 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1843 if (rdev->sb_size & bmask)
1844 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1845 } else
1846 max_dev = le32_to_cpu(sb->max_dev);
1847
1848 for (i=0; i<max_dev;i++)
1849 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1850
1851 rdev_for_each(rdev2, mddev) {
1852 i = rdev2->desc_nr;
1853 if (test_bit(Faulty, &rdev2->flags))
1854 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1855 else if (test_bit(In_sync, &rdev2->flags))
1856 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1857 else if (rdev2->raid_disk >= 0)
1858 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1859 else
1860 sb->dev_roles[i] = cpu_to_le16(0xffff);
1861 }
1862
1863 sb->sb_csum = calc_sb_1_csum(sb);
1864}
1865
1866static unsigned long long
1867super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
1868{
1869 struct mdp_superblock_1 *sb;
1870 sector_t max_sectors;
1871 if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
1872 return 0;
1873 if (rdev->data_offset != rdev->new_data_offset)
1874 return 0;
1875 if (rdev->sb_start < rdev->data_offset) {
1876
1877 max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
1878 max_sectors -= rdev->data_offset;
1879 if (!num_sectors || num_sectors > max_sectors)
1880 num_sectors = max_sectors;
1881 } else if (rdev->mddev->bitmap_info.offset) {
1882
1883 return 0;
1884 } else {
1885
1886 sector_t sb_start;
1887 sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
1888 sb_start &= ~(sector_t)(4*2 - 1);
1889 max_sectors = rdev->sectors + sb_start - rdev->sb_start;
1890 if (!num_sectors || num_sectors > max_sectors)
1891 num_sectors = max_sectors;
1892 rdev->sb_start = sb_start;
1893 }
1894 sb = page_address(rdev->sb_page);
1895 sb->data_size = cpu_to_le64(num_sectors);
1896 sb->super_offset = rdev->sb_start;
1897 sb->sb_csum = calc_sb_1_csum(sb);
1898 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
1899 rdev->sb_page);
1900 md_super_wait(rdev->mddev);
1901 return num_sectors;
1902
1903}
1904
1905static int
1906super_1_allow_new_offset(struct md_rdev *rdev,
1907 unsigned long long new_offset)
1908{
1909
1910 struct bitmap *bitmap;
1911 if (new_offset >= rdev->data_offset)
1912 return 1;
1913
1914
1915
1916 if (rdev->mddev->minor_version == 0)
1917 return 1;
1918
1919
1920
1921
1922
1923
1924
1925 if (rdev->sb_start + (32+4)*2 > new_offset)
1926 return 0;
1927 bitmap = rdev->mddev->bitmap;
1928 if (bitmap && !rdev->mddev->bitmap_info.file &&
1929 rdev->sb_start + rdev->mddev->bitmap_info.offset +
1930 bitmap->storage.file_pages * (PAGE_SIZE>>9) > new_offset)
1931 return 0;
1932 if (rdev->badblocks.sector + rdev->badblocks.size > new_offset)
1933 return 0;
1934
1935 return 1;
1936}
1937
1938static struct super_type super_types[] = {
1939 [0] = {
1940 .name = "0.90.0",
1941 .owner = THIS_MODULE,
1942 .load_super = super_90_load,
1943 .validate_super = super_90_validate,
1944 .sync_super = super_90_sync,
1945 .rdev_size_change = super_90_rdev_size_change,
1946 .allow_new_offset = super_90_allow_new_offset,
1947 },
1948 [1] = {
1949 .name = "md-1",
1950 .owner = THIS_MODULE,
1951 .load_super = super_1_load,
1952 .validate_super = super_1_validate,
1953 .sync_super = super_1_sync,
1954 .rdev_size_change = super_1_rdev_size_change,
1955 .allow_new_offset = super_1_allow_new_offset,
1956 },
1957};
1958
1959static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
1960{
1961 if (mddev->sync_super) {
1962 mddev->sync_super(mddev, rdev);
1963 return;
1964 }
1965
1966 BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
1967
1968 super_types[mddev->major_version].sync_super(mddev, rdev);
1969}
1970
1971static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
1972{
1973 struct md_rdev *rdev, *rdev2;
1974
1975 rcu_read_lock();
1976 rdev_for_each_rcu(rdev, mddev1)
1977 rdev_for_each_rcu(rdev2, mddev2)
1978 if (rdev->bdev->bd_contains ==
1979 rdev2->bdev->bd_contains) {
1980 rcu_read_unlock();
1981 return 1;
1982 }
1983 rcu_read_unlock();
1984 return 0;
1985}
1986
1987static LIST_HEAD(pending_raid_disks);
1988
1989
1990
1991
1992
1993
1994
1995
1996int md_integrity_register(struct mddev *mddev)
1997{
1998 struct md_rdev *rdev, *reference = NULL;
1999
2000 if (list_empty(&mddev->disks))
2001 return 0;
2002 if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
2003 return 0;
2004 rdev_for_each(rdev, mddev) {
2005
2006 if (test_bit(Faulty, &rdev->flags))
2007 continue;
2008 if (rdev->raid_disk < 0)
2009 continue;
2010 if (!reference) {
2011
2012 reference = rdev;
2013 continue;
2014 }
2015
2016 if (blk_integrity_compare(reference->bdev->bd_disk,
2017 rdev->bdev->bd_disk) < 0)
2018 return -EINVAL;
2019 }
2020 if (!reference || !bdev_get_integrity(reference->bdev))
2021 return 0;
2022
2023
2024
2025
2026 if (blk_integrity_register(mddev->gendisk,
2027 bdev_get_integrity(reference->bdev)) != 0) {
2028 printk(KERN_ERR "md: failed to register integrity for %s\n",
2029 mdname(mddev));
2030 return -EINVAL;
2031 }
2032 printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
2033 if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
2034 printk(KERN_ERR "md: failed to create integrity pool for %s\n",
2035 mdname(mddev));
2036 return -EINVAL;
2037 }
2038 return 0;
2039}
2040EXPORT_SYMBOL(md_integrity_register);
2041
2042
2043void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
2044{
2045 struct blk_integrity *bi_rdev;
2046 struct blk_integrity *bi_mddev;
2047
2048 if (!mddev->gendisk)
2049 return;
2050
2051 bi_rdev = bdev_get_integrity(rdev->bdev);
2052 bi_mddev = blk_get_integrity(mddev->gendisk);
2053
2054 if (!bi_mddev)
2055 return;
2056 if (rdev->raid_disk < 0)
2057 return;
2058 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
2059 rdev->bdev->bd_disk) >= 0)
2060 return;
2061 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
2062 blk_integrity_unregister(mddev->gendisk);
2063}
2064EXPORT_SYMBOL(md_integrity_add_rdev);
2065
2066static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
2067{
2068 char b[BDEVNAME_SIZE];
2069 struct kobject *ko;
2070 char *s;
2071 int err;
2072
2073 if (rdev->mddev) {
2074 MD_BUG();
2075 return -EINVAL;
2076 }
2077
2078
2079 if (find_rdev(mddev, rdev->bdev->bd_dev))
2080 return -EEXIST;
2081
2082
2083 if (rdev->sectors && (mddev->dev_sectors == 0 ||
2084 rdev->sectors < mddev->dev_sectors)) {
2085 if (mddev->pers) {
2086
2087
2088
2089
2090 if (mddev->level > 0)
2091 return -ENOSPC;
2092 } else
2093 mddev->dev_sectors = rdev->sectors;
2094 }
2095
2096
2097
2098
2099
2100 if (rdev->desc_nr < 0) {
2101 int choice = 0;
2102 if (mddev->pers) choice = mddev->raid_disks;
2103 while (find_rdev_nr(mddev, choice))
2104 choice++;
2105 rdev->desc_nr = choice;
2106 } else {
2107 if (find_rdev_nr(mddev, rdev->desc_nr))
2108 return -EBUSY;
2109 }
2110 if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
2111 printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
2112 mdname(mddev), mddev->max_disks);
2113 return -EBUSY;
2114 }
2115 bdevname(rdev->bdev,b);
2116 while ( (s=strchr(b, '/')) != NULL)
2117 *s = '!';
2118
2119 rdev->mddev = mddev;
2120 printk(KERN_INFO "md: bind<%s>\n", b);
2121
2122 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
2123 goto fail;
2124
2125 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
2126 if (sysfs_create_link(&rdev->kobj, ko, "block"))
2127 ;
2128 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
2129
2130 list_add_rcu(&rdev->same_set, &mddev->disks);
2131 bd_link_disk_holder(rdev->bdev, mddev->gendisk);
2132
2133
2134 mddev->recovery_disabled++;
2135
2136 return 0;
2137
2138 fail:
2139 printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
2140 b, mdname(mddev));
2141 return err;
2142}
2143
2144static void md_delayed_delete(struct work_struct *ws)
2145{
2146 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
2147 kobject_del(&rdev->kobj);
2148 kobject_put(&rdev->kobj);
2149}
2150
2151static void unbind_rdev_from_array(struct md_rdev * rdev)
2152{
2153 char b[BDEVNAME_SIZE];
2154 if (!rdev->mddev) {
2155 MD_BUG();
2156 return;
2157 }
2158 bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
2159 list_del_rcu(&rdev->same_set);
2160 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
2161 rdev->mddev = NULL;
2162 sysfs_remove_link(&rdev->kobj, "block");
2163 sysfs_put(rdev->sysfs_state);
2164 rdev->sysfs_state = NULL;
2165 rdev->badblocks.count = 0;
2166
2167
2168
2169
2170 synchronize_rcu();
2171 INIT_WORK(&rdev->del_work, md_delayed_delete);
2172 kobject_get(&rdev->kobj);
2173 queue_work(md_misc_wq, &rdev->del_work);
2174}
2175
2176
2177
2178
2179
2180
2181static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
2182{
2183 int err = 0;
2184 struct block_device *bdev;
2185 char b[BDEVNAME_SIZE];
2186
2187 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2188 shared ? (struct md_rdev *)lock_rdev : rdev);
2189 if (IS_ERR(bdev)) {
2190 printk(KERN_ERR "md: could not open %s.\n",
2191 __bdevname(dev, b));
2192 return PTR_ERR(bdev);
2193 }
2194 rdev->bdev = bdev;
2195 return err;
2196}
2197
2198static void unlock_rdev(struct md_rdev *rdev)
2199{
2200 struct block_device *bdev = rdev->bdev;
2201 rdev->bdev = NULL;
2202 if (!bdev)
2203 MD_BUG();
2204 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2205}
2206
2207void md_autodetect_dev(dev_t dev);
2208
2209static void export_rdev(struct md_rdev * rdev)
2210{
2211 char b[BDEVNAME_SIZE];
2212 printk(KERN_INFO "md: export_rdev(%s)\n",
2213 bdevname(rdev->bdev,b));
2214 if (rdev->mddev)
2215 MD_BUG();
2216 md_rdev_clear(rdev);
2217#ifndef MODULE
2218 if (test_bit(AutoDetected, &rdev->flags))
2219 md_autodetect_dev(rdev->bdev->bd_dev);
2220#endif
2221 unlock_rdev(rdev);
2222 kobject_put(&rdev->kobj);
2223}
2224
2225static void kick_rdev_from_array(struct md_rdev * rdev)
2226{
2227 unbind_rdev_from_array(rdev);
2228 export_rdev(rdev);
2229}
2230
2231static void export_array(struct mddev *mddev)
2232{
2233 struct md_rdev *rdev, *tmp;
2234
2235 rdev_for_each_safe(rdev, tmp, mddev) {
2236 if (!rdev->mddev) {
2237 MD_BUG();
2238 continue;
2239 }
2240 kick_rdev_from_array(rdev);
2241 }
2242 if (!list_empty(&mddev->disks))
2243 MD_BUG();
2244 mddev->raid_disks = 0;
2245 mddev->major_version = 0;
2246}
2247
2248static void print_desc(mdp_disk_t *desc)
2249{
2250 printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
2251 desc->major,desc->minor,desc->raid_disk,desc->state);
2252}
2253
2254static void print_sb_90(mdp_super_t *sb)
2255{
2256 int i;
2257
2258 printk(KERN_INFO
2259 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2260 sb->major_version, sb->minor_version, sb->patch_version,
2261 sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
2262 sb->ctime);
2263 printk(KERN_INFO "md: L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
2264 sb->level, sb->size, sb->nr_disks, sb->raid_disks,
2265 sb->md_minor, sb->layout, sb->chunk_size);
2266 printk(KERN_INFO "md: UT:%08x ST:%d AD:%d WD:%d"
2267 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2268 sb->utime, sb->state, sb->active_disks, sb->working_disks,
2269 sb->failed_disks, sb->spare_disks,
2270 sb->sb_csum, (unsigned long)sb->events_lo);
2271
2272 printk(KERN_INFO);
2273 for (i = 0; i < MD_SB_DISKS; i++) {
2274 mdp_disk_t *desc;
2275
2276 desc = sb->disks + i;
2277 if (desc->number || desc->major || desc->minor ||
2278 desc->raid_disk || (desc->state && (desc->state != 4))) {
2279 printk(" D %2d: ", i);
2280 print_desc(desc);
2281 }
2282 }
2283 printk(KERN_INFO "md: THIS: ");
2284 print_desc(&sb->this_disk);
2285}
2286
2287static void print_sb_1(struct mdp_superblock_1 *sb)
2288{
2289 __u8 *uuid;
2290
2291 uuid = sb->set_uuid;
2292 printk(KERN_INFO
2293 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2294 "md: Name: \"%s\" CT:%llu\n",
2295 le32_to_cpu(sb->major_version),
2296 le32_to_cpu(sb->feature_map),
2297 uuid,
2298 sb->set_name,
2299 (unsigned long long)le64_to_cpu(sb->ctime)
2300 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
2301
2302 uuid = sb->device_uuid;
2303 printk(KERN_INFO
2304 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2305 " RO:%llu\n"
2306 "md: Dev:%08x UUID: %pU\n"
2307 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2308 "md: (MaxDev:%u) \n",
2309 le32_to_cpu(sb->level),
2310 (unsigned long long)le64_to_cpu(sb->size),
2311 le32_to_cpu(sb->raid_disks),
2312 le32_to_cpu(sb->layout),
2313 le32_to_cpu(sb->chunksize),
2314 (unsigned long long)le64_to_cpu(sb->data_offset),
2315 (unsigned long long)le64_to_cpu(sb->data_size),
2316 (unsigned long long)le64_to_cpu(sb->super_offset),
2317 (unsigned long long)le64_to_cpu(sb->recovery_offset),
2318 le32_to_cpu(sb->dev_number),
2319 uuid,
2320 sb->devflags,
2321 (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
2322 (unsigned long long)le64_to_cpu(sb->events),
2323 (unsigned long long)le64_to_cpu(sb->resync_offset),
2324 le32_to_cpu(sb->sb_csum),
2325 le32_to_cpu(sb->max_dev)
2326 );
2327}
2328
2329static void print_rdev(struct md_rdev *rdev, int major_version)
2330{
2331 char b[BDEVNAME_SIZE];
2332 printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
2333 bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
2334 test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
2335 rdev->desc_nr);
2336 if (rdev->sb_loaded) {
2337 printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
2338 switch (major_version) {
2339 case 0:
2340 print_sb_90(page_address(rdev->sb_page));
2341 break;
2342 case 1:
2343 print_sb_1(page_address(rdev->sb_page));
2344 break;
2345 }
2346 } else
2347 printk(KERN_INFO "md: no rdev superblock!\n");
2348}
2349
2350static void md_print_devices(void)
2351{
2352 struct list_head *tmp;
2353 struct md_rdev *rdev;
2354 struct mddev *mddev;
2355 char b[BDEVNAME_SIZE];
2356
2357 printk("\n");
2358 printk("md: **********************************\n");
2359 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2360 printk("md: **********************************\n");
2361 for_each_mddev(mddev, tmp) {
2362
2363 if (mddev->bitmap)
2364 bitmap_print_sb(mddev->bitmap);
2365 else
2366 printk("%s: ", mdname(mddev));
2367 rdev_for_each(rdev, mddev)
2368 printk("<%s>", bdevname(rdev->bdev,b));
2369 printk("\n");
2370
2371 rdev_for_each(rdev, mddev)
2372 print_rdev(rdev, mddev->major_version);
2373 }
2374 printk("md: **********************************\n");
2375 printk("\n");
2376}
2377
2378
2379static void sync_sbs(struct mddev * mddev, int nospares)
2380{
2381
2382
2383
2384
2385
2386
2387 struct md_rdev *rdev;
2388 rdev_for_each(rdev, mddev) {
2389 if (rdev->sb_events == mddev->events ||
2390 (nospares &&
2391 rdev->raid_disk < 0 &&
2392 rdev->sb_events+1 == mddev->events)) {
2393
2394 rdev->sb_loaded = 2;
2395 } else {
2396 sync_super(mddev, rdev);
2397 rdev->sb_loaded = 1;
2398 }
2399 }
2400}
2401
2402static void md_update_sb(struct mddev * mddev, int force_change)
2403{
2404 struct md_rdev *rdev;
2405 int sync_req;
2406 int nospares = 0;
2407 int any_badblocks_changed = 0;
2408
2409 if (mddev->ro) {
2410 if (force_change)
2411 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2412 return;
2413 }
2414repeat:
2415
2416 rdev_for_each(rdev, mddev) {
2417 if (rdev->raid_disk >= 0 &&
2418 mddev->delta_disks >= 0 &&
2419 !test_bit(In_sync, &rdev->flags) &&
2420 mddev->curr_resync_completed > rdev->recovery_offset)
2421 rdev->recovery_offset = mddev->curr_resync_completed;
2422
2423 }
2424 if (!mddev->persistent) {
2425 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2426 clear_bit(MD_CHANGE_DEVS, &mddev->flags);
2427 if (!mddev->external) {
2428 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2429 rdev_for_each(rdev, mddev) {
2430 if (rdev->badblocks.changed) {
2431 rdev->badblocks.changed = 0;
2432 md_ack_all_badblocks(&rdev->badblocks);
2433 md_error(mddev, rdev);
2434 }
2435 clear_bit(Blocked, &rdev->flags);
2436 clear_bit(BlockedBadBlocks, &rdev->flags);
2437 wake_up(&rdev->blocked_wait);
2438 }
2439 }
2440 wake_up(&mddev->sb_wait);
2441 return;
2442 }
2443
2444 spin_lock_irq(&mddev->write_lock);
2445
2446 mddev->utime = get_seconds();
2447
2448 if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
2449 force_change = 1;
2450 if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
2451
2452
2453
2454
2455 nospares = 1;
2456 if (force_change)
2457 nospares = 0;
2458 if (mddev->degraded)
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468 nospares = 0;
2469
2470 sync_req = mddev->in_sync;
2471
2472
2473
2474 if (nospares
2475 && (mddev->in_sync && mddev->recovery_cp == MaxSector)
2476 && mddev->can_decrease_events
2477 && mddev->events != 1) {
2478 mddev->events--;
2479 mddev->can_decrease_events = 0;
2480 } else {
2481
2482 mddev->events ++;
2483 mddev->can_decrease_events = nospares;
2484 }
2485
2486 if (!mddev->events) {
2487
2488
2489
2490
2491
2492 MD_BUG();
2493 mddev->events --;
2494 }
2495
2496 rdev_for_each(rdev, mddev) {
2497 if (rdev->badblocks.changed)
2498 any_badblocks_changed++;
2499 if (test_bit(Faulty, &rdev->flags))
2500 set_bit(FaultRecorded, &rdev->flags);
2501 }
2502
2503 sync_sbs(mddev, nospares);
2504 spin_unlock_irq(&mddev->write_lock);
2505
2506 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2507 mdname(mddev), mddev->in_sync);
2508
2509 bitmap_update_sb(mddev->bitmap);
2510 rdev_for_each(rdev, mddev) {
2511 char b[BDEVNAME_SIZE];
2512
2513 if (rdev->sb_loaded != 1)
2514 continue;
2515
2516 if (!test_bit(Faulty, &rdev->flags) &&
2517 rdev->saved_raid_disk == -1) {
2518 md_super_write(mddev,rdev,
2519 rdev->sb_start, rdev->sb_size,
2520 rdev->sb_page);
2521 pr_debug("md: (write) %s's sb offset: %llu\n",
2522 bdevname(rdev->bdev, b),
2523 (unsigned long long)rdev->sb_start);
2524 rdev->sb_events = mddev->events;
2525 if (rdev->badblocks.size) {
2526 md_super_write(mddev, rdev,
2527 rdev->badblocks.sector,
2528 rdev->badblocks.size << 9,
2529 rdev->bb_page);
2530 rdev->badblocks.size = 0;
2531 }
2532
2533 } else if (test_bit(Faulty, &rdev->flags))
2534 pr_debug("md: %s (skipping faulty)\n",
2535 bdevname(rdev->bdev, b));
2536 else
2537 pr_debug("(skipping incremental s/r ");
2538
2539 if (mddev->level == LEVEL_MULTIPATH)
2540
2541 break;
2542 }
2543 md_super_wait(mddev);
2544
2545
2546 spin_lock_irq(&mddev->write_lock);
2547 if (mddev->in_sync != sync_req ||
2548 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2549
2550 spin_unlock_irq(&mddev->write_lock);
2551 goto repeat;
2552 }
2553 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2554 spin_unlock_irq(&mddev->write_lock);
2555 wake_up(&mddev->sb_wait);
2556 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2557 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
2558
2559 rdev_for_each(rdev, mddev) {
2560 if (test_and_clear_bit(FaultRecorded, &rdev->flags))
2561 clear_bit(Blocked, &rdev->flags);
2562
2563 if (any_badblocks_changed)
2564 md_ack_all_badblocks(&rdev->badblocks);
2565 clear_bit(BlockedBadBlocks, &rdev->flags);
2566 wake_up(&rdev->blocked_wait);
2567 }
2568}
2569
2570
2571
2572
2573static int cmd_match(const char *cmd, const char *str)
2574{
2575
2576
2577
2578
2579 while (*cmd && *str && *cmd == *str) {
2580 cmd++;
2581 str++;
2582 }
2583 if (*cmd == '\n')
2584 cmd++;
2585 if (*str || *cmd)
2586 return 0;
2587 return 1;
2588}
2589
2590struct rdev_sysfs_entry {
2591 struct attribute attr;
2592 ssize_t (*show)(struct md_rdev *, char *);
2593 ssize_t (*store)(struct md_rdev *, const char *, size_t);
2594};
2595
2596static ssize_t
2597state_show(struct md_rdev *rdev, char *page)
2598{
2599 char *sep = "";
2600 size_t len = 0;
2601
2602 if (test_bit(Faulty, &rdev->flags) ||
2603 rdev->badblocks.unacked_exist) {
2604 len+= sprintf(page+len, "%sfaulty",sep);
2605 sep = ",";
2606 }
2607 if (test_bit(In_sync, &rdev->flags)) {
2608 len += sprintf(page+len, "%sin_sync",sep);
2609 sep = ",";
2610 }
2611 if (test_bit(WriteMostly, &rdev->flags)) {
2612 len += sprintf(page+len, "%swrite_mostly",sep);
2613 sep = ",";
2614 }
2615 if (test_bit(Blocked, &rdev->flags) ||
2616 (rdev->badblocks.unacked_exist
2617 && !test_bit(Faulty, &rdev->flags))) {
2618 len += sprintf(page+len, "%sblocked", sep);
2619 sep = ",";
2620 }
2621 if (!test_bit(Faulty, &rdev->flags) &&
2622 !test_bit(In_sync, &rdev->flags)) {
2623 len += sprintf(page+len, "%sspare", sep);
2624 sep = ",";
2625 }
2626 if (test_bit(WriteErrorSeen, &rdev->flags)) {
2627 len += sprintf(page+len, "%swrite_error", sep);
2628 sep = ",";
2629 }
2630 if (test_bit(WantReplacement, &rdev->flags)) {
2631 len += sprintf(page+len, "%swant_replacement", sep);
2632 sep = ",";
2633 }
2634 if (test_bit(Replacement, &rdev->flags)) {
2635 len += sprintf(page+len, "%sreplacement", sep);
2636 sep = ",";
2637 }
2638
2639 return len+sprintf(page+len, "\n");
2640}
2641
2642static ssize_t
2643state_store(struct md_rdev *rdev, const char *buf, size_t len)
2644{
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656 int err = -EINVAL;
2657 if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
2658 md_error(rdev->mddev, rdev);
2659 if (test_bit(Faulty, &rdev->flags))
2660 err = 0;
2661 else
2662 err = -EBUSY;
2663 } else if (cmd_match(buf, "remove")) {
2664 if (rdev->raid_disk >= 0)
2665 err = -EBUSY;
2666 else {
2667 struct mddev *mddev = rdev->mddev;
2668 kick_rdev_from_array(rdev);
2669 if (mddev->pers)
2670 md_update_sb(mddev, 1);
2671 md_new_event(mddev);
2672 err = 0;
2673 }
2674 } else if (cmd_match(buf, "writemostly")) {
2675 set_bit(WriteMostly, &rdev->flags);
2676 err = 0;
2677 } else if (cmd_match(buf, "-writemostly")) {
2678 clear_bit(WriteMostly, &rdev->flags);
2679 err = 0;
2680 } else if (cmd_match(buf, "blocked")) {
2681 set_bit(Blocked, &rdev->flags);
2682 err = 0;
2683 } else if (cmd_match(buf, "-blocked")) {
2684 if (!test_bit(Faulty, &rdev->flags) &&
2685 rdev->badblocks.unacked_exist) {
2686
2687
2688
2689 md_error(rdev->mddev, rdev);
2690 }
2691 clear_bit(Blocked, &rdev->flags);
2692 clear_bit(BlockedBadBlocks, &rdev->flags);
2693 wake_up(&rdev->blocked_wait);
2694 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2695 md_wakeup_thread(rdev->mddev->thread);
2696
2697 err = 0;
2698 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2699 set_bit(In_sync, &rdev->flags);
2700 err = 0;
2701 } else if (cmd_match(buf, "write_error")) {
2702 set_bit(WriteErrorSeen, &rdev->flags);
2703 err = 0;
2704 } else if (cmd_match(buf, "-write_error")) {
2705 clear_bit(WriteErrorSeen, &rdev->flags);
2706 err = 0;
2707 } else if (cmd_match(buf, "want_replacement")) {
2708
2709
2710
2711
2712 if (rdev->raid_disk >= 0 &&
2713 !test_bit(Replacement, &rdev->flags))
2714 set_bit(WantReplacement, &rdev->flags);
2715 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2716 md_wakeup_thread(rdev->mddev->thread);
2717 err = 0;
2718 } else if (cmd_match(buf, "-want_replacement")) {
2719
2720
2721
2722 err = 0;
2723 clear_bit(WantReplacement, &rdev->flags);
2724 } else if (cmd_match(buf, "replacement")) {
2725
2726
2727
2728
2729 if (rdev->mddev->pers)
2730 err = -EBUSY;
2731 else {
2732 set_bit(Replacement, &rdev->flags);
2733 err = 0;
2734 }
2735 } else if (cmd_match(buf, "-replacement")) {
2736
2737 if (rdev->mddev->pers)
2738 err = -EBUSY;
2739 else {
2740 clear_bit(Replacement, &rdev->flags);
2741 err = 0;
2742 }
2743 }
2744 if (!err)
2745 sysfs_notify_dirent_safe(rdev->sysfs_state);
2746 return err ? err : len;
2747}
2748static struct rdev_sysfs_entry rdev_state =
2749__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
2750
2751static ssize_t
2752errors_show(struct md_rdev *rdev, char *page)
2753{
2754 return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
2755}
2756
2757static ssize_t
2758errors_store(struct md_rdev *rdev, const char *buf, size_t len)
2759{
2760 char *e;
2761 unsigned long n = simple_strtoul(buf, &e, 10);
2762 if (*buf && (*e == 0 || *e == '\n')) {
2763 atomic_set(&rdev->corrected_errors, n);
2764 return len;
2765 }
2766 return -EINVAL;
2767}
2768static struct rdev_sysfs_entry rdev_errors =
2769__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
2770
2771static ssize_t
2772slot_show(struct md_rdev *rdev, char *page)
2773{
2774 if (rdev->raid_disk < 0)
2775 return sprintf(page, "none\n");
2776 else
2777 return sprintf(page, "%d\n", rdev->raid_disk);
2778}
2779
2780static ssize_t
2781slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2782{
2783 char *e;
2784 int err;
2785 int slot = simple_strtoul(buf, &e, 10);
2786 if (strncmp(buf, "none", 4)==0)
2787 slot = -1;
2788 else if (e==buf || (*e && *e!= '\n'))
2789 return -EINVAL;
2790 if (rdev->mddev->pers && slot == -1) {
2791
2792
2793
2794
2795
2796
2797
2798 if (rdev->raid_disk == -1)
2799 return -EEXIST;
2800
2801 if (rdev->mddev->pers->hot_remove_disk == NULL)
2802 return -EINVAL;
2803 clear_bit(Blocked, &rdev->flags);
2804 remove_and_add_spares(rdev->mddev, rdev);
2805 if (rdev->raid_disk >= 0)
2806 return -EBUSY;
2807 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2808 md_wakeup_thread(rdev->mddev->thread);
2809 } else if (rdev->mddev->pers) {
2810
2811
2812
2813
2814 if (rdev->raid_disk != -1)
2815 return -EBUSY;
2816
2817 if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
2818 return -EBUSY;
2819
2820 if (rdev->mddev->pers->hot_add_disk == NULL)
2821 return -EINVAL;
2822
2823 if (slot >= rdev->mddev->raid_disks &&
2824 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2825 return -ENOSPC;
2826
2827 rdev->raid_disk = slot;
2828 if (test_bit(In_sync, &rdev->flags))
2829 rdev->saved_raid_disk = slot;
2830 else
2831 rdev->saved_raid_disk = -1;
2832 clear_bit(In_sync, &rdev->flags);
2833 err = rdev->mddev->pers->
2834 hot_add_disk(rdev->mddev, rdev);
2835 if (err) {
2836 rdev->raid_disk = -1;
2837 return err;
2838 } else
2839 sysfs_notify_dirent_safe(rdev->sysfs_state);
2840 if (sysfs_link_rdev(rdev->mddev, rdev))
2841 ;
2842
2843 } else {
2844 if (slot >= rdev->mddev->raid_disks &&
2845 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2846 return -ENOSPC;
2847 rdev->raid_disk = slot;
2848
2849 clear_bit(Faulty, &rdev->flags);
2850 clear_bit(WriteMostly, &rdev->flags);
2851 set_bit(In_sync, &rdev->flags);
2852 sysfs_notify_dirent_safe(rdev->sysfs_state);
2853 }
2854 return len;
2855}
2856
2857
2858static struct rdev_sysfs_entry rdev_slot =
2859__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
2860
2861static ssize_t
2862offset_show(struct md_rdev *rdev, char *page)
2863{
2864 return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
2865}
2866
2867static ssize_t
2868offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2869{
2870 unsigned long long offset;
2871 if (kstrtoull(buf, 10, &offset) < 0)
2872 return -EINVAL;
2873 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2874 return -EBUSY;
2875 if (rdev->sectors && rdev->mddev->external)
2876
2877
2878 return -EBUSY;
2879 rdev->data_offset = offset;
2880 rdev->new_data_offset = offset;
2881 return len;
2882}
2883
2884static struct rdev_sysfs_entry rdev_offset =
2885__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
2886
2887static ssize_t new_offset_show(struct md_rdev *rdev, char *page)
2888{
2889 return sprintf(page, "%llu\n",
2890 (unsigned long long)rdev->new_data_offset);
2891}
2892
2893static ssize_t new_offset_store(struct md_rdev *rdev,
2894 const char *buf, size_t len)
2895{
2896 unsigned long long new_offset;
2897 struct mddev *mddev = rdev->mddev;
2898
2899 if (kstrtoull(buf, 10, &new_offset) < 0)
2900 return -EINVAL;
2901
2902 if (mddev->sync_thread)
2903 return -EBUSY;
2904 if (new_offset == rdev->data_offset)
2905
2906 ;
2907 else if (new_offset > rdev->data_offset) {
2908
2909 if (new_offset - rdev->data_offset
2910 + mddev->dev_sectors > rdev->sectors)
2911 return -E2BIG;
2912 }
2913
2914
2915
2916
2917
2918 if (new_offset < rdev->data_offset &&
2919 mddev->reshape_backwards)
2920 return -EINVAL;
2921
2922
2923
2924
2925 if (new_offset > rdev->data_offset &&
2926 !mddev->reshape_backwards)
2927 return -EINVAL;
2928
2929 if (mddev->pers && mddev->persistent &&
2930 !super_types[mddev->major_version]
2931 .allow_new_offset(rdev, new_offset))
2932 return -E2BIG;
2933 rdev->new_data_offset = new_offset;
2934 if (new_offset > rdev->data_offset)
2935 mddev->reshape_backwards = 1;
2936 else if (new_offset < rdev->data_offset)
2937 mddev->reshape_backwards = 0;
2938
2939 return len;
2940}
2941static struct rdev_sysfs_entry rdev_new_offset =
2942__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store);
2943
2944static ssize_t
2945rdev_size_show(struct md_rdev *rdev, char *page)
2946{
2947 return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
2948}
2949
2950static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
2951{
2952
2953 if (s1+l1 <= s2)
2954 return 0;
2955 if (s2+l2 <= s1)
2956 return 0;
2957 return 1;
2958}
2959
2960static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2961{
2962 unsigned long long blocks;
2963 sector_t new;
2964
2965 if (kstrtoull(buf, 10, &blocks) < 0)
2966 return -EINVAL;
2967
2968 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
2969 return -EINVAL;
2970
2971 new = blocks * 2;
2972 if (new != blocks * 2)
2973 return -EINVAL;
2974
2975 *sectors = new;
2976 return 0;
2977}
2978
2979static ssize_t
2980rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
2981{
2982 struct mddev *my_mddev = rdev->mddev;
2983 sector_t oldsectors = rdev->sectors;
2984 sector_t sectors;
2985
2986 if (strict_blocks_to_sectors(buf, §ors) < 0)
2987 return -EINVAL;
2988 if (rdev->data_offset != rdev->new_data_offset)
2989 return -EINVAL;
2990 if (my_mddev->pers && rdev->raid_disk >= 0) {
2991 if (my_mddev->persistent) {
2992 sectors = super_types[my_mddev->major_version].
2993 rdev_size_change(rdev, sectors);
2994 if (!sectors)
2995 return -EBUSY;
2996 } else if (!sectors)
2997 sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
2998 rdev->data_offset;
2999 if (!my_mddev->pers->resize)
3000
3001 return -EINVAL;
3002 }
3003 if (sectors < my_mddev->dev_sectors)
3004 return -EINVAL;
3005
3006 rdev->sectors = sectors;
3007 if (sectors > oldsectors && my_mddev->external) {
3008
3009
3010
3011
3012
3013 struct mddev *mddev;
3014 int overlap = 0;
3015 struct list_head *tmp;
3016
3017 mddev_unlock(my_mddev);
3018 for_each_mddev(mddev, tmp) {
3019 struct md_rdev *rdev2;
3020
3021 mddev_lock(mddev);
3022 rdev_for_each(rdev2, mddev)
3023 if (rdev->bdev == rdev2->bdev &&
3024 rdev != rdev2 &&
3025 overlaps(rdev->data_offset, rdev->sectors,
3026 rdev2->data_offset,
3027 rdev2->sectors)) {
3028 overlap = 1;
3029 break;
3030 }
3031 mddev_unlock(mddev);
3032 if (overlap) {
3033 mddev_put(mddev);
3034 break;
3035 }
3036 }
3037 mddev_lock(my_mddev);
3038 if (overlap) {
3039
3040
3041
3042
3043
3044
3045 rdev->sectors = oldsectors;
3046 return -EBUSY;
3047 }
3048 }
3049 return len;
3050}
3051
3052static struct rdev_sysfs_entry rdev_size =
3053__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
3054
3055
3056static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
3057{
3058 unsigned long long recovery_start = rdev->recovery_offset;
3059
3060 if (test_bit(In_sync, &rdev->flags) ||
3061 recovery_start == MaxSector)
3062 return sprintf(page, "none\n");
3063
3064 return sprintf(page, "%llu\n", recovery_start);
3065}
3066
3067static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
3068{
3069 unsigned long long recovery_start;
3070
3071 if (cmd_match(buf, "none"))
3072 recovery_start = MaxSector;
3073 else if (kstrtoull(buf, 10, &recovery_start))
3074 return -EINVAL;
3075
3076 if (rdev->mddev->pers &&
3077 rdev->raid_disk >= 0)
3078 return -EBUSY;
3079
3080 rdev->recovery_offset = recovery_start;
3081 if (recovery_start == MaxSector)
3082 set_bit(In_sync, &rdev->flags);
3083 else
3084 clear_bit(In_sync, &rdev->flags);
3085 return len;
3086}
3087
3088static struct rdev_sysfs_entry rdev_recovery_start =
3089__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3090
3091
3092static ssize_t
3093badblocks_show(struct badblocks *bb, char *page, int unack);
3094static ssize_t
3095badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
3096
3097static ssize_t bb_show(struct md_rdev *rdev, char *page)
3098{
3099 return badblocks_show(&rdev->badblocks, page, 0);
3100}
3101static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
3102{
3103 int rv = badblocks_store(&rdev->badblocks, page, len, 0);
3104
3105 if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
3106 wake_up(&rdev->blocked_wait);
3107 return rv;
3108}
3109static struct rdev_sysfs_entry rdev_bad_blocks =
3110__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
3111
3112
3113static ssize_t ubb_show(struct md_rdev *rdev, char *page)
3114{
3115 return badblocks_show(&rdev->badblocks, page, 1);
3116}
3117static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
3118{
3119 return badblocks_store(&rdev->badblocks, page, len, 1);
3120}
3121static struct rdev_sysfs_entry rdev_unack_bad_blocks =
3122__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
3123
3124static struct attribute *rdev_default_attrs[] = {
3125 &rdev_state.attr,
3126 &rdev_errors.attr,
3127 &rdev_slot.attr,
3128 &rdev_offset.attr,
3129 &rdev_new_offset.attr,
3130 &rdev_size.attr,
3131 &rdev_recovery_start.attr,
3132 &rdev_bad_blocks.attr,
3133 &rdev_unack_bad_blocks.attr,
3134 NULL,
3135};
3136static ssize_t
3137rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3138{
3139 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3140 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3141 struct mddev *mddev = rdev->mddev;
3142 ssize_t rv;
3143
3144 if (!entry->show)
3145 return -EIO;
3146
3147 rv = mddev ? mddev_lock(mddev) : -EBUSY;
3148 if (!rv) {
3149 if (rdev->mddev == NULL)
3150 rv = -EBUSY;
3151 else
3152 rv = entry->show(rdev, page);
3153 mddev_unlock(mddev);
3154 }
3155 return rv;
3156}
3157
3158static ssize_t
3159rdev_attr_store(struct kobject *kobj, struct attribute *attr,
3160 const char *page, size_t length)
3161{
3162 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
3163 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
3164 ssize_t rv;
3165 struct mddev *mddev = rdev->mddev;
3166
3167 if (!entry->store)
3168 return -EIO;
3169 if (!capable(CAP_SYS_ADMIN))
3170 return -EACCES;
3171 rv = mddev ? mddev_lock(mddev): -EBUSY;
3172 if (!rv) {
3173 if (rdev->mddev == NULL)
3174 rv = -EBUSY;
3175 else
3176 rv = entry->store(rdev, page, length);
3177 mddev_unlock(mddev);
3178 }
3179 return rv;
3180}
3181
3182static void rdev_free(struct kobject *ko)
3183{
3184 struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
3185 kfree(rdev);
3186}
3187static const struct sysfs_ops rdev_sysfs_ops = {
3188 .show = rdev_attr_show,
3189 .store = rdev_attr_store,
3190};
3191static struct kobj_type rdev_ktype = {
3192 .release = rdev_free,
3193 .sysfs_ops = &rdev_sysfs_ops,
3194 .default_attrs = rdev_default_attrs,
3195};
3196
3197int md_rdev_init(struct md_rdev *rdev)
3198{
3199 rdev->desc_nr = -1;
3200 rdev->saved_raid_disk = -1;
3201 rdev->raid_disk = -1;
3202 rdev->flags = 0;
3203 rdev->data_offset = 0;
3204 rdev->new_data_offset = 0;
3205 rdev->sb_events = 0;
3206 rdev->last_read_error.tv_sec = 0;
3207 rdev->last_read_error.tv_nsec = 0;
3208 rdev->sb_loaded = 0;
3209 rdev->bb_page = NULL;
3210 atomic_set(&rdev->nr_pending, 0);
3211 atomic_set(&rdev->read_errors, 0);
3212 atomic_set(&rdev->corrected_errors, 0);
3213
3214 INIT_LIST_HEAD(&rdev->same_set);
3215 init_waitqueue_head(&rdev->blocked_wait);
3216
3217
3218
3219
3220
3221 rdev->badblocks.count = 0;
3222 rdev->badblocks.shift = -1;
3223 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3224 seqlock_init(&rdev->badblocks.lock);
3225 if (rdev->badblocks.page == NULL)
3226 return -ENOMEM;
3227
3228 return 0;
3229}
3230EXPORT_SYMBOL_GPL(md_rdev_init);
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
3242{
3243 char b[BDEVNAME_SIZE];
3244 int err;
3245 struct md_rdev *rdev;
3246 sector_t size;
3247
3248 rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
3249 if (!rdev) {
3250 printk(KERN_ERR "md: could not alloc mem for new device!\n");
3251 return ERR_PTR(-ENOMEM);
3252 }
3253
3254 err = md_rdev_init(rdev);
3255 if (err)
3256 goto abort_free;
3257 err = alloc_disk_sb(rdev);
3258 if (err)
3259 goto abort_free;
3260
3261 err = lock_rdev(rdev, newdev, super_format == -2);
3262 if (err)
3263 goto abort_free;
3264
3265 kobject_init(&rdev->kobj, &rdev_ktype);
3266
3267 size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
3268 if (!size) {
3269 printk(KERN_WARNING
3270 "md: %s has zero or unknown size, marking faulty!\n",
3271 bdevname(rdev->bdev,b));
3272 err = -EINVAL;
3273 goto abort_free;
3274 }
3275
3276 if (super_format >= 0) {
3277 err = super_types[super_format].
3278 load_super(rdev, NULL, super_minor);
3279 if (err == -EINVAL) {
3280 printk(KERN_WARNING
3281 "md: %s does not have a valid v%d.%d "
3282 "superblock, not importing!\n",
3283 bdevname(rdev->bdev,b),
3284 super_format, super_minor);
3285 goto abort_free;
3286 }
3287 if (err < 0) {
3288 printk(KERN_WARNING
3289 "md: could not read %s's sb, not importing!\n",
3290 bdevname(rdev->bdev,b));
3291 goto abort_free;
3292 }
3293 }
3294
3295 return rdev;
3296
3297abort_free:
3298 if (rdev->bdev)
3299 unlock_rdev(rdev);
3300 md_rdev_clear(rdev);
3301 kfree(rdev);
3302 return ERR_PTR(err);
3303}
3304
3305
3306
3307
3308
3309
3310static void analyze_sbs(struct mddev * mddev)
3311{
3312 int i;
3313 struct md_rdev *rdev, *freshest, *tmp;
3314 char b[BDEVNAME_SIZE];
3315
3316 freshest = NULL;
3317 rdev_for_each_safe(rdev, tmp, mddev)
3318 switch (super_types[mddev->major_version].
3319 load_super(rdev, freshest, mddev->minor_version)) {
3320 case 1:
3321 freshest = rdev;
3322 break;
3323 case 0:
3324 break;
3325 default:
3326 printk( KERN_ERR \
3327 "md: fatal superblock inconsistency in %s"
3328 " -- removing from array\n",
3329 bdevname(rdev->bdev,b));
3330 kick_rdev_from_array(rdev);
3331 }
3332
3333
3334 super_types[mddev->major_version].
3335 validate_super(mddev, freshest);
3336
3337 i = 0;
3338 rdev_for_each_safe(rdev, tmp, mddev) {
3339 if (mddev->max_disks &&
3340 (rdev->desc_nr >= mddev->max_disks ||
3341 i > mddev->max_disks)) {
3342 printk(KERN_WARNING
3343 "md: %s: %s: only %d devices permitted\n",
3344 mdname(mddev), bdevname(rdev->bdev, b),
3345 mddev->max_disks);
3346 kick_rdev_from_array(rdev);
3347 continue;
3348 }
3349 if (rdev != freshest)
3350 if (super_types[mddev->major_version].
3351 validate_super(mddev, rdev)) {
3352 printk(KERN_WARNING "md: kicking non-fresh %s"
3353 " from array!\n",
3354 bdevname(rdev->bdev,b));
3355 kick_rdev_from_array(rdev);
3356 continue;
3357 }
3358 if (mddev->level == LEVEL_MULTIPATH) {
3359 rdev->desc_nr = i++;
3360 rdev->raid_disk = rdev->desc_nr;
3361 set_bit(In_sync, &rdev->flags);
3362 } else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
3363 rdev->raid_disk = -1;
3364 clear_bit(In_sync, &rdev->flags);
3365 }
3366 }
3367}
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
3380{
3381 unsigned long result = 0;
3382 long decimals = -1;
3383 while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
3384 if (*cp == '.')
3385 decimals = 0;
3386 else if (decimals < scale) {
3387 unsigned int value;
3388 value = *cp - '0';
3389 result = result * 10 + value;
3390 if (decimals >= 0)
3391 decimals++;
3392 }
3393 cp++;
3394 }
3395 if (*cp == '\n')
3396 cp++;
3397 if (*cp)
3398 return -EINVAL;
3399 if (decimals < 0)
3400 decimals = 0;
3401 while (decimals < scale) {
3402 result *= 10;
3403 decimals ++;
3404 }
3405 *res = result;
3406 return 0;
3407}
3408
3409
3410static void md_safemode_timeout(unsigned long data);
3411
3412static ssize_t
3413safe_delay_show(struct mddev *mddev, char *page)
3414{
3415 int msec = (mddev->safemode_delay*1000)/HZ;
3416 return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
3417}
3418static ssize_t
3419safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3420{
3421 unsigned long msec;
3422
3423 if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
3424 return -EINVAL;
3425 if (msec == 0)
3426 mddev->safemode_delay = 0;
3427 else {
3428 unsigned long old_delay = mddev->safemode_delay;
3429 mddev->safemode_delay = (msec*HZ)/1000;
3430 if (mddev->safemode_delay == 0)
3431 mddev->safemode_delay = 1;
3432 if (mddev->safemode_delay < old_delay || old_delay == 0)
3433 md_safemode_timeout((unsigned long)mddev);
3434 }
3435 return len;
3436}
3437static struct md_sysfs_entry md_safe_delay =
3438__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3439
3440static ssize_t
3441level_show(struct mddev *mddev, char *page)
3442{
3443 struct md_personality *p = mddev->pers;
3444 if (p)
3445 return sprintf(page, "%s\n", p->name);
3446 else if (mddev->clevel[0])
3447 return sprintf(page, "%s\n", mddev->clevel);
3448 else if (mddev->level != LEVEL_NONE)
3449 return sprintf(page, "%d\n", mddev->level);
3450 else
3451 return 0;
3452}
3453
3454static ssize_t
3455level_store(struct mddev *mddev, const char *buf, size_t len)
3456{
3457 char clevel[16];
3458 ssize_t rv = len;
3459 struct md_personality *pers;
3460 long level;
3461 void *priv;
3462 struct md_rdev *rdev;
3463
3464 if (mddev->pers == NULL) {
3465 if (len == 0)
3466 return 0;
3467 if (len >= sizeof(mddev->clevel))
3468 return -ENOSPC;
3469 strncpy(mddev->clevel, buf, len);
3470 if (mddev->clevel[len-1] == '\n')
3471 len--;
3472 mddev->clevel[len] = 0;
3473 mddev->level = LEVEL_NONE;
3474 return rv;
3475 }
3476
3477
3478
3479
3480
3481
3482
3483 if (mddev->sync_thread ||
3484 mddev->reshape_position != MaxSector ||
3485 mddev->sysfs_active)
3486 return -EBUSY;
3487
3488 if (!mddev->pers->quiesce) {
3489 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3490 mdname(mddev), mddev->pers->name);
3491 return -EINVAL;
3492 }
3493
3494
3495 if (len == 0 || len >= sizeof(clevel))
3496 return -EINVAL;
3497 strncpy(clevel, buf, len);
3498 if (clevel[len-1] == '\n')
3499 len--;
3500 clevel[len] = 0;
3501 if (kstrtol(clevel, 10, &level))
3502 level = LEVEL_NONE;
3503
3504 if (request_module("md-%s", clevel) != 0)
3505 request_module("md-level-%s", clevel);
3506 spin_lock(&pers_lock);
3507 pers = find_pers(level, clevel);
3508 if (!pers || !try_module_get(pers->owner)) {
3509 spin_unlock(&pers_lock);
3510 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3511 return -EINVAL;
3512 }
3513 spin_unlock(&pers_lock);
3514
3515 if (pers == mddev->pers) {
3516
3517 module_put(pers->owner);
3518 return rv;
3519 }
3520 if (!pers->takeover) {
3521 module_put(pers->owner);
3522 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3523 mdname(mddev), clevel);
3524 return -EINVAL;
3525 }
3526
3527 rdev_for_each(rdev, mddev)
3528 rdev->new_raid_disk = rdev->raid_disk;
3529
3530
3531
3532
3533 priv = pers->takeover(mddev);
3534 if (IS_ERR(priv)) {
3535 mddev->new_level = mddev->level;
3536 mddev->new_layout = mddev->layout;
3537 mddev->new_chunk_sectors = mddev->chunk_sectors;
3538 mddev->raid_disks -= mddev->delta_disks;
3539 mddev->delta_disks = 0;
3540 mddev->reshape_backwards = 0;
3541 module_put(pers->owner);
3542 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3543 mdname(mddev), clevel);
3544 return PTR_ERR(priv);
3545 }
3546
3547
3548 mddev_suspend(mddev);
3549 mddev->pers->stop(mddev);
3550
3551 if (mddev->pers->sync_request == NULL &&
3552 pers->sync_request != NULL) {
3553
3554 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3555 printk(KERN_WARNING
3556 "md: cannot register extra attributes for %s\n",
3557 mdname(mddev));
3558 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
3559 }
3560 if (mddev->pers->sync_request != NULL &&
3561 pers->sync_request == NULL) {
3562
3563 if (mddev->to_remove == NULL)
3564 mddev->to_remove = &md_redundancy_group;
3565 }
3566
3567 if (mddev->pers->sync_request == NULL &&
3568 mddev->external) {
3569
3570
3571
3572
3573
3574
3575
3576 mddev->in_sync = 0;
3577 mddev->safemode_delay = 0;
3578 mddev->safemode = 0;
3579 }
3580
3581 rdev_for_each(rdev, mddev) {
3582 if (rdev->raid_disk < 0)
3583 continue;
3584 if (rdev->new_raid_disk >= mddev->raid_disks)
3585 rdev->new_raid_disk = -1;
3586 if (rdev->new_raid_disk == rdev->raid_disk)
3587 continue;
3588 sysfs_unlink_rdev(mddev, rdev);
3589 }
3590 rdev_for_each(rdev, mddev) {
3591 if (rdev->raid_disk < 0)
3592 continue;
3593 if (rdev->new_raid_disk == rdev->raid_disk)
3594 continue;
3595 rdev->raid_disk = rdev->new_raid_disk;
3596 if (rdev->raid_disk < 0)
3597 clear_bit(In_sync, &rdev->flags);
3598 else {
3599 if (sysfs_link_rdev(mddev, rdev))
3600 printk(KERN_WARNING "md: cannot register rd%d"
3601 " for %s after level change\n",
3602 rdev->raid_disk, mdname(mddev));
3603 }
3604 }
3605
3606 module_put(mddev->pers->owner);
3607 mddev->pers = pers;
3608 mddev->private = priv;
3609 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3610 mddev->level = mddev->new_level;
3611 mddev->layout = mddev->new_layout;
3612 mddev->chunk_sectors = mddev->new_chunk_sectors;
3613 mddev->delta_disks = 0;
3614 mddev->reshape_backwards = 0;
3615 mddev->degraded = 0;
3616 if (mddev->pers->sync_request == NULL) {
3617
3618
3619
3620 mddev->in_sync = 1;
3621 del_timer_sync(&mddev->safemode_timer);
3622 }
3623 pers->run(mddev);
3624 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3625 mddev_resume(mddev);
3626 sysfs_notify(&mddev->kobj, NULL, "level");
3627 md_new_event(mddev);
3628 return rv;
3629}
3630
3631static struct md_sysfs_entry md_level =
3632__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
3633
3634
3635static ssize_t
3636layout_show(struct mddev *mddev, char *page)
3637{
3638
3639 if (mddev->reshape_position != MaxSector &&
3640 mddev->layout != mddev->new_layout)
3641 return sprintf(page, "%d (%d)\n",
3642 mddev->new_layout, mddev->layout);
3643 return sprintf(page, "%d\n", mddev->layout);
3644}
3645
3646static ssize_t
3647layout_store(struct mddev *mddev, const char *buf, size_t len)
3648{
3649 char *e;
3650 unsigned long n = simple_strtoul(buf, &e, 10);
3651
3652 if (!*buf || (*e && *e != '\n'))
3653 return -EINVAL;
3654
3655 if (mddev->pers) {
3656 int err;
3657 if (mddev->pers->check_reshape == NULL)
3658 return -EBUSY;
3659 mddev->new_layout = n;
3660 err = mddev->pers->check_reshape(mddev);
3661 if (err) {
3662 mddev->new_layout = mddev->layout;
3663 return err;
3664 }
3665 } else {
3666 mddev->new_layout = n;
3667 if (mddev->reshape_position == MaxSector)
3668 mddev->layout = n;
3669 }
3670 return len;
3671}
3672static struct md_sysfs_entry md_layout =
3673__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
3674
3675
3676static ssize_t
3677raid_disks_show(struct mddev *mddev, char *page)
3678{
3679 if (mddev->raid_disks == 0)
3680 return 0;
3681 if (mddev->reshape_position != MaxSector &&
3682 mddev->delta_disks != 0)
3683 return sprintf(page, "%d (%d)\n", mddev->raid_disks,
3684 mddev->raid_disks - mddev->delta_disks);
3685 return sprintf(page, "%d\n", mddev->raid_disks);
3686}
3687
3688static int update_raid_disks(struct mddev *mddev, int raid_disks);
3689
3690static ssize_t
3691raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3692{
3693 char *e;
3694 int rv = 0;
3695 unsigned long n = simple_strtoul(buf, &e, 10);
3696
3697 if (!*buf || (*e && *e != '\n'))
3698 return -EINVAL;
3699
3700 if (mddev->pers)
3701 rv = update_raid_disks(mddev, n);
3702 else if (mddev->reshape_position != MaxSector) {
3703 struct md_rdev *rdev;
3704 int olddisks = mddev->raid_disks - mddev->delta_disks;
3705
3706 rdev_for_each(rdev, mddev) {
3707 if (olddisks < n &&
3708 rdev->data_offset < rdev->new_data_offset)
3709 return -EINVAL;
3710 if (olddisks > n &&
3711 rdev->data_offset > rdev->new_data_offset)
3712 return -EINVAL;
3713 }
3714 mddev->delta_disks = n - olddisks;
3715 mddev->raid_disks = n;
3716 mddev->reshape_backwards = (mddev->delta_disks < 0);
3717 } else
3718 mddev->raid_disks = n;
3719 return rv ? rv : len;
3720}
3721static struct md_sysfs_entry md_raid_disks =
3722__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
3723
3724static ssize_t
3725chunk_size_show(struct mddev *mddev, char *page)
3726{
3727 if (mddev->reshape_position != MaxSector &&
3728 mddev->chunk_sectors != mddev->new_chunk_sectors)
3729 return sprintf(page, "%d (%d)\n",
3730 mddev->new_chunk_sectors << 9,
3731 mddev->chunk_sectors << 9);
3732 return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
3733}
3734
3735static ssize_t
3736chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3737{
3738 char *e;
3739 unsigned long n = simple_strtoul(buf, &e, 10);
3740
3741 if (!*buf || (*e && *e != '\n'))
3742 return -EINVAL;
3743
3744 if (mddev->pers) {
3745 int err;
3746 if (mddev->pers->check_reshape == NULL)
3747 return -EBUSY;
3748 mddev->new_chunk_sectors = n >> 9;
3749 err = mddev->pers->check_reshape(mddev);
3750 if (err) {
3751 mddev->new_chunk_sectors = mddev->chunk_sectors;
3752 return err;
3753 }
3754 } else {
3755 mddev->new_chunk_sectors = n >> 9;
3756 if (mddev->reshape_position == MaxSector)
3757 mddev->chunk_sectors = n >> 9;
3758 }
3759 return len;
3760}
3761static struct md_sysfs_entry md_chunk_size =
3762__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
3763
3764static ssize_t
3765resync_start_show(struct mddev *mddev, char *page)
3766{
3767 if (mddev->recovery_cp == MaxSector)
3768 return sprintf(page, "none\n");
3769 return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
3770}
3771
3772static ssize_t
3773resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3774{
3775 char *e;
3776 unsigned long long n = simple_strtoull(buf, &e, 10);
3777
3778 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3779 return -EBUSY;
3780 if (cmd_match(buf, "none"))
3781 n = MaxSector;
3782 else if (!*buf || (*e && *e != '\n'))
3783 return -EINVAL;
3784
3785 mddev->recovery_cp = n;
3786 if (mddev->pers)
3787 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3788 return len;
3789}
3790static struct md_sysfs_entry md_resync_start =
3791__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
3830 write_pending, active_idle, bad_word};
3831static char *array_states[] = {
3832 "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
3833 "write-pending", "active-idle", NULL };
3834
3835static int match_word(const char *word, char **list)
3836{
3837 int n;
3838 for (n=0; list[n]; n++)
3839 if (cmd_match(word, list[n]))
3840 break;
3841 return n;
3842}
3843
3844static ssize_t
3845array_state_show(struct mddev *mddev, char *page)
3846{
3847 enum array_state st = inactive;
3848
3849 if (mddev->pers)
3850 switch(mddev->ro) {
3851 case 1:
3852 st = readonly;
3853 break;
3854 case 2:
3855 st = read_auto;
3856 break;
3857 case 0:
3858 if (mddev->in_sync)
3859 st = clean;
3860 else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
3861 st = write_pending;
3862 else if (mddev->safemode)
3863 st = active_idle;
3864 else
3865 st = active;
3866 }
3867 else {
3868 if (list_empty(&mddev->disks) &&
3869 mddev->raid_disks == 0 &&
3870 mddev->dev_sectors == 0)
3871 st = clear;
3872 else
3873 st = inactive;
3874 }
3875 return sprintf(page, "%s\n", array_states[st]);
3876}
3877
3878static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3879static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3880static int do_md_run(struct mddev * mddev);
3881static int restart_array(struct mddev *mddev);
3882
3883static ssize_t
3884array_state_store(struct mddev *mddev, const char *buf, size_t len)
3885{
3886 int err = -EINVAL;
3887 enum array_state st = match_word(buf, array_states);
3888 switch(st) {
3889 case bad_word:
3890 break;
3891 case clear:
3892
3893 err = do_md_stop(mddev, 0, NULL);
3894 break;
3895 case inactive:
3896
3897 if (mddev->pers)
3898 err = do_md_stop(mddev, 2, NULL);
3899 else
3900 err = 0;
3901 break;
3902 case suspended:
3903 break;
3904 case readonly:
3905 if (mddev->pers)
3906 err = md_set_readonly(mddev, NULL);
3907 else {
3908 mddev->ro = 1;
3909 set_disk_ro(mddev->gendisk, 1);
3910 err = do_md_run(mddev);
3911 }
3912 break;
3913 case read_auto:
3914 if (mddev->pers) {
3915 if (mddev->ro == 0)
3916 err = md_set_readonly(mddev, NULL);
3917 else if (mddev->ro == 1)
3918 err = restart_array(mddev);
3919 if (err == 0) {
3920 mddev->ro = 2;
3921 set_disk_ro(mddev->gendisk, 0);
3922 }
3923 } else {
3924 mddev->ro = 2;
3925 err = do_md_run(mddev);
3926 }
3927 break;
3928 case clean:
3929 if (mddev->pers) {
3930 restart_array(mddev);
3931 spin_lock_irq(&mddev->write_lock);
3932 if (atomic_read(&mddev->writes_pending) == 0) {
3933 if (mddev->in_sync == 0) {
3934 mddev->in_sync = 1;
3935 if (mddev->safemode == 1)
3936 mddev->safemode = 0;
3937 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3938 }
3939 err = 0;
3940 } else
3941 err = -EBUSY;
3942 spin_unlock_irq(&mddev->write_lock);
3943 } else
3944 err = -EINVAL;
3945 break;
3946 case active:
3947 if (mddev->pers) {
3948 restart_array(mddev);
3949 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3950 wake_up(&mddev->sb_wait);
3951 err = 0;
3952 } else {
3953 mddev->ro = 0;
3954 set_disk_ro(mddev->gendisk, 0);
3955 err = do_md_run(mddev);
3956 }
3957 break;
3958 case write_pending:
3959 case active_idle:
3960
3961 break;
3962 }
3963 if (err)
3964 return err;
3965 else {
3966 if (mddev->hold_active == UNTIL_IOCTL)
3967 mddev->hold_active = 0;
3968 sysfs_notify_dirent_safe(mddev->sysfs_state);
3969 return len;
3970 }
3971}
3972static struct md_sysfs_entry md_array_state =
3973__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3974
3975static ssize_t
3976max_corrected_read_errors_show(struct mddev *mddev, char *page) {
3977 return sprintf(page, "%d\n",
3978 atomic_read(&mddev->max_corr_read_errors));
3979}
3980
3981static ssize_t
3982max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
3983{
3984 char *e;
3985 unsigned long n = simple_strtoul(buf, &e, 10);
3986
3987 if (*buf && (*e == 0 || *e == '\n')) {
3988 atomic_set(&mddev->max_corr_read_errors, n);
3989 return len;
3990 }
3991 return -EINVAL;
3992}
3993
3994static struct md_sysfs_entry max_corr_read_errors =
3995__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3996 max_corrected_read_errors_store);
3997
3998static ssize_t
3999null_show(struct mddev *mddev, char *page)
4000{
4001 return -EINVAL;
4002}
4003
4004static ssize_t
4005new_dev_store(struct mddev *mddev, const char *buf, size_t len)
4006{
4007
4008
4009
4010
4011
4012
4013
4014 char *e;
4015 int major = simple_strtoul(buf, &e, 10);
4016 int minor;
4017 dev_t dev;
4018 struct md_rdev *rdev;
4019 int err;
4020
4021 if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
4022 return -EINVAL;
4023 minor = simple_strtoul(e+1, &e, 10);
4024 if (*e && *e != '\n')
4025 return -EINVAL;
4026 dev = MKDEV(major, minor);
4027 if (major != MAJOR(dev) ||
4028 minor != MINOR(dev))
4029 return -EOVERFLOW;
4030
4031
4032 if (mddev->persistent) {
4033 rdev = md_import_device(dev, mddev->major_version,
4034 mddev->minor_version);
4035 if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
4036 struct md_rdev *rdev0
4037 = list_entry(mddev->disks.next,
4038 struct md_rdev, same_set);
4039 err = super_types[mddev->major_version]
4040 .load_super(rdev, rdev0, mddev->minor_version);
4041 if (err < 0)
4042 goto out;
4043 }
4044 } else if (mddev->external)
4045 rdev = md_import_device(dev, -2, -1);
4046 else
4047 rdev = md_import_device(dev, -1, -1);
4048
4049 if (IS_ERR(rdev))
4050 return PTR_ERR(rdev);
4051 err = bind_rdev_to_array(rdev, mddev);
4052 out:
4053 if (err)
4054 export_rdev(rdev);
4055 return err ? err : len;
4056}
4057
4058static struct md_sysfs_entry md_new_device =
4059__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
4060
4061static ssize_t
4062bitmap_store(struct mddev *mddev, const char *buf, size_t len)
4063{
4064 char *end;
4065 unsigned long chunk, end_chunk;
4066
4067 if (!mddev->bitmap)
4068 goto out;
4069
4070 while (*buf) {
4071 chunk = end_chunk = simple_strtoul(buf, &end, 0);
4072 if (buf == end) break;
4073 if (*end == '-') {
4074 buf = end + 1;
4075 end_chunk = simple_strtoul(buf, &end, 0);
4076 if (buf == end) break;
4077 }
4078 if (*end && !isspace(*end)) break;
4079 bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
4080 buf = skip_spaces(end);
4081 }
4082 bitmap_unplug(mddev->bitmap);
4083out:
4084 return len;
4085}
4086
4087static struct md_sysfs_entry md_bitmap =
4088__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
4089
4090static ssize_t
4091size_show(struct mddev *mddev, char *page)
4092{
4093 return sprintf(page, "%llu\n",
4094 (unsigned long long)mddev->dev_sectors / 2);
4095}
4096
4097static int update_size(struct mddev *mddev, sector_t num_sectors);
4098
4099static ssize_t
4100size_store(struct mddev *mddev, const char *buf, size_t len)
4101{
4102
4103
4104
4105
4106 sector_t sectors;
4107 int err = strict_blocks_to_sectors(buf, §ors);
4108
4109 if (err < 0)
4110 return err;
4111 if (mddev->pers) {
4112 err = update_size(mddev, sectors);
4113 md_update_sb(mddev, 1);
4114 } else {
4115 if (mddev->dev_sectors == 0 ||
4116 mddev->dev_sectors > sectors)
4117 mddev->dev_sectors = sectors;
4118 else
4119 err = -ENOSPC;
4120 }
4121 return err ? err : len;
4122}
4123
4124static struct md_sysfs_entry md_size =
4125__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
4126
4127
4128
4129
4130
4131
4132
4133
4134static ssize_t
4135metadata_show(struct mddev *mddev, char *page)
4136{
4137 if (mddev->persistent)
4138 return sprintf(page, "%d.%d\n",
4139 mddev->major_version, mddev->minor_version);
4140 else if (mddev->external)
4141 return sprintf(page, "external:%s\n", mddev->metadata_type);
4142 else
4143 return sprintf(page, "none\n");
4144}
4145
4146static ssize_t
4147metadata_store(struct mddev *mddev, const char *buf, size_t len)
4148{
4149 int major, minor;
4150 char *e;
4151
4152
4153
4154
4155 if (mddev->external && strncmp(buf, "external:", 9) == 0)
4156 ;
4157 else if (!list_empty(&mddev->disks))
4158 return -EBUSY;
4159
4160 if (cmd_match(buf, "none")) {
4161 mddev->persistent = 0;
4162 mddev->external = 0;
4163 mddev->major_version = 0;
4164 mddev->minor_version = 90;
4165 return len;
4166 }
4167 if (strncmp(buf, "external:", 9) == 0) {
4168 size_t namelen = len-9;
4169 if (namelen >= sizeof(mddev->metadata_type))
4170 namelen = sizeof(mddev->metadata_type)-1;
4171 strncpy(mddev->metadata_type, buf+9, namelen);
4172 mddev->metadata_type[namelen] = 0;
4173 if (namelen && mddev->metadata_type[namelen-1] == '\n')
4174 mddev->metadata_type[--namelen] = 0;
4175 mddev->persistent = 0;
4176 mddev->external = 1;
4177 mddev->major_version = 0;
4178 mddev->minor_version = 90;
4179 return len;
4180 }
4181 major = simple_strtoul(buf, &e, 10);
4182 if (e==buf || *e != '.')
4183 return -EINVAL;
4184 buf = e+1;
4185 minor = simple_strtoul(buf, &e, 10);
4186 if (e==buf || (*e && *e != '\n') )
4187 return -EINVAL;
4188 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
4189 return -ENOENT;
4190 mddev->major_version = major;
4191 mddev->minor_version = minor;
4192 mddev->persistent = 1;
4193 mddev->external = 0;
4194 return len;
4195}
4196
4197static struct md_sysfs_entry md_metadata =
4198__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
4199
4200static ssize_t
4201action_show(struct mddev *mddev, char *page)
4202{
4203 char *type = "idle";
4204 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
4205 type = "frozen";
4206 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4207 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
4208 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4209 type = "reshape";
4210 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4211 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4212 type = "resync";
4213 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
4214 type = "check";
4215 else
4216 type = "repair";
4217 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
4218 type = "recover";
4219 }
4220 return sprintf(page, "%s\n", type);
4221}
4222
4223static ssize_t
4224action_store(struct mddev *mddev, const char *page, size_t len)
4225{
4226 if (!mddev->pers || !mddev->pers->sync_request)
4227 return -EINVAL;
4228
4229 if (cmd_match(page, "frozen"))
4230 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4231 else
4232 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4233
4234 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4235 if (mddev->sync_thread) {
4236 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4237 md_reap_sync_thread(mddev);
4238 }
4239 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4240 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
4241 return -EBUSY;
4242 else if (cmd_match(page, "resync"))
4243 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4244 else if (cmd_match(page, "recover")) {
4245 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
4246 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4247 } else if (cmd_match(page, "reshape")) {
4248 int err;
4249 if (mddev->pers->start_reshape == NULL)
4250 return -EINVAL;
4251 err = mddev->pers->start_reshape(mddev);
4252 if (err)
4253 return err;
4254 sysfs_notify(&mddev->kobj, NULL, "degraded");
4255 } else {
4256 if (cmd_match(page, "check"))
4257 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4258 else if (!cmd_match(page, "repair"))
4259 return -EINVAL;
4260 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
4261 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4262 }
4263 if (mddev->ro == 2) {
4264
4265
4266
4267 mddev->ro = 0;
4268 md_wakeup_thread(mddev->sync_thread);
4269 }
4270 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4271 md_wakeup_thread(mddev->thread);
4272 sysfs_notify_dirent_safe(mddev->sysfs_action);
4273 return len;
4274}
4275
4276static struct md_sysfs_entry md_scan_mode =
4277__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4278
4279static ssize_t
4280last_sync_action_show(struct mddev *mddev, char *page)
4281{
4282 return sprintf(page, "%s\n", mddev->last_sync_action);
4283}
4284
4285static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4286
4287static ssize_t
4288mismatch_cnt_show(struct mddev *mddev, char *page)
4289{
4290 return sprintf(page, "%llu\n",
4291 (unsigned long long)
4292 atomic64_read(&mddev->resync_mismatches));
4293}
4294
4295static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4296
4297static ssize_t
4298sync_min_show(struct mddev *mddev, char *page)
4299{
4300 return sprintf(page, "%d (%s)\n", speed_min(mddev),
4301 mddev->sync_speed_min ? "local": "system");
4302}
4303
4304static ssize_t
4305sync_min_store(struct mddev *mddev, const char *buf, size_t len)
4306{
4307 int min;
4308 char *e;
4309 if (strncmp(buf, "system", 6)==0) {
4310 mddev->sync_speed_min = 0;
4311 return len;
4312 }
4313 min = simple_strtoul(buf, &e, 10);
4314 if (buf == e || (*e && *e != '\n') || min <= 0)
4315 return -EINVAL;
4316 mddev->sync_speed_min = min;
4317 return len;
4318}
4319
4320static struct md_sysfs_entry md_sync_min =
4321__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
4322
4323static ssize_t
4324sync_max_show(struct mddev *mddev, char *page)
4325{
4326 return sprintf(page, "%d (%s)\n", speed_max(mddev),
4327 mddev->sync_speed_max ? "local": "system");
4328}
4329
4330static ssize_t
4331sync_max_store(struct mddev *mddev, const char *buf, size_t len)
4332{
4333 int max;
4334 char *e;
4335 if (strncmp(buf, "system", 6)==0) {
4336 mddev->sync_speed_max = 0;
4337 return len;
4338 }
4339 max = simple_strtoul(buf, &e, 10);
4340 if (buf == e || (*e && *e != '\n') || max <= 0)
4341 return -EINVAL;
4342 mddev->sync_speed_max = max;
4343 return len;
4344}
4345
4346static struct md_sysfs_entry md_sync_max =
4347__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
4348
4349static ssize_t
4350degraded_show(struct mddev *mddev, char *page)
4351{
4352 return sprintf(page, "%d\n", mddev->degraded);
4353}
4354static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
4355
4356static ssize_t
4357sync_force_parallel_show(struct mddev *mddev, char *page)
4358{
4359 return sprintf(page, "%d\n", mddev->parallel_resync);
4360}
4361
4362static ssize_t
4363sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4364{
4365 long n;
4366
4367 if (kstrtol(buf, 10, &n))
4368 return -EINVAL;
4369
4370 if (n != 0 && n != 1)
4371 return -EINVAL;
4372
4373 mddev->parallel_resync = n;
4374
4375 if (mddev->sync_thread)
4376 wake_up(&resync_wait);
4377
4378 return len;
4379}
4380
4381
4382static struct md_sysfs_entry md_sync_force_parallel =
4383__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
4384 sync_force_parallel_show, sync_force_parallel_store);
4385
4386static ssize_t
4387sync_speed_show(struct mddev *mddev, char *page)
4388{
4389 unsigned long resync, dt, db;
4390 if (mddev->curr_resync == 0)
4391 return sprintf(page, "none\n");
4392 resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
4393 dt = (jiffies - mddev->resync_mark) / HZ;
4394 if (!dt) dt++;
4395 db = resync - mddev->resync_mark_cnt;
4396 return sprintf(page, "%lu\n", db/dt/2);
4397}
4398
4399static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
4400
4401static ssize_t
4402sync_completed_show(struct mddev *mddev, char *page)
4403{
4404 unsigned long long max_sectors, resync;
4405
4406 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4407 return sprintf(page, "none\n");
4408
4409 if (mddev->curr_resync == 1 ||
4410 mddev->curr_resync == 2)
4411 return sprintf(page, "delayed\n");
4412
4413 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
4414 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4415 max_sectors = mddev->resync_max_sectors;
4416 else
4417 max_sectors = mddev->dev_sectors;
4418
4419 resync = mddev->curr_resync_completed;
4420 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4421}
4422
4423static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
4424
4425static ssize_t
4426min_sync_show(struct mddev *mddev, char *page)
4427{
4428 return sprintf(page, "%llu\n",
4429 (unsigned long long)mddev->resync_min);
4430}
4431static ssize_t
4432min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4433{
4434 unsigned long long min;
4435 if (kstrtoull(buf, 10, &min))
4436 return -EINVAL;
4437 if (min > mddev->resync_max)
4438 return -EINVAL;
4439 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4440 return -EBUSY;
4441
4442
4443 if (mddev->chunk_sectors) {
4444 sector_t temp = min;
4445 if (sector_div(temp, mddev->chunk_sectors))
4446 return -EINVAL;
4447 }
4448 mddev->resync_min = min;
4449
4450 return len;
4451}
4452
4453static struct md_sysfs_entry md_min_sync =
4454__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
4455
4456static ssize_t
4457max_sync_show(struct mddev *mddev, char *page)
4458{
4459 if (mddev->resync_max == MaxSector)
4460 return sprintf(page, "max\n");
4461 else
4462 return sprintf(page, "%llu\n",
4463 (unsigned long long)mddev->resync_max);
4464}
4465static ssize_t
4466max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4467{
4468 if (strncmp(buf, "max", 3) == 0)
4469 mddev->resync_max = MaxSector;
4470 else {
4471 unsigned long long max;
4472 if (kstrtoull(buf, 10, &max))
4473 return -EINVAL;
4474 if (max < mddev->resync_min)
4475 return -EINVAL;
4476 if (max < mddev->resync_max &&
4477 mddev->ro == 0 &&
4478 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4479 return -EBUSY;
4480
4481
4482 if (mddev->chunk_sectors) {
4483 sector_t temp = max;
4484 if (sector_div(temp, mddev->chunk_sectors))
4485 return -EINVAL;
4486 }
4487 mddev->resync_max = max;
4488 }
4489 wake_up(&mddev->recovery_wait);
4490 return len;
4491}
4492
4493static struct md_sysfs_entry md_max_sync =
4494__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
4495
4496static ssize_t
4497suspend_lo_show(struct mddev *mddev, char *page)
4498{
4499 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
4500}
4501
4502static ssize_t
4503suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4504{
4505 char *e;
4506 unsigned long long new = simple_strtoull(buf, &e, 10);
4507 unsigned long long old = mddev->suspend_lo;
4508
4509 if (mddev->pers == NULL ||
4510 mddev->pers->quiesce == NULL)
4511 return -EINVAL;
4512 if (buf == e || (*e && *e != '\n'))
4513 return -EINVAL;
4514
4515 mddev->suspend_lo = new;
4516 if (new >= old)
4517
4518 mddev->pers->quiesce(mddev, 2);
4519 else {
4520
4521 mddev->pers->quiesce(mddev, 1);
4522 mddev->pers->quiesce(mddev, 0);
4523 }
4524 return len;
4525}
4526static struct md_sysfs_entry md_suspend_lo =
4527__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
4528
4529
4530static ssize_t
4531suspend_hi_show(struct mddev *mddev, char *page)
4532{
4533 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
4534}
4535
4536static ssize_t
4537suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4538{
4539 char *e;
4540 unsigned long long new = simple_strtoull(buf, &e, 10);
4541 unsigned long long old = mddev->suspend_hi;
4542
4543 if (mddev->pers == NULL ||
4544 mddev->pers->quiesce == NULL)
4545 return -EINVAL;
4546 if (buf == e || (*e && *e != '\n'))
4547 return -EINVAL;
4548
4549 mddev->suspend_hi = new;
4550 if (new <= old)
4551
4552 mddev->pers->quiesce(mddev, 2);
4553 else {
4554
4555 mddev->pers->quiesce(mddev, 1);
4556 mddev->pers->quiesce(mddev, 0);
4557 }
4558 return len;
4559}
4560static struct md_sysfs_entry md_suspend_hi =
4561__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
4562
4563static ssize_t
4564reshape_position_show(struct mddev *mddev, char *page)
4565{
4566 if (mddev->reshape_position != MaxSector)
4567 return sprintf(page, "%llu\n",
4568 (unsigned long long)mddev->reshape_position);
4569 strcpy(page, "none\n");
4570 return 5;
4571}
4572
4573static ssize_t
4574reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4575{
4576 struct md_rdev *rdev;
4577 char *e;
4578 unsigned long long new = simple_strtoull(buf, &e, 10);
4579 if (mddev->pers)
4580 return -EBUSY;
4581 if (buf == e || (*e && *e != '\n'))
4582 return -EINVAL;
4583 mddev->reshape_position = new;
4584 mddev->delta_disks = 0;
4585 mddev->reshape_backwards = 0;
4586 mddev->new_level = mddev->level;
4587 mddev->new_layout = mddev->layout;
4588 mddev->new_chunk_sectors = mddev->chunk_sectors;
4589 rdev_for_each(rdev, mddev)
4590 rdev->new_data_offset = rdev->data_offset;
4591 return len;
4592}
4593
4594static struct md_sysfs_entry md_reshape_position =
4595__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
4596 reshape_position_store);
4597
4598static ssize_t
4599reshape_direction_show(struct mddev *mddev, char *page)
4600{
4601 return sprintf(page, "%s\n",
4602 mddev->reshape_backwards ? "backwards" : "forwards");
4603}
4604
4605static ssize_t
4606reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4607{
4608 int backwards = 0;
4609 if (cmd_match(buf, "forwards"))
4610 backwards = 0;
4611 else if (cmd_match(buf, "backwards"))
4612 backwards = 1;
4613 else
4614 return -EINVAL;
4615 if (mddev->reshape_backwards == backwards)
4616 return len;
4617
4618
4619 if (mddev->delta_disks)
4620 return -EBUSY;
4621
4622 if (mddev->persistent &&
4623 mddev->major_version == 0)
4624 return -EINVAL;
4625
4626 mddev->reshape_backwards = backwards;
4627 return len;
4628}
4629
4630static struct md_sysfs_entry md_reshape_direction =
4631__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
4632 reshape_direction_store);
4633
4634static ssize_t
4635array_size_show(struct mddev *mddev, char *page)
4636{
4637 if (mddev->external_size)
4638 return sprintf(page, "%llu\n",
4639 (unsigned long long)mddev->array_sectors/2);
4640 else
4641 return sprintf(page, "default\n");
4642}
4643
4644static ssize_t
4645array_size_store(struct mddev *mddev, const char *buf, size_t len)
4646{
4647 sector_t sectors;
4648
4649 if (strncmp(buf, "default", 7) == 0) {
4650 if (mddev->pers)
4651 sectors = mddev->pers->size(mddev, 0, 0);
4652 else
4653 sectors = mddev->array_sectors;
4654
4655 mddev->external_size = 0;
4656 } else {
4657 if (strict_blocks_to_sectors(buf, §ors) < 0)
4658 return -EINVAL;
4659 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4660 return -E2BIG;
4661
4662 mddev->external_size = 1;
4663 }
4664
4665 mddev->array_sectors = sectors;
4666 if (mddev->pers) {
4667 set_capacity(mddev->gendisk, mddev->array_sectors);
4668 revalidate_disk(mddev->gendisk);
4669 }
4670 return len;
4671}
4672
4673static struct md_sysfs_entry md_array_size =
4674__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
4675 array_size_store);
4676
4677static struct attribute *md_default_attrs[] = {
4678 &md_level.attr,
4679 &md_layout.attr,
4680 &md_raid_disks.attr,
4681 &md_chunk_size.attr,
4682 &md_size.attr,
4683 &md_resync_start.attr,
4684 &md_metadata.attr,
4685 &md_new_device.attr,
4686 &md_safe_delay.attr,
4687 &md_array_state.attr,
4688 &md_reshape_position.attr,
4689 &md_reshape_direction.attr,
4690 &md_array_size.attr,
4691 &max_corr_read_errors.attr,
4692 NULL,
4693};
4694
4695static struct attribute *md_redundancy_attrs[] = {
4696 &md_scan_mode.attr,
4697 &md_last_scan_mode.attr,
4698 &md_mismatches.attr,
4699 &md_sync_min.attr,
4700 &md_sync_max.attr,
4701 &md_sync_speed.attr,
4702 &md_sync_force_parallel.attr,
4703 &md_sync_completed.attr,
4704 &md_min_sync.attr,
4705 &md_max_sync.attr,
4706 &md_suspend_lo.attr,
4707 &md_suspend_hi.attr,
4708 &md_bitmap.attr,
4709 &md_degraded.attr,
4710 NULL,
4711};
4712static struct attribute_group md_redundancy_group = {
4713 .name = NULL,
4714 .attrs = md_redundancy_attrs,
4715};
4716
4717
4718static ssize_t
4719md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4720{
4721 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4722 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4723 ssize_t rv;
4724
4725 if (!entry->show)
4726 return -EIO;
4727 spin_lock(&all_mddevs_lock);
4728 if (list_empty(&mddev->all_mddevs)) {
4729 spin_unlock(&all_mddevs_lock);
4730 return -EBUSY;
4731 }
4732 mddev_get(mddev);
4733 spin_unlock(&all_mddevs_lock);
4734
4735 rv = mddev_lock(mddev);
4736 if (!rv) {
4737 rv = entry->show(mddev, page);
4738 mddev_unlock(mddev);
4739 }
4740 mddev_put(mddev);
4741 return rv;
4742}
4743
4744static ssize_t
4745md_attr_store(struct kobject *kobj, struct attribute *attr,
4746 const char *page, size_t length)
4747{
4748 struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
4749 struct mddev *mddev = container_of(kobj, struct mddev, kobj);
4750 ssize_t rv;
4751
4752 if (!entry->store)
4753 return -EIO;
4754 if (!capable(CAP_SYS_ADMIN))
4755 return -EACCES;
4756 spin_lock(&all_mddevs_lock);
4757 if (list_empty(&mddev->all_mddevs)) {
4758 spin_unlock(&all_mddevs_lock);
4759 return -EBUSY;
4760 }
4761 mddev_get(mddev);
4762 spin_unlock(&all_mddevs_lock);
4763 if (entry->store == new_dev_store)
4764 flush_workqueue(md_misc_wq);
4765 rv = mddev_lock(mddev);
4766 if (!rv) {
4767 rv = entry->store(mddev, page, length);
4768 mddev_unlock(mddev);
4769 }
4770 mddev_put(mddev);
4771 return rv;
4772}
4773
4774static void md_free(struct kobject *ko)
4775{
4776 struct mddev *mddev = container_of(ko, struct mddev, kobj);
4777
4778 if (mddev->sysfs_state)
4779 sysfs_put(mddev->sysfs_state);
4780
4781 if (mddev->gendisk) {
4782 del_gendisk(mddev->gendisk);
4783 put_disk(mddev->gendisk);
4784 }
4785 if (mddev->queue)
4786 blk_cleanup_queue(mddev->queue);
4787
4788 kfree(mddev);
4789}
4790
4791static const struct sysfs_ops md_sysfs_ops = {
4792 .show = md_attr_show,
4793 .store = md_attr_store,
4794};
4795static struct kobj_type md_ktype = {
4796 .release = md_free,
4797 .sysfs_ops = &md_sysfs_ops,
4798 .default_attrs = md_default_attrs,
4799};
4800
4801int mdp_major = 0;
4802
4803static void mddev_delayed_delete(struct work_struct *ws)
4804{
4805 struct mddev *mddev = container_of(ws, struct mddev, del_work);
4806
4807 sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
4808 kobject_del(&mddev->kobj);
4809 kobject_put(&mddev->kobj);
4810}
4811
4812static int md_alloc(dev_t dev, char *name)
4813{
4814 static DEFINE_MUTEX(disks_mutex);
4815 struct mddev *mddev = mddev_find(dev);
4816 struct gendisk *disk;
4817 int partitioned;
4818 int shift;
4819 int unit;
4820 int error;
4821
4822 if (!mddev)
4823 return -ENODEV;
4824
4825 partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
4826 shift = partitioned ? MdpMinorShift : 0;
4827 unit = MINOR(mddev->unit) >> shift;
4828
4829
4830
4831
4832 flush_workqueue(md_misc_wq);
4833
4834 mutex_lock(&disks_mutex);
4835 error = -EEXIST;
4836 if (mddev->gendisk)
4837 goto abort;
4838
4839 if (name) {
4840
4841
4842 struct mddev *mddev2;
4843 spin_lock(&all_mddevs_lock);
4844
4845 list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
4846 if (mddev2->gendisk &&
4847 strcmp(mddev2->gendisk->disk_name, name) == 0) {
4848 spin_unlock(&all_mddevs_lock);
4849 goto abort;
4850 }
4851 spin_unlock(&all_mddevs_lock);
4852 }
4853
4854 error = -ENOMEM;
4855 mddev->queue = blk_alloc_queue(GFP_KERNEL);
4856 if (!mddev->queue)
4857 goto abort;
4858 mddev->queue->queuedata = mddev;
4859
4860 blk_queue_make_request(mddev->queue, md_make_request);
4861 blk_set_stacking_limits(&mddev->queue->limits);
4862
4863 disk = alloc_disk(1 << shift);
4864 if (!disk) {
4865 blk_cleanup_queue(mddev->queue);
4866 mddev->queue = NULL;
4867 goto abort;
4868 }
4869 disk->major = MAJOR(mddev->unit);
4870 disk->first_minor = unit << shift;
4871 if (name)
4872 strcpy(disk->disk_name, name);
4873 else if (partitioned)
4874 sprintf(disk->disk_name, "md_d%d", unit);
4875 else
4876 sprintf(disk->disk_name, "md%d", unit);
4877 disk->fops = &md_fops;
4878 disk->private_data = mddev;
4879 disk->queue = mddev->queue;
4880 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4881
4882
4883
4884
4885 disk->flags |= GENHD_FL_EXT_DEVT;
4886 mddev->gendisk = disk;
4887
4888
4889
4890 mutex_lock(&mddev->open_mutex);
4891 add_disk(disk);
4892
4893 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4894 &disk_to_dev(disk)->kobj, "%s", "md");
4895 if (error) {
4896
4897
4898
4899 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
4900 disk->disk_name);
4901 error = 0;
4902 }
4903 if (mddev->kobj.sd &&
4904 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4905 printk(KERN_DEBUG "pointless warning\n");
4906 mutex_unlock(&mddev->open_mutex);
4907 abort:
4908 mutex_unlock(&disks_mutex);
4909 if (!error && mddev->kobj.sd) {
4910 kobject_uevent(&mddev->kobj, KOBJ_ADD);
4911 mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
4912 }
4913 mddev_put(mddev);
4914 return error;
4915}
4916
4917static struct kobject *md_probe(dev_t dev, int *part, void *data)
4918{
4919 md_alloc(dev, NULL);
4920 return NULL;
4921}
4922
4923static int add_named_array(const char *val, struct kernel_param *kp)
4924{
4925
4926
4927
4928
4929 int len = strlen(val);
4930 char buf[DISK_NAME_LEN];
4931
4932 while (len && val[len-1] == '\n')
4933 len--;
4934 if (len >= DISK_NAME_LEN)
4935 return -E2BIG;
4936 strlcpy(buf, val, len+1);
4937 if (strncmp(buf, "md_", 3) != 0)
4938 return -EINVAL;
4939 return md_alloc(0, buf);
4940}
4941
4942static void md_safemode_timeout(unsigned long data)
4943{
4944 struct mddev *mddev = (struct mddev *) data;
4945
4946 if (!atomic_read(&mddev->writes_pending)) {
4947 mddev->safemode = 1;
4948 if (mddev->external)
4949 sysfs_notify_dirent_safe(mddev->sysfs_state);
4950 }
4951 md_wakeup_thread(mddev->thread);
4952}
4953
4954static int start_dirty_degraded;
4955
4956int md_run(struct mddev *mddev)
4957{
4958 int err;
4959 struct md_rdev *rdev;
4960 struct md_personality *pers;
4961
4962 if (list_empty(&mddev->disks))
4963
4964 return -EINVAL;
4965
4966 if (mddev->pers)
4967 return -EBUSY;
4968
4969 if (mddev->sysfs_active)
4970 return -EBUSY;
4971
4972
4973
4974
4975 if (!mddev->raid_disks) {
4976 if (!mddev->persistent)
4977 return -EINVAL;
4978 analyze_sbs(mddev);
4979 }
4980
4981 if (mddev->level != LEVEL_NONE)
4982 request_module("md-level-%d", mddev->level);
4983 else if (mddev->clevel[0])
4984 request_module("md-%s", mddev->clevel);
4985
4986
4987
4988
4989
4990
4991 rdev_for_each(rdev, mddev) {
4992 if (test_bit(Faulty, &rdev->flags))
4993 continue;
4994 sync_blockdev(rdev->bdev);
4995 invalidate_bdev(rdev->bdev);
4996
4997
4998
4999
5000
5001 if (rdev->meta_bdev) {
5002 ;
5003 } else if (rdev->data_offset < rdev->sb_start) {
5004 if (mddev->dev_sectors &&
5005 rdev->data_offset + mddev->dev_sectors
5006 > rdev->sb_start) {
5007 printk("md: %s: data overlaps metadata\n",
5008 mdname(mddev));
5009 return -EINVAL;
5010 }
5011 } else {
5012 if (rdev->sb_start + rdev->sb_size/512
5013 > rdev->data_offset) {
5014 printk("md: %s: metadata overlaps data\n",
5015 mdname(mddev));
5016 return -EINVAL;
5017 }
5018 }
5019 sysfs_notify_dirent_safe(rdev->sysfs_state);
5020 }
5021
5022 if (mddev->bio_set == NULL)
5023 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5024
5025 spin_lock(&pers_lock);
5026 pers = find_pers(mddev->level, mddev->clevel);
5027 if (!pers || !try_module_get(pers->owner)) {
5028 spin_unlock(&pers_lock);
5029 if (mddev->level != LEVEL_NONE)
5030 printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
5031 mddev->level);
5032 else
5033 printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
5034 mddev->clevel);
5035 return -EINVAL;
5036 }
5037 mddev->pers = pers;
5038 spin_unlock(&pers_lock);
5039 if (mddev->level != pers->level) {
5040 mddev->level = pers->level;
5041 mddev->new_level = pers->level;
5042 }
5043 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
5044
5045 if (mddev->reshape_position != MaxSector &&
5046 pers->start_reshape == NULL) {
5047
5048 mddev->pers = NULL;
5049 module_put(pers->owner);
5050 return -EINVAL;
5051 }
5052
5053 if (pers->sync_request) {
5054
5055
5056
5057 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5058 struct md_rdev *rdev2;
5059 int warned = 0;
5060
5061 rdev_for_each(rdev, mddev)
5062 rdev_for_each(rdev2, mddev) {
5063 if (rdev < rdev2 &&
5064 rdev->bdev->bd_contains ==
5065 rdev2->bdev->bd_contains) {
5066 printk(KERN_WARNING
5067 "%s: WARNING: %s appears to be"
5068 " on the same physical disk as"
5069 " %s.\n",
5070 mdname(mddev),
5071 bdevname(rdev->bdev,b),
5072 bdevname(rdev2->bdev,b2));
5073 warned = 1;
5074 }
5075 }
5076
5077 if (warned)
5078 printk(KERN_WARNING
5079 "True protection against single-disk"
5080 " failure might be compromised.\n");
5081 }
5082
5083 mddev->recovery = 0;
5084
5085 mddev->resync_max_sectors = mddev->dev_sectors;
5086
5087 mddev->ok_start_degraded = start_dirty_degraded;
5088
5089 if (start_readonly && mddev->ro == 0)
5090 mddev->ro = 2;
5091
5092 err = mddev->pers->run(mddev);
5093 if (err)
5094 printk(KERN_ERR "md: pers->run() failed ...\n");
5095 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
5096 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
5097 " but 'external_size' not in effect?\n", __func__);
5098 printk(KERN_ERR
5099 "md: invalid array_size %llu > default size %llu\n",
5100 (unsigned long long)mddev->array_sectors / 2,
5101 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
5102 err = -EINVAL;
5103 mddev->pers->stop(mddev);
5104 }
5105 if (err == 0 && mddev->pers->sync_request &&
5106 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
5107 err = bitmap_create(mddev);
5108 if (err) {
5109 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
5110 mdname(mddev), err);
5111 mddev->pers->stop(mddev);
5112 }
5113 }
5114 if (err) {
5115 module_put(mddev->pers->owner);
5116 mddev->pers = NULL;
5117 bitmap_destroy(mddev);
5118 return err;
5119 }
5120 if (mddev->pers->sync_request) {
5121 if (mddev->kobj.sd &&
5122 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
5123 printk(KERN_WARNING
5124 "md: cannot register extra attributes for %s\n",
5125 mdname(mddev));
5126 mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
5127 } else if (mddev->ro == 2)
5128 mddev->ro = 0;
5129
5130 atomic_set(&mddev->writes_pending,0);
5131 atomic_set(&mddev->max_corr_read_errors,
5132 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
5133 mddev->safemode = 0;
5134 mddev->safemode_timer.function = md_safemode_timeout;
5135 mddev->safemode_timer.data = (unsigned long) mddev;
5136 mddev->safemode_delay = (200 * HZ)/1000 +1;
5137 mddev->in_sync = 1;
5138 smp_wmb();
5139 mddev->ready = 1;
5140 rdev_for_each(rdev, mddev)
5141 if (rdev->raid_disk >= 0)
5142 if (sysfs_link_rdev(mddev, rdev))
5143 ;
5144
5145 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5146
5147 if (mddev->flags & MD_UPDATE_SB_FLAGS)
5148 md_update_sb(mddev, 0);
5149
5150 md_new_event(mddev);
5151 sysfs_notify_dirent_safe(mddev->sysfs_state);
5152 sysfs_notify_dirent_safe(mddev->sysfs_action);
5153 sysfs_notify(&mddev->kobj, NULL, "degraded");
5154 return 0;
5155}
5156EXPORT_SYMBOL_GPL(md_run);
5157
5158static int do_md_run(struct mddev *mddev)
5159{
5160 int err;
5161
5162 err = md_run(mddev);
5163 if (err)
5164 goto out;
5165 err = bitmap_load(mddev);
5166 if (err) {
5167 bitmap_destroy(mddev);
5168 goto out;
5169 }
5170
5171 md_wakeup_thread(mddev->thread);
5172 md_wakeup_thread(mddev->sync_thread);
5173
5174 set_capacity(mddev->gendisk, mddev->array_sectors);
5175 revalidate_disk(mddev->gendisk);
5176 mddev->changed = 1;
5177 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5178out:
5179 return err;
5180}
5181
5182static int restart_array(struct mddev *mddev)
5183{
5184 struct gendisk *disk = mddev->gendisk;
5185
5186
5187 if (list_empty(&mddev->disks))
5188 return -ENXIO;
5189 if (!mddev->pers)
5190 return -EINVAL;
5191 if (!mddev->ro)
5192 return -EBUSY;
5193 mddev->safemode = 0;
5194 mddev->ro = 0;
5195 set_disk_ro(disk, 0);
5196 printk(KERN_INFO "md: %s switched to read-write mode.\n",
5197 mdname(mddev));
5198
5199 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5200 md_wakeup_thread(mddev->thread);
5201 md_wakeup_thread(mddev->sync_thread);
5202 sysfs_notify_dirent_safe(mddev->sysfs_state);
5203 return 0;
5204}
5205
5206
5207
5208static int deny_bitmap_write_access(struct file * file)
5209{
5210 struct inode *inode = file->f_mapping->host;
5211
5212 spin_lock(&inode->i_lock);
5213 if (atomic_read(&inode->i_writecount) > 1) {
5214 spin_unlock(&inode->i_lock);
5215 return -ETXTBSY;
5216 }
5217 atomic_set(&inode->i_writecount, -1);
5218 spin_unlock(&inode->i_lock);
5219
5220 return 0;
5221}
5222
5223void restore_bitmap_write_access(struct file *file)
5224{
5225 struct inode *inode = file->f_mapping->host;
5226
5227 spin_lock(&inode->i_lock);
5228 atomic_set(&inode->i_writecount, 1);
5229 spin_unlock(&inode->i_lock);
5230}
5231
5232static void md_clean(struct mddev *mddev)
5233{
5234 mddev->array_sectors = 0;
5235 mddev->external_size = 0;
5236 mddev->dev_sectors = 0;
5237 mddev->raid_disks = 0;
5238 mddev->recovery_cp = 0;
5239 mddev->resync_min = 0;
5240 mddev->resync_max = MaxSector;
5241 mddev->reshape_position = MaxSector;
5242 mddev->external = 0;
5243 mddev->persistent = 0;
5244 mddev->level = LEVEL_NONE;
5245 mddev->clevel[0] = 0;
5246 mddev->flags = 0;
5247 mddev->ro = 0;
5248 mddev->metadata_type[0] = 0;
5249 mddev->chunk_sectors = 0;
5250 mddev->ctime = mddev->utime = 0;
5251 mddev->layout = 0;
5252 mddev->max_disks = 0;
5253 mddev->events = 0;
5254 mddev->can_decrease_events = 0;
5255 mddev->delta_disks = 0;
5256 mddev->reshape_backwards = 0;
5257 mddev->new_level = LEVEL_NONE;
5258 mddev->new_layout = 0;
5259 mddev->new_chunk_sectors = 0;
5260 mddev->curr_resync = 0;
5261 atomic64_set(&mddev->resync_mismatches, 0);
5262 mddev->suspend_lo = mddev->suspend_hi = 0;
5263 mddev->sync_speed_min = mddev->sync_speed_max = 0;
5264 mddev->recovery = 0;
5265 mddev->in_sync = 0;
5266 mddev->changed = 0;
5267 mddev->degraded = 0;
5268 mddev->safemode = 0;
5269 mddev->merge_check_needed = 0;
5270 mddev->bitmap_info.offset = 0;
5271 mddev->bitmap_info.default_offset = 0;
5272 mddev->bitmap_info.default_space = 0;
5273 mddev->bitmap_info.chunksize = 0;
5274 mddev->bitmap_info.daemon_sleep = 0;
5275 mddev->bitmap_info.max_write_behind = 0;
5276}
5277
5278static void __md_stop_writes(struct mddev *mddev)
5279{
5280 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5281 if (mddev->sync_thread) {
5282 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5283 md_reap_sync_thread(mddev);
5284 }
5285
5286 del_timer_sync(&mddev->safemode_timer);
5287
5288 bitmap_flush(mddev);
5289 md_super_wait(mddev);
5290
5291 if (mddev->ro == 0 &&
5292 (!mddev->in_sync || (mddev->flags & MD_UPDATE_SB_FLAGS))) {
5293
5294 mddev->in_sync = 1;
5295 md_update_sb(mddev, 1);
5296 }
5297}
5298
5299void md_stop_writes(struct mddev *mddev)
5300{
5301 mddev_lock(mddev);
5302 __md_stop_writes(mddev);
5303 mddev_unlock(mddev);
5304}
5305EXPORT_SYMBOL_GPL(md_stop_writes);
5306
5307static void __md_stop(struct mddev *mddev)
5308{
5309 mddev->ready = 0;
5310 mddev->pers->stop(mddev);
5311 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5312 mddev->to_remove = &md_redundancy_group;
5313 module_put(mddev->pers->owner);
5314 mddev->pers = NULL;
5315 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5316}
5317
5318void md_stop(struct mddev *mddev)
5319{
5320
5321
5322
5323 __md_stop(mddev);
5324 bitmap_destroy(mddev);
5325 if (mddev->bio_set)
5326 bioset_free(mddev->bio_set);
5327}
5328
5329EXPORT_SYMBOL_GPL(md_stop);
5330
5331static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5332{
5333 int err = 0;
5334 mutex_lock(&mddev->open_mutex);
5335 if (atomic_read(&mddev->openers) > !!bdev) {
5336 printk("md: %s still in use.\n",mdname(mddev));
5337 err = -EBUSY;
5338 goto out;
5339 }
5340 if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
5341
5342
5343
5344
5345 mutex_unlock(&mddev->open_mutex);
5346 return -EBUSY;
5347 }
5348 if (mddev->pers) {
5349 __md_stop_writes(mddev);
5350
5351 err = -ENXIO;
5352 if (mddev->ro==1)
5353 goto out;
5354 mddev->ro = 1;
5355 set_disk_ro(mddev->gendisk, 1);
5356 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5357 sysfs_notify_dirent_safe(mddev->sysfs_state);
5358 err = 0;
5359 }
5360out:
5361 mutex_unlock(&mddev->open_mutex);
5362 return err;
5363}
5364
5365
5366
5367
5368
5369static int do_md_stop(struct mddev * mddev, int mode,
5370 struct block_device *bdev)
5371{
5372 struct gendisk *disk = mddev->gendisk;
5373 struct md_rdev *rdev;
5374
5375 mutex_lock(&mddev->open_mutex);
5376 if (atomic_read(&mddev->openers) > !!bdev ||
5377 mddev->sysfs_active) {
5378 printk("md: %s still in use.\n",mdname(mddev));
5379 mutex_unlock(&mddev->open_mutex);
5380 return -EBUSY;
5381 }
5382 if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
5383
5384
5385
5386
5387 mutex_unlock(&mddev->open_mutex);
5388 return -EBUSY;
5389 }
5390 if (mddev->pers) {
5391 if (mddev->ro)
5392 set_disk_ro(disk, 0);
5393
5394 __md_stop_writes(mddev);
5395 __md_stop(mddev);
5396 mddev->queue->merge_bvec_fn = NULL;
5397 mddev->queue->backing_dev_info.congested_fn = NULL;
5398
5399
5400 sysfs_notify_dirent_safe(mddev->sysfs_state);
5401
5402 rdev_for_each(rdev, mddev)
5403 if (rdev->raid_disk >= 0)
5404 sysfs_unlink_rdev(mddev, rdev);
5405
5406 set_capacity(disk, 0);
5407 mutex_unlock(&mddev->open_mutex);
5408 mddev->changed = 1;
5409 revalidate_disk(disk);
5410
5411 if (mddev->ro)
5412 mddev->ro = 0;
5413 } else
5414 mutex_unlock(&mddev->open_mutex);
5415
5416
5417
5418 if (mode == 0) {
5419 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
5420
5421 bitmap_destroy(mddev);
5422 if (mddev->bitmap_info.file) {
5423 restore_bitmap_write_access(mddev->bitmap_info.file);
5424 fput(mddev->bitmap_info.file);
5425 mddev->bitmap_info.file = NULL;
5426 }
5427 mddev->bitmap_info.offset = 0;
5428
5429 export_array(mddev);
5430
5431 md_clean(mddev);
5432 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
5433 if (mddev->hold_active == UNTIL_STOP)
5434 mddev->hold_active = 0;
5435 }
5436 blk_integrity_unregister(disk);
5437 md_new_event(mddev);
5438 sysfs_notify_dirent_safe(mddev->sysfs_state);
5439 return 0;
5440}
5441
5442#ifndef MODULE
5443static void autorun_array(struct mddev *mddev)
5444{
5445 struct md_rdev *rdev;
5446 int err;
5447
5448 if (list_empty(&mddev->disks))
5449 return;
5450
5451 printk(KERN_INFO "md: running: ");
5452
5453 rdev_for_each(rdev, mddev) {
5454 char b[BDEVNAME_SIZE];
5455 printk("<%s>", bdevname(rdev->bdev,b));
5456 }
5457 printk("\n");
5458
5459 err = do_md_run(mddev);
5460 if (err) {
5461 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5462 do_md_stop(mddev, 0, NULL);
5463 }
5464}
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478static void autorun_devices(int part)
5479{
5480 struct md_rdev *rdev0, *rdev, *tmp;
5481 struct mddev *mddev;
5482 char b[BDEVNAME_SIZE];
5483
5484 printk(KERN_INFO "md: autorun ...\n");
5485 while (!list_empty(&pending_raid_disks)) {
5486 int unit;
5487 dev_t dev;
5488 LIST_HEAD(candidates);
5489 rdev0 = list_entry(pending_raid_disks.next,
5490 struct md_rdev, same_set);
5491
5492 printk(KERN_INFO "md: considering %s ...\n",
5493 bdevname(rdev0->bdev,b));
5494 INIT_LIST_HEAD(&candidates);
5495 rdev_for_each_list(rdev, tmp, &pending_raid_disks)
5496 if (super_90_load(rdev, rdev0, 0) >= 0) {
5497 printk(KERN_INFO "md: adding %s ...\n",
5498 bdevname(rdev->bdev,b));
5499 list_move(&rdev->same_set, &candidates);
5500 }
5501
5502
5503
5504
5505
5506 if (part) {
5507 dev = MKDEV(mdp_major,
5508 rdev0->preferred_minor << MdpMinorShift);
5509 unit = MINOR(dev) >> MdpMinorShift;
5510 } else {
5511 dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
5512 unit = MINOR(dev);
5513 }
5514 if (rdev0->preferred_minor != unit) {
5515 printk(KERN_INFO "md: unit number in %s is bad: %d\n",
5516 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5517 break;
5518 }
5519
5520 md_probe(dev, NULL, NULL);
5521 mddev = mddev_find(dev);
5522 if (!mddev || !mddev->gendisk) {
5523 if (mddev)
5524 mddev_put(mddev);
5525 printk(KERN_ERR
5526 "md: cannot allocate memory for md drive.\n");
5527 break;
5528 }
5529 if (mddev_lock(mddev))
5530 printk(KERN_WARNING "md: %s locked, cannot run\n",
5531 mdname(mddev));
5532 else if (mddev->raid_disks || mddev->major_version
5533 || !list_empty(&mddev->disks)) {
5534 printk(KERN_WARNING
5535 "md: %s already running, cannot run %s\n",
5536 mdname(mddev), bdevname(rdev0->bdev,b));
5537 mddev_unlock(mddev);
5538 } else {
5539 printk(KERN_INFO "md: created %s\n", mdname(mddev));
5540 mddev->persistent = 1;
5541 rdev_for_each_list(rdev, tmp, &candidates) {
5542 list_del_init(&rdev->same_set);
5543 if (bind_rdev_to_array(rdev, mddev))
5544 export_rdev(rdev);
5545 }
5546 autorun_array(mddev);
5547 mddev_unlock(mddev);
5548 }
5549
5550
5551
5552 rdev_for_each_list(rdev, tmp, &candidates) {
5553 list_del_init(&rdev->same_set);
5554 export_rdev(rdev);
5555 }
5556 mddev_put(mddev);
5557 }
5558 printk(KERN_INFO "md: ... autorun DONE.\n");
5559}
5560#endif
5561
5562static int get_version(void __user * arg)
5563{
5564 mdu_version_t ver;
5565
5566 ver.major = MD_MAJOR_VERSION;
5567 ver.minor = MD_MINOR_VERSION;
5568 ver.patchlevel = MD_PATCHLEVEL_VERSION;
5569
5570 if (copy_to_user(arg, &ver, sizeof(ver)))
5571 return -EFAULT;
5572
5573 return 0;
5574}
5575
5576static int get_array_info(struct mddev * mddev, void __user * arg)
5577{
5578 mdu_array_info_t info;
5579 int nr,working,insync,failed,spare;
5580 struct md_rdev *rdev;
5581
5582 nr = working = insync = failed = spare = 0;
5583 rcu_read_lock();
5584 rdev_for_each_rcu(rdev, mddev) {
5585 nr++;
5586 if (test_bit(Faulty, &rdev->flags))
5587 failed++;
5588 else {
5589 working++;
5590 if (test_bit(In_sync, &rdev->flags))
5591 insync++;
5592 else
5593 spare++;
5594 }
5595 }
5596 rcu_read_unlock();
5597
5598 info.major_version = mddev->major_version;
5599 info.minor_version = mddev->minor_version;
5600 info.patch_version = MD_PATCHLEVEL_VERSION;
5601 info.ctime = mddev->ctime;
5602 info.level = mddev->level;
5603 info.size = mddev->dev_sectors / 2;
5604 if (info.size != mddev->dev_sectors / 2)
5605 info.size = -1;
5606 info.nr_disks = nr;
5607 info.raid_disks = mddev->raid_disks;
5608 info.md_minor = mddev->md_minor;
5609 info.not_persistent= !mddev->persistent;
5610
5611 info.utime = mddev->utime;
5612 info.state = 0;
5613 if (mddev->in_sync)
5614 info.state = (1<<MD_SB_CLEAN);
5615 if (mddev->bitmap && mddev->bitmap_info.offset)
5616 info.state = (1<<MD_SB_BITMAP_PRESENT);
5617 info.active_disks = insync;
5618 info.working_disks = working;
5619 info.failed_disks = failed;
5620 info.spare_disks = spare;
5621
5622 info.layout = mddev->layout;
5623 info.chunk_size = mddev->chunk_sectors << 9;
5624
5625 if (copy_to_user(arg, &info, sizeof(info)))
5626 return -EFAULT;
5627
5628 return 0;
5629}
5630
5631static int get_bitmap_file(struct mddev * mddev, void __user * arg)
5632{
5633 mdu_bitmap_file_t *file = NULL;
5634 char *ptr, *buf = NULL;
5635 int err = -ENOMEM;
5636
5637 file = kmalloc(sizeof(*file), GFP_NOIO);
5638
5639 if (!file)
5640 goto out;
5641
5642
5643 if (!mddev->bitmap || !mddev->bitmap->storage.file) {
5644 file->pathname[0] = '\0';
5645 goto copy_out;
5646 }
5647
5648 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
5649 if (!buf)
5650 goto out;
5651
5652 ptr = d_path(&mddev->bitmap->storage.file->f_path,
5653 buf, sizeof(file->pathname));
5654 if (IS_ERR(ptr))
5655 goto out;
5656
5657 strcpy(file->pathname, ptr);
5658
5659copy_out:
5660 err = 0;
5661 if (copy_to_user(arg, file, sizeof(*file)))
5662 err = -EFAULT;
5663out:
5664 kfree(buf);
5665 kfree(file);
5666 return err;
5667}
5668
5669static int get_disk_info(struct mddev * mddev, void __user * arg)
5670{
5671 mdu_disk_info_t info;
5672 struct md_rdev *rdev;
5673
5674 if (copy_from_user(&info, arg, sizeof(info)))
5675 return -EFAULT;
5676
5677 rcu_read_lock();
5678 rdev = find_rdev_nr_rcu(mddev, info.number);
5679 if (rdev) {
5680 info.major = MAJOR(rdev->bdev->bd_dev);
5681 info.minor = MINOR(rdev->bdev->bd_dev);
5682 info.raid_disk = rdev->raid_disk;
5683 info.state = 0;
5684 if (test_bit(Faulty, &rdev->flags))
5685 info.state |= (1<<MD_DISK_FAULTY);
5686 else if (test_bit(In_sync, &rdev->flags)) {
5687 info.state |= (1<<MD_DISK_ACTIVE);
5688 info.state |= (1<<MD_DISK_SYNC);
5689 }
5690 if (test_bit(WriteMostly, &rdev->flags))
5691 info.state |= (1<<MD_DISK_WRITEMOSTLY);
5692 } else {
5693 info.major = info.minor = 0;
5694 info.raid_disk = -1;
5695 info.state = (1<<MD_DISK_REMOVED);
5696 }
5697 rcu_read_unlock();
5698
5699 if (copy_to_user(arg, &info, sizeof(info)))
5700 return -EFAULT;
5701
5702 return 0;
5703}
5704
5705static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5706{
5707 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
5708 struct md_rdev *rdev;
5709 dev_t dev = MKDEV(info->major,info->minor);
5710
5711 if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
5712 return -EOVERFLOW;
5713
5714 if (!mddev->raid_disks) {
5715 int err;
5716
5717 rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
5718 if (IS_ERR(rdev)) {
5719 printk(KERN_WARNING
5720 "md: md_import_device returned %ld\n",
5721 PTR_ERR(rdev));
5722 return PTR_ERR(rdev);
5723 }
5724 if (!list_empty(&mddev->disks)) {
5725 struct md_rdev *rdev0
5726 = list_entry(mddev->disks.next,
5727 struct md_rdev, same_set);
5728 err = super_types[mddev->major_version]
5729 .load_super(rdev, rdev0, mddev->minor_version);
5730 if (err < 0) {
5731 printk(KERN_WARNING
5732 "md: %s has different UUID to %s\n",
5733 bdevname(rdev->bdev,b),
5734 bdevname(rdev0->bdev,b2));
5735 export_rdev(rdev);
5736 return -EINVAL;
5737 }
5738 }
5739 err = bind_rdev_to_array(rdev, mddev);
5740 if (err)
5741 export_rdev(rdev);
5742 return err;
5743 }
5744
5745
5746
5747
5748
5749
5750 if (mddev->pers) {
5751 int err;
5752 if (!mddev->pers->hot_add_disk) {
5753 printk(KERN_WARNING
5754 "%s: personality does not support diskops!\n",
5755 mdname(mddev));
5756 return -EINVAL;
5757 }
5758 if (mddev->persistent)
5759 rdev = md_import_device(dev, mddev->major_version,
5760 mddev->minor_version);
5761 else
5762 rdev = md_import_device(dev, -1, -1);
5763 if (IS_ERR(rdev)) {
5764 printk(KERN_WARNING
5765 "md: md_import_device returned %ld\n",
5766 PTR_ERR(rdev));
5767 return PTR_ERR(rdev);
5768 }
5769
5770 if (!mddev->persistent) {
5771 if (info->state & (1<<MD_DISK_SYNC) &&
5772 info->raid_disk < mddev->raid_disks) {
5773 rdev->raid_disk = info->raid_disk;
5774 set_bit(In_sync, &rdev->flags);
5775 } else
5776 rdev->raid_disk = -1;
5777 } else
5778 super_types[mddev->major_version].
5779 validate_super(mddev, rdev);
5780 if ((info->state & (1<<MD_DISK_SYNC)) &&
5781 rdev->raid_disk != info->raid_disk) {
5782
5783
5784
5785 export_rdev(rdev);
5786 return -EINVAL;
5787 }
5788
5789 if (test_bit(In_sync, &rdev->flags))
5790 rdev->saved_raid_disk = rdev->raid_disk;
5791 else
5792 rdev->saved_raid_disk = -1;
5793
5794 clear_bit(In_sync, &rdev->flags);
5795 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5796 set_bit(WriteMostly, &rdev->flags);
5797 else
5798 clear_bit(WriteMostly, &rdev->flags);
5799
5800 rdev->raid_disk = -1;
5801 err = bind_rdev_to_array(rdev, mddev);
5802 if (!err && !mddev->pers->hot_remove_disk) {
5803
5804
5805
5806
5807 super_types[mddev->major_version].
5808 validate_super(mddev, rdev);
5809 err = mddev->pers->hot_add_disk(mddev, rdev);
5810 if (err)
5811 unbind_rdev_from_array(rdev);
5812 }
5813 if (err)
5814 export_rdev(rdev);
5815 else
5816 sysfs_notify_dirent_safe(rdev->sysfs_state);
5817
5818 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5819 if (mddev->degraded)
5820 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5821 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5822 if (!err)
5823 md_new_event(mddev);
5824 md_wakeup_thread(mddev->thread);
5825 return err;
5826 }
5827
5828
5829
5830
5831 if (mddev->major_version != 0) {
5832 printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
5833 mdname(mddev));
5834 return -EINVAL;
5835 }
5836
5837 if (!(info->state & (1<<MD_DISK_FAULTY))) {
5838 int err;
5839 rdev = md_import_device(dev, -1, 0);
5840 if (IS_ERR(rdev)) {
5841 printk(KERN_WARNING
5842 "md: error, md_import_device() returned %ld\n",
5843 PTR_ERR(rdev));
5844 return PTR_ERR(rdev);
5845 }
5846 rdev->desc_nr = info->number;
5847 if (info->raid_disk < mddev->raid_disks)
5848 rdev->raid_disk = info->raid_disk;
5849 else
5850 rdev->raid_disk = -1;
5851
5852 if (rdev->raid_disk < mddev->raid_disks)
5853 if (info->state & (1<<MD_DISK_SYNC))
5854 set_bit(In_sync, &rdev->flags);
5855
5856 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5857 set_bit(WriteMostly, &rdev->flags);
5858
5859 if (!mddev->persistent) {
5860 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5861 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5862 } else
5863 rdev->sb_start = calc_dev_sboffset(rdev);
5864 rdev->sectors = rdev->sb_start;
5865
5866 err = bind_rdev_to_array(rdev, mddev);
5867 if (err) {
5868 export_rdev(rdev);
5869 return err;
5870 }
5871 }
5872
5873 return 0;
5874}
5875
5876static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5877{
5878 char b[BDEVNAME_SIZE];
5879 struct md_rdev *rdev;
5880
5881 rdev = find_rdev(mddev, dev);
5882 if (!rdev)
5883 return -ENXIO;
5884
5885 clear_bit(Blocked, &rdev->flags);
5886 remove_and_add_spares(mddev, rdev);
5887
5888 if (rdev->raid_disk >= 0)
5889 goto busy;
5890
5891 kick_rdev_from_array(rdev);
5892 md_update_sb(mddev, 1);
5893 md_new_event(mddev);
5894
5895 return 0;
5896busy:
5897 printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
5898 bdevname(rdev->bdev,b), mdname(mddev));
5899 return -EBUSY;
5900}
5901
5902static int hot_add_disk(struct mddev * mddev, dev_t dev)
5903{
5904 char b[BDEVNAME_SIZE];
5905 int err;
5906 struct md_rdev *rdev;
5907
5908 if (!mddev->pers)
5909 return -ENODEV;
5910
5911 if (mddev->major_version != 0) {
5912 printk(KERN_WARNING "%s: HOT_ADD may only be used with"
5913 " version-0 superblocks.\n",
5914 mdname(mddev));
5915 return -EINVAL;
5916 }
5917 if (!mddev->pers->hot_add_disk) {
5918 printk(KERN_WARNING
5919 "%s: personality does not support diskops!\n",
5920 mdname(mddev));
5921 return -EINVAL;
5922 }
5923
5924 rdev = md_import_device(dev, -1, 0);
5925 if (IS_ERR(rdev)) {
5926 printk(KERN_WARNING
5927 "md: error, md_import_device() returned %ld\n",
5928 PTR_ERR(rdev));
5929 return -EINVAL;
5930 }
5931
5932 if (mddev->persistent)
5933 rdev->sb_start = calc_dev_sboffset(rdev);
5934 else
5935 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5936
5937 rdev->sectors = rdev->sb_start;
5938
5939 if (test_bit(Faulty, &rdev->flags)) {
5940 printk(KERN_WARNING
5941 "md: can not hot-add faulty %s disk to %s!\n",
5942 bdevname(rdev->bdev,b), mdname(mddev));
5943 err = -EINVAL;
5944 goto abort_export;
5945 }
5946 clear_bit(In_sync, &rdev->flags);
5947 rdev->desc_nr = -1;
5948 rdev->saved_raid_disk = -1;
5949 err = bind_rdev_to_array(rdev, mddev);
5950 if (err)
5951 goto abort_export;
5952
5953
5954
5955
5956
5957
5958 rdev->raid_disk = -1;
5959
5960 md_update_sb(mddev, 1);
5961
5962
5963
5964
5965
5966 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5967 md_wakeup_thread(mddev->thread);
5968 md_new_event(mddev);
5969 return 0;
5970
5971abort_export:
5972 export_rdev(rdev);
5973 return err;
5974}
5975
5976static int set_bitmap_file(struct mddev *mddev, int fd)
5977{
5978 int err;
5979
5980 if (mddev->pers) {
5981 if (!mddev->pers->quiesce)
5982 return -EBUSY;
5983 if (mddev->recovery || mddev->sync_thread)
5984 return -EBUSY;
5985
5986 }
5987
5988
5989 if (fd >= 0) {
5990 if (mddev->bitmap)
5991 return -EEXIST;
5992 mddev->bitmap_info.file = fget(fd);
5993
5994 if (mddev->bitmap_info.file == NULL) {
5995 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5996 mdname(mddev));
5997 return -EBADF;
5998 }
5999
6000 err = deny_bitmap_write_access(mddev->bitmap_info.file);
6001 if (err) {
6002 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
6003 mdname(mddev));
6004 fput(mddev->bitmap_info.file);
6005 mddev->bitmap_info.file = NULL;
6006 return err;
6007 }
6008 mddev->bitmap_info.offset = 0;
6009 } else if (mddev->bitmap == NULL)
6010 return -ENOENT;
6011 err = 0;
6012 if (mddev->pers) {
6013 mddev->pers->quiesce(mddev, 1);
6014 if (fd >= 0) {
6015 err = bitmap_create(mddev);
6016 if (!err)
6017 err = bitmap_load(mddev);
6018 }
6019 if (fd < 0 || err) {
6020 bitmap_destroy(mddev);
6021 fd = -1;
6022 }
6023 mddev->pers->quiesce(mddev, 0);
6024 }
6025 if (fd < 0) {
6026 if (mddev->bitmap_info.file) {
6027 restore_bitmap_write_access(mddev->bitmap_info.file);
6028 fput(mddev->bitmap_info.file);
6029 }
6030 mddev->bitmap_info.file = NULL;
6031 }
6032
6033 return err;
6034}
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
6050{
6051
6052 if (info->raid_disks == 0) {
6053
6054 if (info->major_version < 0 ||
6055 info->major_version >= ARRAY_SIZE(super_types) ||
6056 super_types[info->major_version].name == NULL) {
6057
6058 printk(KERN_INFO
6059 "md: superblock version %d not known\n",
6060 info->major_version);
6061 return -EINVAL;
6062 }
6063 mddev->major_version = info->major_version;
6064 mddev->minor_version = info->minor_version;
6065 mddev->patch_version = info->patch_version;
6066 mddev->persistent = !info->not_persistent;
6067
6068
6069
6070 mddev->ctime = get_seconds();
6071 return 0;
6072 }
6073 mddev->major_version = MD_MAJOR_VERSION;
6074 mddev->minor_version = MD_MINOR_VERSION;
6075 mddev->patch_version = MD_PATCHLEVEL_VERSION;
6076 mddev->ctime = get_seconds();
6077
6078 mddev->level = info->level;
6079 mddev->clevel[0] = 0;
6080 mddev->dev_sectors = 2 * (sector_t)info->size;
6081 mddev->raid_disks = info->raid_disks;
6082
6083
6084
6085 if (info->state & (1<<MD_SB_CLEAN))
6086 mddev->recovery_cp = MaxSector;
6087 else
6088 mddev->recovery_cp = 0;
6089 mddev->persistent = ! info->not_persistent;
6090 mddev->external = 0;
6091
6092 mddev->layout = info->layout;
6093 mddev->chunk_sectors = info->chunk_size >> 9;
6094
6095 mddev->max_disks = MD_SB_DISKS;
6096
6097 if (mddev->persistent)
6098 mddev->flags = 0;
6099 set_bit(MD_CHANGE_DEVS, &mddev->flags);
6100
6101 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
6102 mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9);
6103 mddev->bitmap_info.offset = 0;
6104
6105 mddev->reshape_position = MaxSector;
6106
6107
6108
6109
6110 get_random_bytes(mddev->uuid, 16);
6111
6112 mddev->new_level = mddev->level;
6113 mddev->new_chunk_sectors = mddev->chunk_sectors;
6114 mddev->new_layout = mddev->layout;
6115 mddev->delta_disks = 0;
6116 mddev->reshape_backwards = 0;
6117
6118 return 0;
6119}
6120
6121void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
6122{
6123 WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
6124
6125 if (mddev->external_size)
6126 return;
6127
6128 mddev->array_sectors = array_sectors;
6129}
6130EXPORT_SYMBOL(md_set_array_sectors);
6131
6132static int update_size(struct mddev *mddev, sector_t num_sectors)
6133{
6134 struct md_rdev *rdev;
6135 int rv;
6136 int fit = (num_sectors == 0);
6137
6138 if (mddev->pers->resize == NULL)
6139 return -EINVAL;
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149 if (mddev->sync_thread)
6150 return -EBUSY;
6151
6152 rdev_for_each(rdev, mddev) {
6153 sector_t avail = rdev->sectors;
6154
6155 if (fit && (num_sectors == 0 || num_sectors > avail))
6156 num_sectors = avail;
6157 if (avail < num_sectors)
6158 return -ENOSPC;
6159 }
6160 rv = mddev->pers->resize(mddev, num_sectors);
6161 if (!rv)
6162 revalidate_disk(mddev->gendisk);
6163 return rv;
6164}
6165
6166static int update_raid_disks(struct mddev *mddev, int raid_disks)
6167{
6168 int rv;
6169 struct md_rdev *rdev;
6170
6171 if (mddev->pers->check_reshape == NULL)
6172 return -EINVAL;
6173 if (raid_disks <= 0 ||
6174 (mddev->max_disks && raid_disks >= mddev->max_disks))
6175 return -EINVAL;
6176 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
6177 return -EBUSY;
6178
6179 rdev_for_each(rdev, mddev) {
6180 if (mddev->raid_disks < raid_disks &&
6181 rdev->data_offset < rdev->new_data_offset)
6182 return -EINVAL;
6183 if (mddev->raid_disks > raid_disks &&
6184 rdev->data_offset > rdev->new_data_offset)
6185 return -EINVAL;
6186 }
6187
6188 mddev->delta_disks = raid_disks - mddev->raid_disks;
6189 if (mddev->delta_disks < 0)
6190 mddev->reshape_backwards = 1;
6191 else if (mddev->delta_disks > 0)
6192 mddev->reshape_backwards = 0;
6193
6194 rv = mddev->pers->check_reshape(mddev);
6195 if (rv < 0) {
6196 mddev->delta_disks = 0;
6197 mddev->reshape_backwards = 0;
6198 }
6199 return rv;
6200}
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
6212{
6213 int rv = 0;
6214 int cnt = 0;
6215 int state = 0;
6216
6217
6218 if (mddev->bitmap && mddev->bitmap_info.offset)
6219 state |= (1 << MD_SB_BITMAP_PRESENT);
6220
6221 if (mddev->major_version != info->major_version ||
6222 mddev->minor_version != info->minor_version ||
6223
6224 mddev->ctime != info->ctime ||
6225 mddev->level != info->level ||
6226
6227 !mddev->persistent != info->not_persistent||
6228 mddev->chunk_sectors != info->chunk_size >> 9 ||
6229
6230 ((state^info->state) & 0xfffffe00)
6231 )
6232 return -EINVAL;
6233
6234 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6235 cnt++;
6236 if (mddev->raid_disks != info->raid_disks)
6237 cnt++;
6238 if (mddev->layout != info->layout)
6239 cnt++;
6240 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
6241 cnt++;
6242 if (cnt == 0)
6243 return 0;
6244 if (cnt > 1)
6245 return -EINVAL;
6246
6247 if (mddev->layout != info->layout) {
6248
6249
6250
6251
6252 if (mddev->pers->check_reshape == NULL)
6253 return -EINVAL;
6254 else {
6255 mddev->new_layout = info->layout;
6256 rv = mddev->pers->check_reshape(mddev);
6257 if (rv)
6258 mddev->new_layout = mddev->layout;
6259 return rv;
6260 }
6261 }
6262 if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
6263 rv = update_size(mddev, (sector_t)info->size * 2);
6264
6265 if (mddev->raid_disks != info->raid_disks)
6266 rv = update_raid_disks(mddev, info->raid_disks);
6267
6268 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
6269 if (mddev->pers->quiesce == NULL)
6270 return -EINVAL;
6271 if (mddev->recovery || mddev->sync_thread)
6272 return -EBUSY;
6273 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
6274
6275 if (mddev->bitmap)
6276 return -EEXIST;
6277 if (mddev->bitmap_info.default_offset == 0)
6278 return -EINVAL;
6279 mddev->bitmap_info.offset =
6280 mddev->bitmap_info.default_offset;
6281 mddev->bitmap_info.space =
6282 mddev->bitmap_info.default_space;
6283 mddev->pers->quiesce(mddev, 1);
6284 rv = bitmap_create(mddev);
6285 if (!rv)
6286 rv = bitmap_load(mddev);
6287 if (rv)
6288 bitmap_destroy(mddev);
6289 mddev->pers->quiesce(mddev, 0);
6290 } else {
6291
6292 if (!mddev->bitmap)
6293 return -ENOENT;
6294 if (mddev->bitmap->storage.file)
6295 return -EINVAL;
6296 mddev->pers->quiesce(mddev, 1);
6297 bitmap_destroy(mddev);
6298 mddev->pers->quiesce(mddev, 0);
6299 mddev->bitmap_info.offset = 0;
6300 }
6301 }
6302 md_update_sb(mddev, 1);
6303 return rv;
6304}
6305
6306static int set_disk_faulty(struct mddev *mddev, dev_t dev)
6307{
6308 struct md_rdev *rdev;
6309 int err = 0;
6310
6311 if (mddev->pers == NULL)
6312 return -ENODEV;
6313
6314 rcu_read_lock();
6315 rdev = find_rdev_rcu(mddev, dev);
6316 if (!rdev)
6317 err = -ENODEV;
6318 else {
6319 md_error(mddev, rdev);
6320 if (!test_bit(Faulty, &rdev->flags))
6321 err = -EBUSY;
6322 }
6323 rcu_read_unlock();
6324 return err;
6325}
6326
6327
6328
6329
6330
6331
6332
6333static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6334{
6335 struct mddev *mddev = bdev->bd_disk->private_data;
6336
6337 geo->heads = 2;
6338 geo->sectors = 4;
6339 geo->cylinders = mddev->array_sectors / 8;
6340 return 0;
6341}
6342
6343static int md_ioctl(struct block_device *bdev, fmode_t mode,
6344 unsigned int cmd, unsigned long arg)
6345{
6346 int err = 0;
6347 void __user *argp = (void __user *)arg;
6348 struct mddev *mddev = NULL;
6349 int ro;
6350
6351 switch (cmd) {
6352 case RAID_VERSION:
6353 case GET_ARRAY_INFO:
6354 case GET_DISK_INFO:
6355 break;
6356 default:
6357 if (!capable(CAP_SYS_ADMIN))
6358 return -EACCES;
6359 }
6360
6361
6362
6363
6364
6365 switch (cmd) {
6366 case RAID_VERSION:
6367 err = get_version(argp);
6368 goto done;
6369
6370 case PRINT_RAID_DEBUG:
6371 err = 0;
6372 md_print_devices();
6373 goto done;
6374
6375#ifndef MODULE
6376 case RAID_AUTORUN:
6377 err = 0;
6378 autostart_arrays(arg);
6379 goto done;
6380#endif
6381 default:;
6382 }
6383
6384
6385
6386
6387
6388 mddev = bdev->bd_disk->private_data;
6389
6390 if (!mddev) {
6391 BUG();
6392 goto abort;
6393 }
6394
6395
6396 switch (cmd) {
6397 case GET_ARRAY_INFO:
6398 if (!mddev->raid_disks && !mddev->external)
6399 err = -ENODEV;
6400 else
6401 err = get_array_info(mddev, argp);
6402 goto abort;
6403
6404 case GET_DISK_INFO:
6405 if (!mddev->raid_disks && !mddev->external)
6406 err = -ENODEV;
6407 else
6408 err = get_disk_info(mddev, argp);
6409 goto abort;
6410
6411 case SET_DISK_FAULTY:
6412 err = set_disk_faulty(mddev, new_decode_dev(arg));
6413 goto abort;
6414 }
6415
6416 if (cmd == ADD_NEW_DISK)
6417
6418 flush_workqueue(md_misc_wq);
6419
6420 if (cmd == HOT_REMOVE_DISK)
6421
6422 wait_event_interruptible_timeout(mddev->sb_wait,
6423 !test_bit(MD_RECOVERY_NEEDED,
6424 &mddev->flags),
6425 msecs_to_jiffies(5000));
6426 if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
6427
6428
6429
6430 mutex_lock(&mddev->open_mutex);
6431 if (atomic_read(&mddev->openers) > 1) {
6432 mutex_unlock(&mddev->open_mutex);
6433 err = -EBUSY;
6434 goto abort;
6435 }
6436 set_bit(MD_STILL_CLOSED, &mddev->flags);
6437 mutex_unlock(&mddev->open_mutex);
6438 sync_blockdev(bdev);
6439 }
6440 err = mddev_lock(mddev);
6441 if (err) {
6442 printk(KERN_INFO
6443 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6444 err, cmd);
6445 goto abort;
6446 }
6447
6448 if (cmd == SET_ARRAY_INFO) {
6449 mdu_array_info_t info;
6450 if (!arg)
6451 memset(&info, 0, sizeof(info));
6452 else if (copy_from_user(&info, argp, sizeof(info))) {
6453 err = -EFAULT;
6454 goto abort_unlock;
6455 }
6456 if (mddev->pers) {
6457 err = update_array_info(mddev, &info);
6458 if (err) {
6459 printk(KERN_WARNING "md: couldn't update"
6460 " array info. %d\n", err);
6461 goto abort_unlock;
6462 }
6463 goto done_unlock;
6464 }
6465 if (!list_empty(&mddev->disks)) {
6466 printk(KERN_WARNING
6467 "md: array %s already has disks!\n",
6468 mdname(mddev));
6469 err = -EBUSY;
6470 goto abort_unlock;
6471 }
6472 if (mddev->raid_disks) {
6473 printk(KERN_WARNING
6474 "md: array %s already initialised!\n",
6475 mdname(mddev));
6476 err = -EBUSY;
6477 goto abort_unlock;
6478 }
6479 err = set_array_info(mddev, &info);
6480 if (err) {
6481 printk(KERN_WARNING "md: couldn't set"
6482 " array info. %d\n", err);
6483 goto abort_unlock;
6484 }
6485 goto done_unlock;
6486 }
6487
6488
6489
6490
6491
6492
6493 if ((!mddev->raid_disks && !mddev->external)
6494 && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
6495 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
6496 && cmd != GET_BITMAP_FILE) {
6497 err = -ENODEV;
6498 goto abort_unlock;
6499 }
6500
6501
6502
6503
6504 switch (cmd) {
6505 case GET_BITMAP_FILE:
6506 err = get_bitmap_file(mddev, argp);
6507 goto done_unlock;
6508
6509 case RESTART_ARRAY_RW:
6510 err = restart_array(mddev);
6511 goto done_unlock;
6512
6513 case STOP_ARRAY:
6514 err = do_md_stop(mddev, 0, bdev);
6515 goto done_unlock;
6516
6517 case STOP_ARRAY_RO:
6518 err = md_set_readonly(mddev, bdev);
6519 goto done_unlock;
6520
6521 case HOT_REMOVE_DISK:
6522 err = hot_remove_disk(mddev, new_decode_dev(arg));
6523 goto done_unlock;
6524
6525 case ADD_NEW_DISK:
6526
6527
6528
6529
6530 if (mddev->pers) {
6531 mdu_disk_info_t info;
6532 if (copy_from_user(&info, argp, sizeof(info)))
6533 err = -EFAULT;
6534 else if (!(info.state & (1<<MD_DISK_SYNC)))
6535
6536 break;
6537 else
6538 err = add_new_disk(mddev, &info);
6539 goto done_unlock;
6540 }
6541 break;
6542
6543 case BLKROSET:
6544 if (get_user(ro, (int __user *)(arg))) {
6545 err = -EFAULT;
6546 goto done_unlock;
6547 }
6548 err = -EINVAL;
6549
6550
6551
6552
6553 if (ro)
6554 goto done_unlock;
6555
6556
6557 if (mddev->ro != 1)
6558 goto done_unlock;
6559
6560
6561
6562
6563 if (mddev->pers) {
6564 err = restart_array(mddev);
6565 if (err == 0) {
6566 mddev->ro = 2;
6567 set_disk_ro(mddev->gendisk, 0);
6568 }
6569 }
6570 goto done_unlock;
6571 }
6572
6573
6574
6575
6576
6577
6578
6579
6580 if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
6581 if (mddev->ro == 2) {
6582 mddev->ro = 0;
6583 sysfs_notify_dirent_safe(mddev->sysfs_state);
6584 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6585
6586
6587
6588
6589 if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
6590 mddev_unlock(mddev);
6591 wait_event(mddev->sb_wait,
6592 !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
6593 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
6594 mddev_lock(mddev);
6595 }
6596 } else {
6597 err = -EROFS;
6598 goto abort_unlock;
6599 }
6600 }
6601
6602 switch (cmd) {
6603 case ADD_NEW_DISK:
6604 {
6605 mdu_disk_info_t info;
6606 if (copy_from_user(&info, argp, sizeof(info)))
6607 err = -EFAULT;
6608 else
6609 err = add_new_disk(mddev, &info);
6610 goto done_unlock;
6611 }
6612
6613 case HOT_ADD_DISK:
6614 err = hot_add_disk(mddev, new_decode_dev(arg));
6615 goto done_unlock;
6616
6617 case RUN_ARRAY:
6618 err = do_md_run(mddev);
6619 goto done_unlock;
6620
6621 case SET_BITMAP_FILE:
6622 err = set_bitmap_file(mddev, (int)arg);
6623 goto done_unlock;
6624
6625 default:
6626 err = -EINVAL;
6627 goto abort_unlock;
6628 }
6629
6630done_unlock:
6631abort_unlock:
6632 if (mddev->hold_active == UNTIL_IOCTL &&
6633 err != -EINVAL)
6634 mddev->hold_active = 0;
6635 mddev_unlock(mddev);
6636
6637 return err;
6638done:
6639 if (err)
6640 MD_BUG();
6641abort:
6642 return err;
6643}
6644#ifdef CONFIG_COMPAT
6645static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
6646 unsigned int cmd, unsigned long arg)
6647{
6648 switch (cmd) {
6649 case HOT_REMOVE_DISK:
6650 case HOT_ADD_DISK:
6651 case SET_DISK_FAULTY:
6652 case SET_BITMAP_FILE:
6653
6654 break;
6655 default:
6656 arg = (unsigned long)compat_ptr(arg);
6657 break;
6658 }
6659
6660 return md_ioctl(bdev, mode, cmd, arg);
6661}
6662#endif
6663
6664static int md_open(struct block_device *bdev, fmode_t mode)
6665{
6666
6667
6668
6669
6670 struct mddev *mddev = mddev_find(bdev->bd_dev);
6671 int err;
6672
6673 if (!mddev)
6674 return -ENODEV;
6675
6676 if (mddev->gendisk != bdev->bd_disk) {
6677
6678
6679
6680 mddev_put(mddev);
6681
6682 flush_workqueue(md_misc_wq);
6683
6684 return -ERESTARTSYS;
6685 }
6686 BUG_ON(mddev != bdev->bd_disk->private_data);
6687
6688 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
6689 goto out;
6690
6691 err = 0;
6692 atomic_inc(&mddev->openers);
6693 clear_bit(MD_STILL_CLOSED, &mddev->flags);
6694 mutex_unlock(&mddev->open_mutex);
6695
6696 check_disk_change(bdev);
6697 out:
6698 return err;
6699}
6700
6701static void md_release(struct gendisk *disk, fmode_t mode)
6702{
6703 struct mddev *mddev = disk->private_data;
6704
6705 BUG_ON(!mddev);
6706 atomic_dec(&mddev->openers);
6707 mddev_put(mddev);
6708}
6709
6710static int md_media_changed(struct gendisk *disk)
6711{
6712 struct mddev *mddev = disk->private_data;
6713
6714 return mddev->changed;
6715}
6716
6717static int md_revalidate(struct gendisk *disk)
6718{
6719 struct mddev *mddev = disk->private_data;
6720
6721 mddev->changed = 0;
6722 return 0;
6723}
6724static const struct block_device_operations md_fops =
6725{
6726 .owner = THIS_MODULE,
6727 .open = md_open,
6728 .release = md_release,
6729 .ioctl = md_ioctl,
6730#ifdef CONFIG_COMPAT
6731 .compat_ioctl = md_compat_ioctl,
6732#endif
6733 .getgeo = md_getgeo,
6734 .media_changed = md_media_changed,
6735 .revalidate_disk= md_revalidate,
6736};
6737
6738static int md_thread(void * arg)
6739{
6740 struct md_thread *thread = arg;
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754 allow_signal(SIGKILL);
6755 while (!kthread_should_stop()) {
6756
6757
6758
6759
6760
6761
6762 if (signal_pending(current))
6763 flush_signals(current);
6764
6765 wait_event_interruptible_timeout
6766 (thread->wqueue,
6767 test_bit(THREAD_WAKEUP, &thread->flags)
6768 || kthread_should_stop(),
6769 thread->timeout);
6770
6771 clear_bit(THREAD_WAKEUP, &thread->flags);
6772 if (!kthread_should_stop())
6773 thread->run(thread);
6774 }
6775
6776 return 0;
6777}
6778
6779void md_wakeup_thread(struct md_thread *thread)
6780{
6781 if (thread) {
6782 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6783 set_bit(THREAD_WAKEUP, &thread->flags);
6784 wake_up(&thread->wqueue);
6785 }
6786}
6787
6788struct md_thread *md_register_thread(void (*run) (struct md_thread *),
6789 struct mddev *mddev, const char *name)
6790{
6791 struct md_thread *thread;
6792
6793 thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
6794 if (!thread)
6795 return NULL;
6796
6797 init_waitqueue_head(&thread->wqueue);
6798
6799 thread->run = run;
6800 thread->mddev = mddev;
6801 thread->timeout = MAX_SCHEDULE_TIMEOUT;
6802 thread->tsk = kthread_run(md_thread, thread,
6803 "%s_%s",
6804 mdname(thread->mddev),
6805 name);
6806 if (IS_ERR(thread->tsk)) {
6807 kfree(thread);
6808 return NULL;
6809 }
6810 return thread;
6811}
6812
6813void md_unregister_thread(struct md_thread **threadp)
6814{
6815 struct md_thread *thread = *threadp;
6816 if (!thread)
6817 return;
6818 pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
6819
6820
6821
6822 spin_lock(&pers_lock);
6823 *threadp = NULL;
6824 spin_unlock(&pers_lock);
6825
6826 kthread_stop(thread->tsk);
6827 kfree(thread);
6828}
6829
6830void md_error(struct mddev *mddev, struct md_rdev *rdev)
6831{
6832 if (!mddev) {
6833 MD_BUG();
6834 return;
6835 }
6836
6837 if (!rdev || test_bit(Faulty, &rdev->flags))
6838 return;
6839
6840 if (!mddev->pers || !mddev->pers->error_handler)
6841 return;
6842 mddev->pers->error_handler(mddev,rdev);
6843 if (mddev->degraded)
6844 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6845 sysfs_notify_dirent_safe(rdev->sysfs_state);
6846 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
6847 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6848 md_wakeup_thread(mddev->thread);
6849 if (mddev->event_work.func)
6850 queue_work(md_misc_wq, &mddev->event_work);
6851 md_new_event_inintr(mddev);
6852}
6853
6854
6855
6856static void status_unused(struct seq_file *seq)
6857{
6858 int i = 0;
6859 struct md_rdev *rdev;
6860
6861 seq_printf(seq, "unused devices: ");
6862
6863 list_for_each_entry(rdev, &pending_raid_disks, same_set) {
6864 char b[BDEVNAME_SIZE];
6865 i++;
6866 seq_printf(seq, "%s ",
6867 bdevname(rdev->bdev,b));
6868 }
6869 if (!i)
6870 seq_printf(seq, "<none>");
6871
6872 seq_printf(seq, "\n");
6873}
6874
6875
6876static void status_resync(struct seq_file *seq, struct mddev * mddev)
6877{
6878 sector_t max_sectors, resync, res;
6879 unsigned long dt, db;
6880 sector_t rt;
6881 int scale;
6882 unsigned int per_milli;
6883
6884 if (mddev->curr_resync <= 3)
6885 resync = 0;
6886 else
6887 resync = mddev->curr_resync
6888 - atomic_read(&mddev->recovery_active);
6889
6890 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
6891 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
6892 max_sectors = mddev->resync_max_sectors;
6893 else
6894 max_sectors = mddev->dev_sectors;
6895
6896
6897
6898
6899 if (!max_sectors) {
6900 MD_BUG();
6901 return;
6902 }
6903
6904
6905
6906
6907
6908 scale = 10;
6909 if (sizeof(sector_t) > sizeof(unsigned long)) {
6910 while ( max_sectors/2 > (1ULL<<(scale+32)))
6911 scale++;
6912 }
6913 res = (resync>>scale)*1000;
6914 sector_div(res, (u32)((max_sectors>>scale)+1));
6915
6916 per_milli = res;
6917 {
6918 int i, x = per_milli/50, y = 20-x;
6919 seq_printf(seq, "[");
6920 for (i = 0; i < x; i++)
6921 seq_printf(seq, "=");
6922 seq_printf(seq, ">");
6923 for (i = 0; i < y; i++)
6924 seq_printf(seq, ".");
6925 seq_printf(seq, "] ");
6926 }
6927 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
6928 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
6929 "reshape" :
6930 (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
6931 "check" :
6932 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
6933 "resync" : "recovery"))),
6934 per_milli/10, per_milli % 10,
6935 (unsigned long long) resync/2,
6936 (unsigned long long) max_sectors/2);
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952 dt = ((jiffies - mddev->resync_mark) / HZ);
6953 if (!dt) dt++;
6954 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
6955 - mddev->resync_mark_cnt;
6956
6957 rt = max_sectors - resync;
6958 sector_div(rt, db/32+1);
6959 rt *= dt;
6960 rt >>= 5;
6961
6962 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
6963 ((unsigned long)rt % 60)/6);
6964
6965 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
6966}
6967
6968static void *md_seq_start(struct seq_file *seq, loff_t *pos)
6969{
6970 struct list_head *tmp;
6971 loff_t l = *pos;
6972 struct mddev *mddev;
6973
6974 if (l >= 0x10000)
6975 return NULL;
6976 if (!l--)
6977
6978 return (void*)1;
6979
6980 spin_lock(&all_mddevs_lock);
6981 list_for_each(tmp,&all_mddevs)
6982 if (!l--) {
6983 mddev = list_entry(tmp, struct mddev, all_mddevs);
6984 mddev_get(mddev);
6985 spin_unlock(&all_mddevs_lock);
6986 return mddev;
6987 }
6988 spin_unlock(&all_mddevs_lock);
6989 if (!l--)
6990 return (void*)2;
6991 return NULL;
6992}
6993
6994static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
6995{
6996 struct list_head *tmp;
6997 struct mddev *next_mddev, *mddev = v;
6998
6999 ++*pos;
7000 if (v == (void*)2)
7001 return NULL;
7002
7003 spin_lock(&all_mddevs_lock);
7004 if (v == (void*)1)
7005 tmp = all_mddevs.next;
7006 else
7007 tmp = mddev->all_mddevs.next;
7008 if (tmp != &all_mddevs)
7009 next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
7010 else {
7011 next_mddev = (void*)2;
7012 *pos = 0x10000;
7013 }
7014 spin_unlock(&all_mddevs_lock);
7015
7016 if (v != (void*)1)
7017 mddev_put(mddev);
7018 return next_mddev;
7019
7020}
7021
7022static void md_seq_stop(struct seq_file *seq, void *v)
7023{
7024 struct mddev *mddev = v;
7025
7026 if (mddev && v != (void*)1 && v != (void*)2)
7027 mddev_put(mddev);
7028}
7029
7030static int md_seq_show(struct seq_file *seq, void *v)
7031{
7032 struct mddev *mddev = v;
7033 sector_t sectors;
7034 struct md_rdev *rdev;
7035
7036 if (v == (void*)1) {
7037 struct md_personality *pers;
7038 seq_printf(seq, "Personalities : ");
7039 spin_lock(&pers_lock);
7040 list_for_each_entry(pers, &pers_list, list)
7041 seq_printf(seq, "[%s] ", pers->name);
7042
7043 spin_unlock(&pers_lock);
7044 seq_printf(seq, "\n");
7045 seq->poll_event = atomic_read(&md_event_count);
7046 return 0;
7047 }
7048 if (v == (void*)2) {
7049 status_unused(seq);
7050 return 0;
7051 }
7052
7053 if (mddev_lock(mddev) < 0)
7054 return -EINTR;
7055
7056 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
7057 seq_printf(seq, "%s : %sactive", mdname(mddev),
7058 mddev->pers ? "" : "in");
7059 if (mddev->pers) {
7060 if (mddev->ro==1)
7061 seq_printf(seq, " (read-only)");
7062 if (mddev->ro==2)
7063 seq_printf(seq, " (auto-read-only)");
7064 seq_printf(seq, " %s", mddev->pers->name);
7065 }
7066
7067 sectors = 0;
7068 rdev_for_each(rdev, mddev) {
7069 char b[BDEVNAME_SIZE];
7070 seq_printf(seq, " %s[%d]",
7071 bdevname(rdev->bdev,b), rdev->desc_nr);
7072 if (test_bit(WriteMostly, &rdev->flags))
7073 seq_printf(seq, "(W)");
7074 if (test_bit(Faulty, &rdev->flags)) {
7075 seq_printf(seq, "(F)");
7076 continue;
7077 }
7078 if (rdev->raid_disk < 0)
7079 seq_printf(seq, "(S)");
7080 if (test_bit(Replacement, &rdev->flags))
7081 seq_printf(seq, "(R)");
7082 sectors += rdev->sectors;
7083 }
7084
7085 if (!list_empty(&mddev->disks)) {
7086 if (mddev->pers)
7087 seq_printf(seq, "\n %llu blocks",
7088 (unsigned long long)
7089 mddev->array_sectors / 2);
7090 else
7091 seq_printf(seq, "\n %llu blocks",
7092 (unsigned long long)sectors / 2);
7093 }
7094 if (mddev->persistent) {
7095 if (mddev->major_version != 0 ||
7096 mddev->minor_version != 90) {
7097 seq_printf(seq," super %d.%d",
7098 mddev->major_version,
7099 mddev->minor_version);
7100 }
7101 } else if (mddev->external)
7102 seq_printf(seq, " super external:%s",
7103 mddev->metadata_type);
7104 else
7105 seq_printf(seq, " super non-persistent");
7106
7107 if (mddev->pers) {
7108 mddev->pers->status(seq, mddev);
7109 seq_printf(seq, "\n ");
7110 if (mddev->pers->sync_request) {
7111 if (mddev->curr_resync > 2) {
7112 status_resync(seq, mddev);
7113 seq_printf(seq, "\n ");
7114 } else if (mddev->curr_resync >= 1)
7115 seq_printf(seq, "\tresync=DELAYED\n ");
7116 else if (mddev->recovery_cp < MaxSector)
7117 seq_printf(seq, "\tresync=PENDING\n ");
7118 }
7119 } else
7120 seq_printf(seq, "\n ");
7121
7122 bitmap_status(seq, mddev->bitmap);
7123
7124 seq_printf(seq, "\n");
7125 }
7126 mddev_unlock(mddev);
7127
7128 return 0;
7129}
7130
7131static const struct seq_operations md_seq_ops = {
7132 .start = md_seq_start,
7133 .next = md_seq_next,
7134 .stop = md_seq_stop,
7135 .show = md_seq_show,
7136};
7137
7138static int md_seq_open(struct inode *inode, struct file *file)
7139{
7140 struct seq_file *seq;
7141 int error;
7142
7143 error = seq_open(file, &md_seq_ops);
7144 if (error)
7145 return error;
7146
7147 seq = file->private_data;
7148 seq->poll_event = atomic_read(&md_event_count);
7149 return error;
7150}
7151
7152static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
7153{
7154 struct seq_file *seq = filp->private_data;
7155 int mask;
7156
7157 poll_wait(filp, &md_event_waiters, wait);
7158
7159
7160 mask = POLLIN | POLLRDNORM;
7161
7162 if (seq->poll_event != atomic_read(&md_event_count))
7163 mask |= POLLERR | POLLPRI;
7164 return mask;
7165}
7166
7167static const struct file_operations md_seq_fops = {
7168 .owner = THIS_MODULE,
7169 .open = md_seq_open,
7170 .read = seq_read,
7171 .llseek = seq_lseek,
7172 .release = seq_release_private,
7173 .poll = mdstat_poll,
7174};
7175
7176int register_md_personality(struct md_personality *p)
7177{
7178 spin_lock(&pers_lock);
7179 list_add_tail(&p->list, &pers_list);
7180 printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
7181 spin_unlock(&pers_lock);
7182 return 0;
7183}
7184
7185int unregister_md_personality(struct md_personality *p)
7186{
7187 printk(KERN_INFO "md: %s personality unregistered\n", p->name);
7188 spin_lock(&pers_lock);
7189 list_del_init(&p->list);
7190 spin_unlock(&pers_lock);
7191 return 0;
7192}
7193
7194static int is_mddev_idle(struct mddev *mddev, int init)
7195{
7196 struct md_rdev * rdev;
7197 int idle;
7198 int curr_events;
7199
7200 idle = 1;
7201 rcu_read_lock();
7202 rdev_for_each_rcu(rdev, mddev) {
7203 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
7204 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
7205 (int)part_stat_read(&disk->part0, sectors[1]) -
7206 atomic_read(&disk->sync_io);
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229 if (init || curr_events - rdev->last_events > 64) {
7230 rdev->last_events = curr_events;
7231 idle = 0;
7232 }
7233 }
7234 rcu_read_unlock();
7235 return idle;
7236}
7237
7238void md_done_sync(struct mddev *mddev, int blocks, int ok)
7239{
7240
7241 atomic_sub(blocks, &mddev->recovery_active);
7242 wake_up(&mddev->recovery_wait);
7243 if (!ok) {
7244 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7245 set_bit(MD_RECOVERY_ERROR, &mddev->recovery);
7246 md_wakeup_thread(mddev->thread);
7247
7248 }
7249}
7250
7251
7252
7253
7254
7255
7256
7257void md_write_start(struct mddev *mddev, struct bio *bi)
7258{
7259 int did_change = 0;
7260 if (bio_data_dir(bi) != WRITE)
7261 return;
7262
7263 BUG_ON(mddev->ro == 1);
7264 if (mddev->ro == 2) {
7265
7266 mddev->ro = 0;
7267 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7268 md_wakeup_thread(mddev->thread);
7269 md_wakeup_thread(mddev->sync_thread);
7270 did_change = 1;
7271 }
7272 atomic_inc(&mddev->writes_pending);
7273 if (mddev->safemode == 1)
7274 mddev->safemode = 0;
7275 if (mddev->in_sync) {
7276 spin_lock_irq(&mddev->write_lock);
7277 if (mddev->in_sync) {
7278 mddev->in_sync = 0;
7279 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7280 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7281 md_wakeup_thread(mddev->thread);
7282 did_change = 1;
7283 }
7284 spin_unlock_irq(&mddev->write_lock);
7285 }
7286 if (did_change)
7287 sysfs_notify_dirent_safe(mddev->sysfs_state);
7288 wait_event(mddev->sb_wait,
7289 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
7290}
7291
7292void md_write_end(struct mddev *mddev)
7293{
7294 if (atomic_dec_and_test(&mddev->writes_pending)) {
7295 if (mddev->safemode == 2)
7296 md_wakeup_thread(mddev->thread);
7297 else if (mddev->safemode_delay)
7298 mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
7299 }
7300}
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311int md_allow_write(struct mddev *mddev)
7312{
7313 if (!mddev->pers)
7314 return 0;
7315 if (mddev->ro)
7316 return 0;
7317 if (!mddev->pers->sync_request)
7318 return 0;
7319
7320 spin_lock_irq(&mddev->write_lock);
7321 if (mddev->in_sync) {
7322 mddev->in_sync = 0;
7323 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7324 set_bit(MD_CHANGE_PENDING, &mddev->flags);
7325 if (mddev->safemode_delay &&
7326 mddev->safemode == 0)
7327 mddev->safemode = 1;
7328 spin_unlock_irq(&mddev->write_lock);
7329 md_update_sb(mddev, 0);
7330 sysfs_notify_dirent_safe(mddev->sysfs_state);
7331 } else
7332 spin_unlock_irq(&mddev->write_lock);
7333
7334 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7335 return -EAGAIN;
7336 else
7337 return 0;
7338}
7339EXPORT_SYMBOL_GPL(md_allow_write);
7340
7341#define SYNC_MARKS 10
7342#define SYNC_MARK_STEP (3*HZ)
7343#define UPDATE_FREQUENCY (5*60*HZ)
7344void md_do_sync(struct md_thread *thread)
7345{
7346 struct mddev *mddev = thread->mddev;
7347 struct mddev *mddev2;
7348 unsigned int currspeed = 0,
7349 window;
7350 sector_t max_sectors,j, io_sectors;
7351 unsigned long mark[SYNC_MARKS];
7352 unsigned long update_time;
7353 sector_t mark_cnt[SYNC_MARKS];
7354 int last_mark,m;
7355 struct list_head *tmp;
7356 sector_t last_check;
7357 int skipped = 0;
7358 struct md_rdev *rdev;
7359 char *desc, *action = NULL;
7360 struct blk_plug plug;
7361
7362
7363 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
7364 return;
7365 if (mddev->ro)
7366 return;
7367
7368 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7369 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7370 desc = "data-check";
7371 action = "check";
7372 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7373 desc = "requested-resync";
7374 action = "repair";
7375 } else
7376 desc = "resync";
7377 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7378 desc = "reshape";
7379 else
7380 desc = "recovery";
7381
7382 mddev->last_sync_action = action ?: desc;
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400 do {
7401 mddev->curr_resync = 2;
7402
7403 try_again:
7404 if (kthread_should_stop())
7405 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7406
7407 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7408 goto skip;
7409 for_each_mddev(mddev2, tmp) {
7410 if (mddev2 == mddev)
7411 continue;
7412 if (!mddev->parallel_resync
7413 && mddev2->curr_resync
7414 && match_mddev_units(mddev, mddev2)) {
7415 DEFINE_WAIT(wq);
7416 if (mddev < mddev2 && mddev->curr_resync == 2) {
7417
7418 mddev->curr_resync = 1;
7419 wake_up(&resync_wait);
7420 }
7421 if (mddev > mddev2 && mddev->curr_resync == 1)
7422
7423
7424
7425 continue;
7426
7427
7428
7429
7430 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7431 if (!kthread_should_stop() &&
7432 mddev2->curr_resync >= mddev->curr_resync) {
7433 printk(KERN_INFO "md: delaying %s of %s"
7434 " until %s has finished (they"
7435 " share one or more physical units)\n",
7436 desc, mdname(mddev), mdname(mddev2));
7437 mddev_put(mddev2);
7438 if (signal_pending(current))
7439 flush_signals(current);
7440 schedule();
7441 finish_wait(&resync_wait, &wq);
7442 goto try_again;
7443 }
7444 finish_wait(&resync_wait, &wq);
7445 }
7446 }
7447 } while (mddev->curr_resync < 2);
7448
7449 j = 0;
7450 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7451
7452
7453
7454 max_sectors = mddev->resync_max_sectors;
7455 atomic64_set(&mddev->resync_mismatches, 0);
7456
7457 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7458 j = mddev->resync_min;
7459 else if (!mddev->bitmap)
7460 j = mddev->recovery_cp;
7461
7462 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7463 max_sectors = mddev->resync_max_sectors;
7464 else {
7465
7466 max_sectors = mddev->dev_sectors;
7467 j = MaxSector;
7468 rcu_read_lock();
7469 rdev_for_each_rcu(rdev, mddev)
7470 if (rdev->raid_disk >= 0 &&
7471 !test_bit(Faulty, &rdev->flags) &&
7472 !test_bit(In_sync, &rdev->flags) &&
7473 rdev->recovery_offset < j)
7474 j = rdev->recovery_offset;
7475 rcu_read_unlock();
7476 }
7477
7478 printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
7479 printk(KERN_INFO "md: minimum _guaranteed_ speed:"
7480 " %d KB/sec/disk.\n", speed_min(mddev));
7481 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
7482 "(but not more than %d KB/sec) for %s.\n",
7483 speed_max(mddev), desc);
7484
7485 is_mddev_idle(mddev, 1);
7486
7487 io_sectors = 0;
7488 for (m = 0; m < SYNC_MARKS; m++) {
7489 mark[m] = jiffies;
7490 mark_cnt[m] = io_sectors;
7491 }
7492 last_mark = 0;
7493 mddev->resync_mark = mark[last_mark];
7494 mddev->resync_mark_cnt = mark_cnt[last_mark];
7495
7496
7497
7498
7499 window = 32*(PAGE_SIZE/512);
7500 printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
7501 window/2, (unsigned long long)max_sectors/2);
7502
7503 atomic_set(&mddev->recovery_active, 0);
7504 last_check = 0;
7505
7506 if (j>2) {
7507 printk(KERN_INFO
7508 "md: resuming %s of %s from checkpoint.\n",
7509 desc, mdname(mddev));
7510 mddev->curr_resync = j;
7511 } else
7512 mddev->curr_resync = 3;
7513 mddev->curr_resync_completed = j;
7514 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7515 md_new_event(mddev);
7516 update_time = jiffies;
7517
7518 blk_start_plug(&plug);
7519 while (j < max_sectors) {
7520 sector_t sectors;
7521
7522 skipped = 0;
7523
7524 if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7525 ((mddev->curr_resync > mddev->curr_resync_completed &&
7526 (mddev->curr_resync - mddev->curr_resync_completed)
7527 > (max_sectors >> 4)) ||
7528 time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
7529 (j - mddev->curr_resync_completed)*2
7530 >= mddev->resync_max - mddev->curr_resync_completed
7531 )) {
7532
7533 wait_event(mddev->recovery_wait,
7534 atomic_read(&mddev->recovery_active) == 0);
7535 mddev->curr_resync_completed = j;
7536 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
7537 j > mddev->recovery_cp)
7538 mddev->recovery_cp = j;
7539 update_time = jiffies;
7540 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7541 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7542 }
7543
7544 while (j >= mddev->resync_max && !kthread_should_stop()) {
7545
7546
7547
7548
7549 flush_signals(current);
7550 wait_event_interruptible(mddev->recovery_wait,
7551 mddev->resync_max > j
7552 || kthread_should_stop());
7553 }
7554
7555 if (kthread_should_stop())
7556 goto interrupted;
7557
7558 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7559 currspeed < speed_min(mddev));
7560 if (sectors == 0) {
7561 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7562 goto out;
7563 }
7564
7565 if (!skipped) {
7566 io_sectors += sectors;
7567 atomic_add(sectors, &mddev->recovery_active);
7568 }
7569
7570 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7571 break;
7572
7573 j += sectors;
7574 if (j > 2)
7575 mddev->curr_resync = j;
7576 mddev->curr_mark_cnt = io_sectors;
7577 if (last_check == 0)
7578
7579
7580
7581 md_new_event(mddev);
7582
7583 if (last_check + window > io_sectors || j == max_sectors)
7584 continue;
7585
7586 last_check = io_sectors;
7587 repeat:
7588 if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
7589
7590 int next = (last_mark+1) % SYNC_MARKS;
7591
7592 mddev->resync_mark = mark[next];
7593 mddev->resync_mark_cnt = mark_cnt[next];
7594 mark[next] = jiffies;
7595 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
7596 last_mark = next;
7597 }
7598
7599
7600 if (kthread_should_stop())
7601 goto interrupted;
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612 cond_resched();
7613
7614 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
7615 /((jiffies-mddev->resync_mark)/HZ +1) +1;
7616
7617 if (currspeed > speed_min(mddev)) {
7618 if ((currspeed > speed_max(mddev)) ||
7619 !is_mddev_idle(mddev, 0)) {
7620 msleep(500);
7621 goto repeat;
7622 }
7623 }
7624 }
7625 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
7626
7627
7628
7629 out:
7630 blk_finish_plug(&plug);
7631 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7632
7633
7634 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7635
7636 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7637 mddev->curr_resync > 2) {
7638 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7639 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7640 if (mddev->curr_resync >= mddev->recovery_cp) {
7641 printk(KERN_INFO
7642 "md: checkpointing %s of %s.\n",
7643 desc, mdname(mddev));
7644 if (test_bit(MD_RECOVERY_ERROR,
7645 &mddev->recovery))
7646 mddev->recovery_cp =
7647 mddev->curr_resync_completed;
7648 else
7649 mddev->recovery_cp =
7650 mddev->curr_resync;
7651 }
7652 } else
7653 mddev->recovery_cp = MaxSector;
7654 } else {
7655 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7656 mddev->curr_resync = MaxSector;
7657 rcu_read_lock();
7658 rdev_for_each_rcu(rdev, mddev)
7659 if (rdev->raid_disk >= 0 &&
7660 mddev->delta_disks >= 0 &&
7661 !test_bit(Faulty, &rdev->flags) &&
7662 !test_bit(In_sync, &rdev->flags) &&
7663 rdev->recovery_offset < mddev->curr_resync)
7664 rdev->recovery_offset = mddev->curr_resync;
7665 rcu_read_unlock();
7666 }
7667 }
7668 skip:
7669 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7670
7671 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7672
7673 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7674 mddev->resync_min = 0;
7675 mddev->resync_max = MaxSector;
7676 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7677 mddev->resync_min = mddev->curr_resync_completed;
7678 mddev->curr_resync = 0;
7679 wake_up(&resync_wait);
7680 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7681 md_wakeup_thread(mddev->thread);
7682 return;
7683
7684 interrupted:
7685
7686
7687
7688 printk(KERN_INFO
7689 "md: md_do_sync() got signal ... exiting\n");
7690 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7691 goto out;
7692
7693}
7694EXPORT_SYMBOL_GPL(md_do_sync);
7695
7696static int remove_and_add_spares(struct mddev *mddev,
7697 struct md_rdev *this)
7698{
7699 struct md_rdev *rdev;
7700 int spares = 0;
7701 int removed = 0;
7702
7703 rdev_for_each(rdev, mddev)
7704 if ((this == NULL || rdev == this) &&
7705 rdev->raid_disk >= 0 &&
7706 !test_bit(Blocked, &rdev->flags) &&
7707 (test_bit(Faulty, &rdev->flags) ||
7708 ! test_bit(In_sync, &rdev->flags)) &&
7709 atomic_read(&rdev->nr_pending)==0) {
7710 if (mddev->pers->hot_remove_disk(
7711 mddev, rdev) == 0) {
7712 sysfs_unlink_rdev(mddev, rdev);
7713 rdev->raid_disk = -1;
7714 removed++;
7715 }
7716 }
7717 if (removed && mddev->kobj.sd)
7718 sysfs_notify(&mddev->kobj, NULL, "degraded");
7719
7720 if (this)
7721 goto no_add;
7722
7723 rdev_for_each(rdev, mddev) {
7724 if (rdev->raid_disk >= 0 &&
7725 !test_bit(In_sync, &rdev->flags) &&
7726 !test_bit(Faulty, &rdev->flags))
7727 spares++;
7728 if (rdev->raid_disk >= 0)
7729 continue;
7730 if (test_bit(Faulty, &rdev->flags))
7731 continue;
7732 if (mddev->ro &&
7733 rdev->saved_raid_disk < 0)
7734 continue;
7735
7736 rdev->recovery_offset = 0;
7737 if (mddev->pers->
7738 hot_add_disk(mddev, rdev) == 0) {
7739 if (sysfs_link_rdev(mddev, rdev))
7740 ;
7741 spares++;
7742 md_new_event(mddev);
7743 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7744 }
7745 }
7746no_add:
7747 if (removed)
7748 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7749 return spares;
7750}
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774void md_check_recovery(struct mddev *mddev)
7775{
7776 if (mddev->suspended)
7777 return;
7778
7779 if (mddev->bitmap)
7780 bitmap_daemon_work(mddev);
7781
7782 if (signal_pending(current)) {
7783 if (mddev->pers->sync_request && !mddev->external) {
7784 printk(KERN_INFO "md: %s in immediate safe mode\n",
7785 mdname(mddev));
7786 mddev->safemode = 2;
7787 }
7788 flush_signals(current);
7789 }
7790
7791 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7792 return;
7793 if ( ! (
7794 (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
7795 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7796 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
7797 (mddev->external == 0 && mddev->safemode == 1) ||
7798 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
7799 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
7800 ))
7801 return;
7802
7803 if (mddev_trylock(mddev)) {
7804 int spares = 0;
7805
7806 if (mddev->ro) {
7807
7808
7809
7810
7811
7812
7813
7814 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7815 remove_and_add_spares(mddev, NULL);
7816 mddev->pers->spare_active(mddev);
7817 goto unlock;
7818 }
7819
7820 if (!mddev->external) {
7821 int did_change = 0;
7822 spin_lock_irq(&mddev->write_lock);
7823 if (mddev->safemode &&
7824 !atomic_read(&mddev->writes_pending) &&
7825 !mddev->in_sync &&
7826 mddev->recovery_cp == MaxSector) {
7827 mddev->in_sync = 1;
7828 did_change = 1;
7829 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
7830 }
7831 if (mddev->safemode == 1)
7832 mddev->safemode = 0;
7833 spin_unlock_irq(&mddev->write_lock);
7834 if (did_change)
7835 sysfs_notify_dirent_safe(mddev->sysfs_state);
7836 }
7837
7838 if (mddev->flags & MD_UPDATE_SB_FLAGS)
7839 md_update_sb(mddev, 0);
7840
7841 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7842 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
7843
7844 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7845 goto unlock;
7846 }
7847 if (mddev->sync_thread) {
7848 md_reap_sync_thread(mddev);
7849 goto unlock;
7850 }
7851
7852
7853
7854 mddev->curr_resync_completed = 0;
7855 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7856
7857
7858
7859 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
7860 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
7861
7862 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7863 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7864 goto unlock;
7865
7866
7867
7868
7869
7870
7871
7872 if (mddev->reshape_position != MaxSector) {
7873 if (mddev->pers->check_reshape == NULL ||
7874 mddev->pers->check_reshape(mddev) != 0)
7875
7876 goto unlock;
7877 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7878 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7879 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
7880 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7881 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7882 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7883 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7884 } else if (mddev->recovery_cp < MaxSector) {
7885 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7886 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7887 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7888
7889 goto unlock;
7890
7891 if (mddev->pers->sync_request) {
7892 if (spares) {
7893
7894
7895
7896
7897 bitmap_write_all(mddev->bitmap);
7898 }
7899 mddev->sync_thread = md_register_thread(md_do_sync,
7900 mddev,
7901 "resync");
7902 if (!mddev->sync_thread) {
7903 printk(KERN_ERR "%s: could not start resync"
7904 " thread...\n",
7905 mdname(mddev));
7906
7907 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7908 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7909 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7910 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7911 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7912 } else
7913 md_wakeup_thread(mddev->sync_thread);
7914 sysfs_notify_dirent_safe(mddev->sysfs_action);
7915 md_new_event(mddev);
7916 }
7917 unlock:
7918 wake_up(&mddev->sb_wait);
7919
7920 if (!mddev->sync_thread) {
7921 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7922 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7923 &mddev->recovery))
7924 if (mddev->sysfs_action)
7925 sysfs_notify_dirent_safe(mddev->sysfs_action);
7926 }
7927 mddev_unlock(mddev);
7928 }
7929}
7930
7931void md_reap_sync_thread(struct mddev *mddev)
7932{
7933 struct md_rdev *rdev;
7934
7935
7936 md_unregister_thread(&mddev->sync_thread);
7937 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7938 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7939
7940
7941 if (mddev->pers->spare_active(mddev)) {
7942 sysfs_notify(&mddev->kobj, NULL,
7943 "degraded");
7944 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7945 }
7946 }
7947 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7948 mddev->pers->finish_reshape)
7949 mddev->pers->finish_reshape(mddev);
7950
7951
7952
7953
7954
7955
7956
7957 rdev_for_each(rdev, mddev)
7958 if (!mddev->degraded ||
7959 test_bit(In_sync, &rdev->flags))
7960 rdev->saved_raid_disk = -1;
7961
7962 md_update_sb(mddev, 1);
7963 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7964 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7965 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7966 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7967 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7968
7969 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7970 sysfs_notify_dirent_safe(mddev->sysfs_action);
7971 md_new_event(mddev);
7972 if (mddev->event_work.func)
7973 queue_work(md_misc_wq, &mddev->event_work);
7974}
7975
7976void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7977{
7978 sysfs_notify_dirent_safe(rdev->sysfs_state);
7979 wait_event_timeout(rdev->blocked_wait,
7980 !test_bit(Blocked, &rdev->flags) &&
7981 !test_bit(BlockedBadBlocks, &rdev->flags),
7982 msecs_to_jiffies(5000));
7983 rdev_dec_pending(rdev, mddev);
7984}
7985EXPORT_SYMBOL(md_wait_for_blocked_rdev);
7986
7987void md_finish_reshape(struct mddev *mddev)
7988{
7989
7990 struct md_rdev *rdev;
7991
7992 rdev_for_each(rdev, mddev) {
7993 if (rdev->data_offset > rdev->new_data_offset)
7994 rdev->sectors += rdev->data_offset - rdev->new_data_offset;
7995 else
7996 rdev->sectors -= rdev->new_data_offset - rdev->data_offset;
7997 rdev->data_offset = rdev->new_data_offset;
7998 }
7999}
8000EXPORT_SYMBOL(md_finish_reshape);
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
8029 sector_t *first_bad, int *bad_sectors)
8030{
8031 int hi;
8032 int lo;
8033 u64 *p = bb->page;
8034 int rv;
8035 sector_t target = s + sectors;
8036 unsigned seq;
8037
8038 if (bb->shift > 0) {
8039
8040 s >>= bb->shift;
8041 target += (1<<bb->shift) - 1;
8042 target >>= bb->shift;
8043 sectors = target - s;
8044 }
8045
8046
8047retry:
8048 seq = read_seqbegin(&bb->lock);
8049 lo = 0;
8050 rv = 0;
8051 hi = bb->count;
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061 while (hi - lo > 1) {
8062 int mid = (lo + hi) / 2;
8063 sector_t a = BB_OFFSET(p[mid]);
8064 if (a < target)
8065
8066
8067 lo = mid;
8068 else
8069
8070 hi = mid;
8071 }
8072
8073 if (hi > lo) {
8074
8075
8076
8077 while (lo >= 0 &&
8078 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8079 if (BB_OFFSET(p[lo]) < target) {
8080
8081
8082
8083 if (rv != -1 && BB_ACK(p[lo]))
8084 rv = 1;
8085 else
8086 rv = -1;
8087 *first_bad = BB_OFFSET(p[lo]);
8088 *bad_sectors = BB_LEN(p[lo]);
8089 }
8090 lo--;
8091 }
8092 }
8093
8094 if (read_seqretry(&bb->lock, seq))
8095 goto retry;
8096
8097 return rv;
8098}
8099EXPORT_SYMBOL_GPL(md_is_badblock);
8100
8101
8102
8103
8104
8105
8106
8107
8108static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8109 int acknowledged)
8110{
8111 u64 *p;
8112 int lo, hi;
8113 int rv = 1;
8114 unsigned long flags;
8115
8116 if (bb->shift < 0)
8117
8118 return 0;
8119
8120 if (bb->shift) {
8121
8122 sector_t next = s + sectors;
8123 s >>= bb->shift;
8124 next += (1<<bb->shift) - 1;
8125 next >>= bb->shift;
8126 sectors = next - s;
8127 }
8128
8129 write_seqlock_irqsave(&bb->lock, flags);
8130
8131 p = bb->page;
8132 lo = 0;
8133 hi = bb->count;
8134
8135 while (hi - lo > 1) {
8136 int mid = (lo + hi) / 2;
8137 sector_t a = BB_OFFSET(p[mid]);
8138 if (a <= s)
8139 lo = mid;
8140 else
8141 hi = mid;
8142 }
8143 if (hi > lo && BB_OFFSET(p[lo]) > s)
8144 hi = lo;
8145
8146 if (hi > lo) {
8147
8148
8149
8150 sector_t a = BB_OFFSET(p[lo]);
8151 sector_t e = a + BB_LEN(p[lo]);
8152 int ack = BB_ACK(p[lo]);
8153 if (e >= s) {
8154
8155 if (s == a && s + sectors >= e)
8156
8157 ack = acknowledged;
8158 else
8159 ack = ack && acknowledged;
8160
8161 if (e < s + sectors)
8162 e = s + sectors;
8163 if (e - a <= BB_MAX_LEN) {
8164 p[lo] = BB_MAKE(a, e-a, ack);
8165 s = e;
8166 } else {
8167
8168
8169
8170 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8171 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8172 s = a + BB_MAX_LEN;
8173 }
8174 sectors = e - s;
8175 }
8176 }
8177 if (sectors && hi < bb->count) {
8178
8179
8180 sector_t a = BB_OFFSET(p[hi]);
8181 sector_t e = a + BB_LEN(p[hi]);
8182 int ack = BB_ACK(p[hi]);
8183 if (a <= s + sectors) {
8184
8185 if (e <= s + sectors) {
8186
8187 e = s + sectors;
8188 ack = acknowledged;
8189 } else
8190 ack = ack && acknowledged;
8191
8192 a = s;
8193 if (e - a <= BB_MAX_LEN) {
8194 p[hi] = BB_MAKE(a, e-a, ack);
8195 s = e;
8196 } else {
8197 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8198 s = a + BB_MAX_LEN;
8199 }
8200 sectors = e - s;
8201 lo = hi;
8202 hi++;
8203 }
8204 }
8205 if (sectors == 0 && hi < bb->count) {
8206
8207
8208 sector_t a = BB_OFFSET(p[hi]);
8209 int lolen = BB_LEN(p[lo]);
8210 int hilen = BB_LEN(p[hi]);
8211 int newlen = lolen + hilen - (s - a);
8212 if (s >= a && newlen < BB_MAX_LEN) {
8213
8214 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8215 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8216 memmove(p + hi, p + hi + 1,
8217 (bb->count - hi - 1) * 8);
8218 bb->count--;
8219 }
8220 }
8221 while (sectors) {
8222
8223
8224 if (bb->count >= MD_MAX_BADBLOCKS) {
8225
8226 rv = 0;
8227 break;
8228 } else {
8229 int this_sectors = sectors;
8230 memmove(p + hi + 1, p + hi,
8231 (bb->count - hi) * 8);
8232 bb->count++;
8233
8234 if (this_sectors > BB_MAX_LEN)
8235 this_sectors = BB_MAX_LEN;
8236 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8237 sectors -= this_sectors;
8238 s += this_sectors;
8239 }
8240 }
8241
8242 bb->changed = 1;
8243 if (!acknowledged)
8244 bb->unacked_exist = 1;
8245 write_sequnlock_irqrestore(&bb->lock, flags);
8246
8247 return rv;
8248}
8249
8250int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8251 int is_new)
8252{
8253 int rv;
8254 if (is_new)
8255 s += rdev->new_data_offset;
8256 else
8257 s += rdev->data_offset;
8258 rv = md_set_badblocks(&rdev->badblocks,
8259 s, sectors, 0);
8260 if (rv) {
8261
8262 sysfs_notify_dirent_safe(rdev->sysfs_state);
8263 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8264 md_wakeup_thread(rdev->mddev->thread);
8265 }
8266 return rv;
8267}
8268EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8269
8270
8271
8272
8273
8274
8275
8276static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8277{
8278 u64 *p;
8279 int lo, hi;
8280 sector_t target = s + sectors;
8281 int rv = 0;
8282
8283 if (bb->shift > 0) {
8284
8285
8286
8287
8288
8289
8290 s += (1<<bb->shift) - 1;
8291 s >>= bb->shift;
8292 target >>= bb->shift;
8293 sectors = target - s;
8294 }
8295
8296 write_seqlock_irq(&bb->lock);
8297
8298 p = bb->page;
8299 lo = 0;
8300 hi = bb->count;
8301
8302 while (hi - lo > 1) {
8303 int mid = (lo + hi) / 2;
8304 sector_t a = BB_OFFSET(p[mid]);
8305 if (a < target)
8306 lo = mid;
8307 else
8308 hi = mid;
8309 }
8310 if (hi > lo) {
8311
8312
8313
8314
8315 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8316
8317 int ack = BB_ACK(p[lo]);
8318 sector_t a = BB_OFFSET(p[lo]);
8319 sector_t end = a + BB_LEN(p[lo]);
8320
8321 if (a < s) {
8322
8323 if (bb->count >= MD_MAX_BADBLOCKS) {
8324 rv = 0;
8325 goto out;
8326 }
8327 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8328 bb->count++;
8329 p[lo] = BB_MAKE(a, s-a, ack);
8330 lo++;
8331 }
8332 p[lo] = BB_MAKE(target, end - target, ack);
8333
8334 hi = lo;
8335 lo--;
8336 }
8337 while (lo >= 0 &&
8338 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8339
8340 if (BB_OFFSET(p[lo]) < s) {
8341
8342 int ack = BB_ACK(p[lo]);
8343 sector_t start = BB_OFFSET(p[lo]);
8344 p[lo] = BB_MAKE(start, s - start, ack);
8345
8346 break;
8347 }
8348 lo--;
8349 }
8350
8351
8352
8353 if (hi - lo > 1) {
8354 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8355 bb->count -= (hi - lo - 1);
8356 }
8357 }
8358
8359 bb->changed = 1;
8360out:
8361 write_sequnlock_irq(&bb->lock);
8362 return rv;
8363}
8364
8365int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8366 int is_new)
8367{
8368 if (is_new)
8369 s += rdev->new_data_offset;
8370 else
8371 s += rdev->data_offset;
8372 return md_clear_badblocks(&rdev->badblocks,
8373 s, sectors);
8374}
8375EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8376
8377
8378
8379
8380
8381
8382void md_ack_all_badblocks(struct badblocks *bb)
8383{
8384 if (bb->page == NULL || bb->changed)
8385
8386 return;
8387 write_seqlock_irq(&bb->lock);
8388
8389 if (bb->changed == 0 && bb->unacked_exist) {
8390 u64 *p = bb->page;
8391 int i;
8392 for (i = 0; i < bb->count ; i++) {
8393 if (!BB_ACK(p[i])) {
8394 sector_t start = BB_OFFSET(p[i]);
8395 int len = BB_LEN(p[i]);
8396 p[i] = BB_MAKE(start, len, 1);
8397 }
8398 }
8399 bb->unacked_exist = 0;
8400 }
8401 write_sequnlock_irq(&bb->lock);
8402}
8403EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417static ssize_t
8418badblocks_show(struct badblocks *bb, char *page, int unack)
8419{
8420 size_t len;
8421 int i;
8422 u64 *p = bb->page;
8423 unsigned seq;
8424
8425 if (bb->shift < 0)
8426 return 0;
8427
8428retry:
8429 seq = read_seqbegin(&bb->lock);
8430
8431 len = 0;
8432 i = 0;
8433
8434 while (len < PAGE_SIZE && i < bb->count) {
8435 sector_t s = BB_OFFSET(p[i]);
8436 unsigned int length = BB_LEN(p[i]);
8437 int ack = BB_ACK(p[i]);
8438 i++;
8439
8440 if (unack && ack)
8441 continue;
8442
8443 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8444 (unsigned long long)s << bb->shift,
8445 length << bb->shift);
8446 }
8447 if (unack && len == 0)
8448 bb->unacked_exist = 0;
8449
8450 if (read_seqretry(&bb->lock, seq))
8451 goto retry;
8452
8453 return len;
8454}
8455
8456#define DO_DEBUG 1
8457
8458static ssize_t
8459badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8460{
8461 unsigned long long sector;
8462 int length;
8463 char newline;
8464#ifdef DO_DEBUG
8465
8466
8467
8468 int clear = 0;
8469 if (page[0] == '-') {
8470 clear = 1;
8471 page++;
8472 }
8473#endif
8474
8475 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
8476 case 3:
8477 if (newline != '\n')
8478 return -EINVAL;
8479 case 2:
8480 if (length <= 0)
8481 return -EINVAL;
8482 break;
8483 default:
8484 return -EINVAL;
8485 }
8486
8487#ifdef DO_DEBUG
8488 if (clear) {
8489 md_clear_badblocks(bb, sector, length);
8490 return len;
8491 }
8492#endif
8493 if (md_set_badblocks(bb, sector, length, !unack))
8494 return len;
8495 else
8496 return -ENOSPC;
8497}
8498
8499static int md_notify_reboot(struct notifier_block *this,
8500 unsigned long code, void *x)
8501{
8502 struct list_head *tmp;
8503 struct mddev *mddev;
8504 int need_delay = 0;
8505
8506 for_each_mddev(mddev, tmp) {
8507 if (mddev_trylock(mddev)) {
8508 if (mddev->pers)
8509 __md_stop_writes(mddev);
8510 mddev->safemode = 2;
8511 mddev_unlock(mddev);
8512 }
8513 need_delay = 1;
8514 }
8515
8516
8517
8518
8519
8520
8521 if (need_delay)
8522 mdelay(1000*1);
8523
8524 return NOTIFY_DONE;
8525}
8526
8527static struct notifier_block md_notifier = {
8528 .notifier_call = md_notify_reboot,
8529 .next = NULL,
8530 .priority = INT_MAX,
8531};
8532
8533static void md_geninit(void)
8534{
8535 pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
8536
8537 proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
8538}
8539
8540static int __init md_init(void)
8541{
8542 int ret = -ENOMEM;
8543
8544 md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
8545 if (!md_wq)
8546 goto err_wq;
8547
8548 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
8549 if (!md_misc_wq)
8550 goto err_misc_wq;
8551
8552 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
8553 goto err_md;
8554
8555 if ((ret = register_blkdev(0, "mdp")) < 0)
8556 goto err_mdp;
8557 mdp_major = ret;
8558
8559 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
8560 md_probe, NULL, NULL);
8561 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
8562 md_probe, NULL, NULL);
8563
8564 register_reboot_notifier(&md_notifier);
8565 raid_table_header = register_sysctl_table(raid_root_table);
8566
8567 md_geninit();
8568 return 0;
8569
8570err_mdp:
8571 unregister_blkdev(MD_MAJOR, "md");
8572err_md:
8573 destroy_workqueue(md_misc_wq);
8574err_misc_wq:
8575 destroy_workqueue(md_wq);
8576err_wq:
8577 return ret;
8578}
8579
8580#ifndef MODULE
8581
8582
8583
8584
8585
8586
8587static LIST_HEAD(all_detected_devices);
8588struct detected_devices_node {
8589 struct list_head list;
8590 dev_t dev;
8591};
8592
8593void md_autodetect_dev(dev_t dev)
8594{
8595 struct detected_devices_node *node_detected_dev;
8596
8597 node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
8598 if (node_detected_dev) {
8599 node_detected_dev->dev = dev;
8600 list_add_tail(&node_detected_dev->list, &all_detected_devices);
8601 } else {
8602 printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
8603 ", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
8604 }
8605}
8606
8607
8608static void autostart_arrays(int part)
8609{
8610 struct md_rdev *rdev;
8611 struct detected_devices_node *node_detected_dev;
8612 dev_t dev;
8613 int i_scanned, i_passed;
8614
8615 i_scanned = 0;
8616 i_passed = 0;
8617
8618 printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
8619
8620 while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
8621 i_scanned++;
8622 node_detected_dev = list_entry(all_detected_devices.next,
8623 struct detected_devices_node, list);
8624 list_del(&node_detected_dev->list);
8625 dev = node_detected_dev->dev;
8626 kfree(node_detected_dev);
8627 rdev = md_import_device(dev,0, 90);
8628 if (IS_ERR(rdev))
8629 continue;
8630
8631 if (test_bit(Faulty, &rdev->flags)) {
8632 MD_BUG();
8633 continue;
8634 }
8635 set_bit(AutoDetected, &rdev->flags);
8636 list_add(&rdev->same_set, &pending_raid_disks);
8637 i_passed++;
8638 }
8639
8640 printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
8641 i_scanned, i_passed);
8642
8643 autorun_devices(part);
8644}
8645
8646#endif
8647
8648static __exit void md_exit(void)
8649{
8650 struct mddev *mddev;
8651 struct list_head *tmp;
8652
8653 blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
8654 blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
8655
8656 unregister_blkdev(MD_MAJOR,"md");
8657 unregister_blkdev(mdp_major, "mdp");
8658 unregister_reboot_notifier(&md_notifier);
8659 unregister_sysctl_table(raid_table_header);
8660 remove_proc_entry("mdstat", NULL);
8661 for_each_mddev(mddev, tmp) {
8662 export_array(mddev);
8663 mddev->hold_active = 0;
8664 }
8665 destroy_workqueue(md_misc_wq);
8666 destroy_workqueue(md_wq);
8667}
8668
8669subsys_initcall(md_init);
8670module_exit(md_exit)
8671
8672static int get_ro(char *buffer, struct kernel_param *kp)
8673{
8674 return sprintf(buffer, "%d", start_readonly);
8675}
8676static int set_ro(const char *val, struct kernel_param *kp)
8677{
8678 char *e;
8679 int num = simple_strtoul(val, &e, 10);
8680 if (*val && (*e == '\0' || *e == '\n')) {
8681 start_readonly = num;
8682 return 0;
8683 }
8684 return -EINVAL;
8685}
8686
8687module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
8688module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
8689
8690module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
8691
8692EXPORT_SYMBOL(register_md_personality);
8693EXPORT_SYMBOL(unregister_md_personality);
8694EXPORT_SYMBOL(md_error);
8695EXPORT_SYMBOL(md_done_sync);
8696EXPORT_SYMBOL(md_write_start);
8697EXPORT_SYMBOL(md_write_end);
8698EXPORT_SYMBOL(md_register_thread);
8699EXPORT_SYMBOL(md_unregister_thread);
8700EXPORT_SYMBOL(md_wakeup_thread);
8701EXPORT_SYMBOL(md_check_recovery);
8702EXPORT_SYMBOL(md_reap_sync_thread);
8703MODULE_LICENSE("GPL");
8704MODULE_DESCRIPTION("MD RAID framework");
8705MODULE_ALIAS("md");
8706MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
8707