1
2
3
4
5
6
7
8#ifndef _MD_MD_H
9#define _MD_MD_H
10
11#include <linux/blkdev.h>
12#include <linux/backing-dev.h>
13#include <linux/badblocks.h>
14#include <linux/kobject.h>
15#include <linux/list.h>
16#include <linux/mm.h>
17#include <linux/mutex.h>
18#include <linux/timer.h>
19#include <linux/wait.h>
20#include <linux/workqueue.h>
21#include "md-cluster.h"
22
23#define MaxSector (~(sector_t)0)
24
25
26
27
28
29
30
31
32
33
34#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
35
36
37
38struct md_rdev {
39 struct list_head same_set;
40
41 sector_t sectors;
42 struct mddev *mddev;
43 int last_events;
44
45
46
47
48
49
50 struct block_device *meta_bdev;
51 struct block_device *bdev;
52
53 struct page *sb_page, *bb_page;
54 int sb_loaded;
55 __u64 sb_events;
56 sector_t data_offset;
57 sector_t new_data_offset;
58 sector_t sb_start;
59 int sb_size;
60 int preferred_minor;
61
62 struct kobject kobj;
63
64
65
66
67
68
69
70
71
72
73
74
75 unsigned long flags;
76 wait_queue_head_t blocked_wait;
77
78 int desc_nr;
79 int raid_disk;
80 int new_raid_disk;
81
82
83 int saved_raid_disk;
84
85
86
87 union {
88 sector_t recovery_offset;
89
90
91
92 sector_t journal_tail;
93
94
95
96 };
97
98 atomic_t nr_pending;
99
100
101
102 atomic_t read_errors;
103
104
105 time64_t last_read_error;
106
107
108 atomic_t corrected_errors;
109
110
111
112
113
114
115
116 struct list_head wb_list;
117 spinlock_t wb_list_lock;
118 wait_queue_head_t wb_io_wait;
119
120 struct work_struct del_work;
121
122 struct kernfs_node *sysfs_state;
123
124
125 struct badblocks badblocks;
126
127 struct {
128 short offset;
129
130 unsigned int size;
131 sector_t sector;
132 } ppl;
133};
134enum flag_bits {
135 Faulty,
136 In_sync,
137 Bitmap_sync,
138
139
140
141
142 WriteMostly,
143 AutoDetected,
144 Blocked,
145
146
147
148 WriteErrorSeen,
149
150
151 FaultRecorded,
152
153
154
155
156
157 BlockedBadBlocks,
158
159
160
161
162
163
164
165
166
167 WantReplacement,
168
169
170
171
172 Replacement,
173
174
175
176 Candidate,
177
178
179
180 Journal,
181
182
183
184
185 ClusterRemove,
186 RemoveSynchronized,
187
188
189
190
191 ExternalBbl,
192
193
194 FailFast,
195
196
197
198
199
200 LastDev,
201
202
203
204 WBCollisionCheck,
205
206
207
208};
209
210static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
211 sector_t *first_bad, int *bad_sectors)
212{
213 if (unlikely(rdev->badblocks.count)) {
214 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
215 sectors,
216 first_bad, bad_sectors);
217 if (rv)
218 *first_bad -= rdev->data_offset;
219 return rv;
220 }
221 return 0;
222}
223extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
224 int is_new);
225extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
226 int is_new);
227struct md_cluster_info;
228
229
230enum mddev_flags {
231 MD_ARRAY_FIRST_USE,
232 MD_CLOSING,
233
234 MD_JOURNAL_CLEAN,
235 MD_HAS_JOURNAL,
236 MD_CLUSTER_RESYNC_LOCKED,
237
238
239 MD_FAILFAST_SUPPORTED,
240
241
242
243 MD_HAS_PPL,
244 MD_HAS_MULTIPLE_PPLS,
245 MD_ALLOW_SB_UPDATE,
246
247
248 MD_UPDATING_SB,
249
250
251};
252
253enum mddev_sb_flags {
254 MD_SB_CHANGE_DEVS,
255 MD_SB_CHANGE_CLEAN,
256 MD_SB_CHANGE_PENDING,
257 MD_SB_NEED_REWRITE,
258};
259
260#define NR_WB_INFOS 8
261
262struct wb_info {
263 sector_t lo;
264 sector_t hi;
265 struct list_head list;
266};
267
268struct mddev {
269 void *private;
270 struct md_personality *pers;
271 dev_t unit;
272 int md_minor;
273 struct list_head disks;
274 unsigned long flags;
275 unsigned long sb_flags;
276
277 int suspended;
278 atomic_t active_io;
279 int ro;
280 int sysfs_active;
281
282
283
284 struct gendisk *gendisk;
285
286 struct kobject kobj;
287 int hold_active;
288#define UNTIL_IOCTL 1
289#define UNTIL_STOP 2
290
291
292 int major_version,
293 minor_version,
294 patch_version;
295 int persistent;
296 int external;
297
298 char metadata_type[17];
299 int chunk_sectors;
300 time64_t ctime, utime;
301 int level, layout;
302 char clevel[16];
303 int raid_disks;
304 int max_disks;
305 sector_t dev_sectors;
306
307 sector_t array_sectors;
308 int external_size;
309
310 __u64 events;
311
312
313
314
315
316 int can_decrease_events;
317
318 char uuid[16];
319
320
321
322
323
324
325 sector_t reshape_position;
326 int delta_disks, new_level, new_layout;
327 int new_chunk_sectors;
328 int reshape_backwards;
329
330 struct md_thread *thread;
331 struct md_thread *sync_thread;
332
333
334
335
336
337
338
339 char *last_sync_action;
340 sector_t curr_resync;
341
342
343
344
345
346
347 sector_t curr_resync_completed;
348 unsigned long resync_mark;
349 sector_t resync_mark_cnt;
350 sector_t curr_mark_cnt;
351
352 sector_t resync_max_sectors;
353
354 atomic64_t resync_mismatches;
355
356
357
358
359 sector_t suspend_lo;
360 sector_t suspend_hi;
361
362 int sync_speed_min;
363 int sync_speed_max;
364
365
366 int parallel_resync;
367
368 int ok_start_degraded;
369
370 unsigned long recovery;
371
372
373
374
375
376 int recovery_disabled;
377
378 int in_sync;
379
380
381
382
383
384
385
386
387
388 struct mutex open_mutex;
389 struct mutex reconfig_mutex;
390 atomic_t active;
391 atomic_t openers;
392
393 int changed;
394
395 int degraded;
396
397
398
399 atomic_t recovery_active;
400 wait_queue_head_t recovery_wait;
401 sector_t recovery_cp;
402 sector_t resync_min;
403
404 sector_t resync_max;
405
406
407 struct kernfs_node *sysfs_state;
408
409
410 struct kernfs_node *sysfs_action;
411
412 struct work_struct del_work;
413
414
415
416
417
418
419
420
421
422
423
424
425 spinlock_t lock;
426 wait_queue_head_t sb_wait;
427 atomic_t pending_writes;
428
429 unsigned int safemode;
430
431
432 unsigned int safemode_delay;
433 struct timer_list safemode_timer;
434 struct percpu_ref writes_pending;
435 int sync_checkers;
436 struct request_queue *queue;
437
438 struct bitmap *bitmap;
439 struct {
440 struct file *file;
441 loff_t offset;
442
443
444
445
446
447 unsigned long space;
448 loff_t default_offset;
449
450
451
452 unsigned long default_space;
453
454 struct mutex mutex;
455 unsigned long chunksize;
456 unsigned long daemon_sleep;
457 unsigned long max_write_behind;
458 int external;
459 int nodes;
460 char cluster_name[64];
461 } bitmap_info;
462
463 atomic_t max_corr_read_errors;
464 struct list_head all_mddevs;
465
466 struct attribute_group *to_remove;
467
468 struct bio_set bio_set;
469 struct bio_set sync_set;
470
471
472
473
474
475
476
477 struct bio *flush_bio;
478 atomic_t flush_pending;
479 ktime_t start_flush, last_flush;
480
481
482 struct work_struct flush_work;
483 struct work_struct event_work;
484 mempool_t *wb_info_pool;
485 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
486 struct md_cluster_info *cluster_info;
487 unsigned int good_device_nr;
488
489 bool has_superblocks:1;
490};
491
492enum recovery_flags {
493
494
495
496 MD_RECOVERY_RUNNING,
497 MD_RECOVERY_SYNC,
498 MD_RECOVERY_RECOVER,
499 MD_RECOVERY_INTR,
500 MD_RECOVERY_DONE,
501 MD_RECOVERY_NEEDED,
502 MD_RECOVERY_REQUESTED,
503 MD_RECOVERY_CHECK,
504 MD_RECOVERY_RESHAPE,
505 MD_RECOVERY_FROZEN,
506 MD_RECOVERY_ERROR,
507 MD_RECOVERY_WAIT,
508 MD_RESYNCING_REMOTE,
509};
510
511static inline int __must_check mddev_lock(struct mddev *mddev)
512{
513 return mutex_lock_interruptible(&mddev->reconfig_mutex);
514}
515
516
517
518
519static inline void mddev_lock_nointr(struct mddev *mddev)
520{
521 mutex_lock(&mddev->reconfig_mutex);
522}
523
524static inline int mddev_trylock(struct mddev *mddev)
525{
526 return mutex_trylock(&mddev->reconfig_mutex);
527}
528extern void mddev_unlock(struct mddev *mddev);
529
530static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
531{
532 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
533}
534
535static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
536{
537 atomic_add(nr_sectors, &bio->bi_disk->sync_io);
538}
539
540struct md_personality
541{
542 char *name;
543 int level;
544 struct list_head list;
545 struct module *owner;
546 bool (*make_request)(struct mddev *mddev, struct bio *bio);
547
548
549
550
551 int (*run)(struct mddev *mddev);
552
553 int (*start)(struct mddev *mddev);
554 void (*free)(struct mddev *mddev, void *priv);
555 void (*status)(struct seq_file *seq, struct mddev *mddev);
556
557
558
559 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
560 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
561 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
562 int (*spare_active) (struct mddev *mddev);
563 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
564 int (*resize) (struct mddev *mddev, sector_t sectors);
565 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
566 int (*check_reshape) (struct mddev *mddev);
567 int (*start_reshape) (struct mddev *mddev);
568 void (*finish_reshape) (struct mddev *mddev);
569 void (*update_reshape_pos) (struct mddev *mddev);
570
571
572
573
574 void (*quiesce) (struct mddev *mddev, int quiesce);
575
576
577
578
579
580
581
582
583
584 void *(*takeover) (struct mddev *mddev);
585
586
587 int (*congested)(struct mddev *mddev, int bits);
588
589 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
590};
591
592struct md_sysfs_entry {
593 struct attribute attr;
594 ssize_t (*show)(struct mddev *, char *);
595 ssize_t (*store)(struct mddev *, const char *, size_t);
596};
597extern struct attribute_group md_bitmap_group;
598
599static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
600{
601 if (sd)
602 return sysfs_get_dirent(sd, name);
603 return sd;
604}
605static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
606{
607 if (sd)
608 sysfs_notify_dirent(sd);
609}
610
611static inline char * mdname (struct mddev * mddev)
612{
613 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
614}
615
616static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
617{
618 char nm[20];
619 if (!test_bit(Replacement, &rdev->flags) &&
620 !test_bit(Journal, &rdev->flags) &&
621 mddev->kobj.sd) {
622 sprintf(nm, "rd%d", rdev->raid_disk);
623 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
624 } else
625 return 0;
626}
627
628static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
629{
630 char nm[20];
631 if (!test_bit(Replacement, &rdev->flags) &&
632 !test_bit(Journal, &rdev->flags) &&
633 mddev->kobj.sd) {
634 sprintf(nm, "rd%d", rdev->raid_disk);
635 sysfs_remove_link(&mddev->kobj, nm);
636 }
637}
638
639
640
641
642
643#define rdev_for_each_list(rdev, tmp, head) \
644 list_for_each_entry_safe(rdev, tmp, head, same_set)
645
646
647
648
649#define rdev_for_each(rdev, mddev) \
650 list_for_each_entry(rdev, &((mddev)->disks), same_set)
651
652#define rdev_for_each_safe(rdev, tmp, mddev) \
653 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
654
655#define rdev_for_each_rcu(rdev, mddev) \
656 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
657
658struct md_thread {
659 void (*run) (struct md_thread *thread);
660 struct mddev *mddev;
661 wait_queue_head_t wqueue;
662 unsigned long flags;
663 struct task_struct *tsk;
664 unsigned long timeout;
665 void *private;
666};
667
668#define THREAD_WAKEUP 0
669
670static inline void safe_put_page(struct page *p)
671{
672 if (p) put_page(p);
673}
674
675extern int register_md_personality(struct md_personality *p);
676extern int unregister_md_personality(struct md_personality *p);
677extern int register_md_cluster_operations(struct md_cluster_operations *ops,
678 struct module *module);
679extern int unregister_md_cluster_operations(void);
680extern int md_setup_cluster(struct mddev *mddev, int nodes);
681extern void md_cluster_stop(struct mddev *mddev);
682extern struct md_thread *md_register_thread(
683 void (*run)(struct md_thread *thread),
684 struct mddev *mddev,
685 const char *name);
686extern void md_unregister_thread(struct md_thread **threadp);
687extern void md_wakeup_thread(struct md_thread *thread);
688extern void md_check_recovery(struct mddev *mddev);
689extern void md_reap_sync_thread(struct mddev *mddev);
690extern int mddev_init_writes_pending(struct mddev *mddev);
691extern bool md_write_start(struct mddev *mddev, struct bio *bi);
692extern void md_write_inc(struct mddev *mddev, struct bio *bi);
693extern void md_write_end(struct mddev *mddev);
694extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
695extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
696extern void md_finish_reshape(struct mddev *mddev);
697
698extern int mddev_congested(struct mddev *mddev, int bits);
699extern void md_flush_request(struct mddev *mddev, struct bio *bio);
700extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
701 sector_t sector, int size, struct page *page);
702extern int md_super_wait(struct mddev *mddev);
703extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
704 struct page *page, int op, int op_flags,
705 bool metadata_op);
706extern void md_do_sync(struct md_thread *thread);
707extern void md_new_event(struct mddev *mddev);
708extern void md_allow_write(struct mddev *mddev);
709extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
710extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
711extern int md_check_no_bitmap(struct mddev *mddev);
712extern int md_integrity_register(struct mddev *mddev);
713extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
714extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
715
716extern void mddev_init(struct mddev *mddev);
717extern int md_run(struct mddev *mddev);
718extern int md_start(struct mddev *mddev);
719extern void md_stop(struct mddev *mddev);
720extern void md_stop_writes(struct mddev *mddev);
721extern int md_rdev_init(struct md_rdev *rdev);
722extern void md_rdev_clear(struct md_rdev *rdev);
723
724extern void md_handle_request(struct mddev *mddev, struct bio *bio);
725extern void mddev_suspend(struct mddev *mddev);
726extern void mddev_resume(struct mddev *mddev);
727extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
728 struct mddev *mddev);
729
730extern void md_reload_sb(struct mddev *mddev, int raid_disk);
731extern void md_update_sb(struct mddev *mddev, int force);
732extern void md_kick_rdev_from_array(struct md_rdev * rdev);
733extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
734 bool is_suspend);
735struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
736struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
737
738static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
739{
740 int faulty = test_bit(Faulty, &rdev->flags);
741 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
742 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
743 md_wakeup_thread(mddev->thread);
744 }
745}
746
747extern struct md_cluster_operations *md_cluster_ops;
748static inline int mddev_is_clustered(struct mddev *mddev)
749{
750 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
751}
752
753
754static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
755 unsigned long unsupported_flags)
756{
757 mddev->flags &= ~unsupported_flags;
758}
759
760static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
761{
762 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
763 !bio->bi_disk->queue->limits.max_write_same_sectors)
764 mddev->queue->limits.max_write_same_sectors = 0;
765}
766
767static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
768{
769 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
770 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
771 mddev->queue->limits.max_write_zeroes_sectors = 0;
772}
773#endif
774