1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#ifndef _MD_MD_H
16#define _MD_MD_H
17
18#include <linux/blkdev.h>
19#include <linux/backing-dev.h>
20#include <linux/badblocks.h>
21#include <linux/kobject.h>
22#include <linux/list.h>
23#include <linux/mm.h>
24#include <linux/mutex.h>
25#include <linux/timer.h>
26#include <linux/wait.h>
27#include <linux/workqueue.h>
28#include "md-cluster.h"
29
30#define MaxSector (~(sector_t)0)
31
32
33
34
35
36
37
38
39
40
41#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
42
43
44
45struct md_rdev {
46 struct list_head same_set;
47
48 sector_t sectors;
49 struct mddev *mddev;
50 int last_events;
51
52
53
54
55
56
57 struct block_device *meta_bdev;
58 struct block_device *bdev;
59
60 struct page *sb_page, *bb_page;
61 int sb_loaded;
62 __u64 sb_events;
63 sector_t data_offset;
64 sector_t new_data_offset;
65 sector_t sb_start;
66 int sb_size;
67 int preferred_minor;
68
69 struct kobject kobj;
70
71
72
73
74
75
76
77
78
79
80
81
82 unsigned long flags;
83 wait_queue_head_t blocked_wait;
84
85 int desc_nr;
86 int raid_disk;
87 int new_raid_disk;
88
89
90 int saved_raid_disk;
91
92
93
94 union {
95 sector_t recovery_offset;
96
97
98
99 sector_t journal_tail;
100
101
102
103 };
104
105 atomic_t nr_pending;
106
107
108
109 atomic_t read_errors;
110
111
112 time64_t last_read_error;
113
114
115 atomic_t corrected_errors;
116
117
118
119 struct work_struct del_work;
120
121 struct kernfs_node *sysfs_state;
122
123
124 struct badblocks badblocks;
125
126 struct {
127 short offset;
128
129 unsigned int size;
130 sector_t sector;
131 } ppl;
132};
133enum flag_bits {
134 Faulty,
135 In_sync,
136 Bitmap_sync,
137
138
139
140
141 WriteMostly,
142 AutoDetected,
143 Blocked,
144
145
146
147 WriteErrorSeen,
148
149
150 FaultRecorded,
151
152
153
154
155
156 BlockedBadBlocks,
157
158
159
160
161
162
163
164
165
166 WantReplacement,
167
168
169
170
171 Replacement,
172
173
174
175 Candidate,
176
177
178
179 Journal,
180
181
182
183
184 ClusterRemove,
185 RemoveSynchronized,
186
187
188
189
190 ExternalBbl,
191
192
193 FailFast,
194
195
196
197
198
199 LastDev,
200
201
202
203};
204
205static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
206 sector_t *first_bad, int *bad_sectors)
207{
208 if (unlikely(rdev->badblocks.count)) {
209 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
210 sectors,
211 first_bad, bad_sectors);
212 if (rv)
213 *first_bad -= rdev->data_offset;
214 return rv;
215 }
216 return 0;
217}
218extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
219 int is_new);
220extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
221 int is_new);
222struct md_cluster_info;
223
224
225enum mddev_flags {
226 MD_ARRAY_FIRST_USE,
227 MD_CLOSING,
228
229 MD_JOURNAL_CLEAN,
230 MD_HAS_JOURNAL,
231 MD_CLUSTER_RESYNC_LOCKED,
232
233
234 MD_FAILFAST_SUPPORTED,
235
236
237
238 MD_HAS_PPL,
239};
240
241enum mddev_sb_flags {
242 MD_SB_CHANGE_DEVS,
243 MD_SB_CHANGE_CLEAN,
244 MD_SB_CHANGE_PENDING,
245 MD_SB_NEED_REWRITE,
246};
247
248struct mddev {
249 void *private;
250 struct md_personality *pers;
251 dev_t unit;
252 int md_minor;
253 struct list_head disks;
254 unsigned long flags;
255 unsigned long sb_flags;
256
257 int suspended;
258 atomic_t active_io;
259 int ro;
260 int sysfs_active;
261
262
263
264 struct gendisk *gendisk;
265
266 struct kobject kobj;
267 int hold_active;
268#define UNTIL_IOCTL 1
269#define UNTIL_STOP 2
270
271
272 int major_version,
273 minor_version,
274 patch_version;
275 int persistent;
276 int external;
277
278 char metadata_type[17];
279 int chunk_sectors;
280 time64_t ctime, utime;
281 int level, layout;
282 char clevel[16];
283 int raid_disks;
284 int max_disks;
285 sector_t dev_sectors;
286
287 sector_t array_sectors;
288 int external_size;
289
290 __u64 events;
291
292
293
294
295
296 int can_decrease_events;
297
298 char uuid[16];
299
300
301
302
303
304
305 sector_t reshape_position;
306 int delta_disks, new_level, new_layout;
307 int new_chunk_sectors;
308 int reshape_backwards;
309
310 struct md_thread *thread;
311 struct md_thread *sync_thread;
312
313
314
315
316
317
318
319 char *last_sync_action;
320 sector_t curr_resync;
321
322
323
324
325
326
327 sector_t curr_resync_completed;
328 unsigned long resync_mark;
329 sector_t resync_mark_cnt;
330 sector_t curr_mark_cnt;
331
332 sector_t resync_max_sectors;
333
334 atomic64_t resync_mismatches;
335
336
337
338
339 sector_t suspend_lo;
340 sector_t suspend_hi;
341
342 int sync_speed_min;
343 int sync_speed_max;
344
345
346 int parallel_resync;
347
348 int ok_start_degraded;
349
350 unsigned long recovery;
351
352
353
354
355
356 int recovery_disabled;
357
358 int in_sync;
359
360
361
362
363
364
365
366
367
368 struct mutex open_mutex;
369 struct mutex reconfig_mutex;
370 atomic_t active;
371 atomic_t openers;
372
373 int changed;
374
375 int degraded;
376
377
378
379 atomic_t recovery_active;
380 wait_queue_head_t recovery_wait;
381 sector_t recovery_cp;
382 sector_t resync_min;
383
384 sector_t resync_max;
385
386
387 struct kernfs_node *sysfs_state;
388
389
390 struct kernfs_node *sysfs_action;
391
392 struct work_struct del_work;
393
394
395
396
397
398
399
400
401
402
403
404
405 spinlock_t lock;
406 wait_queue_head_t sb_wait;
407 atomic_t pending_writes;
408
409 unsigned int safemode;
410
411
412 unsigned int safemode_delay;
413 struct timer_list safemode_timer;
414 struct percpu_ref writes_pending;
415 int sync_checkers;
416 struct request_queue *queue;
417
418 struct bitmap *bitmap;
419 struct {
420 struct file *file;
421 loff_t offset;
422
423
424
425
426
427 unsigned long space;
428 loff_t default_offset;
429
430
431
432 unsigned long default_space;
433
434 struct mutex mutex;
435 unsigned long chunksize;
436 unsigned long daemon_sleep;
437 unsigned long max_write_behind;
438 int external;
439 int nodes;
440 char cluster_name[64];
441 } bitmap_info;
442
443 atomic_t max_corr_read_errors;
444 struct list_head all_mddevs;
445
446 struct attribute_group *to_remove;
447
448 struct bio_set *bio_set;
449 struct bio_set *sync_set;
450
451
452
453
454
455
456
457 struct bio *flush_bio;
458 atomic_t flush_pending;
459 struct work_struct flush_work;
460 struct work_struct event_work;
461 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
462 struct md_cluster_info *cluster_info;
463 unsigned int good_device_nr;
464};
465
466enum recovery_flags {
467
468
469
470 MD_RECOVERY_RUNNING,
471 MD_RECOVERY_SYNC,
472 MD_RECOVERY_RECOVER,
473 MD_RECOVERY_INTR,
474 MD_RECOVERY_DONE,
475 MD_RECOVERY_NEEDED,
476 MD_RECOVERY_REQUESTED,
477 MD_RECOVERY_CHECK,
478 MD_RECOVERY_RESHAPE,
479 MD_RECOVERY_FROZEN,
480 MD_RECOVERY_ERROR,
481};
482
483static inline int __must_check mddev_lock(struct mddev *mddev)
484{
485 return mutex_lock_interruptible(&mddev->reconfig_mutex);
486}
487
488
489
490
491static inline void mddev_lock_nointr(struct mddev *mddev)
492{
493 mutex_lock(&mddev->reconfig_mutex);
494}
495
496static inline int mddev_is_locked(struct mddev *mddev)
497{
498 return mutex_is_locked(&mddev->reconfig_mutex);
499}
500
501static inline int mddev_trylock(struct mddev *mddev)
502{
503 return mutex_trylock(&mddev->reconfig_mutex);
504}
505extern void mddev_unlock(struct mddev *mddev);
506
507static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
508{
509 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
510}
511
512struct md_personality
513{
514 char *name;
515 int level;
516 struct list_head list;
517 struct module *owner;
518 bool (*make_request)(struct mddev *mddev, struct bio *bio);
519 int (*run)(struct mddev *mddev);
520 void (*free)(struct mddev *mddev, void *priv);
521 void (*status)(struct seq_file *seq, struct mddev *mddev);
522
523
524
525 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
526 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
527 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
528 int (*spare_active) (struct mddev *mddev);
529 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
530 int (*resize) (struct mddev *mddev, sector_t sectors);
531 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
532 int (*check_reshape) (struct mddev *mddev);
533 int (*start_reshape) (struct mddev *mddev);
534 void (*finish_reshape) (struct mddev *mddev);
535
536
537
538
539
540 void (*quiesce) (struct mddev *mddev, int state);
541
542
543
544
545
546
547
548
549
550 void *(*takeover) (struct mddev *mddev);
551
552
553 int (*congested)(struct mddev *mddev, int bits);
554
555 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
556};
557
558struct md_sysfs_entry {
559 struct attribute attr;
560 ssize_t (*show)(struct mddev *, char *);
561 ssize_t (*store)(struct mddev *, const char *, size_t);
562};
563extern struct attribute_group md_bitmap_group;
564
565static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
566{
567 if (sd)
568 return sysfs_get_dirent(sd, name);
569 return sd;
570}
571static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
572{
573 if (sd)
574 sysfs_notify_dirent(sd);
575}
576
577static inline char * mdname (struct mddev * mddev)
578{
579 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
580}
581
582static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
583{
584 char nm[20];
585 if (!test_bit(Replacement, &rdev->flags) &&
586 !test_bit(Journal, &rdev->flags) &&
587 mddev->kobj.sd) {
588 sprintf(nm, "rd%d", rdev->raid_disk);
589 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
590 } else
591 return 0;
592}
593
594static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
595{
596 char nm[20];
597 if (!test_bit(Replacement, &rdev->flags) &&
598 !test_bit(Journal, &rdev->flags) &&
599 mddev->kobj.sd) {
600 sprintf(nm, "rd%d", rdev->raid_disk);
601 sysfs_remove_link(&mddev->kobj, nm);
602 }
603}
604
605
606
607
608
609#define rdev_for_each_list(rdev, tmp, head) \
610 list_for_each_entry_safe(rdev, tmp, head, same_set)
611
612
613
614
615#define rdev_for_each(rdev, mddev) \
616 list_for_each_entry(rdev, &((mddev)->disks), same_set)
617
618#define rdev_for_each_safe(rdev, tmp, mddev) \
619 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
620
621#define rdev_for_each_rcu(rdev, mddev) \
622 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
623
624struct md_thread {
625 void (*run) (struct md_thread *thread);
626 struct mddev *mddev;
627 wait_queue_head_t wqueue;
628 unsigned long flags;
629 struct task_struct *tsk;
630 unsigned long timeout;
631 void *private;
632};
633
634#define THREAD_WAKEUP 0
635
636static inline void safe_put_page(struct page *p)
637{
638 if (p) put_page(p);
639}
640
641extern int register_md_personality(struct md_personality *p);
642extern int unregister_md_personality(struct md_personality *p);
643extern int register_md_cluster_operations(struct md_cluster_operations *ops,
644 struct module *module);
645extern int unregister_md_cluster_operations(void);
646extern int md_setup_cluster(struct mddev *mddev, int nodes);
647extern void md_cluster_stop(struct mddev *mddev);
648extern struct md_thread *md_register_thread(
649 void (*run)(struct md_thread *thread),
650 struct mddev *mddev,
651 const char *name);
652extern void md_unregister_thread(struct md_thread **threadp);
653extern void md_wakeup_thread(struct md_thread *thread);
654extern void md_check_recovery(struct mddev *mddev);
655extern void md_reap_sync_thread(struct mddev *mddev);
656extern int mddev_init_writes_pending(struct mddev *mddev);
657extern bool md_write_start(struct mddev *mddev, struct bio *bi);
658extern void md_write_inc(struct mddev *mddev, struct bio *bi);
659extern void md_write_end(struct mddev *mddev);
660extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
661extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
662extern void md_finish_reshape(struct mddev *mddev);
663
664extern int mddev_congested(struct mddev *mddev, int bits);
665extern void md_flush_request(struct mddev *mddev, struct bio *bio);
666extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
667 sector_t sector, int size, struct page *page);
668extern int md_super_wait(struct mddev *mddev);
669extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
670 struct page *page, int op, int op_flags,
671 bool metadata_op);
672extern void md_do_sync(struct md_thread *thread);
673extern void md_new_event(struct mddev *mddev);
674extern void md_allow_write(struct mddev *mddev);
675extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
676extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
677extern int md_check_no_bitmap(struct mddev *mddev);
678extern int md_integrity_register(struct mddev *mddev);
679extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
680extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
681
682extern void mddev_init(struct mddev *mddev);
683extern int md_run(struct mddev *mddev);
684extern void md_stop(struct mddev *mddev);
685extern void md_stop_writes(struct mddev *mddev);
686extern int md_rdev_init(struct md_rdev *rdev);
687extern void md_rdev_clear(struct md_rdev *rdev);
688
689extern void mddev_suspend(struct mddev *mddev);
690extern void mddev_resume(struct mddev *mddev);
691extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
692 struct mddev *mddev);
693
694extern void md_reload_sb(struct mddev *mddev, int raid_disk);
695extern void md_update_sb(struct mddev *mddev, int force);
696extern void md_kick_rdev_from_array(struct md_rdev * rdev);
697struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
698
699static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
700{
701 int faulty = test_bit(Faulty, &rdev->flags);
702 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
703 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
704 md_wakeup_thread(mddev->thread);
705 }
706}
707
708extern struct md_cluster_operations *md_cluster_ops;
709static inline int mddev_is_clustered(struct mddev *mddev)
710{
711 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
712}
713
714
715static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
716 unsigned long unsupported_flags)
717{
718 mddev->flags &= ~unsupported_flags;
719}
720
721static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
722{
723 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
724 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
725 mddev->queue->limits.max_write_same_sectors = 0;
726}
727
728static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
729{
730 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
731 !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
732 mddev->queue->limits.max_write_zeroes_sectors = 0;
733}
734#endif
735