1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#ifndef _MD_MD_H
16#define _MD_MD_H
17
18#include <linux/blkdev.h>
19#include <linux/backing-dev.h>
20#include <linux/badblocks.h>
21#include <linux/kobject.h>
22#include <linux/list.h>
23#include <linux/mm.h>
24#include <linux/mutex.h>
25#include <linux/timer.h>
26#include <linux/wait.h>
27#include <linux/workqueue.h>
28#include "md-cluster.h"
29
30#define MaxSector (~(sector_t)0)
31
32
33
34
35
36
37
38
39
40
41#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
42
43
44
45struct md_rdev {
46 struct list_head same_set;
47
48 sector_t sectors;
49 struct mddev *mddev;
50 int last_events;
51
52
53
54
55
56
57 struct block_device *meta_bdev;
58 struct block_device *bdev;
59
60 struct page *sb_page, *bb_page;
61 int sb_loaded;
62 __u64 sb_events;
63 sector_t data_offset;
64 sector_t new_data_offset;
65 sector_t sb_start;
66 int sb_size;
67 int preferred_minor;
68
69 struct kobject kobj;
70
71
72
73
74
75
76
77
78
79
80
81
82 unsigned long flags;
83 wait_queue_head_t blocked_wait;
84
85 int desc_nr;
86 int raid_disk;
87 int new_raid_disk;
88
89
90 int saved_raid_disk;
91
92
93
94 union {
95 sector_t recovery_offset;
96
97
98
99 sector_t journal_tail;
100
101
102
103 };
104
105 atomic_t nr_pending;
106
107
108
109 atomic_t read_errors;
110
111
112 time64_t last_read_error;
113
114
115 atomic_t corrected_errors;
116
117
118
119 struct work_struct del_work;
120
121 struct kernfs_node *sysfs_state;
122
123
124 struct badblocks badblocks;
125
126 struct {
127 short offset;
128
129 unsigned int size;
130 sector_t sector;
131 } ppl;
132};
133enum flag_bits {
134 Faulty,
135 In_sync,
136 Bitmap_sync,
137
138
139
140
141 WriteMostly,
142 AutoDetected,
143 Blocked,
144
145
146
147 WriteErrorSeen,
148
149
150 FaultRecorded,
151
152
153
154
155
156 BlockedBadBlocks,
157
158
159
160
161
162
163
164
165
166 WantReplacement,
167
168
169
170
171 Replacement,
172
173
174
175 Candidate,
176
177
178
179 Journal,
180
181
182
183
184 ClusterRemove,
185 RemoveSynchronized,
186
187
188
189
190 ExternalBbl,
191
192
193 FailFast,
194
195
196
197
198
199 LastDev,
200
201
202
203};
204
205static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
206 sector_t *first_bad, int *bad_sectors)
207{
208 if (unlikely(rdev->badblocks.count)) {
209 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
210 sectors,
211 first_bad, bad_sectors);
212 if (rv)
213 *first_bad -= rdev->data_offset;
214 return rv;
215 }
216 return 0;
217}
218extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
219 int is_new);
220extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
221 int is_new);
222struct md_cluster_info;
223
224
225enum mddev_flags {
226 MD_ARRAY_FIRST_USE,
227 MD_CLOSING,
228
229 MD_JOURNAL_CLEAN,
230 MD_HAS_JOURNAL,
231 MD_CLUSTER_RESYNC_LOCKED,
232
233
234 MD_FAILFAST_SUPPORTED,
235
236
237
238 MD_HAS_PPL,
239 MD_HAS_MULTIPLE_PPLS,
240};
241
242enum mddev_sb_flags {
243 MD_SB_CHANGE_DEVS,
244 MD_SB_CHANGE_CLEAN,
245 MD_SB_CHANGE_PENDING,
246 MD_SB_NEED_REWRITE,
247};
248
249struct mddev {
250 void *private;
251 struct md_personality *pers;
252 dev_t unit;
253 int md_minor;
254 struct list_head disks;
255 unsigned long flags;
256 unsigned long sb_flags;
257
258 int suspended;
259 atomic_t active_io;
260 int ro;
261 int sysfs_active;
262
263
264
265 struct gendisk *gendisk;
266
267 struct kobject kobj;
268 int hold_active;
269#define UNTIL_IOCTL 1
270#define UNTIL_STOP 2
271
272
273 int major_version,
274 minor_version,
275 patch_version;
276 int persistent;
277 int external;
278
279 char metadata_type[17];
280 int chunk_sectors;
281 time64_t ctime, utime;
282 int level, layout;
283 char clevel[16];
284 int raid_disks;
285 int max_disks;
286 sector_t dev_sectors;
287
288 sector_t array_sectors;
289 int external_size;
290
291 __u64 events;
292
293
294
295
296
297 int can_decrease_events;
298
299 char uuid[16];
300
301
302
303
304
305
306 sector_t reshape_position;
307 int delta_disks, new_level, new_layout;
308 int new_chunk_sectors;
309 int reshape_backwards;
310
311 struct md_thread *thread;
312 struct md_thread *sync_thread;
313
314
315
316
317
318
319
320 char *last_sync_action;
321 sector_t curr_resync;
322
323
324
325
326
327
328 sector_t curr_resync_completed;
329 unsigned long resync_mark;
330 sector_t resync_mark_cnt;
331 sector_t curr_mark_cnt;
332
333 sector_t resync_max_sectors;
334
335 atomic64_t resync_mismatches;
336
337
338
339
340 sector_t suspend_lo;
341 sector_t suspend_hi;
342
343 int sync_speed_min;
344 int sync_speed_max;
345
346
347 int parallel_resync;
348
349 int ok_start_degraded;
350
351 unsigned long recovery;
352
353
354
355
356
357 int recovery_disabled;
358
359 int in_sync;
360
361
362
363
364
365
366
367
368
369 struct mutex open_mutex;
370 struct mutex reconfig_mutex;
371 atomic_t active;
372 atomic_t openers;
373
374 int changed;
375
376 int degraded;
377
378
379
380 atomic_t recovery_active;
381 wait_queue_head_t recovery_wait;
382 sector_t recovery_cp;
383 sector_t resync_min;
384
385 sector_t resync_max;
386
387
388 struct kernfs_node *sysfs_state;
389
390
391 struct kernfs_node *sysfs_action;
392
393 struct work_struct del_work;
394
395
396
397
398
399
400
401
402
403
404
405
406 spinlock_t lock;
407 wait_queue_head_t sb_wait;
408 atomic_t pending_writes;
409
410 unsigned int safemode;
411
412
413 unsigned int safemode_delay;
414 struct timer_list safemode_timer;
415 struct percpu_ref writes_pending;
416 int sync_checkers;
417 struct request_queue *queue;
418
419 struct bitmap *bitmap;
420 struct {
421 struct file *file;
422 loff_t offset;
423
424
425
426
427
428 unsigned long space;
429 loff_t default_offset;
430
431
432
433 unsigned long default_space;
434
435 struct mutex mutex;
436 unsigned long chunksize;
437 unsigned long daemon_sleep;
438 unsigned long max_write_behind;
439 int external;
440 int nodes;
441 char cluster_name[64];
442 } bitmap_info;
443
444 atomic_t max_corr_read_errors;
445 struct list_head all_mddevs;
446
447 struct attribute_group *to_remove;
448
449 struct bio_set *bio_set;
450 struct bio_set *sync_set;
451
452
453
454
455
456
457
458 struct bio *flush_bio;
459 atomic_t flush_pending;
460 struct work_struct flush_work;
461 struct work_struct event_work;
462 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
463 struct md_cluster_info *cluster_info;
464 unsigned int good_device_nr;
465};
466
467enum recovery_flags {
468
469
470
471 MD_RECOVERY_RUNNING,
472 MD_RECOVERY_SYNC,
473 MD_RECOVERY_RECOVER,
474 MD_RECOVERY_INTR,
475 MD_RECOVERY_DONE,
476 MD_RECOVERY_NEEDED,
477 MD_RECOVERY_REQUESTED,
478 MD_RECOVERY_CHECK,
479 MD_RECOVERY_RESHAPE,
480 MD_RECOVERY_FROZEN,
481 MD_RECOVERY_ERROR,
482};
483
484static inline int __must_check mddev_lock(struct mddev *mddev)
485{
486 return mutex_lock_interruptible(&mddev->reconfig_mutex);
487}
488
489
490
491
492static inline void mddev_lock_nointr(struct mddev *mddev)
493{
494 mutex_lock(&mddev->reconfig_mutex);
495}
496
497static inline int mddev_is_locked(struct mddev *mddev)
498{
499 return mutex_is_locked(&mddev->reconfig_mutex);
500}
501
502static inline int mddev_trylock(struct mddev *mddev)
503{
504 return mutex_trylock(&mddev->reconfig_mutex);
505}
506extern void mddev_unlock(struct mddev *mddev);
507
508static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
509{
510 atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
511}
512
513static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
514{
515 atomic_add(nr_sectors, &bio->bi_disk->sync_io);
516}
517
518struct md_personality
519{
520 char *name;
521 int level;
522 struct list_head list;
523 struct module *owner;
524 bool (*make_request)(struct mddev *mddev, struct bio *bio);
525 int (*run)(struct mddev *mddev);
526 void (*free)(struct mddev *mddev, void *priv);
527 void (*status)(struct seq_file *seq, struct mddev *mddev);
528
529
530
531 void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
532 int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
533 int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
534 int (*spare_active) (struct mddev *mddev);
535 sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
536 int (*resize) (struct mddev *mddev, sector_t sectors);
537 sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
538 int (*check_reshape) (struct mddev *mddev);
539 int (*start_reshape) (struct mddev *mddev);
540 void (*finish_reshape) (struct mddev *mddev);
541
542
543
544
545
546 void (*quiesce) (struct mddev *mddev, int state);
547
548
549
550
551
552
553
554
555
556 void *(*takeover) (struct mddev *mddev);
557
558
559 int (*congested)(struct mddev *mddev, int bits);
560
561 int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
562};
563
564struct md_sysfs_entry {
565 struct attribute attr;
566 ssize_t (*show)(struct mddev *, char *);
567 ssize_t (*store)(struct mddev *, const char *, size_t);
568};
569extern struct attribute_group md_bitmap_group;
570
571static inline struct kernfs_node *sysfs_get_dirent_safe(struct kernfs_node *sd, char *name)
572{
573 if (sd)
574 return sysfs_get_dirent(sd, name);
575 return sd;
576}
577static inline void sysfs_notify_dirent_safe(struct kernfs_node *sd)
578{
579 if (sd)
580 sysfs_notify_dirent(sd);
581}
582
583static inline char * mdname (struct mddev * mddev)
584{
585 return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
586}
587
588static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
589{
590 char nm[20];
591 if (!test_bit(Replacement, &rdev->flags) &&
592 !test_bit(Journal, &rdev->flags) &&
593 mddev->kobj.sd) {
594 sprintf(nm, "rd%d", rdev->raid_disk);
595 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
596 } else
597 return 0;
598}
599
600static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
601{
602 char nm[20];
603 if (!test_bit(Replacement, &rdev->flags) &&
604 !test_bit(Journal, &rdev->flags) &&
605 mddev->kobj.sd) {
606 sprintf(nm, "rd%d", rdev->raid_disk);
607 sysfs_remove_link(&mddev->kobj, nm);
608 }
609}
610
611
612
613
614
615#define rdev_for_each_list(rdev, tmp, head) \
616 list_for_each_entry_safe(rdev, tmp, head, same_set)
617
618
619
620
621#define rdev_for_each(rdev, mddev) \
622 list_for_each_entry(rdev, &((mddev)->disks), same_set)
623
624#define rdev_for_each_safe(rdev, tmp, mddev) \
625 list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
626
627#define rdev_for_each_rcu(rdev, mddev) \
628 list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
629
630struct md_thread {
631 void (*run) (struct md_thread *thread);
632 struct mddev *mddev;
633 wait_queue_head_t wqueue;
634 unsigned long flags;
635 struct task_struct *tsk;
636 unsigned long timeout;
637 void *private;
638};
639
640#define THREAD_WAKEUP 0
641
642static inline void safe_put_page(struct page *p)
643{
644 if (p) put_page(p);
645}
646
647extern int register_md_personality(struct md_personality *p);
648extern int unregister_md_personality(struct md_personality *p);
649extern int register_md_cluster_operations(struct md_cluster_operations *ops,
650 struct module *module);
651extern int unregister_md_cluster_operations(void);
652extern int md_setup_cluster(struct mddev *mddev, int nodes);
653extern void md_cluster_stop(struct mddev *mddev);
654extern struct md_thread *md_register_thread(
655 void (*run)(struct md_thread *thread),
656 struct mddev *mddev,
657 const char *name);
658extern void md_unregister_thread(struct md_thread **threadp);
659extern void md_wakeup_thread(struct md_thread *thread);
660extern void md_check_recovery(struct mddev *mddev);
661extern void md_reap_sync_thread(struct mddev *mddev);
662extern int mddev_init_writes_pending(struct mddev *mddev);
663extern bool md_write_start(struct mddev *mddev, struct bio *bi);
664extern void md_write_inc(struct mddev *mddev, struct bio *bi);
665extern void md_write_end(struct mddev *mddev);
666extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
667extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
668extern void md_finish_reshape(struct mddev *mddev);
669
670extern int mddev_congested(struct mddev *mddev, int bits);
671extern void md_flush_request(struct mddev *mddev, struct bio *bio);
672extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
673 sector_t sector, int size, struct page *page);
674extern int md_super_wait(struct mddev *mddev);
675extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
676 struct page *page, int op, int op_flags,
677 bool metadata_op);
678extern void md_do_sync(struct md_thread *thread);
679extern void md_new_event(struct mddev *mddev);
680extern void md_allow_write(struct mddev *mddev);
681extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
682extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
683extern int md_check_no_bitmap(struct mddev *mddev);
684extern int md_integrity_register(struct mddev *mddev);
685extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
686extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
687
688extern void mddev_init(struct mddev *mddev);
689extern int md_run(struct mddev *mddev);
690extern void md_stop(struct mddev *mddev);
691extern void md_stop_writes(struct mddev *mddev);
692extern int md_rdev_init(struct md_rdev *rdev);
693extern void md_rdev_clear(struct md_rdev *rdev);
694
695extern void md_handle_request(struct mddev *mddev, struct bio *bio);
696extern void mddev_suspend(struct mddev *mddev);
697extern void mddev_resume(struct mddev *mddev);
698extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
699 struct mddev *mddev);
700
701extern void md_reload_sb(struct mddev *mddev, int raid_disk);
702extern void md_update_sb(struct mddev *mddev, int force);
703extern void md_kick_rdev_from_array(struct md_rdev * rdev);
704struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
705
706static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
707{
708 int faulty = test_bit(Faulty, &rdev->flags);
709 if (atomic_dec_and_test(&rdev->nr_pending) && faulty) {
710 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
711 md_wakeup_thread(mddev->thread);
712 }
713}
714
715extern struct md_cluster_operations *md_cluster_ops;
716static inline int mddev_is_clustered(struct mddev *mddev)
717{
718 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
719}
720
721
722static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
723 unsigned long unsupported_flags)
724{
725 mddev->flags &= ~unsupported_flags;
726}
727
728static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio)
729{
730 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
731 !bio->bi_disk->queue->limits.max_write_same_sectors)
732 mddev->queue->limits.max_write_same_sectors = 0;
733}
734
735static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio)
736{
737 if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
738 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
739 mddev->queue->limits.max_write_zeroes_sectors = 0;
740}
741#endif
742