linux/include/linux/blk_types.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Block data types and constants.  Directly include this file only to
   4 * break include dependency loop.
   5 */
   6#ifndef __LINUX_BLK_TYPES_H
   7#define __LINUX_BLK_TYPES_H
   8
   9#include <linux/types.h>
  10#include <linux/bvec.h>
  11#include <linux/device.h>
  12#include <linux/ktime.h>
  13
  14struct bio_set;
  15struct bio;
  16struct bio_integrity_payload;
  17struct page;
  18struct io_context;
  19struct cgroup_subsys_state;
  20typedef void (bio_end_io_t) (struct bio *);
  21struct bio_crypt_ctx;
  22
  23struct block_device {
  24        sector_t                bd_start_sect;
  25        struct disk_stats __percpu *bd_stats;
  26        unsigned long           bd_stamp;
  27        bool                    bd_read_only;   /* read-only policy */
  28        dev_t                   bd_dev;
  29        int                     bd_openers;
  30        struct inode *          bd_inode;       /* will die */
  31        struct super_block *    bd_super;
  32        void *                  bd_claiming;
  33        struct device           bd_device;
  34        void *                  bd_holder;
  35        int                     bd_holders;
  36        bool                    bd_write_holder;
  37        struct kobject          *bd_holder_dir;
  38        u8                      bd_partno;
  39        spinlock_t              bd_size_lock; /* for bd_inode->i_size updates */
  40        struct gendisk *        bd_disk;
  41
  42        /* The counter of freeze processes */
  43        int                     bd_fsfreeze_count;
  44        /* Mutex for freeze */
  45        struct mutex            bd_fsfreeze_mutex;
  46        struct super_block      *bd_fsfreeze_sb;
  47
  48        struct partition_meta_info *bd_meta_info;
  49#ifdef CONFIG_FAIL_MAKE_REQUEST
  50        bool                    bd_make_it_fail;
  51#endif
  52} __randomize_layout;
  53
  54#define bdev_whole(_bdev) \
  55        ((_bdev)->bd_disk->part0)
  56
  57#define dev_to_bdev(device) \
  58        container_of((device), struct block_device, bd_device)
  59
  60#define bdev_kobj(_bdev) \
  61        (&((_bdev)->bd_device.kobj))
  62
  63/*
  64 * Block error status values.  See block/blk-core:blk_errors for the details.
  65 * Alpha cannot write a byte atomically, so we need to use 32-bit value.
  66 */
  67#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
  68typedef u32 __bitwise blk_status_t;
  69#else
  70typedef u8 __bitwise blk_status_t;
  71#endif
  72#define BLK_STS_OK 0
  73#define BLK_STS_NOTSUPP         ((__force blk_status_t)1)
  74#define BLK_STS_TIMEOUT         ((__force blk_status_t)2)
  75#define BLK_STS_NOSPC           ((__force blk_status_t)3)
  76#define BLK_STS_TRANSPORT       ((__force blk_status_t)4)
  77#define BLK_STS_TARGET          ((__force blk_status_t)5)
  78#define BLK_STS_NEXUS           ((__force blk_status_t)6)
  79#define BLK_STS_MEDIUM          ((__force blk_status_t)7)
  80#define BLK_STS_PROTECTION      ((__force blk_status_t)8)
  81#define BLK_STS_RESOURCE        ((__force blk_status_t)9)
  82#define BLK_STS_IOERR           ((__force blk_status_t)10)
  83
  84/* hack for device mapper, don't use elsewhere: */
  85#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
  86
  87#define BLK_STS_AGAIN           ((__force blk_status_t)12)
  88
  89/*
  90 * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if
  91 * device related resources are unavailable, but the driver can guarantee
  92 * that the queue will be rerun in the future once resources become
  93 * available again. This is typically the case for device specific
  94 * resources that are consumed for IO. If the driver fails allocating these
  95 * resources, we know that inflight (or pending) IO will free these
  96 * resource upon completion.
  97 *
  98 * This is different from BLK_STS_RESOURCE in that it explicitly references
  99 * a device specific resource. For resources of wider scope, allocation
 100 * failure can happen without having pending IO. This means that we can't
 101 * rely on request completions freeing these resources, as IO may not be in
 102 * flight. Examples of that are kernel memory allocations, DMA mappings, or
 103 * any other system wide resources.
 104 */
 105#define BLK_STS_DEV_RESOURCE    ((__force blk_status_t)13)
 106
 107/*
 108 * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
 109 * related resources are unavailable, but the driver can guarantee the queue
 110 * will be rerun in the future once the resources become available again.
 111 *
 112 * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
 113 * a zone specific resource and IO to a different zone on the same device could
 114 * still be served. Examples of that are zones that are write-locked, but a read
 115 * to the same zone could be served.
 116 */
 117#define BLK_STS_ZONE_RESOURCE   ((__force blk_status_t)14)
 118
 119/*
 120 * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
 121 * path if the device returns a status indicating that too many zone resources
 122 * are currently open. The same command should be successful if resubmitted
 123 * after the number of open zones decreases below the device's limits, which is
 124 * reported in the request_queue's max_open_zones.
 125 */
 126#define BLK_STS_ZONE_OPEN_RESOURCE      ((__force blk_status_t)15)
 127
 128/*
 129 * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
 130 * path if the device returns a status indicating that too many zone resources
 131 * are currently active. The same command should be successful if resubmitted
 132 * after the number of active zones decreases below the device's limits, which
 133 * is reported in the request_queue's max_active_zones.
 134 */
 135#define BLK_STS_ZONE_ACTIVE_RESOURCE    ((__force blk_status_t)16)
 136
 137/**
 138 * blk_path_error - returns true if error may be path related
 139 * @error: status the request was completed with
 140 *
 141 * Description:
 142 *     This classifies block error status into non-retryable errors and ones
 143 *     that may be successful if retried on a failover path.
 144 *
 145 * Return:
 146 *     %false - retrying failover path will not help
 147 *     %true  - may succeed if retried
 148 */
 149static inline bool blk_path_error(blk_status_t error)
 150{
 151        switch (error) {
 152        case BLK_STS_NOTSUPP:
 153        case BLK_STS_NOSPC:
 154        case BLK_STS_TARGET:
 155        case BLK_STS_NEXUS:
 156        case BLK_STS_MEDIUM:
 157        case BLK_STS_PROTECTION:
 158                return false;
 159        }
 160
 161        /* Anything else could be a path failure, so should be retried */
 162        return true;
 163}
 164
 165/*
 166 * From most significant bit:
 167 * 1 bit: reserved for other usage, see below
 168 * 12 bits: original size of bio
 169 * 51 bits: issue time of bio
 170 */
 171#define BIO_ISSUE_RES_BITS      1
 172#define BIO_ISSUE_SIZE_BITS     12
 173#define BIO_ISSUE_RES_SHIFT     (64 - BIO_ISSUE_RES_BITS)
 174#define BIO_ISSUE_SIZE_SHIFT    (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
 175#define BIO_ISSUE_TIME_MASK     ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
 176#define BIO_ISSUE_SIZE_MASK     \
 177        (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
 178#define BIO_ISSUE_RES_MASK      (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
 179
 180/* Reserved bit for blk-throtl */
 181#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
 182
 183struct bio_issue {
 184        u64 value;
 185};
 186
 187static inline u64 __bio_issue_time(u64 time)
 188{
 189        return time & BIO_ISSUE_TIME_MASK;
 190}
 191
 192static inline u64 bio_issue_time(struct bio_issue *issue)
 193{
 194        return __bio_issue_time(issue->value);
 195}
 196
 197static inline sector_t bio_issue_size(struct bio_issue *issue)
 198{
 199        return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
 200}
 201
 202static inline void bio_issue_init(struct bio_issue *issue,
 203                                       sector_t size)
 204{
 205        size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
 206        issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
 207                        (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
 208                        ((u64)size << BIO_ISSUE_SIZE_SHIFT));
 209}
 210
 211/*
 212 * main unit of I/O for the block layer and lower layers (ie drivers and
 213 * stacking drivers)
 214 */
 215struct bio {
 216        struct bio              *bi_next;       /* request queue link */
 217        struct block_device     *bi_bdev;
 218        unsigned int            bi_opf;         /* bottom bits req flags,
 219                                                 * top bits REQ_OP. Use
 220                                                 * accessors.
 221                                                 */
 222        unsigned short          bi_flags;       /* BIO_* below */
 223        unsigned short          bi_ioprio;
 224        unsigned short          bi_write_hint;
 225        blk_status_t            bi_status;
 226        atomic_t                __bi_remaining;
 227
 228        struct bvec_iter        bi_iter;
 229
 230        bio_end_io_t            *bi_end_io;
 231
 232        void                    *bi_private;
 233#ifdef CONFIG_BLK_CGROUP
 234        /*
 235         * Represents the association of the css and request_queue for the bio.
 236         * If a bio goes direct to device, it will not have a blkg as it will
 237         * not have a request_queue associated with it.  The reference is put
 238         * on release of the bio.
 239         */
 240        struct blkcg_gq         *bi_blkg;
 241        struct bio_issue        bi_issue;
 242#ifdef CONFIG_BLK_CGROUP_IOCOST
 243        u64                     bi_iocost_cost;
 244#endif
 245#endif
 246
 247#ifdef CONFIG_BLK_INLINE_ENCRYPTION
 248        struct bio_crypt_ctx    *bi_crypt_context;
 249#endif
 250
 251        union {
 252#if defined(CONFIG_BLK_DEV_INTEGRITY)
 253                struct bio_integrity_payload *bi_integrity; /* data integrity */
 254#endif
 255        };
 256
 257        unsigned short          bi_vcnt;        /* how many bio_vec's */
 258
 259        /*
 260         * Everything starting with bi_max_vecs will be preserved by bio_reset()
 261         */
 262
 263        unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
 264
 265        atomic_t                __bi_cnt;       /* pin count */
 266
 267        struct bio_vec          *bi_io_vec;     /* the actual vec list */
 268
 269        struct bio_set          *bi_pool;
 270
 271        /*
 272         * We can inline a number of vecs at the end of the bio, to avoid
 273         * double allocations for a small number of bio_vecs. This member
 274         * MUST obviously be kept at the very end of the bio.
 275         */
 276        struct bio_vec          bi_inline_vecs[];
 277};
 278
 279#define BIO_RESET_BYTES         offsetof(struct bio, bi_max_vecs)
 280#define BIO_MAX_SECTORS         (UINT_MAX >> SECTOR_SHIFT)
 281
 282/*
 283 * bio flags
 284 */
 285enum {
 286        BIO_NO_PAGE_REF,        /* don't put release vec pages */
 287        BIO_CLONED,             /* doesn't own data */
 288        BIO_BOUNCED,            /* bio is a bounce bio */
 289        BIO_WORKINGSET,         /* contains userspace workingset pages */
 290        BIO_QUIET,              /* Make BIO Quiet */
 291        BIO_CHAIN,              /* chained bio, ->bi_remaining in effect */
 292        BIO_REFFED,             /* bio has elevated ->bi_cnt */
 293        BIO_THROTTLED,          /* This bio has already been subjected to
 294                                 * throttling rules. Don't do it again. */
 295        BIO_TRACE_COMPLETION,   /* bio_endio() should trace the final completion
 296                                 * of this bio. */
 297        BIO_CGROUP_ACCT,        /* has been accounted to a cgroup */
 298        BIO_TRACKED,            /* set if bio goes through the rq_qos path */
 299        BIO_REMAPPED,
 300        BIO_ZONE_WRITE_LOCKED,  /* Owns a zoned device zone write lock */
 301        BIO_PERCPU_CACHE,       /* can participate in per-cpu alloc cache */
 302        BIO_FLAG_LAST
 303};
 304
 305typedef __u32 __bitwise blk_mq_req_flags_t;
 306
 307/*
 308 * Operations and flags common to the bio and request structures.
 309 * We use 8 bits for encoding the operation, and the remaining 24 for flags.
 310 *
 311 * The least significant bit of the operation number indicates the data
 312 * transfer direction:
 313 *
 314 *   - if the least significant bit is set transfers are TO the device
 315 *   - if the least significant bit is not set transfers are FROM the device
 316 *
 317 * If a operation does not transfer data the least significant bit has no
 318 * meaning.
 319 */
 320#define REQ_OP_BITS     8
 321#define REQ_OP_MASK     ((1 << REQ_OP_BITS) - 1)
 322#define REQ_FLAG_BITS   24
 323
 324enum req_opf {
 325        /* read sectors from the device */
 326        REQ_OP_READ             = 0,
 327        /* write sectors to the device */
 328        REQ_OP_WRITE            = 1,
 329        /* flush the volatile write cache */
 330        REQ_OP_FLUSH            = 2,
 331        /* discard sectors */
 332        REQ_OP_DISCARD          = 3,
 333        /* securely erase sectors */
 334        REQ_OP_SECURE_ERASE     = 5,
 335        /* write the same sector many times */
 336        REQ_OP_WRITE_SAME       = 7,
 337        /* write the zero filled sector many times */
 338        REQ_OP_WRITE_ZEROES     = 9,
 339        /* Open a zone */
 340        REQ_OP_ZONE_OPEN        = 10,
 341        /* Close a zone */
 342        REQ_OP_ZONE_CLOSE       = 11,
 343        /* Transition a zone to full */
 344        REQ_OP_ZONE_FINISH      = 12,
 345        /* write data at the current zone write pointer */
 346        REQ_OP_ZONE_APPEND      = 13,
 347        /* reset a zone write pointer */
 348        REQ_OP_ZONE_RESET       = 15,
 349        /* reset all the zone present on the device */
 350        REQ_OP_ZONE_RESET_ALL   = 17,
 351
 352        /* Driver private requests */
 353        REQ_OP_DRV_IN           = 34,
 354        REQ_OP_DRV_OUT          = 35,
 355
 356        REQ_OP_LAST,
 357};
 358
 359enum req_flag_bits {
 360        __REQ_FAILFAST_DEV =    /* no driver retries of device errors */
 361                REQ_OP_BITS,
 362        __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 363        __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
 364        __REQ_SYNC,             /* request is sync (sync write or read) */
 365        __REQ_META,             /* metadata io request */
 366        __REQ_PRIO,             /* boost priority in cfq */
 367        __REQ_NOMERGE,          /* don't touch this for merging */
 368        __REQ_IDLE,             /* anticipate more IO after this one */
 369        __REQ_INTEGRITY,        /* I/O includes block integrity payload */
 370        __REQ_FUA,              /* forced unit access */
 371        __REQ_PREFLUSH,         /* request for cache flush */
 372        __REQ_RAHEAD,           /* read ahead, can fail anytime */
 373        __REQ_BACKGROUND,       /* background IO */
 374        __REQ_NOWAIT,           /* Don't wait if request will block */
 375        /*
 376         * When a shared kthread needs to issue a bio for a cgroup, doing
 377         * so synchronously can lead to priority inversions as the kthread
 378         * can be trapped waiting for that cgroup.  CGROUP_PUNT flag makes
 379         * submit_bio() punt the actual issuing to a dedicated per-blkcg
 380         * work item to avoid such priority inversions.
 381         */
 382        __REQ_CGROUP_PUNT,
 383
 384        /* command specific flags for REQ_OP_WRITE_ZEROES: */
 385        __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 386
 387        __REQ_HIPRI,
 388
 389        /* for driver use */
 390        __REQ_DRV,
 391        __REQ_SWAP,             /* swapping request. */
 392        __REQ_NR_BITS,          /* stops here */
 393};
 394
 395#define REQ_FAILFAST_DEV        (1ULL << __REQ_FAILFAST_DEV)
 396#define REQ_FAILFAST_TRANSPORT  (1ULL << __REQ_FAILFAST_TRANSPORT)
 397#define REQ_FAILFAST_DRIVER     (1ULL << __REQ_FAILFAST_DRIVER)
 398#define REQ_SYNC                (1ULL << __REQ_SYNC)
 399#define REQ_META                (1ULL << __REQ_META)
 400#define REQ_PRIO                (1ULL << __REQ_PRIO)
 401#define REQ_NOMERGE             (1ULL << __REQ_NOMERGE)
 402#define REQ_IDLE                (1ULL << __REQ_IDLE)
 403#define REQ_INTEGRITY           (1ULL << __REQ_INTEGRITY)
 404#define REQ_FUA                 (1ULL << __REQ_FUA)
 405#define REQ_PREFLUSH            (1ULL << __REQ_PREFLUSH)
 406#define REQ_RAHEAD              (1ULL << __REQ_RAHEAD)
 407#define REQ_BACKGROUND          (1ULL << __REQ_BACKGROUND)
 408#define REQ_NOWAIT              (1ULL << __REQ_NOWAIT)
 409#define REQ_CGROUP_PUNT         (1ULL << __REQ_CGROUP_PUNT)
 410
 411#define REQ_NOUNMAP             (1ULL << __REQ_NOUNMAP)
 412#define REQ_HIPRI               (1ULL << __REQ_HIPRI)
 413
 414#define REQ_DRV                 (1ULL << __REQ_DRV)
 415#define REQ_SWAP                (1ULL << __REQ_SWAP)
 416
 417#define REQ_FAILFAST_MASK \
 418        (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 419
 420#define REQ_NOMERGE_FLAGS \
 421        (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 422
 423enum stat_group {
 424        STAT_READ,
 425        STAT_WRITE,
 426        STAT_DISCARD,
 427        STAT_FLUSH,
 428
 429        NR_STAT_GROUPS
 430};
 431
 432#define bio_op(bio) \
 433        ((bio)->bi_opf & REQ_OP_MASK)
 434#define req_op(req) \
 435        ((req)->cmd_flags & REQ_OP_MASK)
 436
 437/* obsolete, don't use in new code */
 438static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
 439                unsigned op_flags)
 440{
 441        bio->bi_opf = op | op_flags;
 442}
 443
 444static inline bool op_is_write(unsigned int op)
 445{
 446        return (op & 1);
 447}
 448
 449/*
 450 * Check if the bio or request is one that needs special treatment in the
 451 * flush state machine.
 452 */
 453static inline bool op_is_flush(unsigned int op)
 454{
 455        return op & (REQ_FUA | REQ_PREFLUSH);
 456}
 457
 458/*
 459 * Reads are always treated as synchronous, as are requests with the FUA or
 460 * PREFLUSH flag.  Other operations may be marked as synchronous using the
 461 * REQ_SYNC flag.
 462 */
 463static inline bool op_is_sync(unsigned int op)
 464{
 465        return (op & REQ_OP_MASK) == REQ_OP_READ ||
 466                (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 467}
 468
 469static inline bool op_is_discard(unsigned int op)
 470{
 471        return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
 472}
 473
 474/*
 475 * Check if a bio or request operation is a zone management operation, with
 476 * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
 477 * due to its different handling in the block layer and device response in
 478 * case of command failure.
 479 */
 480static inline bool op_is_zone_mgmt(enum req_opf op)
 481{
 482        switch (op & REQ_OP_MASK) {
 483        case REQ_OP_ZONE_RESET:
 484        case REQ_OP_ZONE_OPEN:
 485        case REQ_OP_ZONE_CLOSE:
 486        case REQ_OP_ZONE_FINISH:
 487                return true;
 488        default:
 489                return false;
 490        }
 491}
 492
 493static inline int op_stat_group(unsigned int op)
 494{
 495        if (op_is_discard(op))
 496                return STAT_DISCARD;
 497        return op_is_write(op);
 498}
 499
 500typedef unsigned int blk_qc_t;
 501#define BLK_QC_T_NONE           -1U
 502#define BLK_QC_T_SHIFT          16
 503#define BLK_QC_T_INTERNAL       (1U << 31)
 504
 505static inline bool blk_qc_t_valid(blk_qc_t cookie)
 506{
 507        return cookie != BLK_QC_T_NONE;
 508}
 509
 510static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
 511{
 512        return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
 513}
 514
 515static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 516{
 517        return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 518}
 519
 520static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
 521{
 522        return (cookie & BLK_QC_T_INTERNAL) != 0;
 523}
 524
 525struct blk_rq_stat {
 526        u64 mean;
 527        u64 min;
 528        u64 max;
 529        u32 nr_samples;
 530        u64 batch;
 531};
 532
 533#endif /* __LINUX_BLK_TYPES_H */
 534