linux/block/blk-lib.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Functions related to generic helpers functions
   4 */
   5#include <linux/kernel.h>
   6#include <linux/module.h>
   7#include <linux/bio.h>
   8#include <linux/blkdev.h>
   9#include <linux/scatterlist.h>
  10
  11#include "blk.h"
  12
  13static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
  14                gfp_t gfp)
  15{
  16        struct bio *new = bio_alloc(gfp, nr_pages);
  17
  18        if (bio) {
  19                bio_chain(bio, new);
  20                submit_bio(bio);
  21        }
  22
  23        return new;
  24}
  25
  26int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
  27                sector_t nr_sects, gfp_t gfp_mask, int flags,
  28                struct bio **biop)
  29{
  30        struct request_queue *q = bdev_get_queue(bdev);
  31        struct bio *bio = *biop;
  32        unsigned int granularity;
  33        unsigned int op;
  34        int alignment;
  35        sector_t bs_mask;
  36
  37        if (!q)
  38                return -ENXIO;
  39
  40        if (bdev_read_only(bdev))
  41                return -EPERM;
  42
  43        if (flags & BLKDEV_DISCARD_SECURE) {
  44                if (!blk_queue_secure_erase(q))
  45                        return -EOPNOTSUPP;
  46                op = REQ_OP_SECURE_ERASE;
  47        } else {
  48                if (!blk_queue_discard(q))
  49                        return -EOPNOTSUPP;
  50                op = REQ_OP_DISCARD;
  51        }
  52
  53        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
  54        if ((sector | nr_sects) & bs_mask)
  55                return -EINVAL;
  56
  57        /* Zero-sector (unknown) and one-sector granularities are the same.  */
  58        granularity = max(q->limits.discard_granularity >> 9, 1U);
  59        alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
  60
  61        while (nr_sects) {
  62                unsigned int req_sects;
  63                sector_t end_sect, tmp;
  64
  65                /* Make sure bi_size doesn't overflow */
  66                req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
  67
  68                /**
  69                 * If splitting a request, and the next starting sector would be
  70                 * misaligned, stop the discard at the previous aligned sector.
  71                 */
  72                end_sect = sector + req_sects;
  73                tmp = end_sect;
  74                if (req_sects < nr_sects &&
  75                    sector_div(tmp, granularity) != alignment) {
  76                        end_sect = end_sect - alignment;
  77                        sector_div(end_sect, granularity);
  78                        end_sect = end_sect * granularity + alignment;
  79                        req_sects = end_sect - sector;
  80                }
  81
  82                bio = next_bio(bio, 0, gfp_mask);
  83                bio->bi_iter.bi_sector = sector;
  84                bio_set_dev(bio, bdev);
  85                bio_set_op_attrs(bio, op, 0);
  86
  87                bio->bi_iter.bi_size = req_sects << 9;
  88                nr_sects -= req_sects;
  89                sector = end_sect;
  90
  91                /*
  92                 * We can loop for a long time in here, if someone does
  93                 * full device discards (like mkfs). Be nice and allow
  94                 * us to schedule out to avoid softlocking if preempt
  95                 * is disabled.
  96                 */
  97                cond_resched();
  98        }
  99
 100        *biop = bio;
 101        return 0;
 102}
 103EXPORT_SYMBOL(__blkdev_issue_discard);
 104
 105/**
 106 * blkdev_issue_discard - queue a discard
 107 * @bdev:       blockdev to issue discard for
 108 * @sector:     start sector
 109 * @nr_sects:   number of sectors to discard
 110 * @gfp_mask:   memory allocation flags (for bio_alloc)
 111 * @flags:      BLKDEV_DISCARD_* flags to control behaviour
 112 *
 113 * Description:
 114 *    Issue a discard request for the sectors in question.
 115 */
 116int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 117                sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 118{
 119        struct bio *bio = NULL;
 120        struct blk_plug plug;
 121        int ret;
 122
 123        blk_start_plug(&plug);
 124        ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
 125                        &bio);
 126        if (!ret && bio) {
 127                ret = submit_bio_wait(bio);
 128                if (ret == -EOPNOTSUPP)
 129                        ret = 0;
 130                bio_put(bio);
 131        }
 132        blk_finish_plug(&plug);
 133
 134        return ret;
 135}
 136EXPORT_SYMBOL(blkdev_issue_discard);
 137
 138/**
 139 * __blkdev_issue_write_same - generate number of bios with same page
 140 * @bdev:       target blockdev
 141 * @sector:     start sector
 142 * @nr_sects:   number of sectors to write
 143 * @gfp_mask:   memory allocation flags (for bio_alloc)
 144 * @page:       page containing data to write
 145 * @biop:       pointer to anchor bio
 146 *
 147 * Description:
 148 *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
 149 */
 150static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 151                sector_t nr_sects, gfp_t gfp_mask, struct page *page,
 152                struct bio **biop)
 153{
 154        struct request_queue *q = bdev_get_queue(bdev);
 155        unsigned int max_write_same_sectors;
 156        struct bio *bio = *biop;
 157        sector_t bs_mask;
 158
 159        if (!q)
 160                return -ENXIO;
 161
 162        if (bdev_read_only(bdev))
 163                return -EPERM;
 164
 165        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
 166        if ((sector | nr_sects) & bs_mask)
 167                return -EINVAL;
 168
 169        if (!bdev_write_same(bdev))
 170                return -EOPNOTSUPP;
 171
 172        /* Ensure that max_write_same_sectors doesn't overflow bi_size */
 173        max_write_same_sectors = UINT_MAX >> 9;
 174
 175        while (nr_sects) {
 176                bio = next_bio(bio, 1, gfp_mask);
 177                bio->bi_iter.bi_sector = sector;
 178                bio_set_dev(bio, bdev);
 179                bio->bi_vcnt = 1;
 180                bio->bi_io_vec->bv_page = page;
 181                bio->bi_io_vec->bv_offset = 0;
 182                bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
 183                bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
 184
 185                if (nr_sects > max_write_same_sectors) {
 186                        bio->bi_iter.bi_size = max_write_same_sectors << 9;
 187                        nr_sects -= max_write_same_sectors;
 188                        sector += max_write_same_sectors;
 189                } else {
 190                        bio->bi_iter.bi_size = nr_sects << 9;
 191                        nr_sects = 0;
 192                }
 193                cond_resched();
 194        }
 195
 196        *biop = bio;
 197        return 0;
 198}
 199
 200/**
 201 * blkdev_issue_write_same - queue a write same operation
 202 * @bdev:       target blockdev
 203 * @sector:     start sector
 204 * @nr_sects:   number of sectors to write
 205 * @gfp_mask:   memory allocation flags (for bio_alloc)
 206 * @page:       page containing data
 207 *
 208 * Description:
 209 *    Issue a write same request for the sectors in question.
 210 */
 211int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 212                                sector_t nr_sects, gfp_t gfp_mask,
 213                                struct page *page)
 214{
 215        struct bio *bio = NULL;
 216        struct blk_plug plug;
 217        int ret;
 218
 219        blk_start_plug(&plug);
 220        ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
 221                        &bio);
 222        if (ret == 0 && bio) {
 223                ret = submit_bio_wait(bio);
 224                bio_put(bio);
 225        }
 226        blk_finish_plug(&plug);
 227        return ret;
 228}
 229EXPORT_SYMBOL(blkdev_issue_write_same);
 230
 231static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 232                sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
 233                struct bio **biop, unsigned flags)
 234{
 235        struct bio *bio = *biop;
 236        unsigned int max_write_zeroes_sectors;
 237        struct request_queue *q = bdev_get_queue(bdev);
 238
 239        if (!q)
 240                return -ENXIO;
 241
 242        if (bdev_read_only(bdev))
 243                return -EPERM;
 244
 245        /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
 246        max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
 247
 248        if (max_write_zeroes_sectors == 0)
 249                return -EOPNOTSUPP;
 250
 251        while (nr_sects) {
 252                bio = next_bio(bio, 0, gfp_mask);
 253                bio->bi_iter.bi_sector = sector;
 254                bio_set_dev(bio, bdev);
 255                bio->bi_opf = REQ_OP_WRITE_ZEROES;
 256                if (flags & BLKDEV_ZERO_NOUNMAP)
 257                        bio->bi_opf |= REQ_NOUNMAP;
 258
 259                if (nr_sects > max_write_zeroes_sectors) {
 260                        bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
 261                        nr_sects -= max_write_zeroes_sectors;
 262                        sector += max_write_zeroes_sectors;
 263                } else {
 264                        bio->bi_iter.bi_size = nr_sects << 9;
 265                        nr_sects = 0;
 266                }
 267                cond_resched();
 268        }
 269
 270        *biop = bio;
 271        return 0;
 272}
 273
 274/*
 275 * Convert a number of 512B sectors to a number of pages.
 276 * The result is limited to a number of pages that can fit into a BIO.
 277 * Also make sure that the result is always at least 1 (page) for the cases
 278 * where nr_sects is lower than the number of sectors in a page.
 279 */
 280static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
 281{
 282        sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
 283
 284        return min(pages, (sector_t)BIO_MAX_PAGES);
 285}
 286
 287static int __blkdev_issue_zero_pages(struct block_device *bdev,
 288                sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
 289                struct bio **biop)
 290{
 291        struct request_queue *q = bdev_get_queue(bdev);
 292        struct bio *bio = *biop;
 293        int bi_size = 0;
 294        unsigned int sz;
 295
 296        if (!q)
 297                return -ENXIO;
 298
 299        if (bdev_read_only(bdev))
 300                return -EPERM;
 301
 302        while (nr_sects != 0) {
 303                bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
 304                               gfp_mask);
 305                bio->bi_iter.bi_sector = sector;
 306                bio_set_dev(bio, bdev);
 307                bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 308
 309                while (nr_sects != 0) {
 310                        sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
 311                        bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
 312                        nr_sects -= bi_size >> 9;
 313                        sector += bi_size >> 9;
 314                        if (bi_size < sz)
 315                                break;
 316                }
 317                cond_resched();
 318        }
 319
 320        *biop = bio;
 321        return 0;
 322}
 323
 324/**
 325 * __blkdev_issue_zeroout - generate number of zero filed write bios
 326 * @bdev:       blockdev to issue
 327 * @sector:     start sector
 328 * @nr_sects:   number of sectors to write
 329 * @gfp_mask:   memory allocation flags (for bio_alloc)
 330 * @biop:       pointer to anchor bio
 331 * @flags:      controls detailed behavior
 332 *
 333 * Description:
 334 *  Zero-fill a block range, either using hardware offload or by explicitly
 335 *  writing zeroes to the device.
 336 *
 337 *  If a device is using logical block provisioning, the underlying space will
 338 *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
 339 *
 340 *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
 341 *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
 342 */
 343int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 344                sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
 345                unsigned flags)
 346{
 347        int ret;
 348        sector_t bs_mask;
 349
 350        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
 351        if ((sector | nr_sects) & bs_mask)
 352                return -EINVAL;
 353
 354        ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
 355                        biop, flags);
 356        if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
 357                return ret;
 358
 359        return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
 360                                         biop);
 361}
 362EXPORT_SYMBOL(__blkdev_issue_zeroout);
 363
 364/**
 365 * blkdev_issue_zeroout - zero-fill a block range
 366 * @bdev:       blockdev to write
 367 * @sector:     start sector
 368 * @nr_sects:   number of sectors to write
 369 * @gfp_mask:   memory allocation flags (for bio_alloc)
 370 * @flags:      controls detailed behavior
 371 *
 372 * Description:
 373 *  Zero-fill a block range, either using hardware offload or by explicitly
 374 *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
 375 *  valid values for %flags.
 376 */
 377int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 378                sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
 379{
 380        int ret = 0;
 381        sector_t bs_mask;
 382        struct bio *bio;
 383        struct blk_plug plug;
 384        bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
 385
 386        bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
 387        if ((sector | nr_sects) & bs_mask)
 388                return -EINVAL;
 389
 390retry:
 391        bio = NULL;
 392        blk_start_plug(&plug);
 393        if (try_write_zeroes) {
 394                ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
 395                                                  gfp_mask, &bio, flags);
 396        } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
 397                ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
 398                                                gfp_mask, &bio);
 399        } else {
 400                /* No zeroing offload support */
 401                ret = -EOPNOTSUPP;
 402        }
 403        if (ret == 0 && bio) {
 404                ret = submit_bio_wait(bio);
 405                bio_put(bio);
 406        }
 407        blk_finish_plug(&plug);
 408        if (ret && try_write_zeroes) {
 409                if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
 410                        try_write_zeroes = false;
 411                        goto retry;
 412                }
 413                if (!bdev_write_zeroes_sectors(bdev)) {
 414                        /*
 415                         * Zeroing offload support was indicated, but the
 416                         * device reported ILLEGAL REQUEST (for some devices
 417                         * there is no non-destructive way to verify whether
 418                         * WRITE ZEROES is actually supported).
 419                         */
 420                        ret = -EOPNOTSUPP;
 421                }
 422        }
 423
 424        return ret;
 425}
 426EXPORT_SYMBOL(blkdev_issue_zeroout);
 427