linux/fs/nilfs2/mdt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * mdt.c - meta data file for NILFS
   4 *
   5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   6 *
   7 * Written by Ryusuke Konishi.
   8 */
   9
  10#include <linux/buffer_head.h>
  11#include <linux/mpage.h>
  12#include <linux/mm.h>
  13#include <linux/writeback.h>
  14#include <linux/backing-dev.h>
  15#include <linux/swap.h>
  16#include <linux/slab.h>
  17#include "nilfs.h"
  18#include "btnode.h"
  19#include "segment.h"
  20#include "page.h"
  21#include "mdt.h"
  22#include "alloc.h"              /* nilfs_palloc_destroy_cache() */
  23
  24#include <trace/events/nilfs2.h>
  25
  26#define NILFS_MDT_MAX_RA_BLOCKS         (16 - 1)
  27
  28
  29static int
  30nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
  31                           struct buffer_head *bh,
  32                           void (*init_block)(struct inode *,
  33                                              struct buffer_head *, void *))
  34{
  35        struct nilfs_inode_info *ii = NILFS_I(inode);
  36        void *kaddr;
  37        int ret;
  38
  39        /* Caller exclude read accesses using page lock */
  40
  41        /* set_buffer_new(bh); */
  42        bh->b_blocknr = 0;
  43
  44        ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh);
  45        if (unlikely(ret))
  46                return ret;
  47
  48        set_buffer_mapped(bh);
  49
  50        kaddr = kmap_atomic(bh->b_page);
  51        memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
  52        if (init_block)
  53                init_block(inode, bh, kaddr);
  54        flush_dcache_page(bh->b_page);
  55        kunmap_atomic(kaddr);
  56
  57        set_buffer_uptodate(bh);
  58        mark_buffer_dirty(bh);
  59        nilfs_mdt_mark_dirty(inode);
  60
  61        trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block);
  62
  63        return 0;
  64}
  65
  66static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
  67                                  struct buffer_head **out_bh,
  68                                  void (*init_block)(struct inode *,
  69                                                     struct buffer_head *,
  70                                                     void *))
  71{
  72        struct super_block *sb = inode->i_sb;
  73        struct nilfs_transaction_info ti;
  74        struct buffer_head *bh;
  75        int err;
  76
  77        nilfs_transaction_begin(sb, &ti, 0);
  78
  79        err = -ENOMEM;
  80        bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
  81        if (unlikely(!bh))
  82                goto failed_unlock;
  83
  84        err = -EEXIST;
  85        if (buffer_uptodate(bh))
  86                goto failed_bh;
  87
  88        wait_on_buffer(bh);
  89        if (buffer_uptodate(bh))
  90                goto failed_bh;
  91
  92        bh->b_bdev = sb->s_bdev;
  93        err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
  94        if (likely(!err)) {
  95                get_bh(bh);
  96                *out_bh = bh;
  97        }
  98
  99 failed_bh:
 100        unlock_page(bh->b_page);
 101        put_page(bh->b_page);
 102        brelse(bh);
 103
 104 failed_unlock:
 105        if (likely(!err))
 106                err = nilfs_transaction_commit(sb);
 107        else
 108                nilfs_transaction_abort(sb);
 109
 110        return err;
 111}
 112
 113static int
 114nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
 115                       int mode, int mode_flags, struct buffer_head **out_bh)
 116{
 117        struct buffer_head *bh;
 118        __u64 blknum = 0;
 119        int ret = -ENOMEM;
 120
 121        bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
 122        if (unlikely(!bh))
 123                goto failed;
 124
 125        ret = -EEXIST; /* internal code */
 126        if (buffer_uptodate(bh))
 127                goto out;
 128
 129        if (mode_flags & REQ_RAHEAD) {
 130                if (!trylock_buffer(bh)) {
 131                        ret = -EBUSY;
 132                        goto failed_bh;
 133                }
 134        } else /* mode == READ */
 135                lock_buffer(bh);
 136
 137        if (buffer_uptodate(bh)) {
 138                unlock_buffer(bh);
 139                goto out;
 140        }
 141
 142        ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
 143        if (unlikely(ret)) {
 144                unlock_buffer(bh);
 145                goto failed_bh;
 146        }
 147        map_bh(bh, inode->i_sb, (sector_t)blknum);
 148
 149        bh->b_end_io = end_buffer_read_sync;
 150        get_bh(bh);
 151        submit_bh(mode, mode_flags, bh);
 152        ret = 0;
 153
 154        trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode);
 155 out:
 156        get_bh(bh);
 157        *out_bh = bh;
 158
 159 failed_bh:
 160        unlock_page(bh->b_page);
 161        put_page(bh->b_page);
 162        brelse(bh);
 163 failed:
 164        return ret;
 165}
 166
 167static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
 168                                int readahead, struct buffer_head **out_bh)
 169{
 170        struct buffer_head *first_bh, *bh;
 171        unsigned long blkoff;
 172        int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
 173        int err;
 174
 175        err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh);
 176        if (err == -EEXIST) /* internal code */
 177                goto out;
 178
 179        if (unlikely(err))
 180                goto failed;
 181
 182        if (readahead) {
 183                blkoff = block + 1;
 184                for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
 185                        err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ,
 186                                                     REQ_RAHEAD, &bh);
 187                        if (likely(!err || err == -EEXIST))
 188                                brelse(bh);
 189                        else if (err != -EBUSY)
 190                                break;
 191                                /* abort readahead if bmap lookup failed */
 192                        if (!buffer_locked(first_bh))
 193                                goto out_no_wait;
 194                }
 195        }
 196
 197        wait_on_buffer(first_bh);
 198
 199 out_no_wait:
 200        err = -EIO;
 201        if (!buffer_uptodate(first_bh)) {
 202                nilfs_err(inode->i_sb,
 203                          "I/O error reading meta-data file (ino=%lu, block-offset=%lu)",
 204                          inode->i_ino, block);
 205                goto failed_bh;
 206        }
 207 out:
 208        *out_bh = first_bh;
 209        return 0;
 210
 211 failed_bh:
 212        brelse(first_bh);
 213 failed:
 214        return err;
 215}
 216
 217/**
 218 * nilfs_mdt_get_block - read or create a buffer on meta data file.
 219 * @inode: inode of the meta data file
 220 * @blkoff: block offset
 221 * @create: create flag
 222 * @init_block: initializer used for newly allocated block
 223 * @out_bh: output of a pointer to the buffer_head
 224 *
 225 * nilfs_mdt_get_block() looks up the specified buffer and tries to create
 226 * a new buffer if @create is not zero.  On success, the returned buffer is
 227 * assured to be either existing or formatted using a buffer lock on success.
 228 * @out_bh is substituted only when zero is returned.
 229 *
 230 * Return Value: On success, it returns 0. On error, the following negative
 231 * error code is returned.
 232 *
 233 * %-ENOMEM - Insufficient memory available.
 234 *
 235 * %-EIO - I/O error
 236 *
 237 * %-ENOENT - the specified block does not exist (hole block)
 238 *
 239 * %-EROFS - Read only filesystem (for create mode)
 240 */
 241int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
 242                        void (*init_block)(struct inode *,
 243                                           struct buffer_head *, void *),
 244                        struct buffer_head **out_bh)
 245{
 246        int ret;
 247
 248        /* Should be rewritten with merging nilfs_mdt_read_block() */
 249 retry:
 250        ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
 251        if (!create || ret != -ENOENT)
 252                return ret;
 253
 254        ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block);
 255        if (unlikely(ret == -EEXIST)) {
 256                /* create = 0; */  /* limit read-create loop retries */
 257                goto retry;
 258        }
 259        return ret;
 260}
 261
 262/**
 263 * nilfs_mdt_find_block - find and get a buffer on meta data file.
 264 * @inode: inode of the meta data file
 265 * @start: start block offset (inclusive)
 266 * @end: end block offset (inclusive)
 267 * @blkoff: block offset
 268 * @out_bh: place to store a pointer to buffer_head struct
 269 *
 270 * nilfs_mdt_find_block() looks up an existing block in range of
 271 * [@start, @end] and stores pointer to a buffer head of the block to
 272 * @out_bh, and block offset to @blkoff, respectively.  @out_bh and
 273 * @blkoff are substituted only when zero is returned.
 274 *
 275 * Return Value: On success, it returns 0. On error, the following negative
 276 * error code is returned.
 277 *
 278 * %-ENOMEM - Insufficient memory available.
 279 *
 280 * %-EIO - I/O error
 281 *
 282 * %-ENOENT - no block was found in the range
 283 */
 284int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
 285                         unsigned long end, unsigned long *blkoff,
 286                         struct buffer_head **out_bh)
 287{
 288        __u64 next;
 289        int ret;
 290
 291        if (unlikely(start > end))
 292                return -ENOENT;
 293
 294        ret = nilfs_mdt_read_block(inode, start, true, out_bh);
 295        if (!ret) {
 296                *blkoff = start;
 297                goto out;
 298        }
 299        if (unlikely(ret != -ENOENT || start == ULONG_MAX))
 300                goto out;
 301
 302        ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
 303        if (!ret) {
 304                if (next <= end) {
 305                        ret = nilfs_mdt_read_block(inode, next, true, out_bh);
 306                        if (!ret)
 307                                *blkoff = next;
 308                } else {
 309                        ret = -ENOENT;
 310                }
 311        }
 312out:
 313        return ret;
 314}
 315
 316/**
 317 * nilfs_mdt_delete_block - make a hole on the meta data file.
 318 * @inode: inode of the meta data file
 319 * @block: block offset
 320 *
 321 * Return Value: On success, zero is returned.
 322 * On error, one of the following negative error code is returned.
 323 *
 324 * %-ENOMEM - Insufficient memory available.
 325 *
 326 * %-EIO - I/O error
 327 */
 328int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
 329{
 330        struct nilfs_inode_info *ii = NILFS_I(inode);
 331        int err;
 332
 333        err = nilfs_bmap_delete(ii->i_bmap, block);
 334        if (!err || err == -ENOENT) {
 335                nilfs_mdt_mark_dirty(inode);
 336                nilfs_mdt_forget_block(inode, block);
 337        }
 338        return err;
 339}
 340
 341/**
 342 * nilfs_mdt_forget_block - discard dirty state and try to remove the page
 343 * @inode: inode of the meta data file
 344 * @block: block offset
 345 *
 346 * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
 347 * tries to release the page including the buffer from a page cache.
 348 *
 349 * Return Value: On success, 0 is returned. On error, one of the following
 350 * negative error code is returned.
 351 *
 352 * %-EBUSY - page has an active buffer.
 353 *
 354 * %-ENOENT - page cache has no page addressed by the offset.
 355 */
 356int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
 357{
 358        pgoff_t index = (pgoff_t)block >>
 359                (PAGE_SHIFT - inode->i_blkbits);
 360        struct page *page;
 361        unsigned long first_block;
 362        int ret = 0;
 363        int still_dirty;
 364
 365        page = find_lock_page(inode->i_mapping, index);
 366        if (!page)
 367                return -ENOENT;
 368
 369        wait_on_page_writeback(page);
 370
 371        first_block = (unsigned long)index <<
 372                (PAGE_SHIFT - inode->i_blkbits);
 373        if (page_has_buffers(page)) {
 374                struct buffer_head *bh;
 375
 376                bh = nilfs_page_get_nth_block(page, block - first_block);
 377                nilfs_forget_buffer(bh);
 378        }
 379        still_dirty = PageDirty(page);
 380        unlock_page(page);
 381        put_page(page);
 382
 383        if (still_dirty ||
 384            invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
 385                ret = -EBUSY;
 386        return ret;
 387}
 388
 389int nilfs_mdt_fetch_dirty(struct inode *inode)
 390{
 391        struct nilfs_inode_info *ii = NILFS_I(inode);
 392
 393        if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) {
 394                set_bit(NILFS_I_DIRTY, &ii->i_state);
 395                return 1;
 396        }
 397        return test_bit(NILFS_I_DIRTY, &ii->i_state);
 398}
 399
 400static int
 401nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
 402{
 403        struct inode *inode = page->mapping->host;
 404        struct super_block *sb;
 405        int err = 0;
 406
 407        if (inode && sb_rdonly(inode->i_sb)) {
 408                /*
 409                 * It means that filesystem was remounted in read-only
 410                 * mode because of error or metadata corruption. But we
 411                 * have dirty pages that try to be flushed in background.
 412                 * So, here we simply discard this dirty page.
 413                 */
 414                nilfs_clear_dirty_page(page, false);
 415                unlock_page(page);
 416                return -EROFS;
 417        }
 418
 419        redirty_page_for_writepage(wbc, page);
 420        unlock_page(page);
 421
 422        if (!inode)
 423                return 0;
 424
 425        sb = inode->i_sb;
 426
 427        if (wbc->sync_mode == WB_SYNC_ALL)
 428                err = nilfs_construct_segment(sb);
 429        else if (wbc->for_reclaim)
 430                nilfs_flush_segment(sb, inode->i_ino);
 431
 432        return err;
 433}
 434
 435
 436static const struct address_space_operations def_mdt_aops = {
 437        .set_page_dirty         = __set_page_dirty_buffers,
 438        .writepage              = nilfs_mdt_write_page,
 439};
 440
 441static const struct inode_operations def_mdt_iops;
 442static const struct file_operations def_mdt_fops;
 443
 444
 445int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
 446{
 447        struct nilfs_mdt_info *mi;
 448
 449        mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
 450        if (!mi)
 451                return -ENOMEM;
 452
 453        init_rwsem(&mi->mi_sem);
 454        inode->i_private = mi;
 455
 456        inode->i_mode = S_IFREG;
 457        mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
 458
 459        inode->i_op = &def_mdt_iops;
 460        inode->i_fop = &def_mdt_fops;
 461        inode->i_mapping->a_ops = &def_mdt_aops;
 462
 463        return 0;
 464}
 465
 466/**
 467 * nilfs_mdt_clear - do cleanup for the metadata file
 468 * @inode: inode of the metadata file
 469 */
 470void nilfs_mdt_clear(struct inode *inode)
 471{
 472        struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
 473
 474        if (mdi->mi_palloc_cache)
 475                nilfs_palloc_destroy_cache(inode);
 476}
 477
 478/**
 479 * nilfs_mdt_destroy - release resources used by the metadata file
 480 * @inode: inode of the metadata file
 481 */
 482void nilfs_mdt_destroy(struct inode *inode)
 483{
 484        struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
 485
 486        kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
 487        kfree(mdi);
 488}
 489
 490void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
 491                              unsigned int header_size)
 492{
 493        struct nilfs_mdt_info *mi = NILFS_MDT(inode);
 494
 495        mi->mi_entry_size = entry_size;
 496        mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
 497        mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
 498}
 499
 500/**
 501 * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file
 502 * @inode: inode of the metadata file
 503 * @shadow: shadow mapping
 504 */
 505int nilfs_mdt_setup_shadow_map(struct inode *inode,
 506                               struct nilfs_shadow_map *shadow)
 507{
 508        struct nilfs_mdt_info *mi = NILFS_MDT(inode);
 509
 510        INIT_LIST_HEAD(&shadow->frozen_buffers);
 511        address_space_init_once(&shadow->frozen_data);
 512        nilfs_mapping_init(&shadow->frozen_data, inode);
 513        address_space_init_once(&shadow->frozen_btnodes);
 514        nilfs_mapping_init(&shadow->frozen_btnodes, inode);
 515        mi->mi_shadow = shadow;
 516        return 0;
 517}
 518
 519/**
 520 * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map
 521 * @inode: inode of the metadata file
 522 */
 523int nilfs_mdt_save_to_shadow_map(struct inode *inode)
 524{
 525        struct nilfs_mdt_info *mi = NILFS_MDT(inode);
 526        struct nilfs_inode_info *ii = NILFS_I(inode);
 527        struct nilfs_shadow_map *shadow = mi->mi_shadow;
 528        int ret;
 529
 530        ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping);
 531        if (ret)
 532                goto out;
 533
 534        ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes,
 535                                     &ii->i_btnode_cache);
 536        if (ret)
 537                goto out;
 538
 539        nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store);
 540 out:
 541        return ret;
 542}
 543
 544int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
 545{
 546        struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
 547        struct buffer_head *bh_frozen;
 548        struct page *page;
 549        int blkbits = inode->i_blkbits;
 550
 551        page = grab_cache_page(&shadow->frozen_data, bh->b_page->index);
 552        if (!page)
 553                return -ENOMEM;
 554
 555        if (!page_has_buffers(page))
 556                create_empty_buffers(page, 1 << blkbits, 0);
 557
 558        bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits);
 559
 560        if (!buffer_uptodate(bh_frozen))
 561                nilfs_copy_buffer(bh_frozen, bh);
 562        if (list_empty(&bh_frozen->b_assoc_buffers)) {
 563                list_add_tail(&bh_frozen->b_assoc_buffers,
 564                              &shadow->frozen_buffers);
 565                set_buffer_nilfs_redirected(bh);
 566        } else {
 567                brelse(bh_frozen); /* already frozen */
 568        }
 569
 570        unlock_page(page);
 571        put_page(page);
 572        return 0;
 573}
 574
 575struct buffer_head *
 576nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
 577{
 578        struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow;
 579        struct buffer_head *bh_frozen = NULL;
 580        struct page *page;
 581        int n;
 582
 583        page = find_lock_page(&shadow->frozen_data, bh->b_page->index);
 584        if (page) {
 585                if (page_has_buffers(page)) {
 586                        n = bh_offset(bh) >> inode->i_blkbits;
 587                        bh_frozen = nilfs_page_get_nth_block(page, n);
 588                }
 589                unlock_page(page);
 590                put_page(page);
 591        }
 592        return bh_frozen;
 593}
 594
 595static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow)
 596{
 597        struct list_head *head = &shadow->frozen_buffers;
 598        struct buffer_head *bh;
 599
 600        while (!list_empty(head)) {
 601                bh = list_first_entry(head, struct buffer_head,
 602                                      b_assoc_buffers);
 603                list_del_init(&bh->b_assoc_buffers);
 604                brelse(bh); /* drop ref-count to make it releasable */
 605        }
 606}
 607
 608/**
 609 * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state
 610 * @inode: inode of the metadata file
 611 */
 612void nilfs_mdt_restore_from_shadow_map(struct inode *inode)
 613{
 614        struct nilfs_mdt_info *mi = NILFS_MDT(inode);
 615        struct nilfs_inode_info *ii = NILFS_I(inode);
 616        struct nilfs_shadow_map *shadow = mi->mi_shadow;
 617
 618        down_write(&mi->mi_sem);
 619
 620        if (mi->mi_palloc_cache)
 621                nilfs_palloc_clear_cache(inode);
 622
 623        nilfs_clear_dirty_pages(inode->i_mapping, true);
 624        nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);
 625
 626        nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
 627        nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
 628
 629        nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
 630
 631        up_write(&mi->mi_sem);
 632}
 633
 634/**
 635 * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches
 636 * @inode: inode of the metadata file
 637 */
 638void nilfs_mdt_clear_shadow_map(struct inode *inode)
 639{
 640        struct nilfs_mdt_info *mi = NILFS_MDT(inode);
 641        struct nilfs_shadow_map *shadow = mi->mi_shadow;
 642
 643        down_write(&mi->mi_sem);
 644        nilfs_release_frozen_buffers(shadow);
 645        truncate_inode_pages(&shadow->frozen_data, 0);
 646        truncate_inode_pages(&shadow->frozen_btnodes, 0);
 647        up_write(&mi->mi_sem);
 648}
 649