linux/fs/nilfs2/segment.c
<<
>>
Prefs
   1/*
   2 * segment.c - NILFS segment constructor.
   3 *
   4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  19 *
  20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
  21 *
  22 */
  23
  24#include <linux/pagemap.h>
  25#include <linux/buffer_head.h>
  26#include <linux/writeback.h>
  27#include <linux/bitops.h>
  28#include <linux/bio.h>
  29#include <linux/completion.h>
  30#include <linux/blkdev.h>
  31#include <linux/backing-dev.h>
  32#include <linux/freezer.h>
  33#include <linux/kthread.h>
  34#include <linux/crc32.h>
  35#include <linux/pagevec.h>
  36#include <linux/slab.h>
  37#include "nilfs.h"
  38#include "btnode.h"
  39#include "page.h"
  40#include "segment.h"
  41#include "sufile.h"
  42#include "cpfile.h"
  43#include "ifile.h"
  44#include "segbuf.h"
  45
  46
  47/*
  48 * Segment constructor
  49 */
  50#define SC_N_INODEVEC   16   /* Size of locally allocated inode vector */
  51
  52#define SC_MAX_SEGDELTA 64   /* Upper limit of the number of segments
  53                                appended in collection retry loop */
  54
  55/* Construction mode */
  56enum {
  57        SC_LSEG_SR = 1, /* Make a logical segment having a super root */
  58        SC_LSEG_DSYNC,  /* Flush data blocks of a given file and make
  59                           a logical segment without a super root */
  60        SC_FLUSH_FILE,  /* Flush data files, leads to segment writes without
  61                           creating a checkpoint */
  62        SC_FLUSH_DAT,   /* Flush DAT file. This also creates segments without
  63                           a checkpoint */
  64};
  65
  66/* Stage numbers of dirty block collection */
  67enum {
  68        NILFS_ST_INIT = 0,
  69        NILFS_ST_GC,            /* Collecting dirty blocks for GC */
  70        NILFS_ST_FILE,
  71        NILFS_ST_IFILE,
  72        NILFS_ST_CPFILE,
  73        NILFS_ST_SUFILE,
  74        NILFS_ST_DAT,
  75        NILFS_ST_SR,            /* Super root */
  76        NILFS_ST_DSYNC,         /* Data sync blocks */
  77        NILFS_ST_DONE,
  78};
  79
  80/* State flags of collection */
  81#define NILFS_CF_NODE           0x0001  /* Collecting node blocks */
  82#define NILFS_CF_IFILE_STARTED  0x0002  /* IFILE stage has started */
  83#define NILFS_CF_SUFREED        0x0004  /* segment usages has been freed */
  84#define NILFS_CF_HISTORY_MASK   (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
  85
  86/* Operations depending on the construction mode and file type */
  87struct nilfs_sc_operations {
  88        int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
  89                            struct inode *);
  90        int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
  91                            struct inode *);
  92        int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
  93                            struct inode *);
  94        void (*write_data_binfo)(struct nilfs_sc_info *,
  95                                 struct nilfs_segsum_pointer *,
  96                                 union nilfs_binfo *);
  97        void (*write_node_binfo)(struct nilfs_sc_info *,
  98                                 struct nilfs_segsum_pointer *,
  99                                 union nilfs_binfo *);
 100};
 101
 102/*
 103 * Other definitions
 104 */
 105static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
 106static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
 107static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
 108static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
 109
 110#define nilfs_cnt32_gt(a, b)   \
 111        (typecheck(__u32, a) && typecheck(__u32, b) && \
 112         ((__s32)(b) - (__s32)(a) < 0))
 113#define nilfs_cnt32_ge(a, b)   \
 114        (typecheck(__u32, a) && typecheck(__u32, b) && \
 115         ((__s32)(a) - (__s32)(b) >= 0))
 116#define nilfs_cnt32_lt(a, b)  nilfs_cnt32_gt(b, a)
 117#define nilfs_cnt32_le(a, b)  nilfs_cnt32_ge(b, a)
 118
 119static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
 120{
 121        struct nilfs_transaction_info *cur_ti = current->journal_info;
 122        void *save = NULL;
 123
 124        if (cur_ti) {
 125                if (cur_ti->ti_magic == NILFS_TI_MAGIC)
 126                        return ++cur_ti->ti_count;
 127                else {
 128                        /*
 129                         * If journal_info field is occupied by other FS,
 130                         * it is saved and will be restored on
 131                         * nilfs_transaction_commit().
 132                         */
 133                        printk(KERN_WARNING
 134                               "NILFS warning: journal info from a different "
 135                               "FS\n");
 136                        save = current->journal_info;
 137                }
 138        }
 139        if (!ti) {
 140                ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
 141                if (!ti)
 142                        return -ENOMEM;
 143                ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
 144        } else {
 145                ti->ti_flags = 0;
 146        }
 147        ti->ti_count = 0;
 148        ti->ti_save = save;
 149        ti->ti_magic = NILFS_TI_MAGIC;
 150        current->journal_info = ti;
 151        return 0;
 152}
 153
 154/**
 155 * nilfs_transaction_begin - start indivisible file operations.
 156 * @sb: super block
 157 * @ti: nilfs_transaction_info
 158 * @vacancy_check: flags for vacancy rate checks
 159 *
 160 * nilfs_transaction_begin() acquires a reader/writer semaphore, called
 161 * the segment semaphore, to make a segment construction and write tasks
 162 * exclusive.  The function is used with nilfs_transaction_commit() in pairs.
 163 * The region enclosed by these two functions can be nested.  To avoid a
 164 * deadlock, the semaphore is only acquired or released in the outermost call.
 165 *
 166 * This function allocates a nilfs_transaction_info struct to keep context
 167 * information on it.  It is initialized and hooked onto the current task in
 168 * the outermost call.  If a pre-allocated struct is given to @ti, it is used
 169 * instead; otherwise a new struct is assigned from a slab.
 170 *
 171 * When @vacancy_check flag is set, this function will check the amount of
 172 * free space, and will wait for the GC to reclaim disk space if low capacity.
 173 *
 174 * Return Value: On success, 0 is returned. On error, one of the following
 175 * negative error code is returned.
 176 *
 177 * %-ENOMEM - Insufficient memory available.
 178 *
 179 * %-ENOSPC - No space left on device
 180 */
 181int nilfs_transaction_begin(struct super_block *sb,
 182                            struct nilfs_transaction_info *ti,
 183                            int vacancy_check)
 184{
 185        struct the_nilfs *nilfs;
 186        int ret = nilfs_prepare_segment_lock(ti);
 187
 188        if (unlikely(ret < 0))
 189                return ret;
 190        if (ret > 0)
 191                return 0;
 192
 193        sb_start_intwrite(sb);
 194
 195        nilfs = sb->s_fs_info;
 196        down_read(&nilfs->ns_segctor_sem);
 197        if (vacancy_check && nilfs_near_disk_full(nilfs)) {
 198                up_read(&nilfs->ns_segctor_sem);
 199                ret = -ENOSPC;
 200                goto failed;
 201        }
 202        return 0;
 203
 204 failed:
 205        ti = current->journal_info;
 206        current->journal_info = ti->ti_save;
 207        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
 208                kmem_cache_free(nilfs_transaction_cachep, ti);
 209        sb_end_intwrite(sb);
 210        return ret;
 211}
 212
 213/**
 214 * nilfs_transaction_commit - commit indivisible file operations.
 215 * @sb: super block
 216 *
 217 * nilfs_transaction_commit() releases the read semaphore which is
 218 * acquired by nilfs_transaction_begin(). This is only performed
 219 * in outermost call of this function.  If a commit flag is set,
 220 * nilfs_transaction_commit() sets a timer to start the segment
 221 * constructor.  If a sync flag is set, it starts construction
 222 * directly.
 223 */
 224int nilfs_transaction_commit(struct super_block *sb)
 225{
 226        struct nilfs_transaction_info *ti = current->journal_info;
 227        struct the_nilfs *nilfs = sb->s_fs_info;
 228        int err = 0;
 229
 230        BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
 231        ti->ti_flags |= NILFS_TI_COMMIT;
 232        if (ti->ti_count > 0) {
 233                ti->ti_count--;
 234                return 0;
 235        }
 236        if (nilfs->ns_writer) {
 237                struct nilfs_sc_info *sci = nilfs->ns_writer;
 238
 239                if (ti->ti_flags & NILFS_TI_COMMIT)
 240                        nilfs_segctor_start_timer(sci);
 241                if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
 242                        nilfs_segctor_do_flush(sci, 0);
 243        }
 244        up_read(&nilfs->ns_segctor_sem);
 245        current->journal_info = ti->ti_save;
 246
 247        if (ti->ti_flags & NILFS_TI_SYNC)
 248                err = nilfs_construct_segment(sb);
 249        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
 250                kmem_cache_free(nilfs_transaction_cachep, ti);
 251        sb_end_intwrite(sb);
 252        return err;
 253}
 254
 255void nilfs_transaction_abort(struct super_block *sb)
 256{
 257        struct nilfs_transaction_info *ti = current->journal_info;
 258        struct the_nilfs *nilfs = sb->s_fs_info;
 259
 260        BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
 261        if (ti->ti_count > 0) {
 262                ti->ti_count--;
 263                return;
 264        }
 265        up_read(&nilfs->ns_segctor_sem);
 266
 267        current->journal_info = ti->ti_save;
 268        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
 269                kmem_cache_free(nilfs_transaction_cachep, ti);
 270        sb_end_intwrite(sb);
 271}
 272
 273void nilfs_relax_pressure_in_lock(struct super_block *sb)
 274{
 275        struct the_nilfs *nilfs = sb->s_fs_info;
 276        struct nilfs_sc_info *sci = nilfs->ns_writer;
 277
 278        if (!sci || !sci->sc_flush_request)
 279                return;
 280
 281        set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
 282        up_read(&nilfs->ns_segctor_sem);
 283
 284        down_write(&nilfs->ns_segctor_sem);
 285        if (sci->sc_flush_request &&
 286            test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
 287                struct nilfs_transaction_info *ti = current->journal_info;
 288
 289                ti->ti_flags |= NILFS_TI_WRITER;
 290                nilfs_segctor_do_immediate_flush(sci);
 291                ti->ti_flags &= ~NILFS_TI_WRITER;
 292        }
 293        downgrade_write(&nilfs->ns_segctor_sem);
 294}
 295
 296static void nilfs_transaction_lock(struct super_block *sb,
 297                                   struct nilfs_transaction_info *ti,
 298                                   int gcflag)
 299{
 300        struct nilfs_transaction_info *cur_ti = current->journal_info;
 301        struct the_nilfs *nilfs = sb->s_fs_info;
 302        struct nilfs_sc_info *sci = nilfs->ns_writer;
 303
 304        WARN_ON(cur_ti);
 305        ti->ti_flags = NILFS_TI_WRITER;
 306        ti->ti_count = 0;
 307        ti->ti_save = cur_ti;
 308        ti->ti_magic = NILFS_TI_MAGIC;
 309        current->journal_info = ti;
 310
 311        for (;;) {
 312                down_write(&nilfs->ns_segctor_sem);
 313                if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
 314                        break;
 315
 316                nilfs_segctor_do_immediate_flush(sci);
 317
 318                up_write(&nilfs->ns_segctor_sem);
 319                yield();
 320        }
 321        if (gcflag)
 322                ti->ti_flags |= NILFS_TI_GC;
 323}
 324
 325static void nilfs_transaction_unlock(struct super_block *sb)
 326{
 327        struct nilfs_transaction_info *ti = current->journal_info;
 328        struct the_nilfs *nilfs = sb->s_fs_info;
 329
 330        BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
 331        BUG_ON(ti->ti_count > 0);
 332
 333        up_write(&nilfs->ns_segctor_sem);
 334        current->journal_info = ti->ti_save;
 335}
 336
 337static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
 338                                            struct nilfs_segsum_pointer *ssp,
 339                                            unsigned bytes)
 340{
 341        struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
 342        unsigned blocksize = sci->sc_super->s_blocksize;
 343        void *p;
 344
 345        if (unlikely(ssp->offset + bytes > blocksize)) {
 346                ssp->offset = 0;
 347                BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
 348                                               &segbuf->sb_segsum_buffers));
 349                ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
 350        }
 351        p = ssp->bh->b_data + ssp->offset;
 352        ssp->offset += bytes;
 353        return p;
 354}
 355
 356/**
 357 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
 358 * @sci: nilfs_sc_info
 359 */
 360static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
 361{
 362        struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
 363        struct buffer_head *sumbh;
 364        unsigned sumbytes;
 365        unsigned flags = 0;
 366        int err;
 367
 368        if (nilfs_doing_gc())
 369                flags = NILFS_SS_GC;
 370        err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
 371        if (unlikely(err))
 372                return err;
 373
 374        sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
 375        sumbytes = segbuf->sb_sum.sumbytes;
 376        sci->sc_finfo_ptr.bh = sumbh;  sci->sc_finfo_ptr.offset = sumbytes;
 377        sci->sc_binfo_ptr.bh = sumbh;  sci->sc_binfo_ptr.offset = sumbytes;
 378        sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
 379        return 0;
 380}
 381
 382static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
 383{
 384        sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
 385        if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
 386                return -E2BIG; /* The current segment is filled up
 387                                  (internal code) */
 388        sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
 389        return nilfs_segctor_reset_segment_buffer(sci);
 390}
 391
 392static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
 393{
 394        struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
 395        int err;
 396
 397        if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
 398                err = nilfs_segctor_feed_segment(sci);
 399                if (err)
 400                        return err;
 401                segbuf = sci->sc_curseg;
 402        }
 403        err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
 404        if (likely(!err))
 405                segbuf->sb_sum.flags |= NILFS_SS_SR;
 406        return err;
 407}
 408
 409/*
 410 * Functions for making segment summary and payloads
 411 */
 412static int nilfs_segctor_segsum_block_required(
 413        struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
 414        unsigned binfo_size)
 415{
 416        unsigned blocksize = sci->sc_super->s_blocksize;
 417        /* Size of finfo and binfo is enough small against blocksize */
 418
 419        return ssp->offset + binfo_size +
 420                (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
 421                blocksize;
 422}
 423
 424static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
 425                                      struct inode *inode)
 426{
 427        sci->sc_curseg->sb_sum.nfinfo++;
 428        sci->sc_binfo_ptr = sci->sc_finfo_ptr;
 429        nilfs_segctor_map_segsum_entry(
 430                sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
 431
 432        if (NILFS_I(inode)->i_root &&
 433            !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
 434                set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
 435        /* skip finfo */
 436}
 437
 438static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
 439                                    struct inode *inode)
 440{
 441        struct nilfs_finfo *finfo;
 442        struct nilfs_inode_info *ii;
 443        struct nilfs_segment_buffer *segbuf;
 444        __u64 cno;
 445
 446        if (sci->sc_blk_cnt == 0)
 447                return;
 448
 449        ii = NILFS_I(inode);
 450
 451        if (test_bit(NILFS_I_GCINODE, &ii->i_state))
 452                cno = ii->i_cno;
 453        else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
 454                cno = 0;
 455        else
 456                cno = sci->sc_cno;
 457
 458        finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
 459                                                 sizeof(*finfo));
 460        finfo->fi_ino = cpu_to_le64(inode->i_ino);
 461        finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
 462        finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
 463        finfo->fi_cno = cpu_to_le64(cno);
 464
 465        segbuf = sci->sc_curseg;
 466        segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
 467                sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
 468        sci->sc_finfo_ptr = sci->sc_binfo_ptr;
 469        sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
 470}
 471
 472static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
 473                                        struct buffer_head *bh,
 474                                        struct inode *inode,
 475                                        unsigned binfo_size)
 476{
 477        struct nilfs_segment_buffer *segbuf;
 478        int required, err = 0;
 479
 480 retry:
 481        segbuf = sci->sc_curseg;
 482        required = nilfs_segctor_segsum_block_required(
 483                sci, &sci->sc_binfo_ptr, binfo_size);
 484        if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
 485                nilfs_segctor_end_finfo(sci, inode);
 486                err = nilfs_segctor_feed_segment(sci);
 487                if (err)
 488                        return err;
 489                goto retry;
 490        }
 491        if (unlikely(required)) {
 492                err = nilfs_segbuf_extend_segsum(segbuf);
 493                if (unlikely(err))
 494                        goto failed;
 495        }
 496        if (sci->sc_blk_cnt == 0)
 497                nilfs_segctor_begin_finfo(sci, inode);
 498
 499        nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
 500        /* Substitution to vblocknr is delayed until update_blocknr() */
 501        nilfs_segbuf_add_file_buffer(segbuf, bh);
 502        sci->sc_blk_cnt++;
 503 failed:
 504        return err;
 505}
 506
 507/*
 508 * Callback functions that enumerate, mark, and collect dirty blocks
 509 */
 510static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
 511                                   struct buffer_head *bh, struct inode *inode)
 512{
 513        int err;
 514
 515        err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
 516        if (err < 0)
 517                return err;
 518
 519        err = nilfs_segctor_add_file_block(sci, bh, inode,
 520                                           sizeof(struct nilfs_binfo_v));
 521        if (!err)
 522                sci->sc_datablk_cnt++;
 523        return err;
 524}
 525
 526static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
 527                                   struct buffer_head *bh,
 528                                   struct inode *inode)
 529{
 530        return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
 531}
 532
 533static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
 534                                   struct buffer_head *bh,
 535                                   struct inode *inode)
 536{
 537        WARN_ON(!buffer_dirty(bh));
 538        return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
 539}
 540
 541static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
 542                                        struct nilfs_segsum_pointer *ssp,
 543                                        union nilfs_binfo *binfo)
 544{
 545        struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
 546                sci, ssp, sizeof(*binfo_v));
 547        *binfo_v = binfo->bi_v;
 548}
 549
 550static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
 551                                        struct nilfs_segsum_pointer *ssp,
 552                                        union nilfs_binfo *binfo)
 553{
 554        __le64 *vblocknr = nilfs_segctor_map_segsum_entry(
 555                sci, ssp, sizeof(*vblocknr));
 556        *vblocknr = binfo->bi_v.bi_vblocknr;
 557}
 558
 559static struct nilfs_sc_operations nilfs_sc_file_ops = {
 560        .collect_data = nilfs_collect_file_data,
 561        .collect_node = nilfs_collect_file_node,
 562        .collect_bmap = nilfs_collect_file_bmap,
 563        .write_data_binfo = nilfs_write_file_data_binfo,
 564        .write_node_binfo = nilfs_write_file_node_binfo,
 565};
 566
 567static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
 568                                  struct buffer_head *bh, struct inode *inode)
 569{
 570        int err;
 571
 572        err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
 573        if (err < 0)
 574                return err;
 575
 576        err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
 577        if (!err)
 578                sci->sc_datablk_cnt++;
 579        return err;
 580}
 581
 582static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
 583                                  struct buffer_head *bh, struct inode *inode)
 584{
 585        WARN_ON(!buffer_dirty(bh));
 586        return nilfs_segctor_add_file_block(sci, bh, inode,
 587                                            sizeof(struct nilfs_binfo_dat));
 588}
 589
 590static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
 591                                       struct nilfs_segsum_pointer *ssp,
 592                                       union nilfs_binfo *binfo)
 593{
 594        __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
 595                                                          sizeof(*blkoff));
 596        *blkoff = binfo->bi_dat.bi_blkoff;
 597}
 598
 599static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
 600                                       struct nilfs_segsum_pointer *ssp,
 601                                       union nilfs_binfo *binfo)
 602{
 603        struct nilfs_binfo_dat *binfo_dat =
 604                nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
 605        *binfo_dat = binfo->bi_dat;
 606}
 607
 608static struct nilfs_sc_operations nilfs_sc_dat_ops = {
 609        .collect_data = nilfs_collect_dat_data,
 610        .collect_node = nilfs_collect_file_node,
 611        .collect_bmap = nilfs_collect_dat_bmap,
 612        .write_data_binfo = nilfs_write_dat_data_binfo,
 613        .write_node_binfo = nilfs_write_dat_node_binfo,
 614};
 615
 616static struct nilfs_sc_operations nilfs_sc_dsync_ops = {
 617        .collect_data = nilfs_collect_file_data,
 618        .collect_node = NULL,
 619        .collect_bmap = NULL,
 620        .write_data_binfo = nilfs_write_file_data_binfo,
 621        .write_node_binfo = NULL,
 622};
 623
 624static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
 625                                              struct list_head *listp,
 626                                              size_t nlimit,
 627                                              loff_t start, loff_t end)
 628{
 629        struct address_space *mapping = inode->i_mapping;
 630        struct pagevec pvec;
 631        pgoff_t index = 0, last = ULONG_MAX;
 632        size_t ndirties = 0;
 633        int i;
 634
 635        if (unlikely(start != 0 || end != LLONG_MAX)) {
 636                /*
 637                 * A valid range is given for sync-ing data pages. The
 638                 * range is rounded to per-page; extra dirty buffers
 639                 * may be included if blocksize < pagesize.
 640                 */
 641                index = start >> PAGE_SHIFT;
 642                last = end >> PAGE_SHIFT;
 643        }
 644        pagevec_init(&pvec, 0);
 645 repeat:
 646        if (unlikely(index > last) ||
 647            !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
 648                                min_t(pgoff_t, last - index,
 649                                      PAGEVEC_SIZE - 1) + 1))
 650                return ndirties;
 651
 652        for (i = 0; i < pagevec_count(&pvec); i++) {
 653                struct buffer_head *bh, *head;
 654                struct page *page = pvec.pages[i];
 655
 656                if (unlikely(page->index > last))
 657                        break;
 658
 659                lock_page(page);
 660                if (!page_has_buffers(page))
 661                        create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 662                unlock_page(page);
 663
 664                bh = head = page_buffers(page);
 665                do {
 666                        if (!buffer_dirty(bh) || buffer_async_write(bh))
 667                                continue;
 668                        get_bh(bh);
 669                        list_add_tail(&bh->b_assoc_buffers, listp);
 670                        ndirties++;
 671                        if (unlikely(ndirties >= nlimit)) {
 672                                pagevec_release(&pvec);
 673                                cond_resched();
 674                                return ndirties;
 675                        }
 676                } while (bh = bh->b_this_page, bh != head);
 677        }
 678        pagevec_release(&pvec);
 679        cond_resched();
 680        goto repeat;
 681}
 682
 683static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
 684                                            struct list_head *listp)
 685{
 686        struct nilfs_inode_info *ii = NILFS_I(inode);
 687        struct address_space *mapping = &ii->i_btnode_cache;
 688        struct pagevec pvec;
 689        struct buffer_head *bh, *head;
 690        unsigned int i;
 691        pgoff_t index = 0;
 692
 693        pagevec_init(&pvec, 0);
 694
 695        while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
 696                                  PAGEVEC_SIZE)) {
 697                for (i = 0; i < pagevec_count(&pvec); i++) {
 698                        bh = head = page_buffers(pvec.pages[i]);
 699                        do {
 700                                if (buffer_dirty(bh) &&
 701                                                !buffer_async_write(bh)) {
 702                                        get_bh(bh);
 703                                        list_add_tail(&bh->b_assoc_buffers,
 704                                                      listp);
 705                                }
 706                                bh = bh->b_this_page;
 707                        } while (bh != head);
 708                }
 709                pagevec_release(&pvec);
 710                cond_resched();
 711        }
 712}
 713
 714static void nilfs_dispose_list(struct the_nilfs *nilfs,
 715                               struct list_head *head, int force)
 716{
 717        struct nilfs_inode_info *ii, *n;
 718        struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
 719        unsigned nv = 0;
 720
 721        while (!list_empty(head)) {
 722                spin_lock(&nilfs->ns_inode_lock);
 723                list_for_each_entry_safe(ii, n, head, i_dirty) {
 724                        list_del_init(&ii->i_dirty);
 725                        if (force) {
 726                                if (unlikely(ii->i_bh)) {
 727                                        brelse(ii->i_bh);
 728                                        ii->i_bh = NULL;
 729                                }
 730                        } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
 731                                set_bit(NILFS_I_QUEUED, &ii->i_state);
 732                                list_add_tail(&ii->i_dirty,
 733                                              &nilfs->ns_dirty_files);
 734                                continue;
 735                        }
 736                        ivec[nv++] = ii;
 737                        if (nv == SC_N_INODEVEC)
 738                                break;
 739                }
 740                spin_unlock(&nilfs->ns_inode_lock);
 741
 742                for (pii = ivec; nv > 0; pii++, nv--)
 743                        iput(&(*pii)->vfs_inode);
 744        }
 745}
 746
 747static void nilfs_iput_work_func(struct work_struct *work)
 748{
 749        struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
 750                                                 sc_iput_work);
 751        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
 752
 753        nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
 754}
 755
 756static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
 757                                     struct nilfs_root *root)
 758{
 759        int ret = 0;
 760
 761        if (nilfs_mdt_fetch_dirty(root->ifile))
 762                ret++;
 763        if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
 764                ret++;
 765        if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
 766                ret++;
 767        if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
 768                ret++;
 769        return ret;
 770}
 771
 772static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
 773{
 774        return list_empty(&sci->sc_dirty_files) &&
 775                !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
 776                sci->sc_nfreesegs == 0 &&
 777                (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
 778}
 779
 780static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
 781{
 782        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
 783        int ret = 0;
 784
 785        if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
 786                set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
 787
 788        spin_lock(&nilfs->ns_inode_lock);
 789        if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
 790                ret++;
 791
 792        spin_unlock(&nilfs->ns_inode_lock);
 793        return ret;
 794}
 795
 796static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
 797{
 798        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
 799
 800        nilfs_mdt_clear_dirty(sci->sc_root->ifile);
 801        nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
 802        nilfs_mdt_clear_dirty(nilfs->ns_sufile);
 803        nilfs_mdt_clear_dirty(nilfs->ns_dat);
 804}
 805
 806static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
 807{
 808        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
 809        struct buffer_head *bh_cp;
 810        struct nilfs_checkpoint *raw_cp;
 811        int err;
 812
 813        /* XXX: this interface will be changed */
 814        err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
 815                                          &raw_cp, &bh_cp);
 816        if (likely(!err)) {
 817                /* The following code is duplicated with cpfile.  But, it is
 818                   needed to collect the checkpoint even if it was not newly
 819                   created */
 820                mark_buffer_dirty(bh_cp);
 821                nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
 822                nilfs_cpfile_put_checkpoint(
 823                        nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
 824        } else
 825                WARN_ON(err == -EINVAL || err == -ENOENT);
 826
 827        return err;
 828}
 829
 830static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
 831{
 832        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
 833        struct buffer_head *bh_cp;
 834        struct nilfs_checkpoint *raw_cp;
 835        int err;
 836
 837        err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
 838                                          &raw_cp, &bh_cp);
 839        if (unlikely(err)) {
 840                WARN_ON(err == -EINVAL || err == -ENOENT);
 841                goto failed_ibh;
 842        }
 843        raw_cp->cp_snapshot_list.ssl_next = 0;
 844        raw_cp->cp_snapshot_list.ssl_prev = 0;
 845        raw_cp->cp_inodes_count =
 846                cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
 847        raw_cp->cp_blocks_count =
 848                cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
 849        raw_cp->cp_nblk_inc =
 850                cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
 851        raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
 852        raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
 853
 854        if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
 855                nilfs_checkpoint_clear_minor(raw_cp);
 856        else
 857                nilfs_checkpoint_set_minor(raw_cp);
 858
 859        nilfs_write_inode_common(sci->sc_root->ifile,
 860                                 &raw_cp->cp_ifile_inode, 1);
 861        nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
 862        return 0;
 863
 864 failed_ibh:
 865        return err;
 866}
 867
 868static void nilfs_fill_in_file_bmap(struct inode *ifile,
 869                                    struct nilfs_inode_info *ii)
 870
 871{
 872        struct buffer_head *ibh;
 873        struct nilfs_inode *raw_inode;
 874
 875        if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
 876                ibh = ii->i_bh;
 877                BUG_ON(!ibh);
 878                raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
 879                                                  ibh);
 880                nilfs_bmap_write(ii->i_bmap, raw_inode);
 881                nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
 882        }
 883}
 884
 885static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
 886{
 887        struct nilfs_inode_info *ii;
 888
 889        list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
 890                nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii);
 891                set_bit(NILFS_I_COLLECTED, &ii->i_state);
 892        }
 893}
 894
 895static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
 896                                             struct the_nilfs *nilfs)
 897{
 898        struct buffer_head *bh_sr;
 899        struct nilfs_super_root *raw_sr;
 900        unsigned isz, srsz;
 901
 902        bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
 903        raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
 904        isz = nilfs->ns_inode_size;
 905        srsz = NILFS_SR_BYTES(isz);
 906
 907        raw_sr->sr_bytes = cpu_to_le16(srsz);
 908        raw_sr->sr_nongc_ctime
 909                = cpu_to_le64(nilfs_doing_gc() ?
 910                              nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
 911        raw_sr->sr_flags = 0;
 912
 913        nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
 914                                 NILFS_SR_DAT_OFFSET(isz), 1);
 915        nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
 916                                 NILFS_SR_CPFILE_OFFSET(isz), 1);
 917        nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
 918                                 NILFS_SR_SUFILE_OFFSET(isz), 1);
 919        memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
 920}
 921
 922static void nilfs_redirty_inodes(struct list_head *head)
 923{
 924        struct nilfs_inode_info *ii;
 925
 926        list_for_each_entry(ii, head, i_dirty) {
 927                if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
 928                        clear_bit(NILFS_I_COLLECTED, &ii->i_state);
 929        }
 930}
 931
 932static void nilfs_drop_collected_inodes(struct list_head *head)
 933{
 934        struct nilfs_inode_info *ii;
 935
 936        list_for_each_entry(ii, head, i_dirty) {
 937                if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
 938                        continue;
 939
 940                clear_bit(NILFS_I_INODE_SYNC, &ii->i_state);
 941                set_bit(NILFS_I_UPDATED, &ii->i_state);
 942        }
 943}
 944
 945static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
 946                                       struct inode *inode,
 947                                       struct list_head *listp,
 948                                       int (*collect)(struct nilfs_sc_info *,
 949                                                      struct buffer_head *,
 950                                                      struct inode *))
 951{
 952        struct buffer_head *bh, *n;
 953        int err = 0;
 954
 955        if (collect) {
 956                list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
 957                        list_del_init(&bh->b_assoc_buffers);
 958                        err = collect(sci, bh, inode);
 959                        brelse(bh);
 960                        if (unlikely(err))
 961                                goto dispose_buffers;
 962                }
 963                return 0;
 964        }
 965
 966 dispose_buffers:
 967        while (!list_empty(listp)) {
 968                bh = list_first_entry(listp, struct buffer_head,
 969                                      b_assoc_buffers);
 970                list_del_init(&bh->b_assoc_buffers);
 971                brelse(bh);
 972        }
 973        return err;
 974}
 975
 976static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
 977{
 978        /* Remaining number of blocks within segment buffer */
 979        return sci->sc_segbuf_nblocks -
 980                (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
 981}
 982
 983static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
 984                                   struct inode *inode,
 985                                   struct nilfs_sc_operations *sc_ops)
 986{
 987        LIST_HEAD(data_buffers);
 988        LIST_HEAD(node_buffers);
 989        int err;
 990
 991        if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
 992                size_t n, rest = nilfs_segctor_buffer_rest(sci);
 993
 994                n = nilfs_lookup_dirty_data_buffers(
 995                        inode, &data_buffers, rest + 1, 0, LLONG_MAX);
 996                if (n > rest) {
 997                        err = nilfs_segctor_apply_buffers(
 998                                sci, inode, &data_buffers,
 999                                sc_ops->collect_data);
1000                        BUG_ON(!err); /* always receive -E2BIG or true error */
1001                        goto break_or_fail;
1002                }
1003        }
1004        nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
1005
1006        if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1007                err = nilfs_segctor_apply_buffers(
1008                        sci, inode, &data_buffers, sc_ops->collect_data);
1009                if (unlikely(err)) {
1010                        /* dispose node list */
1011                        nilfs_segctor_apply_buffers(
1012                                sci, inode, &node_buffers, NULL);
1013                        goto break_or_fail;
1014                }
1015                sci->sc_stage.flags |= NILFS_CF_NODE;
1016        }
1017        /* Collect node */
1018        err = nilfs_segctor_apply_buffers(
1019                sci, inode, &node_buffers, sc_ops->collect_node);
1020        if (unlikely(err))
1021                goto break_or_fail;
1022
1023        nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
1024        err = nilfs_segctor_apply_buffers(
1025                sci, inode, &node_buffers, sc_ops->collect_bmap);
1026        if (unlikely(err))
1027                goto break_or_fail;
1028
1029        nilfs_segctor_end_finfo(sci, inode);
1030        sci->sc_stage.flags &= ~NILFS_CF_NODE;
1031
1032 break_or_fail:
1033        return err;
1034}
1035
1036static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1037                                         struct inode *inode)
1038{
1039        LIST_HEAD(data_buffers);
1040        size_t n, rest = nilfs_segctor_buffer_rest(sci);
1041        int err;
1042
1043        n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1,
1044                                            sci->sc_dsync_start,
1045                                            sci->sc_dsync_end);
1046
1047        err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
1048                                          nilfs_collect_file_data);
1049        if (!err) {
1050                nilfs_segctor_end_finfo(sci, inode);
1051                BUG_ON(n > rest);
1052                /* always receive -E2BIG or true error if n > rest */
1053        }
1054        return err;
1055}
1056
1057static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1058{
1059        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1060        struct list_head *head;
1061        struct nilfs_inode_info *ii;
1062        size_t ndone;
1063        int err = 0;
1064
1065        switch (sci->sc_stage.scnt) {
1066        case NILFS_ST_INIT:
1067                /* Pre-processes */
1068                sci->sc_stage.flags = 0;
1069
1070                if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
1071                        sci->sc_nblk_inc = 0;
1072                        sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1073                        if (mode == SC_LSEG_DSYNC) {
1074                                sci->sc_stage.scnt = NILFS_ST_DSYNC;
1075                                goto dsync_mode;
1076                        }
1077                }
1078
1079                sci->sc_stage.dirty_file_ptr = NULL;
1080                sci->sc_stage.gc_inode_ptr = NULL;
1081                if (mode == SC_FLUSH_DAT) {
1082                        sci->sc_stage.scnt = NILFS_ST_DAT;
1083                        goto dat_stage;
1084                }
1085                sci->sc_stage.scnt++;  /* Fall through */
1086        case NILFS_ST_GC:
1087                if (nilfs_doing_gc()) {
1088                        head = &sci->sc_gc_inodes;
1089                        ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
1090                                                head, i_dirty);
1091                        list_for_each_entry_continue(ii, head, i_dirty) {
1092                                err = nilfs_segctor_scan_file(
1093                                        sci, &ii->vfs_inode,
1094                                        &nilfs_sc_file_ops);
1095                                if (unlikely(err)) {
1096                                        sci->sc_stage.gc_inode_ptr = list_entry(
1097                                                ii->i_dirty.prev,
1098                                                struct nilfs_inode_info,
1099                                                i_dirty);
1100                                        goto break_or_fail;
1101                                }
1102                                set_bit(NILFS_I_COLLECTED, &ii->i_state);
1103                        }
1104                        sci->sc_stage.gc_inode_ptr = NULL;
1105                }
1106                sci->sc_stage.scnt++;  /* Fall through */
1107        case NILFS_ST_FILE:
1108                head = &sci->sc_dirty_files;
1109                ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
1110                                        i_dirty);
1111                list_for_each_entry_continue(ii, head, i_dirty) {
1112                        clear_bit(NILFS_I_DIRTY, &ii->i_state);
1113
1114                        err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
1115                                                      &nilfs_sc_file_ops);
1116                        if (unlikely(err)) {
1117                                sci->sc_stage.dirty_file_ptr =
1118                                        list_entry(ii->i_dirty.prev,
1119                                                   struct nilfs_inode_info,
1120                                                   i_dirty);
1121                                goto break_or_fail;
1122                        }
1123                        /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
1124                        /* XXX: required ? */
1125                }
1126                sci->sc_stage.dirty_file_ptr = NULL;
1127                if (mode == SC_FLUSH_FILE) {
1128                        sci->sc_stage.scnt = NILFS_ST_DONE;
1129                        return 0;
1130                }
1131                sci->sc_stage.scnt++;
1132                sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1133                /* Fall through */
1134        case NILFS_ST_IFILE:
1135                err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
1136                                              &nilfs_sc_file_ops);
1137                if (unlikely(err))
1138                        break;
1139                sci->sc_stage.scnt++;
1140                /* Creating a checkpoint */
1141                err = nilfs_segctor_create_checkpoint(sci);
1142                if (unlikely(err))
1143                        break;
1144                /* Fall through */
1145        case NILFS_ST_CPFILE:
1146                err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
1147                                              &nilfs_sc_file_ops);
1148                if (unlikely(err))
1149                        break;
1150                sci->sc_stage.scnt++;  /* Fall through */
1151        case NILFS_ST_SUFILE:
1152                err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
1153                                         sci->sc_nfreesegs, &ndone);
1154                if (unlikely(err)) {
1155                        nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1156                                                  sci->sc_freesegs, ndone,
1157                                                  NULL);
1158                        break;
1159                }
1160                sci->sc_stage.flags |= NILFS_CF_SUFREED;
1161
1162                err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1163                                              &nilfs_sc_file_ops);
1164                if (unlikely(err))
1165                        break;
1166                sci->sc_stage.scnt++;  /* Fall through */
1167        case NILFS_ST_DAT:
1168 dat_stage:
1169                err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
1170                                              &nilfs_sc_dat_ops);
1171                if (unlikely(err))
1172                        break;
1173                if (mode == SC_FLUSH_DAT) {
1174                        sci->sc_stage.scnt = NILFS_ST_DONE;
1175                        return 0;
1176                }
1177                sci->sc_stage.scnt++;  /* Fall through */
1178        case NILFS_ST_SR:
1179                if (mode == SC_LSEG_SR) {
1180                        /* Appending a super root */
1181                        err = nilfs_segctor_add_super_root(sci);
1182                        if (unlikely(err))
1183                                break;
1184                }
1185                /* End of a logical segment */
1186                sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1187                sci->sc_stage.scnt = NILFS_ST_DONE;
1188                return 0;
1189        case NILFS_ST_DSYNC:
1190 dsync_mode:
1191                sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
1192                ii = sci->sc_dsync_inode;
1193                if (!test_bit(NILFS_I_BUSY, &ii->i_state))
1194                        break;
1195
1196                err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
1197                if (unlikely(err))
1198                        break;
1199                sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1200                sci->sc_stage.scnt = NILFS_ST_DONE;
1201                return 0;
1202        case NILFS_ST_DONE:
1203                return 0;
1204        default:
1205                BUG();
1206        }
1207
1208 break_or_fail:
1209        return err;
1210}
1211
1212/**
1213 * nilfs_segctor_begin_construction - setup segment buffer to make a new log
1214 * @sci: nilfs_sc_info
1215 * @nilfs: nilfs object
1216 */
1217static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1218                                            struct the_nilfs *nilfs)
1219{
1220        struct nilfs_segment_buffer *segbuf, *prev;
1221        __u64 nextnum;
1222        int err, alloc = 0;
1223
1224        segbuf = nilfs_segbuf_new(sci->sc_super);
1225        if (unlikely(!segbuf))
1226                return -ENOMEM;
1227
1228        if (list_empty(&sci->sc_write_logs)) {
1229                nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1230                                 nilfs->ns_pseg_offset, nilfs);
1231                if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1232                        nilfs_shift_to_next_segment(nilfs);
1233                        nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1234                }
1235
1236                segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1237                nextnum = nilfs->ns_nextnum;
1238
1239                if (nilfs->ns_segnum == nilfs->ns_nextnum)
1240                        /* Start from the head of a new full segment */
1241                        alloc++;
1242        } else {
1243                /* Continue logs */
1244                prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1245                nilfs_segbuf_map_cont(segbuf, prev);
1246                segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
1247                nextnum = prev->sb_nextnum;
1248
1249                if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1250                        nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1251                        segbuf->sb_sum.seg_seq++;
1252                        alloc++;
1253                }
1254        }
1255
1256        err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
1257        if (err)
1258                goto failed;
1259
1260        if (alloc) {
1261                err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1262                if (err)
1263                        goto failed;
1264        }
1265        nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1266
1267        BUG_ON(!list_empty(&sci->sc_segbufs));
1268        list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
1269        sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1270        return 0;
1271
1272 failed:
1273        nilfs_segbuf_free(segbuf);
1274        return err;
1275}
1276
1277static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1278                                         struct the_nilfs *nilfs, int nadd)
1279{
1280        struct nilfs_segment_buffer *segbuf, *prev;
1281        struct inode *sufile = nilfs->ns_sufile;
1282        __u64 nextnextnum;
1283        LIST_HEAD(list);
1284        int err, ret, i;
1285
1286        prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
1287        /*
1288         * Since the segment specified with nextnum might be allocated during
1289         * the previous construction, the buffer including its segusage may
1290         * not be dirty.  The following call ensures that the buffer is dirty
1291         * and will pin the buffer on memory until the sufile is written.
1292         */
1293        err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
1294        if (unlikely(err))
1295                return err;
1296
1297        for (i = 0; i < nadd; i++) {
1298                /* extend segment info */
1299                err = -ENOMEM;
1300                segbuf = nilfs_segbuf_new(sci->sc_super);
1301                if (unlikely(!segbuf))
1302                        goto failed;
1303
1304                /* map this buffer to region of segment on-disk */
1305                nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1306                sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
1307
1308                /* allocate the next next full segment */
1309                err = nilfs_sufile_alloc(sufile, &nextnextnum);
1310                if (unlikely(err))
1311                        goto failed_segbuf;
1312
1313                segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
1314                nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
1315
1316                list_add_tail(&segbuf->sb_list, &list);
1317                prev = segbuf;
1318        }
1319        list_splice_tail(&list, &sci->sc_segbufs);
1320        return 0;
1321
1322 failed_segbuf:
1323        nilfs_segbuf_free(segbuf);
1324 failed:
1325        list_for_each_entry(segbuf, &list, sb_list) {
1326                ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1327                WARN_ON(ret); /* never fails */
1328        }
1329        nilfs_destroy_logs(&list);
1330        return err;
1331}
1332
1333static void nilfs_free_incomplete_logs(struct list_head *logs,
1334                                       struct the_nilfs *nilfs)
1335{
1336        struct nilfs_segment_buffer *segbuf, *prev;
1337        struct inode *sufile = nilfs->ns_sufile;
1338        int ret;
1339
1340        segbuf = NILFS_FIRST_SEGBUF(logs);
1341        if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1342                ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1343                WARN_ON(ret); /* never fails */
1344        }
1345        if (atomic_read(&segbuf->sb_err)) {
1346                /* Case 1: The first segment failed */
1347                if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1348                        /* Case 1a:  Partial segment appended into an existing
1349                           segment */
1350                        nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
1351                                                segbuf->sb_fseg_end);
1352                else /* Case 1b:  New full segment */
1353                        set_nilfs_discontinued(nilfs);
1354        }
1355
1356        prev = segbuf;
1357        list_for_each_entry_continue(segbuf, logs, sb_list) {
1358                if (prev->sb_nextnum != segbuf->sb_nextnum) {
1359                        ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1360                        WARN_ON(ret); /* never fails */
1361                }
1362                if (atomic_read(&segbuf->sb_err) &&
1363                    segbuf->sb_segnum != nilfs->ns_nextnum)
1364                        /* Case 2: extended segment (!= next) failed */
1365                        nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
1366                prev = segbuf;
1367        }
1368}
1369
1370static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1371                                          struct inode *sufile)
1372{
1373        struct nilfs_segment_buffer *segbuf;
1374        unsigned long live_blocks;
1375        int ret;
1376
1377        list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1378                live_blocks = segbuf->sb_sum.nblocks +
1379                        (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1380                ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1381                                                     live_blocks,
1382                                                     sci->sc_seg_ctime);
1383                WARN_ON(ret); /* always succeed because the segusage is dirty */
1384        }
1385}
1386
1387static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
1388{
1389        struct nilfs_segment_buffer *segbuf;
1390        int ret;
1391
1392        segbuf = NILFS_FIRST_SEGBUF(logs);
1393        ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1394                                             segbuf->sb_pseg_start -
1395                                             segbuf->sb_fseg_start, 0);
1396        WARN_ON(ret); /* always succeed because the segusage is dirty */
1397
1398        list_for_each_entry_continue(segbuf, logs, sb_list) {
1399                ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1400                                                     0, 0);
1401                WARN_ON(ret); /* always succeed */
1402        }
1403}
1404
1405static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1406                                            struct nilfs_segment_buffer *last,
1407                                            struct inode *sufile)
1408{
1409        struct nilfs_segment_buffer *segbuf = last;
1410        int ret;
1411
1412        list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1413                sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1414                ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1415                WARN_ON(ret);
1416        }
1417        nilfs_truncate_logs(&sci->sc_segbufs, last);
1418}
1419
1420
1421static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1422                                 struct the_nilfs *nilfs, int mode)
1423{
1424        struct nilfs_cstage prev_stage = sci->sc_stage;
1425        int err, nadd = 1;
1426
1427        /* Collection retry loop */
1428        for (;;) {
1429                sci->sc_nblk_this_inc = 0;
1430                sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1431
1432                err = nilfs_segctor_reset_segment_buffer(sci);
1433                if (unlikely(err))
1434                        goto failed;
1435
1436                err = nilfs_segctor_collect_blocks(sci, mode);
1437                sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
1438                if (!err)
1439                        break;
1440
1441                if (unlikely(err != -E2BIG))
1442                        goto failed;
1443
1444                /* The current segment is filled up */
1445                if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
1446                        break;
1447
1448                nilfs_clear_logs(&sci->sc_segbufs);
1449
1450                if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1451                        err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1452                                                        sci->sc_freesegs,
1453                                                        sci->sc_nfreesegs,
1454                                                        NULL);
1455                        WARN_ON(err); /* do not happen */
1456                        sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
1457                }
1458
1459                err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1460                if (unlikely(err))
1461                        return err;
1462
1463                nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
1464                sci->sc_stage = prev_stage;
1465        }
1466        nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
1467        return 0;
1468
1469 failed:
1470        return err;
1471}
1472
1473static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
1474                                      struct buffer_head *new_bh)
1475{
1476        BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
1477
1478        list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
1479        /* The caller must release old_bh */
1480}
1481
1482static int
1483nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1484                                     struct nilfs_segment_buffer *segbuf,
1485                                     int mode)
1486{
1487        struct inode *inode = NULL;
1488        sector_t blocknr;
1489        unsigned long nfinfo = segbuf->sb_sum.nfinfo;
1490        unsigned long nblocks = 0, ndatablk = 0;
1491        struct nilfs_sc_operations *sc_op = NULL;
1492        struct nilfs_segsum_pointer ssp;
1493        struct nilfs_finfo *finfo = NULL;
1494        union nilfs_binfo binfo;
1495        struct buffer_head *bh, *bh_org;
1496        ino_t ino = 0;
1497        int err = 0;
1498
1499        if (!nfinfo)
1500                goto out;
1501
1502        blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
1503        ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
1504        ssp.offset = sizeof(struct nilfs_segment_summary);
1505
1506        list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1507                if (bh == segbuf->sb_super_root)
1508                        break;
1509                if (!finfo) {
1510                        finfo = nilfs_segctor_map_segsum_entry(
1511                                sci, &ssp, sizeof(*finfo));
1512                        ino = le64_to_cpu(finfo->fi_ino);
1513                        nblocks = le32_to_cpu(finfo->fi_nblocks);
1514                        ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1515
1516                        inode = bh->b_page->mapping->host;
1517
1518                        if (mode == SC_LSEG_DSYNC)
1519                                sc_op = &nilfs_sc_dsync_ops;
1520                        else if (ino == NILFS_DAT_INO)
1521                                sc_op = &nilfs_sc_dat_ops;
1522                        else /* file blocks */
1523                                sc_op = &nilfs_sc_file_ops;
1524                }
1525                bh_org = bh;
1526                get_bh(bh_org);
1527                err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
1528                                        &binfo);
1529                if (bh != bh_org)
1530                        nilfs_list_replace_buffer(bh_org, bh);
1531                brelse(bh_org);
1532                if (unlikely(err))
1533                        goto failed_bmap;
1534
1535                if (ndatablk > 0)
1536                        sc_op->write_data_binfo(sci, &ssp, &binfo);
1537                else
1538                        sc_op->write_node_binfo(sci, &ssp, &binfo);
1539
1540                blocknr++;
1541                if (--nblocks == 0) {
1542                        finfo = NULL;
1543                        if (--nfinfo == 0)
1544                                break;
1545                } else if (ndatablk > 0)
1546                        ndatablk--;
1547        }
1548 out:
1549        return 0;
1550
1551 failed_bmap:
1552        return err;
1553}
1554
1555static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1556{
1557        struct nilfs_segment_buffer *segbuf;
1558        int err;
1559
1560        list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1561                err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
1562                if (unlikely(err))
1563                        return err;
1564                nilfs_segbuf_fill_in_segsum(segbuf);
1565        }
1566        return 0;
1567}
1568
1569static void nilfs_begin_page_io(struct page *page)
1570{
1571        if (!page || PageWriteback(page))
1572                /* For split b-tree node pages, this function may be called
1573                   twice.  We ignore the 2nd or later calls by this check. */
1574                return;
1575
1576        lock_page(page);
1577        clear_page_dirty_for_io(page);
1578        set_page_writeback(page);
1579        unlock_page(page);
1580}
1581
1582static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1583{
1584        struct nilfs_segment_buffer *segbuf;
1585        struct page *bd_page = NULL, *fs_page = NULL;
1586
1587        list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1588                struct buffer_head *bh;
1589
1590                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1591                                    b_assoc_buffers) {
1592                        if (bh->b_page != bd_page) {
1593                                if (bd_page) {
1594                                        lock_page(bd_page);
1595                                        clear_page_dirty_for_io(bd_page);
1596                                        set_page_writeback(bd_page);
1597                                        unlock_page(bd_page);
1598                                }
1599                                bd_page = bh->b_page;
1600                        }
1601                }
1602
1603                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1604                                    b_assoc_buffers) {
1605                        set_buffer_async_write(bh);
1606                        if (bh == segbuf->sb_super_root) {
1607                                if (bh->b_page != bd_page) {
1608                                        lock_page(bd_page);
1609                                        clear_page_dirty_for_io(bd_page);
1610                                        set_page_writeback(bd_page);
1611                                        unlock_page(bd_page);
1612                                        bd_page = bh->b_page;
1613                                }
1614                                break;
1615                        }
1616                        if (bh->b_page != fs_page) {
1617                                nilfs_begin_page_io(fs_page);
1618                                fs_page = bh->b_page;
1619                        }
1620                }
1621        }
1622        if (bd_page) {
1623                lock_page(bd_page);
1624                clear_page_dirty_for_io(bd_page);
1625                set_page_writeback(bd_page);
1626                unlock_page(bd_page);
1627        }
1628        nilfs_begin_page_io(fs_page);
1629}
1630
1631static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1632                               struct the_nilfs *nilfs)
1633{
1634        int ret;
1635
1636        ret = nilfs_write_logs(&sci->sc_segbufs, nilfs);
1637        list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
1638        return ret;
1639}
1640
1641static void nilfs_end_page_io(struct page *page, int err)
1642{
1643        if (!page)
1644                return;
1645
1646        if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
1647                /*
1648                 * For b-tree node pages, this function may be called twice
1649                 * or more because they might be split in a segment.
1650                 */
1651                if (PageDirty(page)) {
1652                        /*
1653                         * For pages holding split b-tree node buffers, dirty
1654                         * flag on the buffers may be cleared discretely.
1655                         * In that case, the page is once redirtied for
1656                         * remaining buffers, and it must be cancelled if
1657                         * all the buffers get cleaned later.
1658                         */
1659                        lock_page(page);
1660                        if (nilfs_page_buffers_clean(page))
1661                                __nilfs_clear_page_dirty(page);
1662                        unlock_page(page);
1663                }
1664                return;
1665        }
1666
1667        if (!err) {
1668                if (!nilfs_page_buffers_clean(page))
1669                        __set_page_dirty_nobuffers(page);
1670                ClearPageError(page);
1671        } else {
1672                __set_page_dirty_nobuffers(page);
1673                SetPageError(page);
1674        }
1675
1676        end_page_writeback(page);
1677}
1678
1679static void nilfs_abort_logs(struct list_head *logs, int err)
1680{
1681        struct nilfs_segment_buffer *segbuf;
1682        struct page *bd_page = NULL, *fs_page = NULL;
1683        struct buffer_head *bh;
1684
1685        if (list_empty(logs))
1686                return;
1687
1688        list_for_each_entry(segbuf, logs, sb_list) {
1689                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1690                                    b_assoc_buffers) {
1691                        if (bh->b_page != bd_page) {
1692                                if (bd_page)
1693                                        end_page_writeback(bd_page);
1694                                bd_page = bh->b_page;
1695                        }
1696                }
1697
1698                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1699                                    b_assoc_buffers) {
1700                        clear_buffer_async_write(bh);
1701                        if (bh == segbuf->sb_super_root) {
1702                                if (bh->b_page != bd_page) {
1703                                        end_page_writeback(bd_page);
1704                                        bd_page = bh->b_page;
1705                                }
1706                                break;
1707                        }
1708                        if (bh->b_page != fs_page) {
1709                                nilfs_end_page_io(fs_page, err);
1710                                fs_page = bh->b_page;
1711                        }
1712                }
1713        }
1714        if (bd_page)
1715                end_page_writeback(bd_page);
1716
1717        nilfs_end_page_io(fs_page, err);
1718}
1719
1720static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1721                                             struct the_nilfs *nilfs, int err)
1722{
1723        LIST_HEAD(logs);
1724        int ret;
1725
1726        list_splice_tail_init(&sci->sc_write_logs, &logs);
1727        ret = nilfs_wait_on_logs(&logs);
1728        nilfs_abort_logs(&logs, ret ? : err);
1729
1730        list_splice_tail_init(&sci->sc_segbufs, &logs);
1731        nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1732        nilfs_free_incomplete_logs(&logs, nilfs);
1733
1734        if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1735                ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1736                                                sci->sc_freesegs,
1737                                                sci->sc_nfreesegs,
1738                                                NULL);
1739                WARN_ON(ret); /* do not happen */
1740        }
1741
1742        nilfs_destroy_logs(&logs);
1743}
1744
1745static void nilfs_set_next_segment(struct the_nilfs *nilfs,
1746                                   struct nilfs_segment_buffer *segbuf)
1747{
1748        nilfs->ns_segnum = segbuf->sb_segnum;
1749        nilfs->ns_nextnum = segbuf->sb_nextnum;
1750        nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
1751                + segbuf->sb_sum.nblocks;
1752        nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
1753        nilfs->ns_ctime = segbuf->sb_sum.ctime;
1754}
1755
1756static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1757{
1758        struct nilfs_segment_buffer *segbuf;
1759        struct page *bd_page = NULL, *fs_page = NULL;
1760        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1761        int update_sr = false;
1762
1763        list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1764                struct buffer_head *bh;
1765
1766                list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1767                                    b_assoc_buffers) {
1768                        set_buffer_uptodate(bh);
1769                        clear_buffer_dirty(bh);
1770                        if (bh->b_page != bd_page) {
1771                                if (bd_page)
1772                                        end_page_writeback(bd_page);
1773                                bd_page = bh->b_page;
1774                        }
1775                }
1776                /*
1777                 * We assume that the buffers which belong to the same page
1778                 * continue over the buffer list.
1779                 * Under this assumption, the last BHs of pages is
1780                 * identifiable by the discontinuity of bh->b_page
1781                 * (page != fs_page).
1782                 *
1783                 * For B-tree node blocks, however, this assumption is not
1784                 * guaranteed.  The cleanup code of B-tree node pages needs
1785                 * special care.
1786                 */
1787                list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1788                                    b_assoc_buffers) {
1789                        const unsigned long set_bits = (1 << BH_Uptodate);
1790                        const unsigned long clear_bits =
1791                                (1 << BH_Dirty | 1 << BH_Async_Write |
1792                                 1 << BH_Delay | 1 << BH_NILFS_Volatile |
1793                                 1 << BH_NILFS_Redirected);
1794
1795                        set_mask_bits(&bh->b_state, clear_bits, set_bits);
1796                        if (bh == segbuf->sb_super_root) {
1797                                if (bh->b_page != bd_page) {
1798                                        end_page_writeback(bd_page);
1799                                        bd_page = bh->b_page;
1800                                }
1801                                update_sr = true;
1802                                break;
1803                        }
1804                        if (bh->b_page != fs_page) {
1805                                nilfs_end_page_io(fs_page, 0);
1806                                fs_page = bh->b_page;
1807                        }
1808                }
1809
1810                if (!nilfs_segbuf_simplex(segbuf)) {
1811                        if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
1812                                set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1813                                sci->sc_lseg_stime = jiffies;
1814                        }
1815                        if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
1816                                clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
1817                }
1818        }
1819        /*
1820         * Since pages may continue over multiple segment buffers,
1821         * end of the last page must be checked outside of the loop.
1822         */
1823        if (bd_page)
1824                end_page_writeback(bd_page);
1825
1826        nilfs_end_page_io(fs_page, 0);
1827
1828        nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1829
1830        if (nilfs_doing_gc())
1831                nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
1832        else
1833                nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
1834
1835        sci->sc_nblk_inc += sci->sc_nblk_this_inc;
1836
1837        segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1838        nilfs_set_next_segment(nilfs, segbuf);
1839
1840        if (update_sr) {
1841                nilfs->ns_flushed_device = 0;
1842                nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
1843                                       segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
1844
1845                clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
1846                clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
1847                set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
1848                nilfs_segctor_clear_metadata_dirty(sci);
1849        } else
1850                clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
1851}
1852
1853static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1854{
1855        int ret;
1856
1857        ret = nilfs_wait_on_logs(&sci->sc_write_logs);
1858        if (!ret) {
1859                nilfs_segctor_complete_write(sci);
1860                nilfs_destroy_logs(&sci->sc_write_logs);
1861        }
1862        return ret;
1863}
1864
1865static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1866                                             struct the_nilfs *nilfs)
1867{
1868        struct nilfs_inode_info *ii, *n;
1869        struct inode *ifile = sci->sc_root->ifile;
1870
1871        spin_lock(&nilfs->ns_inode_lock);
1872 retry:
1873        list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1874                if (!ii->i_bh) {
1875                        struct buffer_head *ibh;
1876                        int err;
1877
1878                        spin_unlock(&nilfs->ns_inode_lock);
1879                        err = nilfs_ifile_get_inode_block(
1880                                ifile, ii->vfs_inode.i_ino, &ibh);
1881                        if (unlikely(err)) {
1882                                nilfs_warning(sci->sc_super, __func__,
1883                                              "failed to get inode block.\n");
1884                                return err;
1885                        }
1886                        mark_buffer_dirty(ibh);
1887                        nilfs_mdt_mark_dirty(ifile);
1888                        spin_lock(&nilfs->ns_inode_lock);
1889                        if (likely(!ii->i_bh))
1890                                ii->i_bh = ibh;
1891                        else
1892                                brelse(ibh);
1893                        goto retry;
1894                }
1895
1896                clear_bit(NILFS_I_QUEUED, &ii->i_state);
1897                set_bit(NILFS_I_BUSY, &ii->i_state);
1898                list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1899        }
1900        spin_unlock(&nilfs->ns_inode_lock);
1901
1902        return 0;
1903}
1904
1905static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1906                                             struct the_nilfs *nilfs)
1907{
1908        struct nilfs_inode_info *ii, *n;
1909        int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
1910        int defer_iput = false;
1911
1912        spin_lock(&nilfs->ns_inode_lock);
1913        list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
1914                if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
1915                    test_bit(NILFS_I_DIRTY, &ii->i_state))
1916                        continue;
1917
1918                clear_bit(NILFS_I_BUSY, &ii->i_state);
1919                brelse(ii->i_bh);
1920                ii->i_bh = NULL;
1921                list_del_init(&ii->i_dirty);
1922                if (!ii->vfs_inode.i_nlink || during_mount) {
1923                        /*
1924                         * Defer calling iput() to avoid deadlocks if
1925                         * i_nlink == 0 or mount is not yet finished.
1926                         */
1927                        list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1928                        defer_iput = true;
1929                } else {
1930                        spin_unlock(&nilfs->ns_inode_lock);
1931                        iput(&ii->vfs_inode);
1932                        spin_lock(&nilfs->ns_inode_lock);
1933                }
1934        }
1935        spin_unlock(&nilfs->ns_inode_lock);
1936
1937        if (defer_iput)
1938                schedule_work(&sci->sc_iput_work);
1939}
1940
1941/*
1942 * Main procedure of segment constructor
1943 */
1944static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
1945{
1946        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1947        int err;
1948
1949        sci->sc_stage.scnt = NILFS_ST_INIT;
1950        sci->sc_cno = nilfs->ns_cno;
1951
1952        err = nilfs_segctor_collect_dirty_files(sci, nilfs);
1953        if (unlikely(err))
1954                goto out;
1955
1956        if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
1957                set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
1958
1959        if (nilfs_segctor_clean(sci))
1960                goto out;
1961
1962        do {
1963                sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
1964
1965                err = nilfs_segctor_begin_construction(sci, nilfs);
1966                if (unlikely(err))
1967                        goto out;
1968
1969                /* Update time stamp */
1970                sci->sc_seg_ctime = get_seconds();
1971
1972                err = nilfs_segctor_collect(sci, nilfs, mode);
1973                if (unlikely(err))
1974                        goto failed;
1975
1976                /* Avoid empty segment */
1977                if (sci->sc_stage.scnt == NILFS_ST_DONE &&
1978                    nilfs_segbuf_empty(sci->sc_curseg)) {
1979                        nilfs_segctor_abort_construction(sci, nilfs, 1);
1980                        goto out;
1981                }
1982
1983                err = nilfs_segctor_assign(sci, mode);
1984                if (unlikely(err))
1985                        goto failed;
1986
1987                if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
1988                        nilfs_segctor_fill_in_file_bmap(sci);
1989
1990                if (mode == SC_LSEG_SR &&
1991                    sci->sc_stage.scnt >= NILFS_ST_CPFILE) {
1992                        err = nilfs_segctor_fill_in_checkpoint(sci);
1993                        if (unlikely(err))
1994                                goto failed_to_write;
1995
1996                        nilfs_segctor_fill_in_super_root(sci, nilfs);
1997                }
1998                nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
1999
2000                /* Write partial segments */
2001                nilfs_segctor_prepare_write(sci);
2002
2003                nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2004                                            nilfs->ns_crc_seed);
2005
2006                err = nilfs_segctor_write(sci, nilfs);
2007                if (unlikely(err))
2008                        goto failed_to_write;
2009
2010                if (sci->sc_stage.scnt == NILFS_ST_DONE ||
2011                    nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
2012                        /*
2013                         * At this point, we avoid double buffering
2014                         * for blocksize < pagesize because page dirty
2015                         * flag is turned off during write and dirty
2016                         * buffers are not properly collected for
2017                         * pages crossing over segments.
2018                         */
2019                        err = nilfs_segctor_wait(sci);
2020                        if (err)
2021                                goto failed_to_write;
2022                }
2023        } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2024
2025 out:
2026        nilfs_segctor_drop_written_files(sci, nilfs);
2027        return err;
2028
2029 failed_to_write:
2030        if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2031                nilfs_redirty_inodes(&sci->sc_dirty_files);
2032
2033 failed:
2034        if (nilfs_doing_gc())
2035                nilfs_redirty_inodes(&sci->sc_gc_inodes);
2036        nilfs_segctor_abort_construction(sci, nilfs, err);
2037        goto out;
2038}
2039
2040/**
2041 * nilfs_segctor_start_timer - set timer of background write
2042 * @sci: nilfs_sc_info
2043 *
2044 * If the timer has already been set, it ignores the new request.
2045 * This function MUST be called within a section locking the segment
2046 * semaphore.
2047 */
2048static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2049{
2050        spin_lock(&sci->sc_state_lock);
2051        if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2052                sci->sc_timer.expires = jiffies + sci->sc_interval;
2053                add_timer(&sci->sc_timer);
2054                sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2055        }
2056        spin_unlock(&sci->sc_state_lock);
2057}
2058
2059static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2060{
2061        spin_lock(&sci->sc_state_lock);
2062        if (!(sci->sc_flush_request & (1 << bn))) {
2063                unsigned long prev_req = sci->sc_flush_request;
2064
2065                sci->sc_flush_request |= (1 << bn);
2066                if (!prev_req)
2067                        wake_up(&sci->sc_wait_daemon);
2068        }
2069        spin_unlock(&sci->sc_state_lock);
2070}
2071
2072/**
2073 * nilfs_flush_segment - trigger a segment construction for resource control
2074 * @sb: super block
2075 * @ino: inode number of the file to be flushed out.
2076 */
2077void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2078{
2079        struct the_nilfs *nilfs = sb->s_fs_info;
2080        struct nilfs_sc_info *sci = nilfs->ns_writer;
2081
2082        if (!sci || nilfs_doing_construction())
2083                return;
2084        nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
2085                                        /* assign bit 0 to data files */
2086}
2087
2088struct nilfs_segctor_wait_request {
2089        wait_queue_t    wq;
2090        __u32           seq;
2091        int             err;
2092        atomic_t        done;
2093};
2094
2095static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
2096{
2097        struct nilfs_segctor_wait_request wait_req;
2098        int err = 0;
2099
2100        spin_lock(&sci->sc_state_lock);
2101        init_wait(&wait_req.wq);
2102        wait_req.err = 0;
2103        atomic_set(&wait_req.done, 0);
2104        wait_req.seq = ++sci->sc_seq_request;
2105        spin_unlock(&sci->sc_state_lock);
2106
2107        init_waitqueue_entry(&wait_req.wq, current);
2108        add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
2109        set_current_state(TASK_INTERRUPTIBLE);
2110        wake_up(&sci->sc_wait_daemon);
2111
2112        for (;;) {
2113                if (atomic_read(&wait_req.done)) {
2114                        err = wait_req.err;
2115                        break;
2116                }
2117                if (!signal_pending(current)) {
2118                        schedule();
2119                        continue;
2120                }
2121                err = -ERESTARTSYS;
2122                break;
2123        }
2124        finish_wait(&sci->sc_wait_request, &wait_req.wq);
2125        return err;
2126}
2127
2128static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2129{
2130        struct nilfs_segctor_wait_request *wrq, *n;
2131        unsigned long flags;
2132
2133        spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
2134        list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
2135                                 wq.task_list) {
2136                if (!atomic_read(&wrq->done) &&
2137                    nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2138                        wrq->err = err;
2139                        atomic_set(&wrq->done, 1);
2140                }
2141                if (atomic_read(&wrq->done)) {
2142                        wrq->wq.func(&wrq->wq,
2143                                     TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2144                                     0, NULL);
2145                }
2146        }
2147        spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
2148}
2149
2150/**
2151 * nilfs_construct_segment - construct a logical segment
2152 * @sb: super block
2153 *
2154 * Return Value: On success, 0 is retured. On errors, one of the following
2155 * negative error code is returned.
2156 *
2157 * %-EROFS - Read only filesystem.
2158 *
2159 * %-EIO - I/O error
2160 *
2161 * %-ENOSPC - No space left on device (only in a panic state).
2162 *
2163 * %-ERESTARTSYS - Interrupted.
2164 *
2165 * %-ENOMEM - Insufficient memory available.
2166 */
2167int nilfs_construct_segment(struct super_block *sb)
2168{
2169        struct the_nilfs *nilfs = sb->s_fs_info;
2170        struct nilfs_sc_info *sci = nilfs->ns_writer;
2171        struct nilfs_transaction_info *ti;
2172        int err;
2173
2174        if (!sci)
2175                return -EROFS;
2176
2177        /* A call inside transactions causes a deadlock. */
2178        BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
2179
2180        err = nilfs_segctor_sync(sci);
2181        return err;
2182}
2183
2184/**
2185 * nilfs_construct_dsync_segment - construct a data-only logical segment
2186 * @sb: super block
2187 * @inode: inode whose data blocks should be written out
2188 * @start: start byte offset
2189 * @end: end byte offset (inclusive)
2190 *
2191 * Return Value: On success, 0 is retured. On errors, one of the following
2192 * negative error code is returned.
2193 *
2194 * %-EROFS - Read only filesystem.
2195 *
2196 * %-EIO - I/O error
2197 *
2198 * %-ENOSPC - No space left on device (only in a panic state).
2199 *
2200 * %-ERESTARTSYS - Interrupted.
2201 *
2202 * %-ENOMEM - Insufficient memory available.
2203 */
2204int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2205                                  loff_t start, loff_t end)
2206{
2207        struct the_nilfs *nilfs = sb->s_fs_info;
2208        struct nilfs_sc_info *sci = nilfs->ns_writer;
2209        struct nilfs_inode_info *ii;
2210        struct nilfs_transaction_info ti;
2211        int err = 0;
2212
2213        if (!sci)
2214                return -EROFS;
2215
2216        nilfs_transaction_lock(sb, &ti, 0);
2217
2218        ii = NILFS_I(inode);
2219        if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) ||
2220            nilfs_test_opt(nilfs, STRICT_ORDER) ||
2221            test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2222            nilfs_discontinued(nilfs)) {
2223                nilfs_transaction_unlock(sb);
2224                err = nilfs_segctor_sync(sci);
2225                return err;
2226        }
2227
2228        spin_lock(&nilfs->ns_inode_lock);
2229        if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2230            !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2231                spin_unlock(&nilfs->ns_inode_lock);
2232                nilfs_transaction_unlock(sb);
2233                return 0;
2234        }
2235        spin_unlock(&nilfs->ns_inode_lock);
2236        sci->sc_dsync_inode = ii;
2237        sci->sc_dsync_start = start;
2238        sci->sc_dsync_end = end;
2239
2240        err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2241        if (!err)
2242                nilfs->ns_flushed_device = 0;
2243
2244        nilfs_transaction_unlock(sb);
2245        return err;
2246}
2247
2248#define FLUSH_FILE_BIT  (0x1) /* data file only */
2249#define FLUSH_DAT_BIT   (1 << NILFS_DAT_INO) /* DAT only */
2250
2251/**
2252 * nilfs_segctor_accept - record accepted sequence count of log-write requests
2253 * @sci: segment constructor object
2254 */
2255static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
2256{
2257        spin_lock(&sci->sc_state_lock);
2258        sci->sc_seq_accepted = sci->sc_seq_request;
2259        spin_unlock(&sci->sc_state_lock);
2260        del_timer_sync(&sci->sc_timer);
2261}
2262
2263/**
2264 * nilfs_segctor_notify - notify the result of request to caller threads
2265 * @sci: segment constructor object
2266 * @mode: mode of log forming
2267 * @err: error code to be notified
2268 */
2269static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2270{
2271        /* Clear requests (even when the construction failed) */
2272        spin_lock(&sci->sc_state_lock);
2273
2274        if (mode == SC_LSEG_SR) {
2275                sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
2276                sci->sc_seq_done = sci->sc_seq_accepted;
2277                nilfs_segctor_wakeup(sci, err);
2278                sci->sc_flush_request = 0;
2279        } else {
2280                if (mode == SC_FLUSH_FILE)
2281                        sci->sc_flush_request &= ~FLUSH_FILE_BIT;
2282                else if (mode == SC_FLUSH_DAT)
2283                        sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2284
2285                /* re-enable timer if checkpoint creation was not done */
2286                if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2287                    time_before(jiffies, sci->sc_timer.expires))
2288                        add_timer(&sci->sc_timer);
2289        }
2290        spin_unlock(&sci->sc_state_lock);
2291}
2292
2293/**
2294 * nilfs_segctor_construct - form logs and write them to disk
2295 * @sci: segment constructor object
2296 * @mode: mode of log forming
2297 */
2298static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2299{
2300        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2301        struct nilfs_super_block **sbp;
2302        int err = 0;
2303
2304        nilfs_segctor_accept(sci);
2305
2306        if (nilfs_discontinued(nilfs))
2307                mode = SC_LSEG_SR;
2308        if (!nilfs_segctor_confirm(sci))
2309                err = nilfs_segctor_do_construct(sci, mode);
2310
2311        if (likely(!err)) {
2312                if (mode != SC_FLUSH_DAT)
2313                        atomic_set(&nilfs->ns_ndirtyblks, 0);
2314                if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2315                    nilfs_discontinued(nilfs)) {
2316                        down_write(&nilfs->ns_sem);
2317                        err = -EIO;
2318                        sbp = nilfs_prepare_super(sci->sc_super,
2319                                                  nilfs_sb_will_flip(nilfs));
2320                        if (likely(sbp)) {
2321                                nilfs_set_log_cursor(sbp[0], nilfs);
2322                                err = nilfs_commit_super(sci->sc_super,
2323                                                         NILFS_SB_COMMIT);
2324                        }
2325                        up_write(&nilfs->ns_sem);
2326                }
2327        }
2328
2329        nilfs_segctor_notify(sci, mode, err);
2330        return err;
2331}
2332
2333static void nilfs_construction_timeout(unsigned long data)
2334{
2335        struct task_struct *p = (struct task_struct *)data;
2336        wake_up_process(p);
2337}
2338
2339static void
2340nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2341{
2342        struct nilfs_inode_info *ii, *n;
2343
2344        list_for_each_entry_safe(ii, n, head, i_dirty) {
2345                if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2346                        continue;
2347                list_del_init(&ii->i_dirty);
2348                truncate_inode_pages(&ii->vfs_inode.i_data, 0);
2349                nilfs_btnode_cache_clear(&ii->i_btnode_cache);
2350                iput(&ii->vfs_inode);
2351        }
2352}
2353
2354int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2355                         void **kbufs)
2356{
2357        struct the_nilfs *nilfs = sb->s_fs_info;
2358        struct nilfs_sc_info *sci = nilfs->ns_writer;
2359        struct nilfs_transaction_info ti;
2360        int err;
2361
2362        if (unlikely(!sci))
2363                return -EROFS;
2364
2365        nilfs_transaction_lock(sb, &ti, 1);
2366
2367        err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2368        if (unlikely(err))
2369                goto out_unlock;
2370
2371        err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
2372        if (unlikely(err)) {
2373                nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat);
2374                goto out_unlock;
2375        }
2376
2377        sci->sc_freesegs = kbufs[4];
2378        sci->sc_nfreesegs = argv[4].v_nmembs;
2379        list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
2380
2381        for (;;) {
2382                err = nilfs_segctor_construct(sci, SC_LSEG_SR);
2383                nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
2384
2385                if (likely(!err))
2386                        break;
2387
2388                nilfs_warning(sb, __func__,
2389                              "segment construction failed. (err=%d)", err);
2390                set_current_state(TASK_INTERRUPTIBLE);
2391                schedule_timeout(sci->sc_interval);
2392        }
2393        if (nilfs_test_opt(nilfs, DISCARD)) {
2394                int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2395                                                 sci->sc_nfreesegs);
2396                if (ret) {
2397                        printk(KERN_WARNING
2398                               "NILFS warning: error %d on discard request, "
2399                               "turning discards off for the device\n", ret);
2400                        nilfs_clear_opt(nilfs, DISCARD);
2401                }
2402        }
2403
2404 out_unlock:
2405        sci->sc_freesegs = NULL;
2406        sci->sc_nfreesegs = 0;
2407        nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
2408        nilfs_transaction_unlock(sb);
2409        return err;
2410}
2411
2412static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2413{
2414        struct nilfs_transaction_info ti;
2415
2416        nilfs_transaction_lock(sci->sc_super, &ti, 0);
2417        nilfs_segctor_construct(sci, mode);
2418
2419        /*
2420         * Unclosed segment should be retried.  We do this using sc_timer.
2421         * Timeout of sc_timer will invoke complete construction which leads
2422         * to close the current logical segment.
2423         */
2424        if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2425                nilfs_segctor_start_timer(sci);
2426
2427        nilfs_transaction_unlock(sci->sc_super);
2428}
2429
2430static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2431{
2432        int mode = 0;
2433        int err;
2434
2435        spin_lock(&sci->sc_state_lock);
2436        mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
2437                SC_FLUSH_DAT : SC_FLUSH_FILE;
2438        spin_unlock(&sci->sc_state_lock);
2439
2440        if (mode) {
2441                err = nilfs_segctor_do_construct(sci, mode);
2442
2443                spin_lock(&sci->sc_state_lock);
2444                sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
2445                        ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT;
2446                spin_unlock(&sci->sc_state_lock);
2447        }
2448        clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
2449}
2450
2451static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2452{
2453        if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2454            time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
2455                if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
2456                        return SC_FLUSH_FILE;
2457                else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
2458                        return SC_FLUSH_DAT;
2459        }
2460        return SC_LSEG_SR;
2461}
2462
2463/**
2464 * nilfs_segctor_thread - main loop of the segment constructor thread.
2465 * @arg: pointer to a struct nilfs_sc_info.
2466 *
2467 * nilfs_segctor_thread() initializes a timer and serves as a daemon
2468 * to execute segment constructions.
2469 */
2470static int nilfs_segctor_thread(void *arg)
2471{
2472        struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2473        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2474        int timeout = 0;
2475
2476        sci->sc_timer.data = (unsigned long)current;
2477        sci->sc_timer.function = nilfs_construction_timeout;
2478
2479        /* start sync. */
2480        sci->sc_task = current;
2481        wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
2482        printk(KERN_INFO
2483               "segctord starting. Construction interval = %lu seconds, "
2484               "CP frequency < %lu seconds\n",
2485               sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
2486
2487        spin_lock(&sci->sc_state_lock);
2488 loop:
2489        for (;;) {
2490                int mode;
2491
2492                if (sci->sc_state & NILFS_SEGCTOR_QUIT)
2493                        goto end_thread;
2494
2495                if (timeout || sci->sc_seq_request != sci->sc_seq_done)
2496                        mode = SC_LSEG_SR;
2497                else if (!sci->sc_flush_request)
2498                        break;
2499                else
2500                        mode = nilfs_segctor_flush_mode(sci);
2501
2502                spin_unlock(&sci->sc_state_lock);
2503                nilfs_segctor_thread_construct(sci, mode);
2504                spin_lock(&sci->sc_state_lock);
2505                timeout = 0;
2506        }
2507
2508
2509        if (freezing(current)) {
2510                spin_unlock(&sci->sc_state_lock);
2511                try_to_freeze();
2512                spin_lock(&sci->sc_state_lock);
2513        } else {
2514                DEFINE_WAIT(wait);
2515                int should_sleep = 1;
2516
2517                prepare_to_wait(&sci->sc_wait_daemon, &wait,
2518                                TASK_INTERRUPTIBLE);
2519
2520                if (sci->sc_seq_request != sci->sc_seq_done)
2521                        should_sleep = 0;
2522                else if (sci->sc_flush_request)
2523                        should_sleep = 0;
2524                else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2525                        should_sleep = time_before(jiffies,
2526                                        sci->sc_timer.expires);
2527
2528                if (should_sleep) {
2529                        spin_unlock(&sci->sc_state_lock);
2530                        schedule();
2531                        spin_lock(&sci->sc_state_lock);
2532                }
2533                finish_wait(&sci->sc_wait_daemon, &wait);
2534                timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2535                           time_after_eq(jiffies, sci->sc_timer.expires));
2536
2537                if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
2538                        set_nilfs_discontinued(nilfs);
2539        }
2540        goto loop;
2541
2542 end_thread:
2543        spin_unlock(&sci->sc_state_lock);
2544
2545        /* end sync. */
2546        sci->sc_task = NULL;
2547        wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
2548        return 0;
2549}
2550
2551static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
2552{
2553        struct task_struct *t;
2554
2555        t = kthread_run(nilfs_segctor_thread, sci, "segctord");
2556        if (IS_ERR(t)) {
2557                int err = PTR_ERR(t);
2558
2559                printk(KERN_ERR "NILFS: error %d creating segctord thread\n",
2560                       err);
2561                return err;
2562        }
2563        wait_event(sci->sc_wait_task, sci->sc_task != NULL);
2564        return 0;
2565}
2566
2567static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2568        __acquires(&sci->sc_state_lock)
2569        __releases(&sci->sc_state_lock)
2570{
2571        sci->sc_state |= NILFS_SEGCTOR_QUIT;
2572
2573        while (sci->sc_task) {
2574                wake_up(&sci->sc_wait_daemon);
2575                spin_unlock(&sci->sc_state_lock);
2576                wait_event(sci->sc_wait_task, sci->sc_task == NULL);
2577                spin_lock(&sci->sc_state_lock);
2578        }
2579}
2580
2581/*
2582 * Setup & clean-up functions
2583 */
2584static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2585                                               struct nilfs_root *root)
2586{
2587        struct the_nilfs *nilfs = sb->s_fs_info;
2588        struct nilfs_sc_info *sci;
2589
2590        sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2591        if (!sci)
2592                return NULL;
2593
2594        sci->sc_super = sb;
2595
2596        nilfs_get_root(root);
2597        sci->sc_root = root;
2598
2599        init_waitqueue_head(&sci->sc_wait_request);
2600        init_waitqueue_head(&sci->sc_wait_daemon);
2601        init_waitqueue_head(&sci->sc_wait_task);
2602        spin_lock_init(&sci->sc_state_lock);
2603        INIT_LIST_HEAD(&sci->sc_dirty_files);
2604        INIT_LIST_HEAD(&sci->sc_segbufs);
2605        INIT_LIST_HEAD(&sci->sc_write_logs);
2606        INIT_LIST_HEAD(&sci->sc_gc_inodes);
2607        INIT_LIST_HEAD(&sci->sc_iput_queue);
2608        INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
2609        init_timer(&sci->sc_timer);
2610
2611        sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
2612        sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2613        sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2614
2615        if (nilfs->ns_interval)
2616                sci->sc_interval = HZ * nilfs->ns_interval;
2617        if (nilfs->ns_watermark)
2618                sci->sc_watermark = nilfs->ns_watermark;
2619        return sci;
2620}
2621
2622static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2623{
2624        int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
2625
2626        /* The segctord thread was stopped and its timer was removed.
2627           But some tasks remain. */
2628        do {
2629                struct nilfs_transaction_info ti;
2630
2631                nilfs_transaction_lock(sci->sc_super, &ti, 0);
2632                ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2633                nilfs_transaction_unlock(sci->sc_super);
2634
2635                flush_work(&sci->sc_iput_work);
2636
2637        } while (ret && retrycount-- > 0);
2638}
2639
2640/**
2641 * nilfs_segctor_destroy - destroy the segment constructor.
2642 * @sci: nilfs_sc_info
2643 *
2644 * nilfs_segctor_destroy() kills the segctord thread and frees
2645 * the nilfs_sc_info struct.
2646 * Caller must hold the segment semaphore.
2647 */
2648static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2649{
2650        struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2651        int flag;
2652
2653        up_write(&nilfs->ns_segctor_sem);
2654
2655        spin_lock(&sci->sc_state_lock);
2656        nilfs_segctor_kill_thread(sci);
2657        flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
2658                || sci->sc_seq_request != sci->sc_seq_done);
2659        spin_unlock(&sci->sc_state_lock);
2660
2661        if (flush_work(&sci->sc_iput_work))
2662                flag = true;
2663
2664        if (flag || !nilfs_segctor_confirm(sci))
2665                nilfs_segctor_write_out(sci);
2666
2667        if (!list_empty(&sci->sc_dirty_files)) {
2668                nilfs_warning(sci->sc_super, __func__,
2669                              "dirty file(s) after the final construction\n");
2670                nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2671        }
2672
2673        if (!list_empty(&sci->sc_iput_queue)) {
2674                nilfs_warning(sci->sc_super, __func__,
2675                              "iput queue is not empty\n");
2676                nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
2677        }
2678
2679        WARN_ON(!list_empty(&sci->sc_segbufs));
2680        WARN_ON(!list_empty(&sci->sc_write_logs));
2681
2682        nilfs_put_root(sci->sc_root);
2683
2684        down_write(&nilfs->ns_segctor_sem);
2685
2686        del_timer_sync(&sci->sc_timer);
2687        kfree(sci);
2688}
2689
2690/**
2691 * nilfs_attach_log_writer - attach log writer
2692 * @sb: super block instance
2693 * @root: root object of the current filesystem tree
2694 *
2695 * This allocates a log writer object, initializes it, and starts the
2696 * log writer.
2697 *
2698 * Return Value: On success, 0 is returned. On error, one of the following
2699 * negative error code is returned.
2700 *
2701 * %-ENOMEM - Insufficient memory available.
2702 */
2703int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2704{
2705        struct the_nilfs *nilfs = sb->s_fs_info;
2706        int err;
2707
2708        if (nilfs->ns_writer) {
2709                /*
2710                 * This happens if the filesystem was remounted
2711                 * read/write after nilfs_error degenerated it into a
2712                 * read-only mount.
2713                 */
2714                nilfs_detach_log_writer(sb);
2715        }
2716
2717        nilfs->ns_writer = nilfs_segctor_new(sb, root);
2718        if (!nilfs->ns_writer)
2719                return -ENOMEM;
2720
2721        err = nilfs_segctor_start_thread(nilfs->ns_writer);
2722        if (err) {
2723                kfree(nilfs->ns_writer);
2724                nilfs->ns_writer = NULL;
2725        }
2726        return err;
2727}
2728
2729/**
2730 * nilfs_detach_log_writer - destroy log writer
2731 * @sb: super block instance
2732 *
2733 * This kills log writer daemon, frees the log writer object, and
2734 * destroys list of dirty files.
2735 */
2736void nilfs_detach_log_writer(struct super_block *sb)
2737{
2738        struct the_nilfs *nilfs = sb->s_fs_info;
2739        LIST_HEAD(garbage_list);
2740
2741        down_write(&nilfs->ns_segctor_sem);
2742        if (nilfs->ns_writer) {
2743                nilfs_segctor_destroy(nilfs->ns_writer);
2744                nilfs->ns_writer = NULL;
2745        }
2746
2747        /* Force to free the list of dirty files */
2748        spin_lock(&nilfs->ns_inode_lock);
2749        if (!list_empty(&nilfs->ns_dirty_files)) {
2750                list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2751                nilfs_warning(sb, __func__,
2752                              "Hit dirty file after stopped log writer\n");
2753        }
2754        spin_unlock(&nilfs->ns_inode_lock);
2755        up_write(&nilfs->ns_segctor_sem);
2756
2757        nilfs_dispose_list(nilfs, &garbage_list, 1);
2758}
2759