linux/fs/xfs/xfs_inode_item.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_format.h"
   9#include "xfs_log_format.h"
  10#include "xfs_trans_resv.h"
  11#include "xfs_mount.h"
  12#include "xfs_inode.h"
  13#include "xfs_trans.h"
  14#include "xfs_inode_item.h"
  15#include "xfs_error.h"
  16#include "xfs_trace.h"
  17#include "xfs_trans_priv.h"
  18#include "xfs_buf_item.h"
  19#include "xfs_log.h"
  20
  21#include <linux/iversion.h>
  22
  23kmem_zone_t     *xfs_ili_zone;          /* inode log item zone */
  24
  25static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
  26{
  27        return container_of(lip, struct xfs_inode_log_item, ili_item);
  28}
  29
  30STATIC void
  31xfs_inode_item_data_fork_size(
  32        struct xfs_inode_log_item *iip,
  33        int                     *nvecs,
  34        int                     *nbytes)
  35{
  36        struct xfs_inode        *ip = iip->ili_inode;
  37
  38        switch (ip->i_d.di_format) {
  39        case XFS_DINODE_FMT_EXTENTS:
  40                if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  41                    ip->i_d.di_nextents > 0 &&
  42                    ip->i_df.if_bytes > 0) {
  43                        /* worst case, doesn't subtract delalloc extents */
  44                        *nbytes += XFS_IFORK_DSIZE(ip);
  45                        *nvecs += 1;
  46                }
  47                break;
  48        case XFS_DINODE_FMT_BTREE:
  49                if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  50                    ip->i_df.if_broot_bytes > 0) {
  51                        *nbytes += ip->i_df.if_broot_bytes;
  52                        *nvecs += 1;
  53                }
  54                break;
  55        case XFS_DINODE_FMT_LOCAL:
  56                if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  57                    ip->i_df.if_bytes > 0) {
  58                        *nbytes += roundup(ip->i_df.if_bytes, 4);
  59                        *nvecs += 1;
  60                }
  61                break;
  62
  63        case XFS_DINODE_FMT_DEV:
  64                break;
  65        default:
  66                ASSERT(0);
  67                break;
  68        }
  69}
  70
  71STATIC void
  72xfs_inode_item_attr_fork_size(
  73        struct xfs_inode_log_item *iip,
  74        int                     *nvecs,
  75        int                     *nbytes)
  76{
  77        struct xfs_inode        *ip = iip->ili_inode;
  78
  79        switch (ip->i_d.di_aformat) {
  80        case XFS_DINODE_FMT_EXTENTS:
  81                if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  82                    ip->i_d.di_anextents > 0 &&
  83                    ip->i_afp->if_bytes > 0) {
  84                        /* worst case, doesn't subtract unused space */
  85                        *nbytes += XFS_IFORK_ASIZE(ip);
  86                        *nvecs += 1;
  87                }
  88                break;
  89        case XFS_DINODE_FMT_BTREE:
  90                if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  91                    ip->i_afp->if_broot_bytes > 0) {
  92                        *nbytes += ip->i_afp->if_broot_bytes;
  93                        *nvecs += 1;
  94                }
  95                break;
  96        case XFS_DINODE_FMT_LOCAL:
  97                if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  98                    ip->i_afp->if_bytes > 0) {
  99                        *nbytes += roundup(ip->i_afp->if_bytes, 4);
 100                        *nvecs += 1;
 101                }
 102                break;
 103        default:
 104                ASSERT(0);
 105                break;
 106        }
 107}
 108
 109/*
 110 * This returns the number of iovecs needed to log the given inode item.
 111 *
 112 * We need one iovec for the inode log format structure, one for the
 113 * inode core, and possibly one for the inode data/extents/b-tree root
 114 * and one for the inode attribute data/extents/b-tree root.
 115 */
 116STATIC void
 117xfs_inode_item_size(
 118        struct xfs_log_item     *lip,
 119        int                     *nvecs,
 120        int                     *nbytes)
 121{
 122        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 123        struct xfs_inode        *ip = iip->ili_inode;
 124
 125        *nvecs += 2;
 126        *nbytes += sizeof(struct xfs_inode_log_format) +
 127                   xfs_log_dinode_size(ip->i_d.di_version);
 128
 129        xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
 130        if (XFS_IFORK_Q(ip))
 131                xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
 132}
 133
 134STATIC void
 135xfs_inode_item_format_data_fork(
 136        struct xfs_inode_log_item *iip,
 137        struct xfs_inode_log_format *ilf,
 138        struct xfs_log_vec      *lv,
 139        struct xfs_log_iovec    **vecp)
 140{
 141        struct xfs_inode        *ip = iip->ili_inode;
 142        size_t                  data_bytes;
 143
 144        switch (ip->i_d.di_format) {
 145        case XFS_DINODE_FMT_EXTENTS:
 146                iip->ili_fields &=
 147                        ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
 148
 149                if ((iip->ili_fields & XFS_ILOG_DEXT) &&
 150                    ip->i_d.di_nextents > 0 &&
 151                    ip->i_df.if_bytes > 0) {
 152                        struct xfs_bmbt_rec *p;
 153
 154                        ASSERT(xfs_iext_count(&ip->i_df) > 0);
 155
 156                        p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
 157                        data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
 158                        xlog_finish_iovec(lv, *vecp, data_bytes);
 159
 160                        ASSERT(data_bytes <= ip->i_df.if_bytes);
 161
 162                        ilf->ilf_dsize = data_bytes;
 163                        ilf->ilf_size++;
 164                } else {
 165                        iip->ili_fields &= ~XFS_ILOG_DEXT;
 166                }
 167                break;
 168        case XFS_DINODE_FMT_BTREE:
 169                iip->ili_fields &=
 170                        ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
 171
 172                if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
 173                    ip->i_df.if_broot_bytes > 0) {
 174                        ASSERT(ip->i_df.if_broot != NULL);
 175                        xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
 176                                        ip->i_df.if_broot,
 177                                        ip->i_df.if_broot_bytes);
 178                        ilf->ilf_dsize = ip->i_df.if_broot_bytes;
 179                        ilf->ilf_size++;
 180                } else {
 181                        ASSERT(!(iip->ili_fields &
 182                                 XFS_ILOG_DBROOT));
 183                        iip->ili_fields &= ~XFS_ILOG_DBROOT;
 184                }
 185                break;
 186        case XFS_DINODE_FMT_LOCAL:
 187                iip->ili_fields &=
 188                        ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
 189                if ((iip->ili_fields & XFS_ILOG_DDATA) &&
 190                    ip->i_df.if_bytes > 0) {
 191                        /*
 192                         * Round i_bytes up to a word boundary.
 193                         * The underlying memory is guaranteed to
 194                         * to be there by xfs_idata_realloc().
 195                         */
 196                        data_bytes = roundup(ip->i_df.if_bytes, 4);
 197                        ASSERT(ip->i_df.if_real_bytes == 0 ||
 198                               ip->i_df.if_real_bytes >= data_bytes);
 199                        ASSERT(ip->i_df.if_u1.if_data != NULL);
 200                        ASSERT(ip->i_d.di_size > 0);
 201                        xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
 202                                        ip->i_df.if_u1.if_data, data_bytes);
 203                        ilf->ilf_dsize = (unsigned)data_bytes;
 204                        ilf->ilf_size++;
 205                } else {
 206                        iip->ili_fields &= ~XFS_ILOG_DDATA;
 207                }
 208                break;
 209        case XFS_DINODE_FMT_DEV:
 210                iip->ili_fields &=
 211                        ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
 212                if (iip->ili_fields & XFS_ILOG_DEV)
 213                        ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
 214                break;
 215        default:
 216                ASSERT(0);
 217                break;
 218        }
 219}
 220
 221STATIC void
 222xfs_inode_item_format_attr_fork(
 223        struct xfs_inode_log_item *iip,
 224        struct xfs_inode_log_format *ilf,
 225        struct xfs_log_vec      *lv,
 226        struct xfs_log_iovec    **vecp)
 227{
 228        struct xfs_inode        *ip = iip->ili_inode;
 229        size_t                  data_bytes;
 230
 231        switch (ip->i_d.di_aformat) {
 232        case XFS_DINODE_FMT_EXTENTS:
 233                iip->ili_fields &=
 234                        ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
 235
 236                if ((iip->ili_fields & XFS_ILOG_AEXT) &&
 237                    ip->i_d.di_anextents > 0 &&
 238                    ip->i_afp->if_bytes > 0) {
 239                        struct xfs_bmbt_rec *p;
 240
 241                        ASSERT(xfs_iext_count(ip->i_afp) ==
 242                                ip->i_d.di_anextents);
 243
 244                        p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
 245                        data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
 246                        xlog_finish_iovec(lv, *vecp, data_bytes);
 247
 248                        ilf->ilf_asize = data_bytes;
 249                        ilf->ilf_size++;
 250                } else {
 251                        iip->ili_fields &= ~XFS_ILOG_AEXT;
 252                }
 253                break;
 254        case XFS_DINODE_FMT_BTREE:
 255                iip->ili_fields &=
 256                        ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
 257
 258                if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
 259                    ip->i_afp->if_broot_bytes > 0) {
 260                        ASSERT(ip->i_afp->if_broot != NULL);
 261
 262                        xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
 263                                        ip->i_afp->if_broot,
 264                                        ip->i_afp->if_broot_bytes);
 265                        ilf->ilf_asize = ip->i_afp->if_broot_bytes;
 266                        ilf->ilf_size++;
 267                } else {
 268                        iip->ili_fields &= ~XFS_ILOG_ABROOT;
 269                }
 270                break;
 271        case XFS_DINODE_FMT_LOCAL:
 272                iip->ili_fields &=
 273                        ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
 274
 275                if ((iip->ili_fields & XFS_ILOG_ADATA) &&
 276                    ip->i_afp->if_bytes > 0) {
 277                        /*
 278                         * Round i_bytes up to a word boundary.
 279                         * The underlying memory is guaranteed to
 280                         * to be there by xfs_idata_realloc().
 281                         */
 282                        data_bytes = roundup(ip->i_afp->if_bytes, 4);
 283                        ASSERT(ip->i_afp->if_real_bytes == 0 ||
 284                               ip->i_afp->if_real_bytes >= data_bytes);
 285                        ASSERT(ip->i_afp->if_u1.if_data != NULL);
 286                        xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
 287                                        ip->i_afp->if_u1.if_data,
 288                                        data_bytes);
 289                        ilf->ilf_asize = (unsigned)data_bytes;
 290                        ilf->ilf_size++;
 291                } else {
 292                        iip->ili_fields &= ~XFS_ILOG_ADATA;
 293                }
 294                break;
 295        default:
 296                ASSERT(0);
 297                break;
 298        }
 299}
 300
 301static void
 302xfs_inode_to_log_dinode(
 303        struct xfs_inode        *ip,
 304        struct xfs_log_dinode   *to,
 305        xfs_lsn_t               lsn)
 306{
 307        struct xfs_icdinode     *from = &ip->i_d;
 308        struct inode            *inode = VFS_I(ip);
 309
 310        to->di_magic = XFS_DINODE_MAGIC;
 311
 312        to->di_version = from->di_version;
 313        to->di_format = from->di_format;
 314        to->di_uid = from->di_uid;
 315        to->di_gid = from->di_gid;
 316        to->di_projid_lo = from->di_projid_lo;
 317        to->di_projid_hi = from->di_projid_hi;
 318
 319        memset(to->di_pad, 0, sizeof(to->di_pad));
 320        memset(to->di_pad3, 0, sizeof(to->di_pad3));
 321        to->di_atime.t_sec = inode->i_atime.tv_sec;
 322        to->di_atime.t_nsec = inode->i_atime.tv_nsec;
 323        to->di_mtime.t_sec = inode->i_mtime.tv_sec;
 324        to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
 325        to->di_ctime.t_sec = inode->i_ctime.tv_sec;
 326        to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
 327        to->di_nlink = inode->i_nlink;
 328        to->di_gen = inode->i_generation;
 329        to->di_mode = inode->i_mode;
 330
 331        to->di_size = from->di_size;
 332        to->di_nblocks = from->di_nblocks;
 333        to->di_extsize = from->di_extsize;
 334        to->di_nextents = from->di_nextents;
 335        to->di_anextents = from->di_anextents;
 336        to->di_forkoff = from->di_forkoff;
 337        to->di_aformat = from->di_aformat;
 338        to->di_dmevmask = from->di_dmevmask;
 339        to->di_dmstate = from->di_dmstate;
 340        to->di_flags = from->di_flags;
 341
 342        /* log a dummy value to ensure log structure is fully initialised */
 343        to->di_next_unlinked = NULLAGINO;
 344
 345        if (from->di_version == 3) {
 346                to->di_changecount = inode_peek_iversion(inode);
 347                to->di_crtime.t_sec = from->di_crtime.t_sec;
 348                to->di_crtime.t_nsec = from->di_crtime.t_nsec;
 349                to->di_flags2 = from->di_flags2;
 350                to->di_cowextsize = from->di_cowextsize;
 351                to->di_ino = ip->i_ino;
 352                to->di_lsn = lsn;
 353                memset(to->di_pad2, 0, sizeof(to->di_pad2));
 354                uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 355                to->di_flushiter = 0;
 356        } else {
 357                to->di_flushiter = from->di_flushiter;
 358        }
 359}
 360
 361/*
 362 * Format the inode core. Current timestamp data is only in the VFS inode
 363 * fields, so we need to grab them from there. Hence rather than just copying
 364 * the XFS inode core structure, format the fields directly into the iovec.
 365 */
 366static void
 367xfs_inode_item_format_core(
 368        struct xfs_inode        *ip,
 369        struct xfs_log_vec      *lv,
 370        struct xfs_log_iovec    **vecp)
 371{
 372        struct xfs_log_dinode   *dic;
 373
 374        dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
 375        xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
 376        xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
 377}
 378
 379/*
 380 * This is called to fill in the vector of log iovecs for the given inode
 381 * log item.  It fills the first item with an inode log format structure,
 382 * the second with the on-disk inode structure, and a possible third and/or
 383 * fourth with the inode data/extents/b-tree root and inode attributes
 384 * data/extents/b-tree root.
 385 *
 386 * Note: Always use the 64 bit inode log format structure so we don't
 387 * leave an uninitialised hole in the format item on 64 bit systems. Log
 388 * recovery on 32 bit systems handles this just fine, so there's no reason
 389 * for not using an initialising the properly padded structure all the time.
 390 */
 391STATIC void
 392xfs_inode_item_format(
 393        struct xfs_log_item     *lip,
 394        struct xfs_log_vec      *lv)
 395{
 396        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 397        struct xfs_inode        *ip = iip->ili_inode;
 398        struct xfs_log_iovec    *vecp = NULL;
 399        struct xfs_inode_log_format *ilf;
 400
 401        ASSERT(ip->i_d.di_version > 1);
 402
 403        ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
 404        ilf->ilf_type = XFS_LI_INODE;
 405        ilf->ilf_ino = ip->i_ino;
 406        ilf->ilf_blkno = ip->i_imap.im_blkno;
 407        ilf->ilf_len = ip->i_imap.im_len;
 408        ilf->ilf_boffset = ip->i_imap.im_boffset;
 409        ilf->ilf_fields = XFS_ILOG_CORE;
 410        ilf->ilf_size = 2; /* format + core */
 411
 412        /*
 413         * make sure we don't leak uninitialised data into the log in the case
 414         * when we don't log every field in the inode.
 415         */
 416        ilf->ilf_dsize = 0;
 417        ilf->ilf_asize = 0;
 418        ilf->ilf_pad = 0;
 419        memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
 420
 421        xlog_finish_iovec(lv, vecp, sizeof(*ilf));
 422
 423        xfs_inode_item_format_core(ip, lv, &vecp);
 424        xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
 425        if (XFS_IFORK_Q(ip)) {
 426                xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
 427        } else {
 428                iip->ili_fields &=
 429                        ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
 430        }
 431
 432        /* update the format with the exact fields we actually logged */
 433        ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
 434}
 435
 436/*
 437 * This is called to pin the inode associated with the inode log
 438 * item in memory so it cannot be written out.
 439 */
 440STATIC void
 441xfs_inode_item_pin(
 442        struct xfs_log_item     *lip)
 443{
 444        struct xfs_inode        *ip = INODE_ITEM(lip)->ili_inode;
 445
 446        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 447
 448        trace_xfs_inode_pin(ip, _RET_IP_);
 449        atomic_inc(&ip->i_pincount);
 450}
 451
 452
 453/*
 454 * This is called to unpin the inode associated with the inode log
 455 * item which was previously pinned with a call to xfs_inode_item_pin().
 456 *
 457 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
 458 */
 459STATIC void
 460xfs_inode_item_unpin(
 461        struct xfs_log_item     *lip,
 462        int                     remove)
 463{
 464        struct xfs_inode        *ip = INODE_ITEM(lip)->ili_inode;
 465
 466        trace_xfs_inode_unpin(ip, _RET_IP_);
 467        ASSERT(atomic_read(&ip->i_pincount) > 0);
 468        if (atomic_dec_and_test(&ip->i_pincount))
 469                wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
 470}
 471
 472/*
 473 * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
 474 * have been failed during writeback
 475 *
 476 * This informs the AIL that the inode is already flush locked on the next push,
 477 * and acquires a hold on the buffer to ensure that it isn't reclaimed before
 478 * dirty data makes it to disk.
 479 */
 480STATIC void
 481xfs_inode_item_error(
 482        struct xfs_log_item     *lip,
 483        struct xfs_buf          *bp)
 484{
 485        ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
 486        xfs_set_li_failed(lip, bp);
 487}
 488
 489STATIC uint
 490xfs_inode_item_push(
 491        struct xfs_log_item     *lip,
 492        struct list_head        *buffer_list)
 493                __releases(&lip->li_ailp->ail_lock)
 494                __acquires(&lip->li_ailp->ail_lock)
 495{
 496        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 497        struct xfs_inode        *ip = iip->ili_inode;
 498        struct xfs_buf          *bp = lip->li_buf;
 499        uint                    rval = XFS_ITEM_SUCCESS;
 500        int                     error;
 501
 502        if (xfs_ipincount(ip) > 0)
 503                return XFS_ITEM_PINNED;
 504
 505        /*
 506         * The buffer containing this item failed to be written back
 507         * previously. Resubmit the buffer for IO.
 508         */
 509        if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
 510                if (!xfs_buf_trylock(bp))
 511                        return XFS_ITEM_LOCKED;
 512
 513                if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
 514                        rval = XFS_ITEM_FLUSHING;
 515
 516                xfs_buf_unlock(bp);
 517                return rval;
 518        }
 519
 520        if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
 521                return XFS_ITEM_LOCKED;
 522
 523        /*
 524         * Re-check the pincount now that we stabilized the value by
 525         * taking the ilock.
 526         */
 527        if (xfs_ipincount(ip) > 0) {
 528                rval = XFS_ITEM_PINNED;
 529                goto out_unlock;
 530        }
 531
 532        /*
 533         * Stale inode items should force out the iclog.
 534         */
 535        if (ip->i_flags & XFS_ISTALE) {
 536                rval = XFS_ITEM_PINNED;
 537                goto out_unlock;
 538        }
 539
 540        /*
 541         * Someone else is already flushing the inode.  Nothing we can do
 542         * here but wait for the flush to finish and remove the item from
 543         * the AIL.
 544         */
 545        if (!xfs_iflock_nowait(ip)) {
 546                rval = XFS_ITEM_FLUSHING;
 547                goto out_unlock;
 548        }
 549
 550        ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
 551        ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
 552
 553        spin_unlock(&lip->li_ailp->ail_lock);
 554
 555        error = xfs_iflush(ip, &bp);
 556        if (!error) {
 557                if (!xfs_buf_delwri_queue(bp, buffer_list))
 558                        rval = XFS_ITEM_FLUSHING;
 559                xfs_buf_relse(bp);
 560        }
 561
 562        spin_lock(&lip->li_ailp->ail_lock);
 563out_unlock:
 564        xfs_iunlock(ip, XFS_ILOCK_SHARED);
 565        return rval;
 566}
 567
 568/*
 569 * Unlock the inode associated with the inode log item.
 570 */
 571STATIC void
 572xfs_inode_item_unlock(
 573        struct xfs_log_item     *lip)
 574{
 575        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 576        struct xfs_inode        *ip = iip->ili_inode;
 577        unsigned short          lock_flags;
 578
 579        ASSERT(ip->i_itemp != NULL);
 580        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 581
 582        lock_flags = iip->ili_lock_flags;
 583        iip->ili_lock_flags = 0;
 584        if (lock_flags)
 585                xfs_iunlock(ip, lock_flags);
 586}
 587
 588/*
 589 * This is called to find out where the oldest active copy of the inode log
 590 * item in the on disk log resides now that the last log write of it completed
 591 * at the given lsn.  Since we always re-log all dirty data in an inode, the
 592 * latest copy in the on disk log is the only one that matters.  Therefore,
 593 * simply return the given lsn.
 594 *
 595 * If the inode has been marked stale because the cluster is being freed, we
 596 * don't want to (re-)insert this inode into the AIL. There is a race condition
 597 * where the cluster buffer may be unpinned before the inode is inserted into
 598 * the AIL during transaction committed processing. If the buffer is unpinned
 599 * before the inode item has been committed and inserted, then it is possible
 600 * for the buffer to be written and IO completes before the inode is inserted
 601 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
 602 * AIL which will never get removed. It will, however, get reclaimed which
 603 * triggers an assert in xfs_inode_free() complaining about freein an inode
 604 * still in the AIL.
 605 *
 606 * To avoid this, just unpin the inode directly and return a LSN of -1 so the
 607 * transaction committed code knows that it does not need to do any further
 608 * processing on the item.
 609 */
 610STATIC xfs_lsn_t
 611xfs_inode_item_committed(
 612        struct xfs_log_item     *lip,
 613        xfs_lsn_t               lsn)
 614{
 615        struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 616        struct xfs_inode        *ip = iip->ili_inode;
 617
 618        if (xfs_iflags_test(ip, XFS_ISTALE)) {
 619                xfs_inode_item_unpin(lip, 0);
 620                return -1;
 621        }
 622        return lsn;
 623}
 624
 625STATIC void
 626xfs_inode_item_committing(
 627        struct xfs_log_item     *lip,
 628        xfs_lsn_t               lsn)
 629{
 630        INODE_ITEM(lip)->ili_last_lsn = lsn;
 631}
 632
 633/*
 634 * This is the ops vector shared by all buf log items.
 635 */
 636static const struct xfs_item_ops xfs_inode_item_ops = {
 637        .iop_size       = xfs_inode_item_size,
 638        .iop_format     = xfs_inode_item_format,
 639        .iop_pin        = xfs_inode_item_pin,
 640        .iop_unpin      = xfs_inode_item_unpin,
 641        .iop_unlock     = xfs_inode_item_unlock,
 642        .iop_committed  = xfs_inode_item_committed,
 643        .iop_push       = xfs_inode_item_push,
 644        .iop_committing = xfs_inode_item_committing,
 645        .iop_error      = xfs_inode_item_error
 646};
 647
 648
 649/*
 650 * Initialize the inode log item for a newly allocated (in-core) inode.
 651 */
 652void
 653xfs_inode_item_init(
 654        struct xfs_inode        *ip,
 655        struct xfs_mount        *mp)
 656{
 657        struct xfs_inode_log_item *iip;
 658
 659        ASSERT(ip->i_itemp == NULL);
 660        iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
 661
 662        iip->ili_inode = ip;
 663        xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
 664                                                &xfs_inode_item_ops);
 665}
 666
 667/*
 668 * Free the inode log item and any memory hanging off of it.
 669 */
 670void
 671xfs_inode_item_destroy(
 672        xfs_inode_t     *ip)
 673{
 674        kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
 675        kmem_zone_free(xfs_ili_zone, ip->i_itemp);
 676}
 677
 678
 679/*
 680 * This is the inode flushing I/O completion routine.  It is called
 681 * from interrupt level when the buffer containing the inode is
 682 * flushed to disk.  It is responsible for removing the inode item
 683 * from the AIL if it has not been re-logged, and unlocking the inode's
 684 * flush lock.
 685 *
 686 * To reduce AIL lock traffic as much as possible, we scan the buffer log item
 687 * list for other inodes that will run this function. We remove them from the
 688 * buffer list so we can process all the inode IO completions in one AIL lock
 689 * traversal.
 690 */
 691void
 692xfs_iflush_done(
 693        struct xfs_buf          *bp,
 694        struct xfs_log_item     *lip)
 695{
 696        struct xfs_inode_log_item *iip;
 697        struct xfs_log_item     *blip, *n;
 698        struct xfs_ail          *ailp = lip->li_ailp;
 699        int                     need_ail = 0;
 700        LIST_HEAD(tmp);
 701
 702        /*
 703         * Scan the buffer IO completions for other inodes being completed and
 704         * attach them to the current inode log item.
 705         */
 706
 707        list_add_tail(&lip->li_bio_list, &tmp);
 708
 709        list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
 710                if (lip->li_cb != xfs_iflush_done)
 711                        continue;
 712
 713                list_move_tail(&blip->li_bio_list, &tmp);
 714                /*
 715                 * while we have the item, do the unlocked check for needing
 716                 * the AIL lock.
 717                 */
 718                iip = INODE_ITEM(blip);
 719                if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
 720                    test_bit(XFS_LI_FAILED, &blip->li_flags))
 721                        need_ail++;
 722        }
 723
 724        /* make sure we capture the state of the initial inode. */
 725        iip = INODE_ITEM(lip);
 726        if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
 727            test_bit(XFS_LI_FAILED, &lip->li_flags))
 728                need_ail++;
 729
 730        /*
 731         * We only want to pull the item from the AIL if it is
 732         * actually there and its location in the log has not
 733         * changed since we started the flush.  Thus, we only bother
 734         * if the ili_logged flag is set and the inode's lsn has not
 735         * changed.  First we check the lsn outside
 736         * the lock since it's cheaper, and then we recheck while
 737         * holding the lock before removing the inode from the AIL.
 738         */
 739        if (need_ail) {
 740                bool                    mlip_changed = false;
 741
 742                /* this is an opencoded batch version of xfs_trans_ail_delete */
 743                spin_lock(&ailp->ail_lock);
 744                list_for_each_entry(blip, &tmp, li_bio_list) {
 745                        if (INODE_ITEM(blip)->ili_logged &&
 746                            blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
 747                                mlip_changed |= xfs_ail_delete_one(ailp, blip);
 748                        else {
 749                                xfs_clear_li_failed(blip);
 750                        }
 751                }
 752
 753                if (mlip_changed) {
 754                        if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
 755                                xlog_assign_tail_lsn_locked(ailp->ail_mount);
 756                        if (list_empty(&ailp->ail_head))
 757                                wake_up_all(&ailp->ail_empty);
 758                }
 759                spin_unlock(&ailp->ail_lock);
 760
 761                if (mlip_changed)
 762                        xfs_log_space_wake(ailp->ail_mount);
 763        }
 764
 765        /*
 766         * clean up and unlock the flush lock now we are done. We can clear the
 767         * ili_last_fields bits now that we know that the data corresponding to
 768         * them is safely on disk.
 769         */
 770        list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
 771                list_del_init(&blip->li_bio_list);
 772                iip = INODE_ITEM(blip);
 773                iip->ili_logged = 0;
 774                iip->ili_last_fields = 0;
 775                xfs_ifunlock(iip->ili_inode);
 776        }
 777        list_del(&tmp);
 778}
 779
 780/*
 781 * This is the inode flushing abort routine.  It is called from xfs_iflush when
 782 * the filesystem is shutting down to clean up the inode state.  It is
 783 * responsible for removing the inode item from the AIL if it has not been
 784 * re-logged, and unlocking the inode's flush lock.
 785 */
 786void
 787xfs_iflush_abort(
 788        xfs_inode_t             *ip,
 789        bool                    stale)
 790{
 791        xfs_inode_log_item_t    *iip = ip->i_itemp;
 792
 793        if (iip) {
 794                if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
 795                        xfs_trans_ail_remove(&iip->ili_item,
 796                                             stale ? SHUTDOWN_LOG_IO_ERROR :
 797                                                     SHUTDOWN_CORRUPT_INCORE);
 798                }
 799                iip->ili_logged = 0;
 800                /*
 801                 * Clear the ili_last_fields bits now that we know that the
 802                 * data corresponding to them is safely on disk.
 803                 */
 804                iip->ili_last_fields = 0;
 805                /*
 806                 * Clear the inode logging fields so no more flushes are
 807                 * attempted.
 808                 */
 809                iip->ili_fields = 0;
 810                iip->ili_fsync_fields = 0;
 811        }
 812        /*
 813         * Release the inode's flush lock since we're done with it.
 814         */
 815        xfs_ifunlock(ip);
 816}
 817
 818void
 819xfs_istale_done(
 820        struct xfs_buf          *bp,
 821        struct xfs_log_item     *lip)
 822{
 823        xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
 824}
 825
 826/*
 827 * convert an xfs_inode_log_format struct from the old 32 bit version
 828 * (which can have different field alignments) to the native 64 bit version
 829 */
 830int
 831xfs_inode_item_format_convert(
 832        struct xfs_log_iovec            *buf,
 833        struct xfs_inode_log_format     *in_f)
 834{
 835        struct xfs_inode_log_format_32  *in_f32 = buf->i_addr;
 836
 837        if (buf->i_len != sizeof(*in_f32))
 838                return -EFSCORRUPTED;
 839
 840        in_f->ilf_type = in_f32->ilf_type;
 841        in_f->ilf_size = in_f32->ilf_size;
 842        in_f->ilf_fields = in_f32->ilf_fields;
 843        in_f->ilf_asize = in_f32->ilf_asize;
 844        in_f->ilf_dsize = in_f32->ilf_dsize;
 845        in_f->ilf_ino = in_f32->ilf_ino;
 846        memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
 847        in_f->ilf_blkno = in_f32->ilf_blkno;
 848        in_f->ilf_len = in_f32->ilf_len;
 849        in_f->ilf_boffset = in_f32->ilf_boffset;
 850        return 0;
 851}
 852