linux/fs/gfs2/super.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4 * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
   5 */
   6
   7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9#include <linux/bio.h>
  10#include <linux/sched/signal.h>
  11#include <linux/slab.h>
  12#include <linux/spinlock.h>
  13#include <linux/completion.h>
  14#include <linux/buffer_head.h>
  15#include <linux/statfs.h>
  16#include <linux/seq_file.h>
  17#include <linux/mount.h>
  18#include <linux/kthread.h>
  19#include <linux/delay.h>
  20#include <linux/gfs2_ondisk.h>
  21#include <linux/crc32.h>
  22#include <linux/time.h>
  23#include <linux/wait.h>
  24#include <linux/writeback.h>
  25#include <linux/backing-dev.h>
  26#include <linux/kernel.h>
  27
  28#include "gfs2.h"
  29#include "incore.h"
  30#include "bmap.h"
  31#include "dir.h"
  32#include "glock.h"
  33#include "glops.h"
  34#include "inode.h"
  35#include "log.h"
  36#include "meta_io.h"
  37#include "quota.h"
  38#include "recovery.h"
  39#include "rgrp.h"
  40#include "super.h"
  41#include "trans.h"
  42#include "util.h"
  43#include "sys.h"
  44#include "xattr.h"
  45#include "lops.h"
  46
  47enum dinode_demise {
  48        SHOULD_DELETE_DINODE,
  49        SHOULD_NOT_DELETE_DINODE,
  50        SHOULD_DEFER_EVICTION,
  51};
  52
  53/**
  54 * gfs2_jindex_free - Clear all the journal index information
  55 * @sdp: The GFS2 superblock
  56 *
  57 */
  58
  59void gfs2_jindex_free(struct gfs2_sbd *sdp)
  60{
  61        struct list_head list;
  62        struct gfs2_jdesc *jd;
  63
  64        spin_lock(&sdp->sd_jindex_spin);
  65        list_add(&list, &sdp->sd_jindex_list);
  66        list_del_init(&sdp->sd_jindex_list);
  67        sdp->sd_journals = 0;
  68        spin_unlock(&sdp->sd_jindex_spin);
  69
  70        sdp->sd_jdesc = NULL;
  71        while (!list_empty(&list)) {
  72                jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
  73                gfs2_free_journal_extents(jd);
  74                list_del(&jd->jd_list);
  75                iput(jd->jd_inode);
  76                jd->jd_inode = NULL;
  77                kfree(jd);
  78        }
  79}
  80
  81static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
  82{
  83        struct gfs2_jdesc *jd;
  84        int found = 0;
  85
  86        list_for_each_entry(jd, head, jd_list) {
  87                if (jd->jd_jid == jid) {
  88                        found = 1;
  89                        break;
  90                }
  91        }
  92
  93        if (!found)
  94                jd = NULL;
  95
  96        return jd;
  97}
  98
  99struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
 100{
 101        struct gfs2_jdesc *jd;
 102
 103        spin_lock(&sdp->sd_jindex_spin);
 104        jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
 105        spin_unlock(&sdp->sd_jindex_spin);
 106
 107        return jd;
 108}
 109
 110int gfs2_jdesc_check(struct gfs2_jdesc *jd)
 111{
 112        struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 113        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 114        u64 size = i_size_read(jd->jd_inode);
 115
 116        if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
 117                return -EIO;
 118
 119        jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
 120
 121        if (gfs2_write_alloc_required(ip, 0, size)) {
 122                gfs2_consist_inode(ip);
 123                return -EIO;
 124        }
 125
 126        return 0;
 127}
 128
 129static int init_threads(struct gfs2_sbd *sdp)
 130{
 131        struct task_struct *p;
 132        int error = 0;
 133
 134        p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
 135        if (IS_ERR(p)) {
 136                error = PTR_ERR(p);
 137                fs_err(sdp, "can't start logd thread: %d\n", error);
 138                return error;
 139        }
 140        sdp->sd_logd_process = p;
 141
 142        p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
 143        if (IS_ERR(p)) {
 144                error = PTR_ERR(p);
 145                fs_err(sdp, "can't start quotad thread: %d\n", error);
 146                goto fail;
 147        }
 148        sdp->sd_quotad_process = p;
 149        return 0;
 150
 151fail:
 152        kthread_stop(sdp->sd_logd_process);
 153        sdp->sd_logd_process = NULL;
 154        return error;
 155}
 156
 157/**
 158 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
 159 * @sdp: the filesystem
 160 *
 161 * Returns: errno
 162 */
 163
 164int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 165{
 166        struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 167        struct gfs2_glock *j_gl = ip->i_gl;
 168        struct gfs2_holder freeze_gh;
 169        struct gfs2_log_header_host head;
 170        int error;
 171
 172        error = init_threads(sdp);
 173        if (error)
 174                return error;
 175
 176        error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
 177                                   LM_FLAG_NOEXP | GL_EXACT,
 178                                   &freeze_gh);
 179        if (error)
 180                goto fail_threads;
 181
 182        j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 183        if (gfs2_withdrawn(sdp)) {
 184                error = -EIO;
 185                goto fail;
 186        }
 187
 188        error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
 189        if (error || gfs2_withdrawn(sdp))
 190                goto fail;
 191
 192        if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 193                gfs2_consist(sdp);
 194                error = -EIO;
 195                goto fail;
 196        }
 197
 198        /*  Initialize some head of the log stuff  */
 199        sdp->sd_log_sequence = head.lh_sequence + 1;
 200        gfs2_log_pointers_init(sdp, head.lh_blkno);
 201
 202        error = gfs2_quota_init(sdp);
 203        if (error || gfs2_withdrawn(sdp))
 204                goto fail;
 205
 206        set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 207
 208        gfs2_glock_dq_uninit(&freeze_gh);
 209
 210        return 0;
 211
 212fail:
 213        gfs2_glock_dq_uninit(&freeze_gh);
 214fail_threads:
 215        if (sdp->sd_quotad_process)
 216                kthread_stop(sdp->sd_quotad_process);
 217        sdp->sd_quotad_process = NULL;
 218        if (sdp->sd_logd_process)
 219                kthread_stop(sdp->sd_logd_process);
 220        sdp->sd_logd_process = NULL;
 221        return error;
 222}
 223
 224void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
 225{
 226        const struct gfs2_statfs_change *str = buf;
 227
 228        sc->sc_total = be64_to_cpu(str->sc_total);
 229        sc->sc_free = be64_to_cpu(str->sc_free);
 230        sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
 231}
 232
 233void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
 234{
 235        struct gfs2_statfs_change *str = buf;
 236
 237        str->sc_total = cpu_to_be64(sc->sc_total);
 238        str->sc_free = cpu_to_be64(sc->sc_free);
 239        str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
 240}
 241
 242int gfs2_statfs_init(struct gfs2_sbd *sdp)
 243{
 244        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 245        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 246        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 247        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 248        struct buffer_head *m_bh, *l_bh;
 249        struct gfs2_holder gh;
 250        int error;
 251
 252        error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 253                                   &gh);
 254        if (error)
 255                return error;
 256
 257        error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 258        if (error)
 259                goto out;
 260
 261        if (sdp->sd_args.ar_spectator) {
 262                spin_lock(&sdp->sd_statfs_spin);
 263                gfs2_statfs_change_in(m_sc, m_bh->b_data +
 264                                      sizeof(struct gfs2_dinode));
 265                spin_unlock(&sdp->sd_statfs_spin);
 266        } else {
 267                error = gfs2_meta_inode_buffer(l_ip, &l_bh);
 268                if (error)
 269                        goto out_m_bh;
 270
 271                spin_lock(&sdp->sd_statfs_spin);
 272                gfs2_statfs_change_in(m_sc, m_bh->b_data +
 273                                      sizeof(struct gfs2_dinode));
 274                gfs2_statfs_change_in(l_sc, l_bh->b_data +
 275                                      sizeof(struct gfs2_dinode));
 276                spin_unlock(&sdp->sd_statfs_spin);
 277
 278                brelse(l_bh);
 279        }
 280
 281out_m_bh:
 282        brelse(m_bh);
 283out:
 284        gfs2_glock_dq_uninit(&gh);
 285        return 0;
 286}
 287
 288void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
 289                        s64 dinodes)
 290{
 291        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 292        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 293        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 294        struct buffer_head *l_bh;
 295        s64 x, y;
 296        int need_sync = 0;
 297        int error;
 298
 299        error = gfs2_meta_inode_buffer(l_ip, &l_bh);
 300        if (error)
 301                return;
 302
 303        gfs2_trans_add_meta(l_ip->i_gl, l_bh);
 304
 305        spin_lock(&sdp->sd_statfs_spin);
 306        l_sc->sc_total += total;
 307        l_sc->sc_free += free;
 308        l_sc->sc_dinodes += dinodes;
 309        gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
 310        if (sdp->sd_args.ar_statfs_percent) {
 311                x = 100 * l_sc->sc_free;
 312                y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
 313                if (x >= y || x <= -y)
 314                        need_sync = 1;
 315        }
 316        spin_unlock(&sdp->sd_statfs_spin);
 317
 318        brelse(l_bh);
 319        if (need_sync)
 320                gfs2_wake_up_statfs(sdp);
 321}
 322
 323void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
 324                   struct buffer_head *l_bh)
 325{
 326        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 327        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 328        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 329        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 330
 331        gfs2_trans_add_meta(l_ip->i_gl, l_bh);
 332        gfs2_trans_add_meta(m_ip->i_gl, m_bh);
 333
 334        spin_lock(&sdp->sd_statfs_spin);
 335        m_sc->sc_total += l_sc->sc_total;
 336        m_sc->sc_free += l_sc->sc_free;
 337        m_sc->sc_dinodes += l_sc->sc_dinodes;
 338        memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
 339        memset(l_bh->b_data + sizeof(struct gfs2_dinode),
 340               0, sizeof(struct gfs2_statfs_change));
 341        gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
 342        spin_unlock(&sdp->sd_statfs_spin);
 343}
 344
 345int gfs2_statfs_sync(struct super_block *sb, int type)
 346{
 347        struct gfs2_sbd *sdp = sb->s_fs_info;
 348        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 349        struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 350        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 351        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 352        struct gfs2_holder gh;
 353        struct buffer_head *m_bh, *l_bh;
 354        int error;
 355
 356        error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 357                                   &gh);
 358        if (error)
 359                goto out;
 360
 361        error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 362        if (error)
 363                goto out_unlock;
 364
 365        spin_lock(&sdp->sd_statfs_spin);
 366        gfs2_statfs_change_in(m_sc, m_bh->b_data +
 367                              sizeof(struct gfs2_dinode));
 368        if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
 369                spin_unlock(&sdp->sd_statfs_spin);
 370                goto out_bh;
 371        }
 372        spin_unlock(&sdp->sd_statfs_spin);
 373
 374        error = gfs2_meta_inode_buffer(l_ip, &l_bh);
 375        if (error)
 376                goto out_bh;
 377
 378        error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
 379        if (error)
 380                goto out_bh2;
 381
 382        update_statfs(sdp, m_bh, l_bh);
 383        sdp->sd_statfs_force_sync = 0;
 384
 385        gfs2_trans_end(sdp);
 386
 387out_bh2:
 388        brelse(l_bh);
 389out_bh:
 390        brelse(m_bh);
 391out_unlock:
 392        gfs2_glock_dq_uninit(&gh);
 393out:
 394        return error;
 395}
 396
 397struct lfcc {
 398        struct list_head list;
 399        struct gfs2_holder gh;
 400};
 401
 402/**
 403 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
 404 *                            journals are clean
 405 * @sdp: the file system
 406 * @state: the state to put the transaction lock into
 407 * @t_gh: the hold on the transaction lock
 408 *
 409 * Returns: errno
 410 */
 411
 412static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
 413{
 414        struct gfs2_inode *ip;
 415        struct gfs2_jdesc *jd;
 416        struct lfcc *lfcc;
 417        LIST_HEAD(list);
 418        struct gfs2_log_header_host lh;
 419        int error;
 420
 421        list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 422                lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
 423                if (!lfcc) {
 424                        error = -ENOMEM;
 425                        goto out;
 426                }
 427                ip = GFS2_I(jd->jd_inode);
 428                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
 429                if (error) {
 430                        kfree(lfcc);
 431                        goto out;
 432                }
 433                list_add(&lfcc->list, &list);
 434        }
 435
 436        error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
 437                                   LM_FLAG_NOEXP, &sdp->sd_freeze_gh);
 438        if (error)
 439                goto out;
 440
 441        list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 442                error = gfs2_jdesc_check(jd);
 443                if (error)
 444                        break;
 445                error = gfs2_find_jhead(jd, &lh, false);
 446                if (error)
 447                        break;
 448                if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 449                        error = -EBUSY;
 450                        break;
 451                }
 452        }
 453
 454        if (error)
 455                gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
 456
 457out:
 458        while (!list_empty(&list)) {
 459                lfcc = list_first_entry(&list, struct lfcc, list);
 460                list_del(&lfcc->list);
 461                gfs2_glock_dq_uninit(&lfcc->gh);
 462                kfree(lfcc);
 463        }
 464        return error;
 465}
 466
 467void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 468{
 469        struct gfs2_dinode *str = buf;
 470
 471        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 472        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
 473        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
 474        str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
 475        str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
 476        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
 477        str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
 478        str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
 479        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
 480        str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
 481        str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 482        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
 483        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
 484        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
 485
 486        str->di_goal_meta = cpu_to_be64(ip->i_goal);
 487        str->di_goal_data = cpu_to_be64(ip->i_goal);
 488        str->di_generation = cpu_to_be64(ip->i_generation);
 489
 490        str->di_flags = cpu_to_be32(ip->i_diskflags);
 491        str->di_height = cpu_to_be16(ip->i_height);
 492        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
 493                                             !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
 494                                             GFS2_FORMAT_DE : 0);
 495        str->di_depth = cpu_to_be16(ip->i_depth);
 496        str->di_entries = cpu_to_be32(ip->i_entries);
 497
 498        str->di_eattr = cpu_to_be64(ip->i_eattr);
 499        str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
 500        str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
 501        str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
 502}
 503
 504/**
 505 * gfs2_write_inode - Make sure the inode is stable on the disk
 506 * @inode: The inode
 507 * @wbc: The writeback control structure
 508 *
 509 * Returns: errno
 510 */
 511
 512static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 513{
 514        struct gfs2_inode *ip = GFS2_I(inode);
 515        struct gfs2_sbd *sdp = GFS2_SB(inode);
 516        struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
 517        struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
 518        int ret = 0;
 519        bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
 520
 521        if (flush_all)
 522                gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
 523                               GFS2_LOG_HEAD_FLUSH_NORMAL |
 524                               GFS2_LFC_WRITE_INODE);
 525        if (bdi->wb.dirty_exceeded)
 526                gfs2_ail1_flush(sdp, wbc);
 527        else
 528                filemap_fdatawrite(metamapping);
 529        if (flush_all)
 530                ret = filemap_fdatawait(metamapping);
 531        if (ret)
 532                mark_inode_dirty_sync(inode);
 533        else {
 534                spin_lock(&inode->i_lock);
 535                if (!(inode->i_flags & I_DIRTY))
 536                        gfs2_ordered_del_inode(ip);
 537                spin_unlock(&inode->i_lock);
 538        }
 539        return ret;
 540}
 541
 542/**
 543 * gfs2_dirty_inode - check for atime updates
 544 * @inode: The inode in question
 545 * @flags: The type of dirty
 546 *
 547 * Unfortunately it can be called under any combination of inode
 548 * glock and transaction lock, so we have to check carefully.
 549 *
 550 * At the moment this deals only with atime - it should be possible
 551 * to expand that role in future, once a review of the locking has
 552 * been carried out.
 553 */
 554
 555static void gfs2_dirty_inode(struct inode *inode, int flags)
 556{
 557        struct gfs2_inode *ip = GFS2_I(inode);
 558        struct gfs2_sbd *sdp = GFS2_SB(inode);
 559        struct buffer_head *bh;
 560        struct gfs2_holder gh;
 561        int need_unlock = 0;
 562        int need_endtrans = 0;
 563        int ret;
 564
 565        if (!(flags & I_DIRTY_INODE))
 566                return;
 567        if (unlikely(gfs2_withdrawn(sdp)))
 568                return;
 569        if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
 570                ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 571                if (ret) {
 572                        fs_err(sdp, "dirty_inode: glock %d\n", ret);
 573                        gfs2_dump_glock(NULL, ip->i_gl, true);
 574                        return;
 575                }
 576                need_unlock = 1;
 577        } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
 578                return;
 579
 580        if (current->journal_info == NULL) {
 581                ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
 582                if (ret) {
 583                        fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
 584                        goto out;
 585                }
 586                need_endtrans = 1;
 587        }
 588
 589        ret = gfs2_meta_inode_buffer(ip, &bh);
 590        if (ret == 0) {
 591                gfs2_trans_add_meta(ip->i_gl, bh);
 592                gfs2_dinode_out(ip, bh->b_data);
 593                brelse(bh);
 594        }
 595
 596        if (need_endtrans)
 597                gfs2_trans_end(sdp);
 598out:
 599        if (need_unlock)
 600                gfs2_glock_dq_uninit(&gh);
 601}
 602
 603/**
 604 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
 605 * @sdp: the filesystem
 606 *
 607 * Returns: errno
 608 */
 609
 610int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 611{
 612        struct gfs2_holder freeze_gh;
 613        int error = 0;
 614        int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 615
 616        gfs2_holder_mark_uninitialized(&freeze_gh);
 617        if (sdp->sd_freeze_gl &&
 618            !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
 619                if (!log_write_allowed) {
 620                        error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
 621                                                   LM_ST_SHARED, LM_FLAG_TRY |
 622                                                   LM_FLAG_NOEXP | GL_EXACT,
 623                                                   &freeze_gh);
 624                        if (error == GLR_TRYFAILED)
 625                                error = 0;
 626                } else {
 627                        error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
 628                                                   LM_ST_SHARED,
 629                                                   LM_FLAG_NOEXP | GL_EXACT,
 630                                                   &freeze_gh);
 631                        if (error && !gfs2_withdrawn(sdp))
 632                                return error;
 633                }
 634        }
 635
 636        gfs2_flush_delete_work(sdp);
 637        if (!log_write_allowed && current == sdp->sd_quotad_process)
 638                fs_warn(sdp, "The quotad daemon is withdrawing.\n");
 639        else if (sdp->sd_quotad_process)
 640                kthread_stop(sdp->sd_quotad_process);
 641        sdp->sd_quotad_process = NULL;
 642
 643        if (!log_write_allowed && current == sdp->sd_logd_process)
 644                fs_warn(sdp, "The logd daemon is withdrawing.\n");
 645        else if (sdp->sd_logd_process)
 646                kthread_stop(sdp->sd_logd_process);
 647        sdp->sd_logd_process = NULL;
 648
 649        if (log_write_allowed) {
 650                gfs2_quota_sync(sdp->sd_vfs, 0);
 651                gfs2_statfs_sync(sdp->sd_vfs, 0);
 652
 653                gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
 654                               GFS2_LFC_MAKE_FS_RO);
 655                wait_event(sdp->sd_reserving_log_wait,
 656                           atomic_read(&sdp->sd_reserving_log) == 0);
 657                gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
 658                                 sdp->sd_jdesc->jd_blocks);
 659        } else {
 660                wait_event_timeout(sdp->sd_reserving_log_wait,
 661                                   atomic_read(&sdp->sd_reserving_log) == 0,
 662                                   HZ * 5);
 663        }
 664        if (gfs2_holder_initialized(&freeze_gh))
 665                gfs2_glock_dq_uninit(&freeze_gh);
 666
 667        gfs2_quota_cleanup(sdp);
 668
 669        if (!log_write_allowed)
 670                sdp->sd_vfs->s_flags |= SB_RDONLY;
 671
 672        return error;
 673}
 674
 675/**
 676 * gfs2_put_super - Unmount the filesystem
 677 * @sb: The VFS superblock
 678 *
 679 */
 680
 681static void gfs2_put_super(struct super_block *sb)
 682{
 683        struct gfs2_sbd *sdp = sb->s_fs_info;
 684        int error;
 685        struct gfs2_jdesc *jd;
 686
 687        /* No more recovery requests */
 688        set_bit(SDF_NORECOVERY, &sdp->sd_flags);
 689        smp_mb();
 690
 691        /* Wait on outstanding recovery */
 692restart:
 693        spin_lock(&sdp->sd_jindex_spin);
 694        list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 695                if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
 696                        continue;
 697                spin_unlock(&sdp->sd_jindex_spin);
 698                wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
 699                            TASK_UNINTERRUPTIBLE);
 700                goto restart;
 701        }
 702        spin_unlock(&sdp->sd_jindex_spin);
 703
 704        if (!sb_rdonly(sb)) {
 705                error = gfs2_make_fs_ro(sdp);
 706                if (error)
 707                        gfs2_io_error(sdp);
 708        }
 709        WARN_ON(gfs2_withdrawing(sdp));
 710
 711        /*  At this point, we're through modifying the disk  */
 712
 713        /*  Release stuff  */
 714
 715        iput(sdp->sd_jindex);
 716        iput(sdp->sd_statfs_inode);
 717        iput(sdp->sd_rindex);
 718        iput(sdp->sd_quota_inode);
 719
 720        gfs2_glock_put(sdp->sd_rename_gl);
 721        gfs2_glock_put(sdp->sd_freeze_gl);
 722
 723        if (!sdp->sd_args.ar_spectator) {
 724                if (gfs2_holder_initialized(&sdp->sd_journal_gh))
 725                        gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
 726                if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
 727                        gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
 728                gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
 729                gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
 730                free_local_statfs_inodes(sdp);
 731                iput(sdp->sd_qc_inode);
 732        }
 733
 734        gfs2_glock_dq_uninit(&sdp->sd_live_gh);
 735        gfs2_clear_rgrpd(sdp);
 736        gfs2_jindex_free(sdp);
 737        /*  Take apart glock structures and buffer lists  */
 738        gfs2_gl_hash_clear(sdp);
 739        truncate_inode_pages_final(&sdp->sd_aspace);
 740        gfs2_delete_debugfs_file(sdp);
 741        /*  Unmount the locking protocol  */
 742        gfs2_lm_unmount(sdp);
 743
 744        /*  At this point, we're through participating in the lockspace  */
 745        gfs2_sys_fs_del(sdp);
 746        free_sbd(sdp);
 747}
 748
 749/**
 750 * gfs2_sync_fs - sync the filesystem
 751 * @sb: the superblock
 752 *
 753 * Flushes the log to disk.
 754 */
 755
 756static int gfs2_sync_fs(struct super_block *sb, int wait)
 757{
 758        struct gfs2_sbd *sdp = sb->s_fs_info;
 759
 760        gfs2_quota_sync(sb, -1);
 761        if (wait)
 762                gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 763                               GFS2_LFC_SYNC_FS);
 764        return sdp->sd_log_error;
 765}
 766
 767void gfs2_freeze_func(struct work_struct *work)
 768{
 769        int error;
 770        struct gfs2_holder freeze_gh;
 771        struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
 772        struct super_block *sb = sdp->sd_vfs;
 773
 774        atomic_inc(&sb->s_active);
 775        error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
 776                                   LM_FLAG_NOEXP | GL_EXACT, &freeze_gh);
 777        if (error) {
 778                fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error);
 779                gfs2_assert_withdraw(sdp, 0);
 780        } else {
 781                atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
 782                error = thaw_super(sb);
 783                if (error) {
 784                        fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n",
 785                                error);
 786                        gfs2_assert_withdraw(sdp, 0);
 787                }
 788                gfs2_glock_dq_uninit(&freeze_gh);
 789        }
 790        deactivate_super(sb);
 791        clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
 792        wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN);
 793        return;
 794}
 795
 796/**
 797 * gfs2_freeze - prevent further writes to the filesystem
 798 * @sb: the VFS structure for the filesystem
 799 *
 800 */
 801
 802static int gfs2_freeze(struct super_block *sb)
 803{
 804        struct gfs2_sbd *sdp = sb->s_fs_info;
 805        int error = 0;
 806
 807        mutex_lock(&sdp->sd_freeze_mutex);
 808        if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
 809                goto out;
 810
 811        for (;;) {
 812                if (gfs2_withdrawn(sdp)) {
 813                        error = -EINVAL;
 814                        goto out;
 815                }
 816
 817                error = gfs2_lock_fs_check_clean(sdp);
 818                if (!error)
 819                        break;
 820
 821                if (error == -EBUSY)
 822                        fs_err(sdp, "waiting for recovery before freeze\n");
 823                else if (error == -EIO) {
 824                        fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due "
 825                               "to recovery error.\n");
 826                        goto out;
 827                } else {
 828                        fs_err(sdp, "error freezing FS: %d\n", error);
 829                }
 830                fs_err(sdp, "retrying...\n");
 831                msleep(1000);
 832        }
 833        set_bit(SDF_FS_FROZEN, &sdp->sd_flags);
 834out:
 835        mutex_unlock(&sdp->sd_freeze_mutex);
 836        return error;
 837}
 838
 839/**
 840 * gfs2_unfreeze - reallow writes to the filesystem
 841 * @sb: the VFS structure for the filesystem
 842 *
 843 */
 844
 845static int gfs2_unfreeze(struct super_block *sb)
 846{
 847        struct gfs2_sbd *sdp = sb->s_fs_info;
 848
 849        mutex_lock(&sdp->sd_freeze_mutex);
 850        if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
 851            !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
 852                mutex_unlock(&sdp->sd_freeze_mutex);
 853                return 0;
 854        }
 855
 856        gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
 857        mutex_unlock(&sdp->sd_freeze_mutex);
 858        return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
 859}
 860
 861/**
 862 * statfs_fill - fill in the sg for a given RG
 863 * @rgd: the RG
 864 * @sc: the sc structure
 865 *
 866 * Returns: 0 on success, -ESTALE if the LVB is invalid
 867 */
 868
 869static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
 870                            struct gfs2_statfs_change_host *sc)
 871{
 872        gfs2_rgrp_verify(rgd);
 873        sc->sc_total += rgd->rd_data;
 874        sc->sc_free += rgd->rd_free;
 875        sc->sc_dinodes += rgd->rd_dinodes;
 876        return 0;
 877}
 878
 879/**
 880 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
 881 * @sdp: the filesystem
 882 * @sc: the sc info that will be returned
 883 *
 884 * Any error (other than a signal) will cause this routine to fall back
 885 * to the synchronous version.
 886 *
 887 * FIXME: This really shouldn't busy wait like this.
 888 *
 889 * Returns: errno
 890 */
 891
 892static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 893{
 894        struct gfs2_rgrpd *rgd_next;
 895        struct gfs2_holder *gha, *gh;
 896        unsigned int slots = 64;
 897        unsigned int x;
 898        int done;
 899        int error = 0, err;
 900
 901        memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
 902        gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
 903        if (!gha)
 904                return -ENOMEM;
 905        for (x = 0; x < slots; x++)
 906                gfs2_holder_mark_uninitialized(gha + x);
 907
 908        rgd_next = gfs2_rgrpd_get_first(sdp);
 909
 910        for (;;) {
 911                done = 1;
 912
 913                for (x = 0; x < slots; x++) {
 914                        gh = gha + x;
 915
 916                        if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
 917                                err = gfs2_glock_wait(gh);
 918                                if (err) {
 919                                        gfs2_holder_uninit(gh);
 920                                        error = err;
 921                                } else {
 922                                        if (!error) {
 923                                                struct gfs2_rgrpd *rgd =
 924                                                        gfs2_glock2rgrp(gh->gh_gl);
 925
 926                                                error = statfs_slow_fill(rgd, sc);
 927                                        }
 928                                        gfs2_glock_dq_uninit(gh);
 929                                }
 930                        }
 931
 932                        if (gfs2_holder_initialized(gh))
 933                                done = 0;
 934                        else if (rgd_next && !error) {
 935                                error = gfs2_glock_nq_init(rgd_next->rd_gl,
 936                                                           LM_ST_SHARED,
 937                                                           GL_ASYNC,
 938                                                           gh);
 939                                rgd_next = gfs2_rgrpd_get_next(rgd_next);
 940                                done = 0;
 941                        }
 942
 943                        if (signal_pending(current))
 944                                error = -ERESTARTSYS;
 945                }
 946
 947                if (done)
 948                        break;
 949
 950                yield();
 951        }
 952
 953        kfree(gha);
 954        return error;
 955}
 956
 957/**
 958 * gfs2_statfs_i - Do a statfs
 959 * @sdp: the filesystem
 960 * @sg: the sg structure
 961 *
 962 * Returns: errno
 963 */
 964
 965static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 966{
 967        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 968        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 969
 970        spin_lock(&sdp->sd_statfs_spin);
 971
 972        *sc = *m_sc;
 973        sc->sc_total += l_sc->sc_total;
 974        sc->sc_free += l_sc->sc_free;
 975        sc->sc_dinodes += l_sc->sc_dinodes;
 976
 977        spin_unlock(&sdp->sd_statfs_spin);
 978
 979        if (sc->sc_free < 0)
 980                sc->sc_free = 0;
 981        if (sc->sc_free > sc->sc_total)
 982                sc->sc_free = sc->sc_total;
 983        if (sc->sc_dinodes < 0)
 984                sc->sc_dinodes = 0;
 985
 986        return 0;
 987}
 988
 989/**
 990 * gfs2_statfs - Gather and return stats about the filesystem
 991 * @sb: The superblock
 992 * @statfsbuf: The buffer
 993 *
 994 * Returns: 0 on success or error code
 995 */
 996
 997static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 998{
 999        struct super_block *sb = dentry->d_sb;
1000        struct gfs2_sbd *sdp = sb->s_fs_info;
1001        struct gfs2_statfs_change_host sc;
1002        int error;
1003
1004        error = gfs2_rindex_update(sdp);
1005        if (error)
1006                return error;
1007
1008        if (gfs2_tune_get(sdp, gt_statfs_slow))
1009                error = gfs2_statfs_slow(sdp, &sc);
1010        else
1011                error = gfs2_statfs_i(sdp, &sc);
1012
1013        if (error)
1014                return error;
1015
1016        buf->f_type = GFS2_MAGIC;
1017        buf->f_bsize = sdp->sd_sb.sb_bsize;
1018        buf->f_blocks = sc.sc_total;
1019        buf->f_bfree = sc.sc_free;
1020        buf->f_bavail = sc.sc_free;
1021        buf->f_files = sc.sc_dinodes + sc.sc_free;
1022        buf->f_ffree = sc.sc_free;
1023        buf->f_namelen = GFS2_FNAMESIZE;
1024
1025        return 0;
1026}
1027
1028/**
1029 * gfs2_drop_inode - Drop an inode (test for remote unlink)
1030 * @inode: The inode to drop
1031 *
1032 * If we've received a callback on an iopen lock then it's because a
1033 * remote node tried to deallocate the inode but failed due to this node
1034 * still having the inode open. Here we mark the link count zero
1035 * since we know that it must have reached zero if the GLF_DEMOTE flag
1036 * is set on the iopen glock. If we didn't do a disk read since the
1037 * remote node removed the final link then we might otherwise miss
1038 * this event. This check ensures that this node will deallocate the
1039 * inode's blocks, or alternatively pass the baton on to another
1040 * node for later deallocation.
1041 */
1042
1043static int gfs2_drop_inode(struct inode *inode)
1044{
1045        struct gfs2_inode *ip = GFS2_I(inode);
1046
1047        if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
1048            inode->i_nlink &&
1049            gfs2_holder_initialized(&ip->i_iopen_gh)) {
1050                struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1051                if (test_bit(GLF_DEMOTE, &gl->gl_flags))
1052                        clear_nlink(inode);
1053        }
1054
1055        /*
1056         * When under memory pressure when an inode's link count has dropped to
1057         * zero, defer deleting the inode to the delete workqueue.  This avoids
1058         * calling into DLM under memory pressure, which can deadlock.
1059         */
1060        if (!inode->i_nlink &&
1061            unlikely(current->flags & PF_MEMALLOC) &&
1062            gfs2_holder_initialized(&ip->i_iopen_gh)) {
1063                struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1064
1065                gfs2_glock_hold(gl);
1066                if (!gfs2_queue_delete_work(gl, 0))
1067                        gfs2_glock_queue_put(gl);
1068                return false;
1069        }
1070
1071        return generic_drop_inode(inode);
1072}
1073
1074static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
1075{
1076        do {
1077                if (d1 == d2)
1078                        return 1;
1079                d1 = d1->d_parent;
1080        } while (!IS_ROOT(d1));
1081        return 0;
1082}
1083
1084/**
1085 * gfs2_show_options - Show mount options for /proc/mounts
1086 * @s: seq_file structure
1087 * @root: root of this (sub)tree
1088 *
1089 * Returns: 0 on success or error code
1090 */
1091
1092static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1093{
1094        struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
1095        struct gfs2_args *args = &sdp->sd_args;
1096        int val;
1097
1098        if (is_ancestor(root, sdp->sd_master_dir))
1099                seq_puts(s, ",meta");
1100        if (args->ar_lockproto[0])
1101                seq_show_option(s, "lockproto", args->ar_lockproto);
1102        if (args->ar_locktable[0])
1103                seq_show_option(s, "locktable", args->ar_locktable);
1104        if (args->ar_hostdata[0])
1105                seq_show_option(s, "hostdata", args->ar_hostdata);
1106        if (args->ar_spectator)
1107                seq_puts(s, ",spectator");
1108        if (args->ar_localflocks)
1109                seq_puts(s, ",localflocks");
1110        if (args->ar_debug)
1111                seq_puts(s, ",debug");
1112        if (args->ar_posix_acl)
1113                seq_puts(s, ",acl");
1114        if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
1115                char *state;
1116                switch (args->ar_quota) {
1117                case GFS2_QUOTA_OFF:
1118                        state = "off";
1119                        break;
1120                case GFS2_QUOTA_ACCOUNT:
1121                        state = "account";
1122                        break;
1123                case GFS2_QUOTA_ON:
1124                        state = "on";
1125                        break;
1126                default:
1127                        state = "unknown";
1128                        break;
1129                }
1130                seq_printf(s, ",quota=%s", state);
1131        }
1132        if (args->ar_suiddir)
1133                seq_puts(s, ",suiddir");
1134        if (args->ar_data != GFS2_DATA_DEFAULT) {
1135                char *state;
1136                switch (args->ar_data) {
1137                case GFS2_DATA_WRITEBACK:
1138                        state = "writeback";
1139                        break;
1140                case GFS2_DATA_ORDERED:
1141                        state = "ordered";
1142                        break;
1143                default:
1144                        state = "unknown";
1145                        break;
1146                }
1147                seq_printf(s, ",data=%s", state);
1148        }
1149        if (args->ar_discard)
1150                seq_puts(s, ",discard");
1151        val = sdp->sd_tune.gt_logd_secs;
1152        if (val != 30)
1153                seq_printf(s, ",commit=%d", val);
1154        val = sdp->sd_tune.gt_statfs_quantum;
1155        if (val != 30)
1156                seq_printf(s, ",statfs_quantum=%d", val);
1157        else if (sdp->sd_tune.gt_statfs_slow)
1158                seq_puts(s, ",statfs_quantum=0");
1159        val = sdp->sd_tune.gt_quota_quantum;
1160        if (val != 60)
1161                seq_printf(s, ",quota_quantum=%d", val);
1162        if (args->ar_statfs_percent)
1163                seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1164        if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1165                const char *state;
1166
1167                switch (args->ar_errors) {
1168                case GFS2_ERRORS_WITHDRAW:
1169                        state = "withdraw";
1170                        break;
1171                case GFS2_ERRORS_PANIC:
1172                        state = "panic";
1173                        break;
1174                default:
1175                        state = "unknown";
1176                        break;
1177                }
1178                seq_printf(s, ",errors=%s", state);
1179        }
1180        if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1181                seq_puts(s, ",nobarrier");
1182        if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1183                seq_puts(s, ",demote_interface_used");
1184        if (args->ar_rgrplvb)
1185                seq_puts(s, ",rgrplvb");
1186        if (args->ar_loccookie)
1187                seq_puts(s, ",loccookie");
1188        return 0;
1189}
1190
1191static void gfs2_final_release_pages(struct gfs2_inode *ip)
1192{
1193        struct inode *inode = &ip->i_inode;
1194        struct gfs2_glock *gl = ip->i_gl;
1195
1196        truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1197        truncate_inode_pages(&inode->i_data, 0);
1198
1199        if (atomic_read(&gl->gl_revokes) == 0) {
1200                clear_bit(GLF_LFLUSH, &gl->gl_flags);
1201                clear_bit(GLF_DIRTY, &gl->gl_flags);
1202        }
1203}
1204
1205static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1206{
1207        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1208        struct gfs2_rgrpd *rgd;
1209        struct gfs2_holder gh;
1210        int error;
1211
1212        if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1213                gfs2_consist_inode(ip);
1214                return -EIO;
1215        }
1216
1217        error = gfs2_rindex_update(sdp);
1218        if (error)
1219                return error;
1220
1221        error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1222        if (error)
1223                return error;
1224
1225        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1226        if (!rgd) {
1227                gfs2_consist_inode(ip);
1228                error = -EIO;
1229                goto out_qs;
1230        }
1231
1232        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1233        if (error)
1234                goto out_qs;
1235
1236        error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1237                                 sdp->sd_jdesc->jd_blocks);
1238        if (error)
1239                goto out_rg_gunlock;
1240
1241        gfs2_free_di(rgd, ip);
1242
1243        gfs2_final_release_pages(ip);
1244
1245        gfs2_trans_end(sdp);
1246
1247out_rg_gunlock:
1248        gfs2_glock_dq_uninit(&gh);
1249out_qs:
1250        gfs2_quota_unhold(ip);
1251        return error;
1252}
1253
1254/**
1255 * gfs2_glock_put_eventually
1256 * @gl: The glock to put
1257 *
1258 * When under memory pressure, trigger a deferred glock put to make sure we
1259 * won't call into DLM and deadlock.  Otherwise, put the glock directly.
1260 */
1261
1262static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
1263{
1264        if (current->flags & PF_MEMALLOC)
1265                gfs2_glock_queue_put(gl);
1266        else
1267                gfs2_glock_put(gl);
1268}
1269
1270static bool gfs2_upgrade_iopen_glock(struct inode *inode)
1271{
1272        struct gfs2_inode *ip = GFS2_I(inode);
1273        struct gfs2_sbd *sdp = GFS2_SB(inode);
1274        struct gfs2_holder *gh = &ip->i_iopen_gh;
1275        long timeout = 5 * HZ;
1276        int error;
1277
1278        gh->gh_flags |= GL_NOCACHE;
1279        gfs2_glock_dq_wait(gh);
1280
1281        /*
1282         * If there are no other lock holders, we'll get the lock immediately.
1283         * Otherwise, the other nodes holding the lock will be notified about
1284         * our locking request.  If they don't have the inode open, they'll
1285         * evict the cached inode and release the lock.  Otherwise, if they
1286         * poke the inode glock, we'll take this as an indication that they
1287         * still need the iopen glock and that they'll take care of deleting
1288         * the inode when they're done.  As a last resort, if another node
1289         * keeps holding the iopen glock without showing any activity on the
1290         * inode glock, we'll eventually time out.
1291         *
1292         * Note that we're passing the LM_FLAG_TRY_1CB flag to the first
1293         * locking request as an optimization to notify lock holders as soon as
1294         * possible.  Without that flag, they'd be notified implicitly by the
1295         * second locking request.
1296         */
1297
1298        gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh);
1299        error = gfs2_glock_nq(gh);
1300        if (error != GLR_TRYFAILED)
1301                return !error;
1302
1303        gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
1304        error = gfs2_glock_nq(gh);
1305        if (error)
1306                return false;
1307
1308        timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
1309                !test_bit(HIF_WAIT, &gh->gh_iflags) ||
1310                test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags),
1311                timeout);
1312        if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
1313                gfs2_glock_dq(gh);
1314                return false;
1315        }
1316        return true;
1317}
1318
1319/**
1320 * evict_should_delete - determine whether the inode is eligible for deletion
1321 * @inode: The inode to evict
1322 *
1323 * This function determines whether the evicted inode is eligible to be deleted
1324 * and locks the inode glock.
1325 *
1326 * Returns: the fate of the dinode
1327 */
1328static enum dinode_demise evict_should_delete(struct inode *inode,
1329                                              struct gfs2_holder *gh)
1330{
1331        struct gfs2_inode *ip = GFS2_I(inode);
1332        struct super_block *sb = inode->i_sb;
1333        struct gfs2_sbd *sdp = sb->s_fs_info;
1334        int ret;
1335
1336        if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
1337                BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
1338                goto should_delete;
1339        }
1340
1341        if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
1342                return SHOULD_DEFER_EVICTION;
1343
1344        /* Deletes should never happen under memory pressure anymore.  */
1345        if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
1346                return SHOULD_DEFER_EVICTION;
1347
1348        /* Must not read inode block until block type has been verified */
1349        ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
1350        if (unlikely(ret)) {
1351                glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
1352                ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1353                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1354                return SHOULD_DEFER_EVICTION;
1355        }
1356
1357        if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
1358                return SHOULD_NOT_DELETE_DINODE;
1359        ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1360        if (ret)
1361                return SHOULD_NOT_DELETE_DINODE;
1362
1363        if (test_bit(GIF_INVALID, &ip->i_flags)) {
1364                ret = gfs2_inode_refresh(ip);
1365                if (ret)
1366                        return SHOULD_NOT_DELETE_DINODE;
1367        }
1368
1369        /*
1370         * The inode may have been recreated in the meantime.
1371         */
1372        if (inode->i_nlink)
1373                return SHOULD_NOT_DELETE_DINODE;
1374
1375should_delete:
1376        if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
1377            test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1378                if (!gfs2_upgrade_iopen_glock(inode)) {
1379                        gfs2_holder_uninit(&ip->i_iopen_gh);
1380                        return SHOULD_NOT_DELETE_DINODE;
1381                }
1382        }
1383        return SHOULD_DELETE_DINODE;
1384}
1385
1386/**
1387 * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
1388 * @inode: The inode to evict
1389 */
1390static int evict_unlinked_inode(struct inode *inode)
1391{
1392        struct gfs2_inode *ip = GFS2_I(inode);
1393        int ret;
1394
1395        if (S_ISDIR(inode->i_mode) &&
1396            (ip->i_diskflags & GFS2_DIF_EXHASH)) {
1397                ret = gfs2_dir_exhash_dealloc(ip);
1398                if (ret)
1399                        goto out;
1400        }
1401
1402        if (ip->i_eattr) {
1403                ret = gfs2_ea_dealloc(ip);
1404                if (ret)
1405                        goto out;
1406        }
1407
1408        if (!gfs2_is_stuffed(ip)) {
1409                ret = gfs2_file_dealloc(ip);
1410                if (ret)
1411                        goto out;
1412        }
1413
1414        /* We're about to clear the bitmap for the dinode, but as soon as we
1415           do, gfs2_create_inode can create another inode at the same block
1416           location and try to set gl_object again. We clear gl_object here so
1417           that subsequent inode creates don't see an old gl_object. */
1418        glock_clear_object(ip->i_gl, ip);
1419        ret = gfs2_dinode_dealloc(ip);
1420        gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
1421out:
1422        return ret;
1423}
1424
1425/*
1426 * evict_linked_inode - evict an inode whose dinode has not been unlinked
1427 * @inode: The inode to evict
1428 */
1429static int evict_linked_inode(struct inode *inode)
1430{
1431        struct super_block *sb = inode->i_sb;
1432        struct gfs2_sbd *sdp = sb->s_fs_info;
1433        struct gfs2_inode *ip = GFS2_I(inode);
1434        struct address_space *metamapping;
1435        int ret;
1436
1437        gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
1438                       GFS2_LFC_EVICT_INODE);
1439        metamapping = gfs2_glock2aspace(ip->i_gl);
1440        if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1441                filemap_fdatawrite(metamapping);
1442                filemap_fdatawait(metamapping);
1443        }
1444        write_inode_now(inode, 1);
1445        gfs2_ail_flush(ip->i_gl, 0);
1446
1447        ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1448        if (ret)
1449                return ret;
1450
1451        /* Needs to be done before glock release & also in a transaction */
1452        truncate_inode_pages(&inode->i_data, 0);
1453        truncate_inode_pages(metamapping, 0);
1454        gfs2_trans_end(sdp);
1455        return 0;
1456}
1457
1458/**
1459 * gfs2_evict_inode - Remove an inode from cache
1460 * @inode: The inode to evict
1461 *
1462 * There are three cases to consider:
1463 * 1. i_nlink == 0, we are final opener (and must deallocate)
1464 * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
1465 * 3. i_nlink > 0
1466 *
1467 * If the fs is read only, then we have to treat all cases as per #3
1468 * since we are unable to do any deallocation. The inode will be
1469 * deallocated by the next read/write node to attempt an allocation
1470 * in the same resource group
1471 *
1472 * We have to (at the moment) hold the inodes main lock to cover
1473 * the gap between unlocking the shared lock on the iopen lock and
1474 * taking the exclusive lock. I'd rather do a shared -> exclusive
1475 * conversion on the iopen lock, but we can change that later. This
1476 * is safe, just less efficient.
1477 */
1478
1479static void gfs2_evict_inode(struct inode *inode)
1480{
1481        struct super_block *sb = inode->i_sb;
1482        struct gfs2_sbd *sdp = sb->s_fs_info;
1483        struct gfs2_inode *ip = GFS2_I(inode);
1484        struct gfs2_holder gh;
1485        int ret;
1486
1487        if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
1488                clear_inode(inode);
1489                return;
1490        }
1491
1492        if (inode->i_nlink || sb_rdonly(sb))
1493                goto out;
1494
1495        gfs2_holder_mark_uninitialized(&gh);
1496        ret = evict_should_delete(inode, &gh);
1497        if (ret == SHOULD_DEFER_EVICTION)
1498                goto out;
1499        if (ret == SHOULD_DELETE_DINODE)
1500                ret = evict_unlinked_inode(inode);
1501        else
1502                ret = evict_linked_inode(inode);
1503
1504        if (gfs2_rs_active(&ip->i_res))
1505                gfs2_rs_deltree(&ip->i_res);
1506
1507        if (gfs2_holder_initialized(&gh)) {
1508                glock_clear_object(ip->i_gl, ip);
1509                gfs2_glock_dq_uninit(&gh);
1510        }
1511        if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
1512                fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
1513out:
1514        truncate_inode_pages_final(&inode->i_data);
1515        if (ip->i_qadata)
1516                gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
1517        gfs2_rs_delete(ip, NULL);
1518        gfs2_ordered_del_inode(ip);
1519        clear_inode(inode);
1520        gfs2_dir_hash_inval(ip);
1521        if (ip->i_gl) {
1522                glock_clear_object(ip->i_gl, ip);
1523                wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
1524                gfs2_glock_add_to_lru(ip->i_gl);
1525                gfs2_glock_put_eventually(ip->i_gl);
1526                ip->i_gl = NULL;
1527        }
1528        if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1529                struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1530
1531                glock_clear_object(gl, ip);
1532                if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1533                        ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1534                        gfs2_glock_dq(&ip->i_iopen_gh);
1535                }
1536                gfs2_glock_hold(gl);
1537                gfs2_holder_uninit(&ip->i_iopen_gh);
1538                gfs2_glock_put_eventually(gl);
1539        }
1540}
1541
1542static struct inode *gfs2_alloc_inode(struct super_block *sb)
1543{
1544        struct gfs2_inode *ip;
1545
1546        ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
1547        if (!ip)
1548                return NULL;
1549        ip->i_flags = 0;
1550        ip->i_gl = NULL;
1551        gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
1552        memset(&ip->i_res, 0, sizeof(ip->i_res));
1553        RB_CLEAR_NODE(&ip->i_res.rs_node);
1554        ip->i_rahead = 0;
1555        return &ip->i_inode;
1556}
1557
1558static void gfs2_free_inode(struct inode *inode)
1559{
1560        kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
1561}
1562
1563extern void free_local_statfs_inodes(struct gfs2_sbd *sdp)
1564{
1565        struct local_statfs_inode *lsi, *safe;
1566
1567        /* Run through the statfs inodes list to iput and free memory */
1568        list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
1569                if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
1570                        sdp->sd_sc_inode = NULL; /* belongs to this node */
1571                if (lsi->si_sc_inode)
1572                        iput(lsi->si_sc_inode);
1573                list_del(&lsi->si_list);
1574                kfree(lsi);
1575        }
1576}
1577
1578extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
1579                                             unsigned int index)
1580{
1581        struct local_statfs_inode *lsi;
1582
1583        /* Return the local (per node) statfs inode in the
1584         * sdp->sd_sc_inodes_list corresponding to the 'index'. */
1585        list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
1586                if (lsi->si_jid == index)
1587                        return lsi->si_sc_inode;
1588        }
1589        return NULL;
1590}
1591
1592const struct super_operations gfs2_super_ops = {
1593        .alloc_inode            = gfs2_alloc_inode,
1594        .free_inode             = gfs2_free_inode,
1595        .write_inode            = gfs2_write_inode,
1596        .dirty_inode            = gfs2_dirty_inode,
1597        .evict_inode            = gfs2_evict_inode,
1598        .put_super              = gfs2_put_super,
1599        .sync_fs                = gfs2_sync_fs,
1600        .freeze_super           = gfs2_freeze,
1601        .thaw_super             = gfs2_unfreeze,
1602        .statfs                 = gfs2_statfs,
1603        .drop_inode             = gfs2_drop_inode,
1604        .show_options           = gfs2_show_options,
1605};
1606
1607