LXR linux/fs/ocfs2/mmap.c

   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * mmap.c
   5 *
   6 * Code to deal with the mess that is clustered mmap.
   7 *
   8 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2 of the License, or (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public
  21 * License along with this program; if not, write to the
  22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 * Boston, MA 021110-1307, USA.
  24 */
  25
  26#include <linux/fs.h>
  27#include <linux/types.h>
  28#include <linux/slab.h>
  29#include <linux/highmem.h>
  30#include <linux/pagemap.h>
  31#include <linux/uio.h>
  32#include <linux/signal.h>
  33#include <linux/rbtree.h>
  34
  35#define MLOG_MASK_PREFIX ML_FILE_IO
  36#include <cluster/masklog.h>
  37
  38#include "ocfs2.h"
  39
  40#include "aops.h"
  41#include "dlmglue.h"
  42#include "file.h"
  43#include "inode.h"
  44#include "mmap.h"
  45
  46static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
  47{
  48        /* The best way to deal with signals in the vm path is
  49         * to block them upfront, rather than allowing the
  50         * locking paths to return -ERESTARTSYS. */
  51        sigfillset(blocked);
  52
  53        /* We should technically never get a bad return value
  54         * from sigprocmask */
  55        return sigprocmask(SIG_BLOCK, blocked, oldset);
  56}
  57
  58static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
  59{
  60        return sigprocmask(SIG_SETMASK, oldset, NULL);
  61}
  62
  63static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
  64{
  65        sigset_t blocked, oldset;
  66        int error, ret;
  67
  68        mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
  69
  70        error = ocfs2_vm_op_block_sigs(&blocked, &oldset);
  71        if (error < 0) {
  72                mlog_errno(error);
  73                ret = VM_FAULT_SIGBUS;
  74                goto out;
  75        }
  76
  77        ret = filemap_fault(area, vmf);
  78
  79        error = ocfs2_vm_op_unblock_sigs(&oldset);
  80        if (error < 0)
  81                mlog_errno(error);
  82out:
  83        mlog_exit_ptr(vmf->page);
  84        return ret;
  85}
  86
  87static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
  88                                struct page *page)
  89{
  90        int ret;
  91        struct address_space *mapping = inode->i_mapping;
  92        loff_t pos = page_offset(page);
  93        unsigned int len = PAGE_CACHE_SIZE;
  94        pgoff_t last_index;
  95        struct page *locked_page = NULL;
  96        void *fsdata;
  97        loff_t size = i_size_read(inode);
  98
  99        /*
 100         * Another node might have truncated while we were waiting on
 101         * cluster locks.
 102         */
 103        last_index = size >> PAGE_CACHE_SHIFT;
 104        if (page->index > last_index) {
 105                ret = -EINVAL;
 106                goto out;
 107        }
 108
 109        /*
 110         * The i_size check above doesn't catch the case where nodes
 111         * truncated and then re-extended the file. We'll re-check the
 112         * page mapping after taking the page lock inside of
 113         * ocfs2_write_begin_nolock().
 114         */
 115        if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
 116                /*
 117                 * the page has been umapped in ocfs2_data_downconvert_worker.
 118                 * So return 0 here and let VFS retry.
 119                 */
 120                ret = 0;
 121                goto out;
 122        }
 123
 124        /*
 125         * Call ocfs2_write_begin() and ocfs2_write_end() to take
 126         * advantage of the allocation code there. We pass a write
 127         * length of the whole page (chopped to i_size) to make sure
 128         * the whole thing is allocated.
 129         *
 130         * Since we know the page is up to date, we don't have to
 131         * worry about ocfs2_write_begin() skipping some buffer reads
 132         * because the "write" would invalidate their data.
 133         */
 134        if (page->index == last_index)
 135                len = size & ~PAGE_CACHE_MASK;
 136
 137        ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
 138                                       &fsdata, di_bh, page);
 139        if (ret) {
 140                if (ret != -ENOSPC)
 141                        mlog_errno(ret);
 142                goto out;
 143        }
 144
 145        ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
 146                                     fsdata);
 147        if (ret < 0) {
 148                mlog_errno(ret);
 149                goto out;
 150        }
 151        BUG_ON(ret != len);
 152        ret = 0;
 153out:
 154        return ret;
 155}
 156
 157static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 158{
 159        struct page *page = vmf->page;
 160        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
 161        struct buffer_head *di_bh = NULL;
 162        sigset_t blocked, oldset;
 163        int ret, ret2;
 164
 165        ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
 166        if (ret < 0) {
 167                mlog_errno(ret);
 168                return ret;
 169        }
 170
 171        /*
 172         * The cluster locks taken will block a truncate from another
 173         * node. Taking the data lock will also ensure that we don't
 174         * attempt page truncation as part of a downconvert.
 175         */
 176        ret = ocfs2_inode_lock(inode, &di_bh, 1);
 177        if (ret < 0) {
 178                mlog_errno(ret);
 179                goto out;
 180        }
 181
 182        /*
 183         * The alloc sem should be enough to serialize with
 184         * ocfs2_truncate_file() changing i_size as well as any thread
 185         * modifying the inode btree.
 186         */
 187        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 188
 189        ret = __ocfs2_page_mkwrite(inode, di_bh, page);
 190
 191        up_write(&OCFS2_I(inode)->ip_alloc_sem);
 192
 193        brelse(di_bh);
 194        ocfs2_inode_unlock(inode, 1);
 195
 196out:
 197        ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
 198        if (ret2 < 0)
 199                mlog_errno(ret2);
 200        if (ret)
 201                ret = VM_FAULT_SIGBUS;
 202        return ret;
 203}
 204
 205static const struct vm_operations_struct ocfs2_file_vm_ops = {
 206        .fault          = ocfs2_fault,
 207        .page_mkwrite   = ocfs2_page_mkwrite,
 208};
 209
 210int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 211{
 212        int ret = 0, lock_level = 0;
 213
 214        ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode,
 215                                    file->f_vfsmnt, &lock_level);
 216        if (ret < 0) {
 217                mlog_errno(ret);
 218                goto out;
 219        }
 220        ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level);
 221out:
 222        vma->vm_ops = &ocfs2_file_vm_ops;
 223        vma->vm_flags |= VM_CAN_NONLINEAR;
 224        return 0;
 225}
 226
 227