linux/fs/ext4/file.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext4/file.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/file.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  ext4 fs regular file handling primitives
  16 *
  17 *  64-bit file support on 64-bit platforms by Jakub Jelinek
  18 *      (jj@sunsite.ms.mff.cuni.cz)
  19 */
  20
  21#include <linux/time.h>
  22#include <linux/fs.h>
  23#include <linux/jbd2.h>
  24#include <linux/mount.h>
  25#include <linux/path.h>
  26#include <linux/quotaops.h>
  27#include "ext4.h"
  28#include "ext4_jbd2.h"
  29#include "xattr.h"
  30#include "acl.h"
  31
  32/*
  33 * Called when an inode is released. Note that this is different
  34 * from ext4_file_open: open gets called at every open, but release
  35 * gets called only when /all/ the files are closed.
  36 */
  37static int ext4_release_file(struct inode *inode, struct file *filp)
  38{
  39        if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
  40                ext4_alloc_da_blocks(inode);
  41                ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
  42        }
  43        /* if we are the last writer on the inode, drop the block reservation */
  44        if ((filp->f_mode & FMODE_WRITE) &&
  45                        (atomic_read(&inode->i_writecount) == 1) &&
  46                        !EXT4_I(inode)->i_reserved_data_blocks)
  47        {
  48                down_write(&EXT4_I(inode)->i_data_sem);
  49                ext4_discard_preallocations(inode);
  50                up_write(&EXT4_I(inode)->i_data_sem);
  51        }
  52        if (is_dx(inode) && filp->private_data)
  53                ext4_htree_free_dir_info(filp->private_data);
  54
  55        return 0;
  56}
  57
  58static void ext4_aiodio_wait(struct inode *inode)
  59{
  60        wait_queue_head_t *wq = ext4_ioend_wq(inode);
  61
  62        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
  63}
  64
  65/*
  66 * This tests whether the IO in question is block-aligned or not.
  67 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
  68 * are converted to written only after the IO is complete.  Until they are
  69 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
  70 * it needs to zero out portions of the start and/or end block.  If 2 AIO
  71 * threads are at work on the same unwritten block, they must be synchronized
  72 * or one thread will zero the other's data, causing corruption.
  73 */
  74static int
  75ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
  76                   unsigned long nr_segs, loff_t pos)
  77{
  78        struct super_block *sb = inode->i_sb;
  79        int blockmask = sb->s_blocksize - 1;
  80        size_t count = iov_length(iov, nr_segs);
  81        loff_t final_size = pos + count;
  82
  83        if (pos >= inode->i_size)
  84                return 0;
  85
  86        if ((pos & blockmask) || (final_size & blockmask))
  87                return 1;
  88
  89        return 0;
  90}
  91
  92static ssize_t
  93ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
  94                    unsigned long nr_segs, loff_t pos)
  95{
  96        struct file *file = iocb->ki_filp;
  97        struct inode *inode = file->f_mapping->host;
  98        struct blk_plug plug;
  99        int unaligned_aio = 0;
 100        ssize_t ret;
 101        int overwrite = 0;
 102        size_t length = iov_length(iov, nr_segs);
 103
 104        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
 105            !is_sync_kiocb(iocb))
 106                unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
 107
 108        /* Unaligned direct AIO must be serialized; see comment above */
 109        if (unaligned_aio) {
 110                static unsigned long unaligned_warn_time;
 111
 112                /* Warn about this once per day */
 113                if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
 114                        ext4_msg(inode->i_sb, KERN_WARNING,
 115                                 "Unaligned AIO/DIO on inode %ld by %s; "
 116                                 "performance will be poor.",
 117                                 inode->i_ino, current->comm);
 118                mutex_lock(ext4_aio_mutex(inode));
 119                ext4_aiodio_wait(inode);
 120        }
 121
 122        BUG_ON(iocb->ki_pos != pos);
 123
 124        mutex_lock(&inode->i_mutex);
 125        blk_start_plug(&plug);
 126
 127        iocb->private = &overwrite;
 128
 129        /* check whether we do a DIO overwrite or not */
 130        if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
 131            !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
 132                struct ext4_map_blocks map;
 133                unsigned int blkbits = inode->i_blkbits;
 134                int err, len;
 135
 136                map.m_lblk = pos >> blkbits;
 137                map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
 138                        - map.m_lblk;
 139                len = map.m_len;
 140
 141                err = ext4_map_blocks(NULL, inode, &map, 0);
 142                /*
 143                 * 'err==len' means that all of blocks has been preallocated no
 144                 * matter they are initialized or not.  For excluding
 145                 * uninitialized extents, we need to check m_flags.  There are
 146                 * two conditions that indicate for initialized extents.
 147                 * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
 148                 * 2) If we do a real lookup, non-flags are returned.
 149                 * So we should check these two conditions.
 150                 */
 151                if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
 152                        overwrite = 1;
 153        }
 154
 155        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 156        mutex_unlock(&inode->i_mutex);
 157
 158        if (ret > 0 || ret == -EIOCBQUEUED) {
 159                ssize_t err;
 160
 161                err = generic_write_sync(file, pos, ret);
 162                if (err < 0 && ret > 0)
 163                        ret = err;
 164        }
 165        blk_finish_plug(&plug);
 166
 167        if (unaligned_aio)
 168                mutex_unlock(ext4_aio_mutex(inode));
 169
 170        return ret;
 171}
 172
 173static ssize_t
 174ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 175                unsigned long nr_segs, loff_t pos)
 176{
 177        struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
 178        ssize_t ret;
 179
 180        /*
 181         * If we have encountered a bitmap-format file, the size limit
 182         * is smaller than s_maxbytes, which is for extent-mapped files.
 183         */
 184
 185        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
 186                struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 187                size_t length = iov_length(iov, nr_segs);
 188
 189                if ((pos > sbi->s_bitmap_maxbytes ||
 190                    (pos == sbi->s_bitmap_maxbytes && length > 0)))
 191                        return -EFBIG;
 192
 193                if (pos + length > sbi->s_bitmap_maxbytes) {
 194                        nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
 195                                              sbi->s_bitmap_maxbytes - pos);
 196                }
 197        }
 198
 199        if (unlikely(iocb->ki_filp->f_flags & O_DIRECT))
 200                ret = ext4_file_dio_write(iocb, iov, nr_segs, pos);
 201        else
 202                ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
 203
 204        return ret;
 205}
 206
 207static const struct vm_operations_struct ext4_file_vm_ops = {
 208        .fault          = filemap_fault,
 209        .page_mkwrite   = ext4_page_mkwrite,
 210};
 211
 212static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 213{
 214        struct address_space *mapping = file->f_mapping;
 215
 216        if (!mapping->a_ops->readpage)
 217                return -ENOEXEC;
 218        file_accessed(file);
 219        vma->vm_ops = &ext4_file_vm_ops;
 220        vma->vm_flags |= VM_CAN_NONLINEAR;
 221        return 0;
 222}
 223
 224static int ext4_file_open(struct inode * inode, struct file * filp)
 225{
 226        struct super_block *sb = inode->i_sb;
 227        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 228        struct ext4_inode_info *ei = EXT4_I(inode);
 229        struct vfsmount *mnt = filp->f_path.mnt;
 230        struct path path;
 231        char buf[64], *cp;
 232
 233        if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
 234                     !(sb->s_flags & MS_RDONLY))) {
 235                sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
 236                /*
 237                 * Sample where the filesystem has been mounted and
 238                 * store it in the superblock for sysadmin convenience
 239                 * when trying to sort through large numbers of block
 240                 * devices or filesystem images.
 241                 */
 242                memset(buf, 0, sizeof(buf));
 243                path.mnt = mnt;
 244                path.dentry = mnt->mnt_root;
 245                cp = d_path(&path, buf, sizeof(buf));
 246                if (!IS_ERR(cp)) {
 247                        handle_t *handle;
 248                        int err;
 249
 250                        handle = ext4_journal_start_sb(sb, 1);
 251                        if (IS_ERR(handle))
 252                                return PTR_ERR(handle);
 253                        err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 254                        if (err) {
 255                                ext4_journal_stop(handle);
 256                                return err;
 257                        }
 258                        strlcpy(sbi->s_es->s_last_mounted, cp,
 259                                sizeof(sbi->s_es->s_last_mounted));
 260                        ext4_handle_dirty_super(handle, sb);
 261                        ext4_journal_stop(handle);
 262                }
 263        }
 264        /*
 265         * Set up the jbd2_inode if we are opening the inode for
 266         * writing and the journal is present
 267         */
 268        if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
 269                struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
 270
 271                spin_lock(&inode->i_lock);
 272                if (!ei->jinode) {
 273                        if (!jinode) {
 274                                spin_unlock(&inode->i_lock);
 275                                return -ENOMEM;
 276                        }
 277                        ei->jinode = jinode;
 278                        jbd2_journal_init_jbd_inode(ei->jinode, inode);
 279                        jinode = NULL;
 280                }
 281                spin_unlock(&inode->i_lock);
 282                if (unlikely(jinode != NULL))
 283                        jbd2_free_inode(jinode);
 284        }
 285        return dquot_file_open(inode, filp);
 286}
 287
 288/*
 289 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
 290 * by calling generic_file_llseek_size() with the appropriate maxbytes
 291 * value for each.
 292 */
 293loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 294{
 295        struct inode *inode = file->f_mapping->host;
 296        loff_t maxbytes;
 297
 298        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 299                maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
 300        else
 301                maxbytes = inode->i_sb->s_maxbytes;
 302
 303        return generic_file_llseek_size(file, offset, origin,
 304                                        maxbytes, i_size_read(inode));
 305}
 306
 307const struct file_operations ext4_file_operations = {
 308        .llseek         = ext4_llseek,
 309        .read           = do_sync_read,
 310        .write          = do_sync_write,
 311        .aio_read       = generic_file_aio_read,
 312        .aio_write      = ext4_file_write,
 313        .unlocked_ioctl = ext4_ioctl,
 314#ifdef CONFIG_COMPAT
 315        .compat_ioctl   = ext4_compat_ioctl,
 316#endif
 317        .mmap           = ext4_file_mmap,
 318        .open           = ext4_file_open,
 319        .release        = ext4_release_file,
 320        .fsync          = ext4_sync_file,
 321        .splice_read    = generic_file_splice_read,
 322        .splice_write   = generic_file_splice_write,
 323        .fallocate      = ext4_fallocate,
 324};
 325
 326const struct inode_operations ext4_file_inode_operations = {
 327        .setattr        = ext4_setattr,
 328        .getattr        = ext4_getattr,
 329#ifdef CONFIG_EXT4_FS_XATTR
 330        .setxattr       = generic_setxattr,
 331        .getxattr       = generic_getxattr,
 332        .listxattr      = ext4_listxattr,
 333        .removexattr    = generic_removexattr,
 334#endif
 335        .get_acl        = ext4_get_acl,
 336        .fiemap         = ext4_fiemap,
 337};
 338
 339