linux/fs/cifs/file.c
<<
>>
Prefs
   1// SPDX-License-Identifier: LGPL-2.1
   2/*
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 */
  11#include <linux/fs.h>
  12#include <linux/backing-dev.h>
  13#include <linux/stat.h>
  14#include <linux/fcntl.h>
  15#include <linux/pagemap.h>
  16#include <linux/pagevec.h>
  17#include <linux/writeback.h>
  18#include <linux/task_io_accounting_ops.h>
  19#include <linux/delay.h>
  20#include <linux/mount.h>
  21#include <linux/slab.h>
  22#include <linux/swap.h>
  23#include <linux/mm.h>
  24#include <asm/div64.h>
  25#include "cifsfs.h"
  26#include "cifspdu.h"
  27#include "cifsglob.h"
  28#include "cifsproto.h"
  29#include "cifs_unicode.h"
  30#include "cifs_debug.h"
  31#include "cifs_fs_sb.h"
  32#include "fscache.h"
  33#include "smbdirect.h"
  34#include "fs_context.h"
  35#include "cifs_ioctl.h"
  36
  37static inline int cifs_convert_flags(unsigned int flags)
  38{
  39        if ((flags & O_ACCMODE) == O_RDONLY)
  40                return GENERIC_READ;
  41        else if ((flags & O_ACCMODE) == O_WRONLY)
  42                return GENERIC_WRITE;
  43        else if ((flags & O_ACCMODE) == O_RDWR) {
  44                /* GENERIC_ALL is too much permission to request
  45                   can cause unnecessary access denied on create */
  46                /* return GENERIC_ALL; */
  47                return (GENERIC_READ | GENERIC_WRITE);
  48        }
  49
  50        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  51                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  52                FILE_READ_DATA);
  53}
  54
  55static u32 cifs_posix_convert_flags(unsigned int flags)
  56{
  57        u32 posix_flags = 0;
  58
  59        if ((flags & O_ACCMODE) == O_RDONLY)
  60                posix_flags = SMB_O_RDONLY;
  61        else if ((flags & O_ACCMODE) == O_WRONLY)
  62                posix_flags = SMB_O_WRONLY;
  63        else if ((flags & O_ACCMODE) == O_RDWR)
  64                posix_flags = SMB_O_RDWR;
  65
  66        if (flags & O_CREAT) {
  67                posix_flags |= SMB_O_CREAT;
  68                if (flags & O_EXCL)
  69                        posix_flags |= SMB_O_EXCL;
  70        } else if (flags & O_EXCL)
  71                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  72                         current->comm, current->tgid);
  73
  74        if (flags & O_TRUNC)
  75                posix_flags |= SMB_O_TRUNC;
  76        /* be safe and imply O_SYNC for O_DSYNC */
  77        if (flags & O_DSYNC)
  78                posix_flags |= SMB_O_SYNC;
  79        if (flags & O_DIRECTORY)
  80                posix_flags |= SMB_O_DIRECTORY;
  81        if (flags & O_NOFOLLOW)
  82                posix_flags |= SMB_O_NOFOLLOW;
  83        if (flags & O_DIRECT)
  84                posix_flags |= SMB_O_DIRECT;
  85
  86        return posix_flags;
  87}
  88
  89static inline int cifs_get_disposition(unsigned int flags)
  90{
  91        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
  92                return FILE_CREATE;
  93        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
  94                return FILE_OVERWRITE_IF;
  95        else if ((flags & O_CREAT) == O_CREAT)
  96                return FILE_OPEN_IF;
  97        else if ((flags & O_TRUNC) == O_TRUNC)
  98                return FILE_OVERWRITE;
  99        else
 100                return FILE_OPEN;
 101}
 102
 103int cifs_posix_open(const char *full_path, struct inode **pinode,
 104                        struct super_block *sb, int mode, unsigned int f_flags,
 105                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 106{
 107        int rc;
 108        FILE_UNIX_BASIC_INFO *presp_data;
 109        __u32 posix_flags = 0;
 110        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 111        struct cifs_fattr fattr;
 112        struct tcon_link *tlink;
 113        struct cifs_tcon *tcon;
 114
 115        cifs_dbg(FYI, "posix open %s\n", full_path);
 116
 117        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 118        if (presp_data == NULL)
 119                return -ENOMEM;
 120
 121        tlink = cifs_sb_tlink(cifs_sb);
 122        if (IS_ERR(tlink)) {
 123                rc = PTR_ERR(tlink);
 124                goto posix_open_ret;
 125        }
 126
 127        tcon = tlink_tcon(tlink);
 128        mode &= ~current_umask();
 129
 130        posix_flags = cifs_posix_convert_flags(f_flags);
 131        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 132                             poplock, full_path, cifs_sb->local_nls,
 133                             cifs_remap(cifs_sb));
 134        cifs_put_tlink(tlink);
 135
 136        if (rc)
 137                goto posix_open_ret;
 138
 139        if (presp_data->Type == cpu_to_le32(-1))
 140                goto posix_open_ret; /* open ok, caller does qpathinfo */
 141
 142        if (!pinode)
 143                goto posix_open_ret; /* caller does not need info */
 144
 145        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 146
 147        /* get new inode and set it up */
 148        if (*pinode == NULL) {
 149                cifs_fill_uniqueid(sb, &fattr);
 150                *pinode = cifs_iget(sb, &fattr);
 151                if (!*pinode) {
 152                        rc = -ENOMEM;
 153                        goto posix_open_ret;
 154                }
 155        } else {
 156                cifs_revalidate_mapping(*pinode);
 157                rc = cifs_fattr_to_inode(*pinode, &fattr);
 158        }
 159
 160posix_open_ret:
 161        kfree(presp_data);
 162        return rc;
 163}
 164
 165static int
 166cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 167             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 168             struct cifs_fid *fid, unsigned int xid)
 169{
 170        int rc;
 171        int desired_access;
 172        int disposition;
 173        int create_options = CREATE_NOT_DIR;
 174        FILE_ALL_INFO *buf;
 175        struct TCP_Server_Info *server = tcon->ses->server;
 176        struct cifs_open_parms oparms;
 177
 178        if (!server->ops->open)
 179                return -ENOSYS;
 180
 181        desired_access = cifs_convert_flags(f_flags);
 182
 183/*********************************************************************
 184 *  open flag mapping table:
 185 *
 186 *      POSIX Flag            CIFS Disposition
 187 *      ----------            ----------------
 188 *      O_CREAT               FILE_OPEN_IF
 189 *      O_CREAT | O_EXCL      FILE_CREATE
 190 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 191 *      O_TRUNC               FILE_OVERWRITE
 192 *      none of the above     FILE_OPEN
 193 *
 194 *      Note that there is not a direct match between disposition
 195 *      FILE_SUPERSEDE (ie create whether or not file exists although
 196 *      O_CREAT | O_TRUNC is similar but truncates the existing
 197 *      file rather than creating a new file as FILE_SUPERSEDE does
 198 *      (which uses the attributes / metadata passed in on open call)
 199 *?
 200 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 201 *?  and the read write flags match reasonably.  O_LARGEFILE
 202 *?  is irrelevant because largefile support is always used
 203 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 204 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 205 *********************************************************************/
 206
 207        disposition = cifs_get_disposition(f_flags);
 208
 209        /* BB pass O_SYNC flag through on file attributes .. BB */
 210
 211        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 212        if (!buf)
 213                return -ENOMEM;
 214
 215        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 216        if (f_flags & O_SYNC)
 217                create_options |= CREATE_WRITE_THROUGH;
 218
 219        if (f_flags & O_DIRECT)
 220                create_options |= CREATE_NO_BUFFER;
 221
 222        oparms.tcon = tcon;
 223        oparms.cifs_sb = cifs_sb;
 224        oparms.desired_access = desired_access;
 225        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 226        oparms.disposition = disposition;
 227        oparms.path = full_path;
 228        oparms.fid = fid;
 229        oparms.reconnect = false;
 230
 231        rc = server->ops->open(xid, &oparms, oplock, buf);
 232
 233        if (rc)
 234                goto out;
 235
 236        /* TODO: Add support for calling posix query info but with passing in fid */
 237        if (tcon->unix_ext)
 238                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 239                                              xid);
 240        else
 241                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 242                                         xid, fid);
 243
 244        if (rc) {
 245                server->ops->close(xid, tcon, fid);
 246                if (rc == -ESTALE)
 247                        rc = -EOPENSTALE;
 248        }
 249
 250out:
 251        kfree(buf);
 252        return rc;
 253}
 254
 255static bool
 256cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 257{
 258        struct cifs_fid_locks *cur;
 259        bool has_locks = false;
 260
 261        down_read(&cinode->lock_sem);
 262        list_for_each_entry(cur, &cinode->llist, llist) {
 263                if (!list_empty(&cur->locks)) {
 264                        has_locks = true;
 265                        break;
 266                }
 267        }
 268        up_read(&cinode->lock_sem);
 269        return has_locks;
 270}
 271
 272void
 273cifs_down_write(struct rw_semaphore *sem)
 274{
 275        while (!down_write_trylock(sem))
 276                msleep(10);
 277}
 278
 279static void cifsFileInfo_put_work(struct work_struct *work);
 280
 281struct cifsFileInfo *
 282cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 283                  struct tcon_link *tlink, __u32 oplock)
 284{
 285        struct dentry *dentry = file_dentry(file);
 286        struct inode *inode = d_inode(dentry);
 287        struct cifsInodeInfo *cinode = CIFS_I(inode);
 288        struct cifsFileInfo *cfile;
 289        struct cifs_fid_locks *fdlocks;
 290        struct cifs_tcon *tcon = tlink_tcon(tlink);
 291        struct TCP_Server_Info *server = tcon->ses->server;
 292
 293        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 294        if (cfile == NULL)
 295                return cfile;
 296
 297        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 298        if (!fdlocks) {
 299                kfree(cfile);
 300                return NULL;
 301        }
 302
 303        INIT_LIST_HEAD(&fdlocks->locks);
 304        fdlocks->cfile = cfile;
 305        cfile->llist = fdlocks;
 306
 307        cfile->count = 1;
 308        cfile->pid = current->tgid;
 309        cfile->uid = current_fsuid();
 310        cfile->dentry = dget(dentry);
 311        cfile->f_flags = file->f_flags;
 312        cfile->invalidHandle = false;
 313        cfile->deferred_close_scheduled = false;
 314        cfile->tlink = cifs_get_tlink(tlink);
 315        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 316        INIT_WORK(&cfile->put, cifsFileInfo_put_work);
 317        INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
 318        mutex_init(&cfile->fh_mutex);
 319        spin_lock_init(&cfile->file_info_lock);
 320
 321        cifs_sb_active(inode->i_sb);
 322
 323        /*
 324         * If the server returned a read oplock and we have mandatory brlocks,
 325         * set oplock level to None.
 326         */
 327        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 328                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 329                oplock = 0;
 330        }
 331
 332        cifs_down_write(&cinode->lock_sem);
 333        list_add(&fdlocks->llist, &cinode->llist);
 334        up_write(&cinode->lock_sem);
 335
 336        spin_lock(&tcon->open_file_lock);
 337        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 338                oplock = fid->pending_open->oplock;
 339        list_del(&fid->pending_open->olist);
 340
 341        fid->purge_cache = false;
 342        server->ops->set_fid(cfile, fid, oplock);
 343
 344        list_add(&cfile->tlist, &tcon->openFileList);
 345        atomic_inc(&tcon->num_local_opens);
 346
 347        /* if readable file instance put first in list*/
 348        spin_lock(&cinode->open_file_lock);
 349        if (file->f_mode & FMODE_READ)
 350                list_add(&cfile->flist, &cinode->openFileList);
 351        else
 352                list_add_tail(&cfile->flist, &cinode->openFileList);
 353        spin_unlock(&cinode->open_file_lock);
 354        spin_unlock(&tcon->open_file_lock);
 355
 356        if (fid->purge_cache)
 357                cifs_zap_mapping(inode);
 358
 359        file->private_data = cfile;
 360        return cfile;
 361}
 362
 363struct cifsFileInfo *
 364cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 365{
 366        spin_lock(&cifs_file->file_info_lock);
 367        cifsFileInfo_get_locked(cifs_file);
 368        spin_unlock(&cifs_file->file_info_lock);
 369        return cifs_file;
 370}
 371
 372static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
 373{
 374        struct inode *inode = d_inode(cifs_file->dentry);
 375        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 376        struct cifsLockInfo *li, *tmp;
 377        struct super_block *sb = inode->i_sb;
 378
 379        cifs_fscache_release_inode_cookie(inode);
 380
 381        /*
 382         * Delete any outstanding lock records. We'll lose them when the file
 383         * is closed anyway.
 384         */
 385        cifs_down_write(&cifsi->lock_sem);
 386        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 387                list_del(&li->llist);
 388                cifs_del_lock_waiters(li);
 389                kfree(li);
 390        }
 391        list_del(&cifs_file->llist->llist);
 392        kfree(cifs_file->llist);
 393        up_write(&cifsi->lock_sem);
 394
 395        cifs_put_tlink(cifs_file->tlink);
 396        dput(cifs_file->dentry);
 397        cifs_sb_deactive(sb);
 398        kfree(cifs_file);
 399}
 400
 401static void cifsFileInfo_put_work(struct work_struct *work)
 402{
 403        struct cifsFileInfo *cifs_file = container_of(work,
 404                        struct cifsFileInfo, put);
 405
 406        cifsFileInfo_put_final(cifs_file);
 407}
 408
 409/**
 410 * cifsFileInfo_put - release a reference of file priv data
 411 *
 412 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
 413 *
 414 * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
 415 */
 416void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 417{
 418        _cifsFileInfo_put(cifs_file, true, true);
 419}
 420
 421/**
 422 * _cifsFileInfo_put - release a reference of file priv data
 423 *
 424 * This may involve closing the filehandle @cifs_file out on the
 425 * server. Must be called without holding tcon->open_file_lock,
 426 * cinode->open_file_lock and cifs_file->file_info_lock.
 427 *
 428 * If @wait_for_oplock_handler is true and we are releasing the last
 429 * reference, wait for any running oplock break handler of the file
 430 * and cancel any pending one.
 431 *
 432 * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
 433 * @wait_oplock_handler: must be false if called from oplock_break_handler
 434 * @offload:    not offloaded on close and oplock breaks
 435 *
 436 */
 437void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
 438                       bool wait_oplock_handler, bool offload)
 439{
 440        struct inode *inode = d_inode(cifs_file->dentry);
 441        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 442        struct TCP_Server_Info *server = tcon->ses->server;
 443        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 444        struct super_block *sb = inode->i_sb;
 445        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 446        struct cifs_fid fid;
 447        struct cifs_pending_open open;
 448        bool oplock_break_cancelled;
 449
 450        spin_lock(&tcon->open_file_lock);
 451        spin_lock(&cifsi->open_file_lock);
 452        spin_lock(&cifs_file->file_info_lock);
 453        if (--cifs_file->count > 0) {
 454                spin_unlock(&cifs_file->file_info_lock);
 455                spin_unlock(&cifsi->open_file_lock);
 456                spin_unlock(&tcon->open_file_lock);
 457                return;
 458        }
 459        spin_unlock(&cifs_file->file_info_lock);
 460
 461        if (server->ops->get_lease_key)
 462                server->ops->get_lease_key(inode, &fid);
 463
 464        /* store open in pending opens to make sure we don't miss lease break */
 465        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 466
 467        /* remove it from the lists */
 468        list_del(&cifs_file->flist);
 469        list_del(&cifs_file->tlist);
 470        atomic_dec(&tcon->num_local_opens);
 471
 472        if (list_empty(&cifsi->openFileList)) {
 473                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 474                         d_inode(cifs_file->dentry));
 475                /*
 476                 * In strict cache mode we need invalidate mapping on the last
 477                 * close  because it may cause a error when we open this file
 478                 * again and get at least level II oplock.
 479                 */
 480                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 481                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 482                cifs_set_oplock_level(cifsi, 0);
 483        }
 484
 485        spin_unlock(&cifsi->open_file_lock);
 486        spin_unlock(&tcon->open_file_lock);
 487
 488        oplock_break_cancelled = wait_oplock_handler ?
 489                cancel_work_sync(&cifs_file->oplock_break) : false;
 490
 491        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 492                struct TCP_Server_Info *server = tcon->ses->server;
 493                unsigned int xid;
 494
 495                xid = get_xid();
 496                if (server->ops->close_getattr)
 497                        server->ops->close_getattr(xid, tcon, cifs_file);
 498                else if (server->ops->close)
 499                        server->ops->close(xid, tcon, &cifs_file->fid);
 500                _free_xid(xid);
 501        }
 502
 503        if (oplock_break_cancelled)
 504                cifs_done_oplock_break(cifsi);
 505
 506        cifs_del_pending_open(&open);
 507
 508        if (offload)
 509                queue_work(fileinfo_put_wq, &cifs_file->put);
 510        else
 511                cifsFileInfo_put_final(cifs_file);
 512}
 513
 514int cifs_open(struct inode *inode, struct file *file)
 515
 516{
 517        int rc = -EACCES;
 518        unsigned int xid;
 519        __u32 oplock;
 520        struct cifs_sb_info *cifs_sb;
 521        struct TCP_Server_Info *server;
 522        struct cifs_tcon *tcon;
 523        struct tcon_link *tlink;
 524        struct cifsFileInfo *cfile = NULL;
 525        void *page;
 526        const char *full_path;
 527        bool posix_open_ok = false;
 528        struct cifs_fid fid;
 529        struct cifs_pending_open open;
 530
 531        xid = get_xid();
 532
 533        cifs_sb = CIFS_SB(inode->i_sb);
 534        if (unlikely(cifs_forced_shutdown(cifs_sb))) {
 535                free_xid(xid);
 536                return -EIO;
 537        }
 538
 539        tlink = cifs_sb_tlink(cifs_sb);
 540        if (IS_ERR(tlink)) {
 541                free_xid(xid);
 542                return PTR_ERR(tlink);
 543        }
 544        tcon = tlink_tcon(tlink);
 545        server = tcon->ses->server;
 546
 547        page = alloc_dentry_path();
 548        full_path = build_path_from_dentry(file_dentry(file), page);
 549        if (IS_ERR(full_path)) {
 550                rc = PTR_ERR(full_path);
 551                goto out;
 552        }
 553
 554        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 555                 inode, file->f_flags, full_path);
 556
 557        if (file->f_flags & O_DIRECT &&
 558            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 559                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 560                        file->f_op = &cifs_file_direct_nobrl_ops;
 561                else
 562                        file->f_op = &cifs_file_direct_ops;
 563        }
 564
 565        /* Get the cached handle as SMB2 close is deferred */
 566        rc = cifs_get_readable_path(tcon, full_path, &cfile);
 567        if (rc == 0) {
 568                if (file->f_flags == cfile->f_flags) {
 569                        file->private_data = cfile;
 570                        spin_lock(&CIFS_I(inode)->deferred_lock);
 571                        cifs_del_deferred_close(cfile);
 572                        spin_unlock(&CIFS_I(inode)->deferred_lock);
 573                        goto out;
 574                } else {
 575                        _cifsFileInfo_put(cfile, true, false);
 576                }
 577        }
 578
 579        if (server->oplocks)
 580                oplock = REQ_OPLOCK;
 581        else
 582                oplock = 0;
 583
 584        if (!tcon->broken_posix_open && tcon->unix_ext &&
 585            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 586                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 587                /* can not refresh inode info since size could be stale */
 588                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 589                                cifs_sb->ctx->file_mode /* ignored */,
 590                                file->f_flags, &oplock, &fid.netfid, xid);
 591                if (rc == 0) {
 592                        cifs_dbg(FYI, "posix open succeeded\n");
 593                        posix_open_ok = true;
 594                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 595                        if (tcon->ses->serverNOS)
 596                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 597                                         tcon->ses->ip_addr,
 598                                         tcon->ses->serverNOS);
 599                        tcon->broken_posix_open = true;
 600                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 601                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 602                        goto out;
 603                /*
 604                 * Else fallthrough to retry open the old way on network i/o
 605                 * or DFS errors.
 606                 */
 607        }
 608
 609        if (server->ops->get_lease_key)
 610                server->ops->get_lease_key(inode, &fid);
 611
 612        cifs_add_pending_open(&fid, tlink, &open);
 613
 614        if (!posix_open_ok) {
 615                if (server->ops->get_lease_key)
 616                        server->ops->get_lease_key(inode, &fid);
 617
 618                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 619                                  file->f_flags, &oplock, &fid, xid);
 620                if (rc) {
 621                        cifs_del_pending_open(&open);
 622                        goto out;
 623                }
 624        }
 625
 626        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 627        if (cfile == NULL) {
 628                if (server->ops->close)
 629                        server->ops->close(xid, tcon, &fid);
 630                cifs_del_pending_open(&open);
 631                rc = -ENOMEM;
 632                goto out;
 633        }
 634
 635        cifs_fscache_set_inode_cookie(inode, file);
 636
 637        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 638                /*
 639                 * Time to set mode which we can not set earlier due to
 640                 * problems creating new read-only files.
 641                 */
 642                struct cifs_unix_set_info_args args = {
 643                        .mode   = inode->i_mode,
 644                        .uid    = INVALID_UID, /* no change */
 645                        .gid    = INVALID_GID, /* no change */
 646                        .ctime  = NO_CHANGE_64,
 647                        .atime  = NO_CHANGE_64,
 648                        .mtime  = NO_CHANGE_64,
 649                        .device = 0,
 650                };
 651                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 652                                       cfile->pid);
 653        }
 654
 655out:
 656        free_dentry_path(page);
 657        free_xid(xid);
 658        cifs_put_tlink(tlink);
 659        return rc;
 660}
 661
 662static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 663
 664/*
 665 * Try to reacquire byte range locks that were released when session
 666 * to server was lost.
 667 */
 668static int
 669cifs_relock_file(struct cifsFileInfo *cfile)
 670{
 671        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 672        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 673        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 674        int rc = 0;
 675
 676        down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 677        if (cinode->can_cache_brlcks) {
 678                /* can cache locks - no need to relock */
 679                up_read(&cinode->lock_sem);
 680                return rc;
 681        }
 682
 683        if (cap_unix(tcon->ses) &&
 684            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 685            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 686                rc = cifs_push_posix_locks(cfile);
 687        else
 688                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 689
 690        up_read(&cinode->lock_sem);
 691        return rc;
 692}
 693
 694static int
 695cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 696{
 697        int rc = -EACCES;
 698        unsigned int xid;
 699        __u32 oplock;
 700        struct cifs_sb_info *cifs_sb;
 701        struct cifs_tcon *tcon;
 702        struct TCP_Server_Info *server;
 703        struct cifsInodeInfo *cinode;
 704        struct inode *inode;
 705        void *page;
 706        const char *full_path;
 707        int desired_access;
 708        int disposition = FILE_OPEN;
 709        int create_options = CREATE_NOT_DIR;
 710        struct cifs_open_parms oparms;
 711
 712        xid = get_xid();
 713        mutex_lock(&cfile->fh_mutex);
 714        if (!cfile->invalidHandle) {
 715                mutex_unlock(&cfile->fh_mutex);
 716                free_xid(xid);
 717                return 0;
 718        }
 719
 720        inode = d_inode(cfile->dentry);
 721        cifs_sb = CIFS_SB(inode->i_sb);
 722        tcon = tlink_tcon(cfile->tlink);
 723        server = tcon->ses->server;
 724
 725        /*
 726         * Can not grab rename sem here because various ops, including those
 727         * that already have the rename sem can end up causing writepage to get
 728         * called and if the server was down that means we end up here, and we
 729         * can never tell if the caller already has the rename_sem.
 730         */
 731        page = alloc_dentry_path();
 732        full_path = build_path_from_dentry(cfile->dentry, page);
 733        if (IS_ERR(full_path)) {
 734                mutex_unlock(&cfile->fh_mutex);
 735                free_dentry_path(page);
 736                free_xid(xid);
 737                return PTR_ERR(full_path);
 738        }
 739
 740        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 741                 inode, cfile->f_flags, full_path);
 742
 743        if (tcon->ses->server->oplocks)
 744                oplock = REQ_OPLOCK;
 745        else
 746                oplock = 0;
 747
 748        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 749            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 750                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 751                /*
 752                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 753                 * original open. Must mask them off for a reopen.
 754                 */
 755                unsigned int oflags = cfile->f_flags &
 756                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 757
 758                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 759                                     cifs_sb->ctx->file_mode /* ignored */,
 760                                     oflags, &oplock, &cfile->fid.netfid, xid);
 761                if (rc == 0) {
 762                        cifs_dbg(FYI, "posix reopen succeeded\n");
 763                        oparms.reconnect = true;
 764                        goto reopen_success;
 765                }
 766                /*
 767                 * fallthrough to retry open the old way on errors, especially
 768                 * in the reconnect path it is important to retry hard
 769                 */
 770        }
 771
 772        desired_access = cifs_convert_flags(cfile->f_flags);
 773
 774        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 775        if (cfile->f_flags & O_SYNC)
 776                create_options |= CREATE_WRITE_THROUGH;
 777
 778        if (cfile->f_flags & O_DIRECT)
 779                create_options |= CREATE_NO_BUFFER;
 780
 781        if (server->ops->get_lease_key)
 782                server->ops->get_lease_key(inode, &cfile->fid);
 783
 784        oparms.tcon = tcon;
 785        oparms.cifs_sb = cifs_sb;
 786        oparms.desired_access = desired_access;
 787        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 788        oparms.disposition = disposition;
 789        oparms.path = full_path;
 790        oparms.fid = &cfile->fid;
 791        oparms.reconnect = true;
 792
 793        /*
 794         * Can not refresh inode by passing in file_info buf to be returned by
 795         * ops->open and then calling get_inode_info with returned buf since
 796         * file might have write behind data that needs to be flushed and server
 797         * version of file size can be stale. If we knew for sure that inode was
 798         * not dirty locally we could do this.
 799         */
 800        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 801        if (rc == -ENOENT && oparms.reconnect == false) {
 802                /* durable handle timeout is expired - open the file again */
 803                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 804                /* indicate that we need to relock the file */
 805                oparms.reconnect = true;
 806        }
 807
 808        if (rc) {
 809                mutex_unlock(&cfile->fh_mutex);
 810                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 811                cifs_dbg(FYI, "oplock: %d\n", oplock);
 812                goto reopen_error_exit;
 813        }
 814
 815reopen_success:
 816        cfile->invalidHandle = false;
 817        mutex_unlock(&cfile->fh_mutex);
 818        cinode = CIFS_I(inode);
 819
 820        if (can_flush) {
 821                rc = filemap_write_and_wait(inode->i_mapping);
 822                if (!is_interrupt_error(rc))
 823                        mapping_set_error(inode->i_mapping, rc);
 824
 825                if (tcon->posix_extensions)
 826                        rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
 827                else if (tcon->unix_ext)
 828                        rc = cifs_get_inode_info_unix(&inode, full_path,
 829                                                      inode->i_sb, xid);
 830                else
 831                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 832                                                 inode->i_sb, xid, NULL);
 833        }
 834        /*
 835         * Else we are writing out data to server already and could deadlock if
 836         * we tried to flush data, and since we do not know if we have data that
 837         * would invalidate the current end of file on the server we can not go
 838         * to the server to get the new inode info.
 839         */
 840
 841        /*
 842         * If the server returned a read oplock and we have mandatory brlocks,
 843         * set oplock level to None.
 844         */
 845        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 846                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 847                oplock = 0;
 848        }
 849
 850        server->ops->set_fid(cfile, &cfile->fid, oplock);
 851        if (oparms.reconnect)
 852                cifs_relock_file(cfile);
 853
 854reopen_error_exit:
 855        free_dentry_path(page);
 856        free_xid(xid);
 857        return rc;
 858}
 859
 860void smb2_deferred_work_close(struct work_struct *work)
 861{
 862        struct cifsFileInfo *cfile = container_of(work,
 863                        struct cifsFileInfo, deferred.work);
 864
 865        spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 866        cifs_del_deferred_close(cfile);
 867        cfile->deferred_close_scheduled = false;
 868        spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 869        _cifsFileInfo_put(cfile, true, false);
 870}
 871
 872int cifs_close(struct inode *inode, struct file *file)
 873{
 874        struct cifsFileInfo *cfile;
 875        struct cifsInodeInfo *cinode = CIFS_I(inode);
 876        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 877        struct cifs_deferred_close *dclose;
 878
 879        if (file->private_data != NULL) {
 880                cfile = file->private_data;
 881                file->private_data = NULL;
 882                dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
 883                if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
 884                    cinode->lease_granted &&
 885                    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
 886                    dclose) {
 887                        if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
 888                                inode->i_ctime = inode->i_mtime = current_time(inode);
 889                                cifs_fscache_update_inode_cookie(inode);
 890                        }
 891                        spin_lock(&cinode->deferred_lock);
 892                        cifs_add_deferred_close(cfile, dclose);
 893                        if (cfile->deferred_close_scheduled &&
 894                            delayed_work_pending(&cfile->deferred)) {
 895                                /*
 896                                 * If there is no pending work, mod_delayed_work queues new work.
 897                                 * So, Increase the ref count to avoid use-after-free.
 898                                 */
 899                                if (!mod_delayed_work(deferredclose_wq,
 900                                                &cfile->deferred, cifs_sb->ctx->acregmax))
 901                                        cifsFileInfo_get(cfile);
 902                        } else {
 903                                /* Deferred close for files */
 904                                queue_delayed_work(deferredclose_wq,
 905                                                &cfile->deferred, cifs_sb->ctx->acregmax);
 906                                cfile->deferred_close_scheduled = true;
 907                                spin_unlock(&cinode->deferred_lock);
 908                                return 0;
 909                        }
 910                        spin_unlock(&cinode->deferred_lock);
 911                        _cifsFileInfo_put(cfile, true, false);
 912                } else {
 913                        _cifsFileInfo_put(cfile, true, false);
 914                        kfree(dclose);
 915                }
 916        }
 917
 918        /* return code from the ->release op is always ignored */
 919        return 0;
 920}
 921
 922void
 923cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 924{
 925        struct cifsFileInfo *open_file;
 926        struct list_head *tmp;
 927        struct list_head *tmp1;
 928        struct list_head tmp_list;
 929
 930        if (!tcon->use_persistent || !tcon->need_reopen_files)
 931                return;
 932
 933        tcon->need_reopen_files = false;
 934
 935        cifs_dbg(FYI, "Reopen persistent handles\n");
 936        INIT_LIST_HEAD(&tmp_list);
 937
 938        /* list all files open on tree connection, reopen resilient handles  */
 939        spin_lock(&tcon->open_file_lock);
 940        list_for_each(tmp, &tcon->openFileList) {
 941                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 942                if (!open_file->invalidHandle)
 943                        continue;
 944                cifsFileInfo_get(open_file);
 945                list_add_tail(&open_file->rlist, &tmp_list);
 946        }
 947        spin_unlock(&tcon->open_file_lock);
 948
 949        list_for_each_safe(tmp, tmp1, &tmp_list) {
 950                open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 951                if (cifs_reopen_file(open_file, false /* do not flush */))
 952                        tcon->need_reopen_files = true;
 953                list_del_init(&open_file->rlist);
 954                cifsFileInfo_put(open_file);
 955        }
 956}
 957
 958int cifs_closedir(struct inode *inode, struct file *file)
 959{
 960        int rc = 0;
 961        unsigned int xid;
 962        struct cifsFileInfo *cfile = file->private_data;
 963        struct cifs_tcon *tcon;
 964        struct TCP_Server_Info *server;
 965        char *buf;
 966
 967        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 968
 969        if (cfile == NULL)
 970                return rc;
 971
 972        xid = get_xid();
 973        tcon = tlink_tcon(cfile->tlink);
 974        server = tcon->ses->server;
 975
 976        cifs_dbg(FYI, "Freeing private data in close dir\n");
 977        spin_lock(&cfile->file_info_lock);
 978        if (server->ops->dir_needs_close(cfile)) {
 979                cfile->invalidHandle = true;
 980                spin_unlock(&cfile->file_info_lock);
 981                if (server->ops->close_dir)
 982                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 983                else
 984                        rc = -ENOSYS;
 985                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 986                /* not much we can do if it fails anyway, ignore rc */
 987                rc = 0;
 988        } else
 989                spin_unlock(&cfile->file_info_lock);
 990
 991        buf = cfile->srch_inf.ntwrk_buf_start;
 992        if (buf) {
 993                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 994                cfile->srch_inf.ntwrk_buf_start = NULL;
 995                if (cfile->srch_inf.smallBuf)
 996                        cifs_small_buf_release(buf);
 997                else
 998                        cifs_buf_release(buf);
 999        }
1000
1001        cifs_put_tlink(cfile->tlink);
1002        kfree(file->private_data);
1003        file->private_data = NULL;
1004        /* BB can we lock the filestruct while this is going on? */
1005        free_xid(xid);
1006        return rc;
1007}
1008
1009static struct cifsLockInfo *
1010cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1011{
1012        struct cifsLockInfo *lock =
1013                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1014        if (!lock)
1015                return lock;
1016        lock->offset = offset;
1017        lock->length = length;
1018        lock->type = type;
1019        lock->pid = current->tgid;
1020        lock->flags = flags;
1021        INIT_LIST_HEAD(&lock->blist);
1022        init_waitqueue_head(&lock->block_q);
1023        return lock;
1024}
1025
1026void
1027cifs_del_lock_waiters(struct cifsLockInfo *lock)
1028{
1029        struct cifsLockInfo *li, *tmp;
1030        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1031                list_del_init(&li->blist);
1032                wake_up(&li->block_q);
1033        }
1034}
1035
1036#define CIFS_LOCK_OP    0
1037#define CIFS_READ_OP    1
1038#define CIFS_WRITE_OP   2
1039
1040/* @rw_check : 0 - no op, 1 - read, 2 - write */
1041static bool
1042cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1043                            __u64 length, __u8 type, __u16 flags,
1044                            struct cifsFileInfo *cfile,
1045                            struct cifsLockInfo **conf_lock, int rw_check)
1046{
1047        struct cifsLockInfo *li;
1048        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1049        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1050
1051        list_for_each_entry(li, &fdlocks->locks, llist) {
1052                if (offset + length <= li->offset ||
1053                    offset >= li->offset + li->length)
1054                        continue;
1055                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1056                    server->ops->compare_fids(cfile, cur_cfile)) {
1057                        /* shared lock prevents write op through the same fid */
1058                        if (!(li->type & server->vals->shared_lock_type) ||
1059                            rw_check != CIFS_WRITE_OP)
1060                                continue;
1061                }
1062                if ((type & server->vals->shared_lock_type) &&
1063                    ((server->ops->compare_fids(cfile, cur_cfile) &&
1064                     current->tgid == li->pid) || type == li->type))
1065                        continue;
1066                if (rw_check == CIFS_LOCK_OP &&
1067                    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1068                    server->ops->compare_fids(cfile, cur_cfile))
1069                        continue;
1070                if (conf_lock)
1071                        *conf_lock = li;
1072                return true;
1073        }
1074        return false;
1075}
1076
1077bool
1078cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1079                        __u8 type, __u16 flags,
1080                        struct cifsLockInfo **conf_lock, int rw_check)
1081{
1082        bool rc = false;
1083        struct cifs_fid_locks *cur;
1084        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085
1086        list_for_each_entry(cur, &cinode->llist, llist) {
1087                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1088                                                 flags, cfile, conf_lock,
1089                                                 rw_check);
1090                if (rc)
1091                        break;
1092        }
1093
1094        return rc;
1095}
1096
1097/*
1098 * Check if there is another lock that prevents us to set the lock (mandatory
1099 * style). If such a lock exists, update the flock structure with its
1100 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1101 * or leave it the same if we can't. Returns 0 if we don't need to request to
1102 * the server or 1 otherwise.
1103 */
1104static int
1105cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1106               __u8 type, struct file_lock *flock)
1107{
1108        int rc = 0;
1109        struct cifsLockInfo *conf_lock;
1110        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1111        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1112        bool exist;
1113
1114        down_read(&cinode->lock_sem);
1115
1116        exist = cifs_find_lock_conflict(cfile, offset, length, type,
1117                                        flock->fl_flags, &conf_lock,
1118                                        CIFS_LOCK_OP);
1119        if (exist) {
1120                flock->fl_start = conf_lock->offset;
1121                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1122                flock->fl_pid = conf_lock->pid;
1123                if (conf_lock->type & server->vals->shared_lock_type)
1124                        flock->fl_type = F_RDLCK;
1125                else
1126                        flock->fl_type = F_WRLCK;
1127        } else if (!cinode->can_cache_brlcks)
1128                rc = 1;
1129        else
1130                flock->fl_type = F_UNLCK;
1131
1132        up_read(&cinode->lock_sem);
1133        return rc;
1134}
1135
1136static void
1137cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1138{
1139        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1140        cifs_down_write(&cinode->lock_sem);
1141        list_add_tail(&lock->llist, &cfile->llist->locks);
1142        up_write(&cinode->lock_sem);
1143}
1144
1145/*
1146 * Set the byte-range lock (mandatory style). Returns:
1147 * 1) 0, if we set the lock and don't need to request to the server;
1148 * 2) 1, if no locks prevent us but we need to request to the server;
1149 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1150 */
1151static int
1152cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1153                 bool wait)
1154{
1155        struct cifsLockInfo *conf_lock;
1156        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1157        bool exist;
1158        int rc = 0;
1159
1160try_again:
1161        exist = false;
1162        cifs_down_write(&cinode->lock_sem);
1163
1164        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1165                                        lock->type, lock->flags, &conf_lock,
1166                                        CIFS_LOCK_OP);
1167        if (!exist && cinode->can_cache_brlcks) {
1168                list_add_tail(&lock->llist, &cfile->llist->locks);
1169                up_write(&cinode->lock_sem);
1170                return rc;
1171        }
1172
1173        if (!exist)
1174                rc = 1;
1175        else if (!wait)
1176                rc = -EACCES;
1177        else {
1178                list_add_tail(&lock->blist, &conf_lock->blist);
1179                up_write(&cinode->lock_sem);
1180                rc = wait_event_interruptible(lock->block_q,
1181                                        (lock->blist.prev == &lock->blist) &&
1182                                        (lock->blist.next == &lock->blist));
1183                if (!rc)
1184                        goto try_again;
1185                cifs_down_write(&cinode->lock_sem);
1186                list_del_init(&lock->blist);
1187        }
1188
1189        up_write(&cinode->lock_sem);
1190        return rc;
1191}
1192
1193/*
1194 * Check if there is another lock that prevents us to set the lock (posix
1195 * style). If such a lock exists, update the flock structure with its
1196 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1197 * or leave it the same if we can't. Returns 0 if we don't need to request to
1198 * the server or 1 otherwise.
1199 */
1200static int
1201cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1202{
1203        int rc = 0;
1204        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1205        unsigned char saved_type = flock->fl_type;
1206
1207        if ((flock->fl_flags & FL_POSIX) == 0)
1208                return 1;
1209
1210        down_read(&cinode->lock_sem);
1211        posix_test_lock(file, flock);
1212
1213        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1214                flock->fl_type = saved_type;
1215                rc = 1;
1216        }
1217
1218        up_read(&cinode->lock_sem);
1219        return rc;
1220}
1221
1222/*
1223 * Set the byte-range lock (posix style). Returns:
1224 * 1) <0, if the error occurs while setting the lock;
1225 * 2) 0, if we set the lock and don't need to request to the server;
1226 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1227 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1228 */
1229static int
1230cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1231{
1232        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1233        int rc = FILE_LOCK_DEFERRED + 1;
1234
1235        if ((flock->fl_flags & FL_POSIX) == 0)
1236                return rc;
1237
1238        cifs_down_write(&cinode->lock_sem);
1239        if (!cinode->can_cache_brlcks) {
1240                up_write(&cinode->lock_sem);
1241                return rc;
1242        }
1243
1244        rc = posix_lock_file(file, flock, NULL);
1245        up_write(&cinode->lock_sem);
1246        return rc;
1247}
1248
1249int
1250cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1251{
1252        unsigned int xid;
1253        int rc = 0, stored_rc;
1254        struct cifsLockInfo *li, *tmp;
1255        struct cifs_tcon *tcon;
1256        unsigned int num, max_num, max_buf;
1257        LOCKING_ANDX_RANGE *buf, *cur;
1258        static const int types[] = {
1259                LOCKING_ANDX_LARGE_FILES,
1260                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1261        };
1262        int i;
1263
1264        xid = get_xid();
1265        tcon = tlink_tcon(cfile->tlink);
1266
1267        /*
1268         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1269         * and check it before using.
1270         */
1271        max_buf = tcon->ses->server->maxBuf;
1272        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1273                free_xid(xid);
1274                return -EINVAL;
1275        }
1276
1277        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1278                     PAGE_SIZE);
1279        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1280                        PAGE_SIZE);
1281        max_num = (max_buf - sizeof(struct smb_hdr)) /
1282                                                sizeof(LOCKING_ANDX_RANGE);
1283        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1284        if (!buf) {
1285                free_xid(xid);
1286                return -ENOMEM;
1287        }
1288
1289        for (i = 0; i < 2; i++) {
1290                cur = buf;
1291                num = 0;
1292                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1293                        if (li->type != types[i])
1294                                continue;
1295                        cur->Pid = cpu_to_le16(li->pid);
1296                        cur->LengthLow = cpu_to_le32((u32)li->length);
1297                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1298                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1299                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1300                        if (++num == max_num) {
1301                                stored_rc = cifs_lockv(xid, tcon,
1302                                                       cfile->fid.netfid,
1303                                                       (__u8)li->type, 0, num,
1304                                                       buf);
1305                                if (stored_rc)
1306                                        rc = stored_rc;
1307                                cur = buf;
1308                                num = 0;
1309                        } else
1310                                cur++;
1311                }
1312
1313                if (num) {
1314                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1315                                               (__u8)types[i], 0, num, buf);
1316                        if (stored_rc)
1317                                rc = stored_rc;
1318                }
1319        }
1320
1321        kfree(buf);
1322        free_xid(xid);
1323        return rc;
1324}
1325
1326static __u32
1327hash_lockowner(fl_owner_t owner)
1328{
1329        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1330}
1331
1332struct lock_to_push {
1333        struct list_head llist;
1334        __u64 offset;
1335        __u64 length;
1336        __u32 pid;
1337        __u16 netfid;
1338        __u8 type;
1339};
1340
1341static int
1342cifs_push_posix_locks(struct cifsFileInfo *cfile)
1343{
1344        struct inode *inode = d_inode(cfile->dentry);
1345        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1346        struct file_lock *flock;
1347        struct file_lock_context *flctx = inode->i_flctx;
1348        unsigned int count = 0, i;
1349        int rc = 0, xid, type;
1350        struct list_head locks_to_send, *el;
1351        struct lock_to_push *lck, *tmp;
1352        __u64 length;
1353
1354        xid = get_xid();
1355
1356        if (!flctx)
1357                goto out;
1358
1359        spin_lock(&flctx->flc_lock);
1360        list_for_each(el, &flctx->flc_posix) {
1361                count++;
1362        }
1363        spin_unlock(&flctx->flc_lock);
1364
1365        INIT_LIST_HEAD(&locks_to_send);
1366
1367        /*
1368         * Allocating count locks is enough because no FL_POSIX locks can be
1369         * added to the list while we are holding cinode->lock_sem that
1370         * protects locking operations of this inode.
1371         */
1372        for (i = 0; i < count; i++) {
1373                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1374                if (!lck) {
1375                        rc = -ENOMEM;
1376                        goto err_out;
1377                }
1378                list_add_tail(&lck->llist, &locks_to_send);
1379        }
1380
1381        el = locks_to_send.next;
1382        spin_lock(&flctx->flc_lock);
1383        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1384                if (el == &locks_to_send) {
1385                        /*
1386                         * The list ended. We don't have enough allocated
1387                         * structures - something is really wrong.
1388                         */
1389                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1390                        break;
1391                }
1392                length = 1 + flock->fl_end - flock->fl_start;
1393                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1394                        type = CIFS_RDLCK;
1395                else
1396                        type = CIFS_WRLCK;
1397                lck = list_entry(el, struct lock_to_push, llist);
1398                lck->pid = hash_lockowner(flock->fl_owner);
1399                lck->netfid = cfile->fid.netfid;
1400                lck->length = length;
1401                lck->type = type;
1402                lck->offset = flock->fl_start;
1403        }
1404        spin_unlock(&flctx->flc_lock);
1405
1406        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1407                int stored_rc;
1408
1409                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1410                                             lck->offset, lck->length, NULL,
1411                                             lck->type, 0);
1412                if (stored_rc)
1413                        rc = stored_rc;
1414                list_del(&lck->llist);
1415                kfree(lck);
1416        }
1417
1418out:
1419        free_xid(xid);
1420        return rc;
1421err_out:
1422        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1423                list_del(&lck->llist);
1424                kfree(lck);
1425        }
1426        goto out;
1427}
1428
1429static int
1430cifs_push_locks(struct cifsFileInfo *cfile)
1431{
1432        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1433        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1434        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1435        int rc = 0;
1436
1437        /* we are going to update can_cache_brlcks here - need a write access */
1438        cifs_down_write(&cinode->lock_sem);
1439        if (!cinode->can_cache_brlcks) {
1440                up_write(&cinode->lock_sem);
1441                return rc;
1442        }
1443
1444        if (cap_unix(tcon->ses) &&
1445            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1446            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1447                rc = cifs_push_posix_locks(cfile);
1448        else
1449                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1450
1451        cinode->can_cache_brlcks = false;
1452        up_write(&cinode->lock_sem);
1453        return rc;
1454}
1455
1456static void
1457cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1458                bool *wait_flag, struct TCP_Server_Info *server)
1459{
1460        if (flock->fl_flags & FL_POSIX)
1461                cifs_dbg(FYI, "Posix\n");
1462        if (flock->fl_flags & FL_FLOCK)
1463                cifs_dbg(FYI, "Flock\n");
1464        if (flock->fl_flags & FL_SLEEP) {
1465                cifs_dbg(FYI, "Blocking lock\n");
1466                *wait_flag = true;
1467        }
1468        if (flock->fl_flags & FL_ACCESS)
1469                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1470        if (flock->fl_flags & FL_LEASE)
1471                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1472        if (flock->fl_flags &
1473            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1474               FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1475                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1476
1477        *type = server->vals->large_lock_type;
1478        if (flock->fl_type == F_WRLCK) {
1479                cifs_dbg(FYI, "F_WRLCK\n");
1480                *type |= server->vals->exclusive_lock_type;
1481                *lock = 1;
1482        } else if (flock->fl_type == F_UNLCK) {
1483                cifs_dbg(FYI, "F_UNLCK\n");
1484                *type |= server->vals->unlock_lock_type;
1485                *unlock = 1;
1486                /* Check if unlock includes more than one lock range */
1487        } else if (flock->fl_type == F_RDLCK) {
1488                cifs_dbg(FYI, "F_RDLCK\n");
1489                *type |= server->vals->shared_lock_type;
1490                *lock = 1;
1491        } else if (flock->fl_type == F_EXLCK) {
1492                cifs_dbg(FYI, "F_EXLCK\n");
1493                *type |= server->vals->exclusive_lock_type;
1494                *lock = 1;
1495        } else if (flock->fl_type == F_SHLCK) {
1496                cifs_dbg(FYI, "F_SHLCK\n");
1497                *type |= server->vals->shared_lock_type;
1498                *lock = 1;
1499        } else
1500                cifs_dbg(FYI, "Unknown type of lock\n");
1501}
1502
1503static int
1504cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1505           bool wait_flag, bool posix_lck, unsigned int xid)
1506{
1507        int rc = 0;
1508        __u64 length = 1 + flock->fl_end - flock->fl_start;
1509        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1510        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1511        struct TCP_Server_Info *server = tcon->ses->server;
1512        __u16 netfid = cfile->fid.netfid;
1513
1514        if (posix_lck) {
1515                int posix_lock_type;
1516
1517                rc = cifs_posix_lock_test(file, flock);
1518                if (!rc)
1519                        return rc;
1520
1521                if (type & server->vals->shared_lock_type)
1522                        posix_lock_type = CIFS_RDLCK;
1523                else
1524                        posix_lock_type = CIFS_WRLCK;
1525                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1526                                      hash_lockowner(flock->fl_owner),
1527                                      flock->fl_start, length, flock,
1528                                      posix_lock_type, wait_flag);
1529                return rc;
1530        }
1531
1532        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1533        if (!rc)
1534                return rc;
1535
1536        /* BB we could chain these into one lock request BB */
1537        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1538                                    1, 0, false);
1539        if (rc == 0) {
1540                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1541                                            type, 0, 1, false);
1542                flock->fl_type = F_UNLCK;
1543                if (rc != 0)
1544                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1545                                 rc);
1546                return 0;
1547        }
1548
1549        if (type & server->vals->shared_lock_type) {
1550                flock->fl_type = F_WRLCK;
1551                return 0;
1552        }
1553
1554        type &= ~server->vals->exclusive_lock_type;
1555
1556        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1557                                    type | server->vals->shared_lock_type,
1558                                    1, 0, false);
1559        if (rc == 0) {
1560                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1561                        type | server->vals->shared_lock_type, 0, 1, false);
1562                flock->fl_type = F_RDLCK;
1563                if (rc != 0)
1564                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1565                                 rc);
1566        } else
1567                flock->fl_type = F_WRLCK;
1568
1569        return 0;
1570}
1571
1572void
1573cifs_move_llist(struct list_head *source, struct list_head *dest)
1574{
1575        struct list_head *li, *tmp;
1576        list_for_each_safe(li, tmp, source)
1577                list_move(li, dest);
1578}
1579
1580void
1581cifs_free_llist(struct list_head *llist)
1582{
1583        struct cifsLockInfo *li, *tmp;
1584        list_for_each_entry_safe(li, tmp, llist, llist) {
1585                cifs_del_lock_waiters(li);
1586                list_del(&li->llist);
1587                kfree(li);
1588        }
1589}
1590
1591int
1592cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1593                  unsigned int xid)
1594{
1595        int rc = 0, stored_rc;
1596        static const int types[] = {
1597                LOCKING_ANDX_LARGE_FILES,
1598                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1599        };
1600        unsigned int i;
1601        unsigned int max_num, num, max_buf;
1602        LOCKING_ANDX_RANGE *buf, *cur;
1603        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1604        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1605        struct cifsLockInfo *li, *tmp;
1606        __u64 length = 1 + flock->fl_end - flock->fl_start;
1607        struct list_head tmp_llist;
1608
1609        INIT_LIST_HEAD(&tmp_llist);
1610
1611        /*
1612         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1613         * and check it before using.
1614         */
1615        max_buf = tcon->ses->server->maxBuf;
1616        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1617                return -EINVAL;
1618
1619        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1620                     PAGE_SIZE);
1621        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1622                        PAGE_SIZE);
1623        max_num = (max_buf - sizeof(struct smb_hdr)) /
1624                                                sizeof(LOCKING_ANDX_RANGE);
1625        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1626        if (!buf)
1627                return -ENOMEM;
1628
1629        cifs_down_write(&cinode->lock_sem);
1630        for (i = 0; i < 2; i++) {
1631                cur = buf;
1632                num = 0;
1633                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1634                        if (flock->fl_start > li->offset ||
1635                            (flock->fl_start + length) <
1636                            (li->offset + li->length))
1637                                continue;
1638                        if (current->tgid != li->pid)
1639                                continue;
1640                        if (types[i] != li->type)
1641                                continue;
1642                        if (cinode->can_cache_brlcks) {
1643                                /*
1644                                 * We can cache brlock requests - simply remove
1645                                 * a lock from the file's list.
1646                                 */
1647                                list_del(&li->llist);
1648                                cifs_del_lock_waiters(li);
1649                                kfree(li);
1650                                continue;
1651                        }
1652                        cur->Pid = cpu_to_le16(li->pid);
1653                        cur->LengthLow = cpu_to_le32((u32)li->length);
1654                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1655                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1656                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1657                        /*
1658                         * We need to save a lock here to let us add it again to
1659                         * the file's list if the unlock range request fails on
1660                         * the server.
1661                         */
1662                        list_move(&li->llist, &tmp_llist);
1663                        if (++num == max_num) {
1664                                stored_rc = cifs_lockv(xid, tcon,
1665                                                       cfile->fid.netfid,
1666                                                       li->type, num, 0, buf);
1667                                if (stored_rc) {
1668                                        /*
1669                                         * We failed on the unlock range
1670                                         * request - add all locks from the tmp
1671                                         * list to the head of the file's list.
1672                                         */
1673                                        cifs_move_llist(&tmp_llist,
1674                                                        &cfile->llist->locks);
1675                                        rc = stored_rc;
1676                                } else
1677                                        /*
1678                                         * The unlock range request succeed -
1679                                         * free the tmp list.
1680                                         */
1681                                        cifs_free_llist(&tmp_llist);
1682                                cur = buf;
1683                                num = 0;
1684                        } else
1685                                cur++;
1686                }
1687                if (num) {
1688                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1689                                               types[i], num, 0, buf);
1690                        if (stored_rc) {
1691                                cifs_move_llist(&tmp_llist,
1692                                                &cfile->llist->locks);
1693                                rc = stored_rc;
1694                        } else
1695                                cifs_free_llist(&tmp_llist);
1696                }
1697        }
1698
1699        up_write(&cinode->lock_sem);
1700        kfree(buf);
1701        return rc;
1702}
1703
1704static int
1705cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1706           bool wait_flag, bool posix_lck, int lock, int unlock,
1707           unsigned int xid)
1708{
1709        int rc = 0;
1710        __u64 length = 1 + flock->fl_end - flock->fl_start;
1711        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1712        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1713        struct TCP_Server_Info *server = tcon->ses->server;
1714        struct inode *inode = d_inode(cfile->dentry);
1715
1716        if (posix_lck) {
1717                int posix_lock_type;
1718
1719                rc = cifs_posix_lock_set(file, flock);
1720                if (rc <= FILE_LOCK_DEFERRED)
1721                        return rc;
1722
1723                if (type & server->vals->shared_lock_type)
1724                        posix_lock_type = CIFS_RDLCK;
1725                else
1726                        posix_lock_type = CIFS_WRLCK;
1727
1728                if (unlock == 1)
1729                        posix_lock_type = CIFS_UNLCK;
1730
1731                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1732                                      hash_lockowner(flock->fl_owner),
1733                                      flock->fl_start, length,
1734                                      NULL, posix_lock_type, wait_flag);
1735                goto out;
1736        }
1737
1738        if (lock) {
1739                struct cifsLockInfo *lock;
1740
1741                lock = cifs_lock_init(flock->fl_start, length, type,
1742                                      flock->fl_flags);
1743                if (!lock)
1744                        return -ENOMEM;
1745
1746                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1747                if (rc < 0) {
1748                        kfree(lock);
1749                        return rc;
1750                }
1751                if (!rc)
1752                        goto out;
1753
1754                /*
1755                 * Windows 7 server can delay breaking lease from read to None
1756                 * if we set a byte-range lock on a file - break it explicitly
1757                 * before sending the lock to the server to be sure the next
1758                 * read won't conflict with non-overlapted locks due to
1759                 * pagereading.
1760                 */
1761                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1762                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1763                        cifs_zap_mapping(inode);
1764                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1765                                 inode);
1766                        CIFS_I(inode)->oplock = 0;
1767                }
1768
1769                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1770                                            type, 1, 0, wait_flag);
1771                if (rc) {
1772                        kfree(lock);
1773                        return rc;
1774                }
1775
1776                cifs_lock_add(cfile, lock);
1777        } else if (unlock)
1778                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1779
1780out:
1781        if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1782                /*
1783                 * If this is a request to remove all locks because we
1784                 * are closing the file, it doesn't matter if the
1785                 * unlocking failed as both cifs.ko and the SMB server
1786                 * remove the lock on file close
1787                 */
1788                if (rc) {
1789                        cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1790                        if (!(flock->fl_flags & FL_CLOSE))
1791                                return rc;
1792                }
1793                rc = locks_lock_file_wait(file, flock);
1794        }
1795        return rc;
1796}
1797
1798int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1799{
1800        int rc, xid;
1801        int lock = 0, unlock = 0;
1802        bool wait_flag = false;
1803        bool posix_lck = false;
1804        struct cifs_sb_info *cifs_sb;
1805        struct cifs_tcon *tcon;
1806        struct cifsFileInfo *cfile;
1807        __u32 type;
1808
1809        rc = -EACCES;
1810        xid = get_xid();
1811
1812        if (!(fl->fl_flags & FL_FLOCK))
1813                return -ENOLCK;
1814
1815        cfile = (struct cifsFileInfo *)file->private_data;
1816        tcon = tlink_tcon(cfile->tlink);
1817
1818        cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1819                        tcon->ses->server);
1820        cifs_sb = CIFS_FILE_SB(file);
1821
1822        if (cap_unix(tcon->ses) &&
1823            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1824            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1825                posix_lck = true;
1826
1827        if (!lock && !unlock) {
1828                /*
1829                 * if no lock or unlock then nothing to do since we do not
1830                 * know what it is
1831                 */
1832                free_xid(xid);
1833                return -EOPNOTSUPP;
1834        }
1835
1836        rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1837                        xid);
1838        free_xid(xid);
1839        return rc;
1840
1841
1842}
1843
1844int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1845{
1846        int rc, xid;
1847        int lock = 0, unlock = 0;
1848        bool wait_flag = false;
1849        bool posix_lck = false;
1850        struct cifs_sb_info *cifs_sb;
1851        struct cifs_tcon *tcon;
1852        struct cifsFileInfo *cfile;
1853        __u32 type;
1854
1855        rc = -EACCES;
1856        xid = get_xid();
1857
1858        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1859                 cmd, flock->fl_flags, flock->fl_type,
1860                 flock->fl_start, flock->fl_end);
1861
1862        cfile = (struct cifsFileInfo *)file->private_data;
1863        tcon = tlink_tcon(cfile->tlink);
1864
1865        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1866                        tcon->ses->server);
1867        cifs_sb = CIFS_FILE_SB(file);
1868        set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1869
1870        if (cap_unix(tcon->ses) &&
1871            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1872            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1873                posix_lck = true;
1874        /*
1875         * BB add code here to normalize offset and length to account for
1876         * negative length which we can not accept over the wire.
1877         */
1878        if (IS_GETLK(cmd)) {
1879                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1880                free_xid(xid);
1881                return rc;
1882        }
1883
1884        if (!lock && !unlock) {
1885                /*
1886                 * if no lock or unlock then nothing to do since we do not
1887                 * know what it is
1888                 */
1889                free_xid(xid);
1890                return -EOPNOTSUPP;
1891        }
1892
1893        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1894                        xid);
1895        free_xid(xid);
1896        return rc;
1897}
1898
1899/*
1900 * update the file size (if needed) after a write. Should be called with
1901 * the inode->i_lock held
1902 */
1903void
1904cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1905                      unsigned int bytes_written)
1906{
1907        loff_t end_of_write = offset + bytes_written;
1908
1909        if (end_of_write > cifsi->server_eof)
1910                cifsi->server_eof = end_of_write;
1911}
1912
1913static ssize_t
1914cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1915           size_t write_size, loff_t *offset)
1916{
1917        int rc = 0;
1918        unsigned int bytes_written = 0;
1919        unsigned int total_written;
1920        struct cifs_tcon *tcon;
1921        struct TCP_Server_Info *server;
1922        unsigned int xid;
1923        struct dentry *dentry = open_file->dentry;
1924        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1925        struct cifs_io_parms io_parms = {0};
1926
1927        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1928                 write_size, *offset, dentry);
1929
1930        tcon = tlink_tcon(open_file->tlink);
1931        server = tcon->ses->server;
1932
1933        if (!server->ops->sync_write)
1934                return -ENOSYS;
1935
1936        xid = get_xid();
1937
1938        for (total_written = 0; write_size > total_written;
1939             total_written += bytes_written) {
1940                rc = -EAGAIN;
1941                while (rc == -EAGAIN) {
1942                        struct kvec iov[2];
1943                        unsigned int len;
1944
1945                        if (open_file->invalidHandle) {
1946                                /* we could deadlock if we called
1947                                   filemap_fdatawait from here so tell
1948                                   reopen_file not to flush data to
1949                                   server now */
1950                                rc = cifs_reopen_file(open_file, false);
1951                                if (rc != 0)
1952                                        break;
1953                        }
1954
1955                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1956                                  (unsigned int)write_size - total_written);
1957                        /* iov[0] is reserved for smb header */
1958                        iov[1].iov_base = (char *)write_data + total_written;
1959                        iov[1].iov_len = len;
1960                        io_parms.pid = pid;
1961                        io_parms.tcon = tcon;
1962                        io_parms.offset = *offset;
1963                        io_parms.length = len;
1964                        rc = server->ops->sync_write(xid, &open_file->fid,
1965                                        &io_parms, &bytes_written, iov, 1);
1966                }
1967                if (rc || (bytes_written == 0)) {
1968                        if (total_written)
1969                                break;
1970                        else {
1971                                free_xid(xid);
1972                                return rc;
1973                        }
1974                } else {
1975                        spin_lock(&d_inode(dentry)->i_lock);
1976                        cifs_update_eof(cifsi, *offset, bytes_written);
1977                        spin_unlock(&d_inode(dentry)->i_lock);
1978                        *offset += bytes_written;
1979                }
1980        }
1981
1982        cifs_stats_bytes_written(tcon, total_written);
1983
1984        if (total_written > 0) {
1985                spin_lock(&d_inode(dentry)->i_lock);
1986                if (*offset > d_inode(dentry)->i_size) {
1987                        i_size_write(d_inode(dentry), *offset);
1988                        d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
1989                }
1990                spin_unlock(&d_inode(dentry)->i_lock);
1991        }
1992        mark_inode_dirty_sync(d_inode(dentry));
1993        free_xid(xid);
1994        return total_written;
1995}
1996
1997struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1998                                        bool fsuid_only)
1999{
2000        struct cifsFileInfo *open_file = NULL;
2001        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2002
2003        /* only filter by fsuid on multiuser mounts */
2004        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2005                fsuid_only = false;
2006
2007        spin_lock(&cifs_inode->open_file_lock);
2008        /* we could simply get the first_list_entry since write-only entries
2009           are always at the end of the list but since the first entry might
2010           have a close pending, we go through the whole list */
2011        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2012                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2013                        continue;
2014                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2015                        if ((!open_file->invalidHandle)) {
2016                                /* found a good file */
2017                                /* lock it so it will not be closed on us */
2018                                cifsFileInfo_get(open_file);
2019                                spin_unlock(&cifs_inode->open_file_lock);
2020                                return open_file;
2021                        } /* else might as well continue, and look for
2022                             another, or simply have the caller reopen it
2023                             again rather than trying to fix this handle */
2024                } else /* write only file */
2025                        break; /* write only files are last so must be done */
2026        }
2027        spin_unlock(&cifs_inode->open_file_lock);
2028        return NULL;
2029}
2030
2031/* Return -EBADF if no handle is found and general rc otherwise */
2032int
2033cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2034                       struct cifsFileInfo **ret_file)
2035{
2036        struct cifsFileInfo *open_file, *inv_file = NULL;
2037        struct cifs_sb_info *cifs_sb;
2038        bool any_available = false;
2039        int rc = -EBADF;
2040        unsigned int refind = 0;
2041        bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2042        bool with_delete = flags & FIND_WR_WITH_DELETE;
2043        *ret_file = NULL;
2044
2045        /*
2046         * Having a null inode here (because mapping->host was set to zero by
2047         * the VFS or MM) should not happen but we had reports of on oops (due
2048         * to it being zero) during stress testcases so we need to check for it
2049         */
2050
2051        if (cifs_inode == NULL) {
2052                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2053                dump_stack();
2054                return rc;
2055        }
2056
2057        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
2058
2059        /* only filter by fsuid on multiuser mounts */
2060        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2061                fsuid_only = false;
2062
2063        spin_lock(&cifs_inode->open_file_lock);
2064refind_writable:
2065        if (refind > MAX_REOPEN_ATT) {
2066                spin_unlock(&cifs_inode->open_file_lock);
2067                return rc;
2068        }
2069        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2070                if (!any_available && open_file->pid != current->tgid)
2071                        continue;
2072                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2073                        continue;
2074                if (with_delete && !(open_file->fid.access & DELETE))
2075                        continue;
2076                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2077                        if (!open_file->invalidHandle) {
2078                                /* found a good writable file */
2079                                cifsFileInfo_get(open_file);
2080                                spin_unlock(&cifs_inode->open_file_lock);
2081                                *ret_file = open_file;
2082                                return 0;
2083                        } else {
2084                                if (!inv_file)
2085                                        inv_file = open_file;
2086                        }
2087                }
2088        }
2089        /* couldn't find useable FH with same pid, try any available */
2090        if (!any_available) {
2091                any_available = true;
2092                goto refind_writable;
2093        }
2094
2095        if (inv_file) {
2096                any_available = false;
2097                cifsFileInfo_get(inv_file);
2098        }
2099
2100        spin_unlock(&cifs_inode->open_file_lock);
2101
2102        if (inv_file) {
2103                rc = cifs_reopen_file(inv_file, false);
2104                if (!rc) {
2105                        *ret_file = inv_file;
2106                        return 0;
2107                }
2108
2109                spin_lock(&cifs_inode->open_file_lock);
2110                list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2111                spin_unlock(&cifs_inode->open_file_lock);
2112                cifsFileInfo_put(inv_file);
2113                ++refind;
2114                inv_file = NULL;
2115                spin_lock(&cifs_inode->open_file_lock);
2116                goto refind_writable;
2117        }
2118
2119        return rc;
2120}
2121
2122struct cifsFileInfo *
2123find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2124{
2125        struct cifsFileInfo *cfile;
2126        int rc;
2127
2128        rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2129        if (rc)
2130                cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2131
2132        return cfile;
2133}
2134
2135int
2136cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2137                       int flags,
2138                       struct cifsFileInfo **ret_file)
2139{
2140        struct cifsFileInfo *cfile;
2141        void *page = alloc_dentry_path();
2142
2143        *ret_file = NULL;
2144
2145        spin_lock(&tcon->open_file_lock);
2146        list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2147                struct cifsInodeInfo *cinode;
2148                const char *full_path = build_path_from_dentry(cfile->dentry, page);
2149                if (IS_ERR(full_path)) {
2150                        spin_unlock(&tcon->open_file_lock);
2151                        free_dentry_path(page);
2152                        return PTR_ERR(full_path);
2153                }
2154                if (strcmp(full_path, name))
2155                        continue;
2156
2157                cinode = CIFS_I(d_inode(cfile->dentry));
2158                spin_unlock(&tcon->open_file_lock);
2159                free_dentry_path(page);
2160                return cifs_get_writable_file(cinode, flags, ret_file);
2161        }
2162
2163        spin_unlock(&tcon->open_file_lock);
2164        free_dentry_path(page);
2165        return -ENOENT;
2166}
2167
2168int
2169cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2170                       struct cifsFileInfo **ret_file)
2171{
2172        struct cifsFileInfo *cfile;
2173        void *page = alloc_dentry_path();
2174
2175        *ret_file = NULL;
2176
2177        spin_lock(&tcon->open_file_lock);
2178        list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2179                struct cifsInodeInfo *cinode;
2180                const char *full_path = build_path_from_dentry(cfile->dentry, page);
2181                if (IS_ERR(full_path)) {
2182                        spin_unlock(&tcon->open_file_lock);
2183                        free_dentry_path(page);
2184                        return PTR_ERR(full_path);
2185                }
2186                if (strcmp(full_path, name))
2187                        continue;
2188
2189                cinode = CIFS_I(d_inode(cfile->dentry));
2190                spin_unlock(&tcon->open_file_lock);
2191                free_dentry_path(page);
2192                *ret_file = find_readable_file(cinode, 0);
2193                return *ret_file ? 0 : -ENOENT;
2194        }
2195
2196        spin_unlock(&tcon->open_file_lock);
2197        free_dentry_path(page);
2198        return -ENOENT;
2199}
2200
2201static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2202{
2203        struct address_space *mapping = page->mapping;
2204        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2205        char *write_data;
2206        int rc = -EFAULT;
2207        int bytes_written = 0;
2208        struct inode *inode;
2209        struct cifsFileInfo *open_file;
2210
2211        if (!mapping || !mapping->host)
2212                return -EFAULT;
2213
2214        inode = page->mapping->host;
2215
2216        offset += (loff_t)from;
2217        write_data = kmap(page);
2218        write_data += from;
2219
2220        if ((to > PAGE_SIZE) || (from > to)) {
2221                kunmap(page);
2222                return -EIO;
2223        }
2224
2225        /* racing with truncate? */
2226        if (offset > mapping->host->i_size) {
2227                kunmap(page);
2228                return 0; /* don't care */
2229        }
2230
2231        /* check to make sure that we are not extending the file */
2232        if (mapping->host->i_size - offset < (loff_t)to)
2233                to = (unsigned)(mapping->host->i_size - offset);
2234
2235        rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2236                                    &open_file);
2237        if (!rc) {
2238                bytes_written = cifs_write(open_file, open_file->pid,
2239                                           write_data, to - from, &offset);
2240                cifsFileInfo_put(open_file);
2241                /* Does mm or vfs already set times? */
2242                inode->i_atime = inode->i_mtime = current_time(inode);
2243                if ((bytes_written > 0) && (offset))
2244                        rc = 0;
2245                else if (bytes_written < 0)
2246                        rc = bytes_written;
2247                else
2248                        rc = -EFAULT;
2249        } else {
2250                cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2251                if (!is_retryable_error(rc))
2252                        rc = -EIO;
2253        }
2254
2255        kunmap(page);
2256        return rc;
2257}
2258
2259static struct cifs_writedata *
2260wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2261                          pgoff_t end, pgoff_t *index,
2262                          unsigned int *found_pages)
2263{
2264        struct cifs_writedata *wdata;
2265
2266        wdata = cifs_writedata_alloc((unsigned int)tofind,
2267                                     cifs_writev_complete);
2268        if (!wdata)
2269                return NULL;
2270
2271        *found_pages = find_get_pages_range_tag(mapping, index, end,
2272                                PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2273        return wdata;
2274}
2275
2276static unsigned int
2277wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2278                    struct address_space *mapping,
2279                    struct writeback_control *wbc,
2280                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2281{
2282        unsigned int nr_pages = 0, i;
2283        struct page *page;
2284
2285        for (i = 0; i < found_pages; i++) {
2286                page = wdata->pages[i];
2287                /*
2288                 * At this point we hold neither the i_pages lock nor the
2289                 * page lock: the page may be truncated or invalidated
2290                 * (changing page->mapping to NULL), or even swizzled
2291                 * back from swapper_space to tmpfs file mapping
2292                 */
2293
2294                if (nr_pages == 0)
2295                        lock_page(page);
2296                else if (!trylock_page(page))
2297                        break;
2298
2299                if (unlikely(page->mapping != mapping)) {
2300                        unlock_page(page);
2301                        break;
2302                }
2303
2304                if (!wbc->range_cyclic && page->index > end) {
2305                        *done = true;
2306                        unlock_page(page);
2307                        break;
2308                }
2309
2310                if (*next && (page->index != *next)) {
2311                        /* Not next consecutive page */
2312                        unlock_page(page);
2313                        break;
2314                }
2315
2316                if (wbc->sync_mode != WB_SYNC_NONE)
2317                        wait_on_page_writeback(page);
2318
2319                if (PageWriteback(page) ||
2320                                !clear_page_dirty_for_io(page)) {
2321                        unlock_page(page);
2322                        break;
2323                }
2324
2325                /*
2326                 * This actually clears the dirty bit in the radix tree.
2327                 * See cifs_writepage() for more commentary.
2328                 */
2329                set_page_writeback(page);
2330                if (page_offset(page) >= i_size_read(mapping->host)) {
2331                        *done = true;
2332                        unlock_page(page);
2333                        end_page_writeback(page);
2334                        break;
2335                }
2336
2337                wdata->pages[i] = page;
2338                *next = page->index + 1;
2339                ++nr_pages;
2340        }
2341
2342        /* reset index to refind any pages skipped */
2343        if (nr_pages == 0)
2344                *index = wdata->pages[0]->index + 1;
2345
2346        /* put any pages we aren't going to use */
2347        for (i = nr_pages; i < found_pages; i++) {
2348                put_page(wdata->pages[i]);
2349                wdata->pages[i] = NULL;
2350        }
2351
2352        return nr_pages;
2353}
2354
2355static int
2356wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2357                 struct address_space *mapping, struct writeback_control *wbc)
2358{
2359        int rc;
2360
2361        wdata->sync_mode = wbc->sync_mode;
2362        wdata->nr_pages = nr_pages;
2363        wdata->offset = page_offset(wdata->pages[0]);
2364        wdata->pagesz = PAGE_SIZE;
2365        wdata->tailsz = min(i_size_read(mapping->host) -
2366                        page_offset(wdata->pages[nr_pages - 1]),
2367                        (loff_t)PAGE_SIZE);
2368        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2369        wdata->pid = wdata->cfile->pid;
2370
2371        rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2372        if (rc)
2373                return rc;
2374
2375        if (wdata->cfile->invalidHandle)
2376                rc = -EAGAIN;
2377        else
2378                rc = wdata->server->ops->async_writev(wdata,
2379                                                      cifs_writedata_release);
2380
2381        return rc;
2382}
2383
2384static int cifs_writepages(struct address_space *mapping,
2385                           struct writeback_control *wbc)
2386{
2387        struct inode *inode = mapping->host;
2388        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2389        struct TCP_Server_Info *server;
2390        bool done = false, scanned = false, range_whole = false;
2391        pgoff_t end, index;
2392        struct cifs_writedata *wdata;
2393        struct cifsFileInfo *cfile = NULL;
2394        int rc = 0;
2395        int saved_rc = 0;
2396        unsigned int xid;
2397
2398        /*
2399         * If wsize is smaller than the page cache size, default to writing
2400         * one page at a time via cifs_writepage
2401         */
2402        if (cifs_sb->ctx->wsize < PAGE_SIZE)
2403                return generic_writepages(mapping, wbc);
2404
2405        xid = get_xid();
2406        if (wbc->range_cyclic) {
2407                index = mapping->writeback_index; /* Start from prev offset */
2408                end = -1;
2409        } else {
2410                index = wbc->range_start >> PAGE_SHIFT;
2411                end = wbc->range_end >> PAGE_SHIFT;
2412                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2413                        range_whole = true;
2414                scanned = true;
2415        }
2416        server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2417
2418retry:
2419        while (!done && index <= end) {
2420                unsigned int i, nr_pages, found_pages, wsize;
2421                pgoff_t next = 0, tofind, saved_index = index;
2422                struct cifs_credits credits_on_stack;
2423                struct cifs_credits *credits = &credits_on_stack;
2424                int get_file_rc = 0;
2425
2426                if (cfile)
2427                        cifsFileInfo_put(cfile);
2428
2429                rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2430
2431                /* in case of an error store it to return later */
2432                if (rc)
2433                        get_file_rc = rc;
2434
2435                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2436                                                   &wsize, credits);
2437                if (rc != 0) {
2438                        done = true;
2439                        break;
2440                }
2441
2442                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2443
2444                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2445                                                  &found_pages);
2446                if (!wdata) {
2447                        rc = -ENOMEM;
2448                        done = true;
2449                        add_credits_and_wake_if(server, credits, 0);
2450                        break;
2451                }
2452
2453                if (found_pages == 0) {
2454                        kref_put(&wdata->refcount, cifs_writedata_release);
2455                        add_credits_and_wake_if(server, credits, 0);
2456                        break;
2457                }
2458
2459                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2460                                               end, &index, &next, &done);
2461
2462                /* nothing to write? */
2463                if (nr_pages == 0) {
2464                        kref_put(&wdata->refcount, cifs_writedata_release);
2465                        add_credits_and_wake_if(server, credits, 0);
2466                        continue;
2467                }
2468
2469                wdata->credits = credits_on_stack;
2470                wdata->cfile = cfile;
2471                wdata->server = server;
2472                cfile = NULL;
2473
2474                if (!wdata->cfile) {
2475                        cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2476                                 get_file_rc);
2477                        if (is_retryable_error(get_file_rc))
2478                                rc = get_file_rc;
2479                        else
2480                                rc = -EBADF;
2481                } else
2482                        rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2483
2484                for (i = 0; i < nr_pages; ++i)
2485                        unlock_page(wdata->pages[i]);
2486
2487                /* send failure -- clean up the mess */
2488                if (rc != 0) {
2489                        add_credits_and_wake_if(server, &wdata->credits, 0);
2490                        for (i = 0; i < nr_pages; ++i) {
2491                                if (is_retryable_error(rc))
2492                                        redirty_page_for_writepage(wbc,
2493                                                           wdata->pages[i]);
2494                                else
2495                                        SetPageError(wdata->pages[i]);
2496                                end_page_writeback(wdata->pages[i]);
2497                                put_page(wdata->pages[i]);
2498                        }
2499                        if (!is_retryable_error(rc))
2500                                mapping_set_error(mapping, rc);
2501                }
2502                kref_put(&wdata->refcount, cifs_writedata_release);
2503
2504                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2505                        index = saved_index;
2506                        continue;
2507                }
2508
2509                /* Return immediately if we received a signal during writing */
2510                if (is_interrupt_error(rc)) {
2511                        done = true;
2512                        break;
2513                }
2514
2515                if (rc != 0 && saved_rc == 0)
2516                        saved_rc = rc;
2517
2518                wbc->nr_to_write -= nr_pages;
2519                if (wbc->nr_to_write <= 0)
2520                        done = true;
2521
2522                index = next;
2523        }
2524
2525        if (!scanned && !done) {
2526                /*
2527                 * We hit the last page and there is more work to be done: wrap
2528                 * back to the start of the file
2529                 */
2530                scanned = true;
2531                index = 0;
2532                goto retry;
2533        }
2534
2535        if (saved_rc != 0)
2536                rc = saved_rc;
2537
2538        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2539                mapping->writeback_index = index;
2540
2541        if (cfile)
2542                cifsFileInfo_put(cfile);
2543        free_xid(xid);
2544        /* Indication to update ctime and mtime as close is deferred */
2545        set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2546        return rc;
2547}
2548
2549static int
2550cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2551{
2552        int rc;
2553        unsigned int xid;
2554
2555        xid = get_xid();
2556/* BB add check for wbc flags */
2557        get_page(page);
2558        if (!PageUptodate(page))
2559                cifs_dbg(FYI, "ppw - page not up to date\n");
2560
2561        /*
2562         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2563         *
2564         * A writepage() implementation always needs to do either this,
2565         * or re-dirty the page with "redirty_page_for_writepage()" in
2566         * the case of a failure.
2567         *
2568         * Just unlocking the page will cause the radix tree tag-bits
2569         * to fail to update with the state of the page correctly.
2570         */
2571        set_page_writeback(page);
2572retry_write:
2573        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2574        if (is_retryable_error(rc)) {
2575                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2576                        goto retry_write;
2577                redirty_page_for_writepage(wbc, page);
2578        } else if (rc != 0) {
2579                SetPageError(page);
2580                mapping_set_error(page->mapping, rc);
2581        } else {
2582                SetPageUptodate(page);
2583        }
2584        end_page_writeback(page);
2585        put_page(page);
2586        free_xid(xid);
2587        return rc;
2588}
2589
2590static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2591{
2592        int rc = cifs_writepage_locked(page, wbc);
2593        unlock_page(page);
2594        return rc;
2595}
2596
2597static int cifs_write_end(struct file *file, struct address_space *mapping,
2598                        loff_t pos, unsigned len, unsigned copied,
2599                        struct page *page, void *fsdata)
2600{
2601        int rc;
2602        struct inode *inode = mapping->host;
2603        struct cifsFileInfo *cfile = file->private_data;
2604        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2605        __u32 pid;
2606
2607        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2608                pid = cfile->pid;
2609        else
2610                pid = current->tgid;
2611
2612        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2613                 page, pos, copied);
2614
2615        if (PageChecked(page)) {
2616                if (copied == len)
2617                        SetPageUptodate(page);
2618                ClearPageChecked(page);
2619        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2620                SetPageUptodate(page);
2621
2622        if (!PageUptodate(page)) {
2623                char *page_data;
2624                unsigned offset = pos & (PAGE_SIZE - 1);
2625                unsigned int xid;
2626
2627                xid = get_xid();
2628                /* this is probably better than directly calling
2629                   partialpage_write since in this function the file handle is
2630                   known which we might as well leverage */
2631                /* BB check if anything else missing out of ppw
2632                   such as updating last write time */
2633                page_data = kmap(page);
2634                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2635                /* if (rc < 0) should we set writebehind rc? */
2636                kunmap(page);
2637
2638                free_xid(xid);
2639        } else {
2640                rc = copied;
2641                pos += copied;
2642                set_page_dirty(page);
2643        }
2644
2645        if (rc > 0) {
2646                spin_lock(&inode->i_lock);
2647                if (pos > inode->i_size) {
2648                        i_size_write(inode, pos);
2649                        inode->i_blocks = (512 - 1 + pos) >> 9;
2650                }
2651                spin_unlock(&inode->i_lock);
2652        }
2653
2654        unlock_page(page);
2655        put_page(page);
2656        /* Indication to update ctime and mtime as close is deferred */
2657        set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2658
2659        return rc;
2660}
2661
2662int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2663                      int datasync)
2664{
2665        unsigned int xid;
2666        int rc = 0;
2667        struct cifs_tcon *tcon;
2668        struct TCP_Server_Info *server;
2669        struct cifsFileInfo *smbfile = file->private_data;
2670        struct inode *inode = file_inode(file);
2671        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2672
2673        rc = file_write_and_wait_range(file, start, end);
2674        if (rc) {
2675                trace_cifs_fsync_err(inode->i_ino, rc);
2676                return rc;
2677        }
2678
2679        xid = get_xid();
2680
2681        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2682                 file, datasync);
2683
2684        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2685                rc = cifs_zap_mapping(inode);
2686                if (rc) {
2687                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2688                        rc = 0; /* don't care about it in fsync */
2689                }
2690        }
2691
2692        tcon = tlink_tcon(smbfile->tlink);
2693        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2694                server = tcon->ses->server;
2695                if (server->ops->flush)
2696                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2697                else
2698                        rc = -ENOSYS;
2699        }
2700
2701        free_xid(xid);
2702        return rc;
2703}
2704
2705int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2706{
2707        unsigned int xid;
2708        int rc = 0;
2709        struct cifs_tcon *tcon;
2710        struct TCP_Server_Info *server;
2711        struct cifsFileInfo *smbfile = file->private_data;
2712        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2713
2714        rc = file_write_and_wait_range(file, start, end);
2715        if (rc) {
2716                trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2717                return rc;
2718        }
2719
2720        xid = get_xid();
2721
2722        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2723                 file, datasync);
2724
2725        tcon = tlink_tcon(smbfile->tlink);
2726        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2727                server = tcon->ses->server;
2728                if (server->ops->flush)
2729                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2730                else
2731                        rc = -ENOSYS;
2732        }
2733
2734        free_xid(xid);
2735        return rc;
2736}
2737
2738/*
2739 * As file closes, flush all cached write data for this inode checking
2740 * for write behind errors.
2741 */
2742int cifs_flush(struct file *file, fl_owner_t id)
2743{
2744        struct inode *inode = file_inode(file);
2745        int rc = 0;
2746
2747        if (file->f_mode & FMODE_WRITE)
2748                rc = filemap_write_and_wait(inode->i_mapping);
2749
2750        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2751        if (rc)
2752                trace_cifs_flush_err(inode->i_ino, rc);
2753        return rc;
2754}
2755
2756static int
2757cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2758{
2759        int rc = 0;
2760        unsigned long i;
2761
2762        for (i = 0; i < num_pages; i++) {
2763                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2764                if (!pages[i]) {
2765                        /*
2766                         * save number of pages we have already allocated and
2767                         * return with ENOMEM error
2768                         */
2769                        num_pages = i;
2770                        rc = -ENOMEM;
2771                        break;
2772                }
2773        }
2774
2775        if (rc) {
2776                for (i = 0; i < num_pages; i++)
2777                        put_page(pages[i]);
2778        }
2779        return rc;
2780}
2781
2782static inline
2783size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2784{
2785        size_t num_pages;
2786        size_t clen;
2787
2788        clen = min_t(const size_t, len, wsize);
2789        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2790
2791        if (cur_len)
2792                *cur_len = clen;
2793
2794        return num_pages;
2795}
2796
2797static void
2798cifs_uncached_writedata_release(struct kref *refcount)
2799{
2800        int i;
2801        struct cifs_writedata *wdata = container_of(refcount,
2802                                        struct cifs_writedata, refcount);
2803
2804        kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2805        for (i = 0; i < wdata->nr_pages; i++)
2806                put_page(wdata->pages[i]);
2807        cifs_writedata_release(refcount);
2808}
2809
2810static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2811
2812static void
2813cifs_uncached_writev_complete(struct work_struct *work)
2814{
2815        struct cifs_writedata *wdata = container_of(work,
2816                                        struct cifs_writedata, work);
2817        struct inode *inode = d_inode(wdata->cfile->dentry);
2818        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2819
2820        spin_lock(&inode->i_lock);
2821        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2822        if (cifsi->server_eof > inode->i_size)
2823                i_size_write(inode, cifsi->server_eof);
2824        spin_unlock(&inode->i_lock);
2825
2826        complete(&wdata->done);
2827        collect_uncached_write_data(wdata->ctx);
2828        /* the below call can possibly free the last ref to aio ctx */
2829        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2830}
2831
2832static int
2833wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2834                      size_t *len, unsigned long *num_pages)
2835{
2836        size_t save_len, copied, bytes, cur_len = *len;
2837        unsigned long i, nr_pages = *num_pages;
2838
2839        save_len = cur_len;
2840        for (i = 0; i < nr_pages; i++) {
2841                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2842                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2843                cur_len -= copied;
2844                /*
2845                 * If we didn't copy as much as we expected, then that
2846                 * may mean we trod into an unmapped area. Stop copying
2847                 * at that point. On the next pass through the big
2848                 * loop, we'll likely end up getting a zero-length
2849                 * write and bailing out of it.
2850                 */
2851                if (copied < bytes)
2852                        break;
2853        }
2854        cur_len = save_len - cur_len;
2855        *len = cur_len;
2856
2857        /*
2858         * If we have no data to send, then that probably means that
2859         * the copy above failed altogether. That's most likely because
2860         * the address in the iovec was bogus. Return -EFAULT and let
2861         * the caller free anything we allocated and bail out.
2862         */
2863        if (!cur_len)
2864                return -EFAULT;
2865
2866        /*
2867         * i + 1 now represents the number of pages we actually used in
2868         * the copy phase above.
2869         */
2870        *num_pages = i + 1;
2871        return 0;
2872}
2873
2874static int
2875cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2876        struct cifs_aio_ctx *ctx)
2877{
2878        unsigned int wsize;
2879        struct cifs_credits credits;
2880        int rc;
2881        struct TCP_Server_Info *server = wdata->server;
2882
2883        do {
2884                if (wdata->cfile->invalidHandle) {
2885                        rc = cifs_reopen_file(wdata->cfile, false);
2886                        if (rc == -EAGAIN)
2887                                continue;
2888                        else if (rc)
2889                                break;
2890                }
2891
2892
2893                /*
2894                 * Wait for credits to resend this wdata.
2895                 * Note: we are attempting to resend the whole wdata not in
2896                 * segments
2897                 */
2898                do {
2899                        rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2900                                                &wsize, &credits);
2901                        if (rc)
2902                                goto fail;
2903
2904                        if (wsize < wdata->bytes) {
2905                                add_credits_and_wake_if(server, &credits, 0);
2906                                msleep(1000);
2907                        }
2908                } while (wsize < wdata->bytes);
2909                wdata->credits = credits;
2910
2911                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2912
2913                if (!rc) {
2914                        if (wdata->cfile->invalidHandle)
2915                                rc = -EAGAIN;
2916                        else {
2917#ifdef CONFIG_CIFS_SMB_DIRECT
2918                                if (wdata->mr) {
2919                                        wdata->mr->need_invalidate = true;
2920                                        smbd_deregister_mr(wdata->mr);
2921                                        wdata->mr = NULL;
2922                                }
2923#endif
2924                                rc = server->ops->async_writev(wdata,
2925                                        cifs_uncached_writedata_release);
2926                        }
2927                }
2928
2929                /* If the write was successfully sent, we are done */
2930                if (!rc) {
2931                        list_add_tail(&wdata->list, wdata_list);
2932                        return 0;
2933                }
2934
2935                /* Roll back credits and retry if needed */
2936                add_credits_and_wake_if(server, &wdata->credits, 0);
2937        } while (rc == -EAGAIN);
2938
2939fail:
2940        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2941        return rc;
2942}
2943
2944static int
2945cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2946                     struct cifsFileInfo *open_file,
2947                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2948                     struct cifs_aio_ctx *ctx)
2949{
2950        int rc = 0;
2951        size_t cur_len;
2952        unsigned long nr_pages, num_pages, i;
2953        struct cifs_writedata *wdata;
2954        struct iov_iter saved_from = *from;
2955        loff_t saved_offset = offset;
2956        pid_t pid;
2957        struct TCP_Server_Info *server;
2958        struct page **pagevec;
2959        size_t start;
2960        unsigned int xid;
2961
2962        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2963                pid = open_file->pid;
2964        else
2965                pid = current->tgid;
2966
2967        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2968        xid = get_xid();
2969
2970        do {
2971                unsigned int wsize;
2972                struct cifs_credits credits_on_stack;
2973                struct cifs_credits *credits = &credits_on_stack;
2974
2975                if (open_file->invalidHandle) {
2976                        rc = cifs_reopen_file(open_file, false);
2977                        if (rc == -EAGAIN)
2978                                continue;
2979                        else if (rc)
2980                                break;
2981                }
2982
2983                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2984                                                   &wsize, credits);
2985                if (rc)
2986                        break;
2987
2988                cur_len = min_t(const size_t, len, wsize);
2989
2990                if (ctx->direct_io) {
2991                        ssize_t result;
2992
2993                        result = iov_iter_get_pages_alloc(
2994                                from, &pagevec, cur_len, &start);
2995                        if (result < 0) {
2996                                cifs_dbg(VFS,
2997                                         "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2998                                         result, iov_iter_type(from),
2999                                         from->iov_offset, from->count);
3000                                dump_stack();
3001
3002                                rc = result;
3003                                add_credits_and_wake_if(server, credits, 0);
3004                                break;
3005                        }
3006                        cur_len = (size_t)result;
3007                        iov_iter_advance(from, cur_len);
3008
3009                        nr_pages =
3010                                (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3011
3012                        wdata = cifs_writedata_direct_alloc(pagevec,
3013                                             cifs_uncached_writev_complete);
3014                        if (!wdata) {
3015                                rc = -ENOMEM;
3016                                add_credits_and_wake_if(server, credits, 0);
3017                                break;
3018                        }
3019
3020
3021                        wdata->page_offset = start;
3022                        wdata->tailsz =
3023                                nr_pages > 1 ?
3024                                        cur_len - (PAGE_SIZE - start) -
3025                                        (nr_pages - 2) * PAGE_SIZE :
3026                                        cur_len;
3027                } else {
3028                        nr_pages = get_numpages(wsize, len, &cur_len);
3029                        wdata = cifs_writedata_alloc(nr_pages,
3030                                             cifs_uncached_writev_complete);
3031                        if (!wdata) {
3032                                rc = -ENOMEM;
3033                                add_credits_and_wake_if(server, credits, 0);
3034                                break;
3035                        }
3036
3037                        rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3038                        if (rc) {
3039                                kvfree(wdata->pages);
3040                                kfree(wdata);
3041                                add_credits_and_wake_if(server, credits, 0);
3042                                break;
3043                        }
3044
3045                        num_pages = nr_pages;
3046                        rc = wdata_fill_from_iovec(
3047                                wdata, from, &cur_len, &num_pages);
3048                        if (rc) {
3049                                for (i = 0; i < nr_pages; i++)
3050                                        put_page(wdata->pages[i]);
3051                                kvfree(wdata->pages);
3052                                kfree(wdata);
3053                                add_credits_and_wake_if(server, credits, 0);
3054                                break;
3055                        }
3056
3057                        /*
3058                         * Bring nr_pages down to the number of pages we
3059                         * actually used, and free any pages that we didn't use.
3060                         */
3061                        for ( ; nr_pages > num_pages; nr_pages--)
3062                                put_page(wdata->pages[nr_pages - 1]);
3063
3064                        wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3065                }
3066
3067                wdata->sync_mode = WB_SYNC_ALL;
3068                wdata->nr_pages = nr_pages;
3069                wdata->offset = (__u64)offset;
3070                wdata->cfile = cifsFileInfo_get(open_file);
3071                wdata->server = server;
3072                wdata->pid = pid;
3073                wdata->bytes = cur_len;
3074                wdata->pagesz = PAGE_SIZE;
3075                wdata->credits = credits_on_stack;
3076                wdata->ctx = ctx;
3077                kref_get(&ctx->refcount);
3078
3079                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3080
3081                if (!rc) {
3082                        if (wdata->cfile->invalidHandle)
3083                                rc = -EAGAIN;
3084                        else
3085                                rc = server->ops->async_writev(wdata,
3086                                        cifs_uncached_writedata_release);
3087                }
3088
3089                if (rc) {
3090                        add_credits_and_wake_if(server, &wdata->credits, 0);
3091                        kref_put(&wdata->refcount,
3092                                 cifs_uncached_writedata_release);
3093                        if (rc == -EAGAIN) {
3094                                *from = saved_from;
3095                                iov_iter_advance(from, offset - saved_offset);
3096                                continue;
3097                        }
3098                        break;
3099                }
3100
3101                list_add_tail(&wdata->list, wdata_list);
3102                offset += cur_len;
3103                len -= cur_len;
3104        } while (len > 0);
3105
3106        free_xid(xid);
3107        return rc;
3108}
3109
3110static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3111{
3112        struct cifs_writedata *wdata, *tmp;
3113        struct cifs_tcon *tcon;
3114        struct cifs_sb_info *cifs_sb;
3115        struct dentry *dentry = ctx->cfile->dentry;
3116        ssize_t rc;
3117
3118        tcon = tlink_tcon(ctx->cfile->tlink);
3119        cifs_sb = CIFS_SB(dentry->d_sb);
3120
3121        mutex_lock(&ctx->aio_mutex);
3122
3123        if (list_empty(&ctx->list)) {
3124                mutex_unlock(&ctx->aio_mutex);
3125                return;
3126        }
3127
3128        rc = ctx->rc;
3129        /*
3130         * Wait for and collect replies for any successful sends in order of
3131         * increasing offset. Once an error is hit, then return without waiting
3132         * for any more replies.
3133         */
3134restart_loop:
3135        list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3136                if (!rc) {
3137                        if (!try_wait_for_completion(&wdata->done)) {
3138                                mutex_unlock(&ctx->aio_mutex);
3139                                return;
3140                        }
3141
3142                        if (wdata->result)
3143                                rc = wdata->result;
3144                        else
3145                                ctx->total_len += wdata->bytes;
3146
3147                        /* resend call if it's a retryable error */
3148                        if (rc == -EAGAIN) {
3149                                struct list_head tmp_list;
3150                                struct iov_iter tmp_from = ctx->iter;
3151
3152                                INIT_LIST_HEAD(&tmp_list);
3153                                list_del_init(&wdata->list);
3154
3155                                if (ctx->direct_io)
3156                                        rc = cifs_resend_wdata(
3157                                                wdata, &tmp_list, ctx);
3158                                else {
3159                                        iov_iter_advance(&tmp_from,
3160                                                 wdata->offset - ctx->pos);
3161
3162                                        rc = cifs_write_from_iter(wdata->offset,
3163                                                wdata->bytes, &tmp_from,
3164                                                ctx->cfile, cifs_sb, &tmp_list,
3165                                                ctx);
3166
3167                                        kref_put(&wdata->refcount,
3168                                                cifs_uncached_writedata_release);
3169                                }
3170
3171                                list_splice(&tmp_list, &ctx->list);
3172                                goto restart_loop;
3173                        }
3174                }
3175                list_del_init(&wdata->list);
3176                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3177        }
3178
3179        cifs_stats_bytes_written(tcon, ctx->total_len);
3180        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3181
3182        ctx->rc = (rc == 0) ? ctx->total_len : rc;
3183
3184        mutex_unlock(&ctx->aio_mutex);
3185
3186        if (ctx->iocb && ctx->iocb->ki_complete)
3187                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3188        else
3189                complete(&ctx->done);
3190}
3191
3192static ssize_t __cifs_writev(
3193        struct kiocb *iocb, struct iov_iter *from, bool direct)
3194{
3195        struct file *file = iocb->ki_filp;
3196        ssize_t total_written = 0;
3197        struct cifsFileInfo *cfile;
3198        struct cifs_tcon *tcon;
3199        struct cifs_sb_info *cifs_sb;
3200        struct cifs_aio_ctx *ctx;
3201        struct iov_iter saved_from = *from;
3202        size_t len = iov_iter_count(from);
3203        int rc;
3204
3205        /*
3206         * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3207         * In this case, fall back to non-direct write function.
3208         * this could be improved by getting pages directly in ITER_KVEC
3209         */
3210        if (direct && iov_iter_is_kvec(from)) {
3211                cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3212                direct = false;
3213        }
3214
3215        rc = generic_write_checks(iocb, from);
3216        if (rc <= 0)
3217                return rc;
3218
3219        cifs_sb = CIFS_FILE_SB(file);
3220        cfile = file->private_data;
3221        tcon = tlink_tcon(cfile->tlink);
3222
3223        if (!tcon->ses->server->ops->async_writev)
3224                return -ENOSYS;
3225
3226        ctx = cifs_aio_ctx_alloc();
3227        if (!ctx)
3228                return -ENOMEM;
3229
3230        ctx->cfile = cifsFileInfo_get(cfile);
3231
3232        if (!is_sync_kiocb(iocb))
3233                ctx->iocb = iocb;
3234
3235        ctx->pos = iocb->ki_pos;
3236
3237        if (direct) {
3238                ctx->direct_io = true;
3239                ctx->iter = *from;
3240                ctx->len = len;
3241        } else {
3242                rc = setup_aio_ctx_iter(ctx, from, WRITE);
3243                if (rc) {
3244                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3245                        return rc;
3246                }
3247        }
3248
3249        /* grab a lock here due to read response handlers can access ctx */
3250        mutex_lock(&ctx->aio_mutex);
3251
3252        rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3253                                  cfile, cifs_sb, &ctx->list, ctx);
3254
3255        /*
3256         * If at least one write was successfully sent, then discard any rc
3257         * value from the later writes. If the other write succeeds, then
3258         * we'll end up returning whatever was written. If it fails, then
3259         * we'll get a new rc value from that.
3260         */
3261        if (!list_empty(&ctx->list))
3262                rc = 0;
3263
3264        mutex_unlock(&ctx->aio_mutex);
3265
3266        if (rc) {
3267                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3268                return rc;
3269        }
3270
3271        if (!is_sync_kiocb(iocb)) {
3272                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3273                return -EIOCBQUEUED;
3274        }
3275
3276        rc = wait_for_completion_killable(&ctx->done);
3277        if (rc) {
3278                mutex_lock(&ctx->aio_mutex);
3279                ctx->rc = rc = -EINTR;
3280                total_written = ctx->total_len;
3281                mutex_unlock(&ctx->aio_mutex);
3282        } else {
3283                rc = ctx->rc;
3284                total_written = ctx->total_len;
3285        }
3286
3287        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3288
3289        if (unlikely(!total_written))
3290                return rc;
3291
3292        iocb->ki_pos += total_written;
3293        return total_written;
3294}
3295
3296ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3297{
3298        return __cifs_writev(iocb, from, true);
3299}
3300
3301ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3302{
3303        return __cifs_writev(iocb, from, false);
3304}
3305
3306static ssize_t
3307cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3308{
3309        struct file *file = iocb->ki_filp;
3310        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3311        struct inode *inode = file->f_mapping->host;
3312        struct cifsInodeInfo *cinode = CIFS_I(inode);
3313        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3314        ssize_t rc;
3315
3316        inode_lock(inode);
3317        /*
3318         * We need to hold the sem to be sure nobody modifies lock list
3319         * with a brlock that prevents writing.
3320         */
3321        down_read(&cinode->lock_sem);
3322
3323        rc = generic_write_checks(iocb, from);
3324        if (rc <= 0)
3325                goto out;
3326
3327        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3328                                     server->vals->exclusive_lock_type, 0,
3329                                     NULL, CIFS_WRITE_OP))
3330                rc = __generic_file_write_iter(iocb, from);
3331        else
3332                rc = -EACCES;
3333out:
3334        up_read(&cinode->lock_sem);
3335        inode_unlock(inode);
3336
3337        if (rc > 0)
3338                rc = generic_write_sync(iocb, rc);
3339        return rc;
3340}
3341
3342ssize_t
3343cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3344{
3345        struct inode *inode = file_inode(iocb->ki_filp);
3346        struct cifsInodeInfo *cinode = CIFS_I(inode);
3347        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3348        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3349                                                iocb->ki_filp->private_data;
3350        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3351        ssize_t written;
3352
3353        written = cifs_get_writer(cinode);
3354        if (written)
3355                return written;
3356
3357        if (CIFS_CACHE_WRITE(cinode)) {
3358                if (cap_unix(tcon->ses) &&
3359                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3360                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3361                        written = generic_file_write_iter(iocb, from);
3362                        goto out;
3363                }
3364                written = cifs_writev(iocb, from);
3365                goto out;
3366        }
3367        /*
3368         * For non-oplocked files in strict cache mode we need to write the data
3369         * to the server exactly from the pos to pos+len-1 rather than flush all
3370         * affected pages because it may cause a error with mandatory locks on
3371         * these pages but not on the region from pos to ppos+len-1.
3372         */
3373        written = cifs_user_writev(iocb, from);
3374        if (CIFS_CACHE_READ(cinode)) {
3375                /*
3376                 * We have read level caching and we have just sent a write
3377                 * request to the server thus making data in the cache stale.
3378                 * Zap the cache and set oplock/lease level to NONE to avoid
3379                 * reading stale data from the cache. All subsequent read
3380                 * operations will read new data from the server.
3381                 */
3382                cifs_zap_mapping(inode);
3383                cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3384                         inode);
3385                cinode->oplock = 0;
3386        }
3387out:
3388        cifs_put_writer(cinode);
3389        return written;
3390}
3391
3392static struct cifs_readdata *
3393cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3394{
3395        struct cifs_readdata *rdata;
3396
3397        rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3398        if (rdata != NULL) {
3399                rdata->pages = pages;
3400                kref_init(&rdata->refcount);
3401                INIT_LIST_HEAD(&rdata->list);
3402                init_completion(&rdata->done);
3403                INIT_WORK(&rdata->work, complete);
3404        }
3405
3406        return rdata;
3407}
3408
3409static struct cifs_readdata *
3410cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3411{
3412        struct page **pages =
3413                kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3414        struct cifs_readdata *ret = NULL;
3415
3416        if (pages) {
3417                ret = cifs_readdata_direct_alloc(pages, complete);
3418                if (!ret)
3419                        kfree(pages);
3420        }
3421
3422        return ret;
3423}
3424
3425void
3426cifs_readdata_release(struct kref *refcount)
3427{
3428        struct cifs_readdata *rdata = container_of(refcount,
3429                                        struct cifs_readdata, refcount);
3430#ifdef CONFIG_CIFS_SMB_DIRECT
3431        if (rdata->mr) {
3432                smbd_deregister_mr(rdata->mr);
3433                rdata->mr = NULL;
3434        }
3435#endif
3436        if (rdata->cfile)
3437                cifsFileInfo_put(rdata->cfile);
3438
3439        kvfree(rdata->pages);
3440        kfree(rdata);
3441}
3442
3443static int
3444cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3445{
3446        int rc = 0;
3447        struct page *page;
3448        unsigned int i;
3449
3450        for (i = 0; i < nr_pages; i++) {
3451                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3452                if (!page) {
3453                        rc = -ENOMEM;
3454                        break;
3455                }
3456                rdata->pages[i] = page;
3457        }
3458
3459        if (rc) {
3460                unsigned int nr_page_failed = i;
3461
3462                for (i = 0; i < nr_page_failed; i++) {
3463                        put_page(rdata->pages[i]);
3464                        rdata->pages[i] = NULL;
3465                }
3466        }
3467        return rc;
3468}
3469
3470static void
3471cifs_uncached_readdata_release(struct kref *refcount)
3472{
3473        struct cifs_readdata *rdata = container_of(refcount,
3474                                        struct cifs_readdata, refcount);
3475        unsigned int i;
3476
3477        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3478        for (i = 0; i < rdata->nr_pages; i++) {
3479                put_page(rdata->pages[i]);
3480        }
3481        cifs_readdata_release(refcount);
3482}
3483
3484/**
3485 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3486 * @rdata:      the readdata response with list of pages holding data
3487 * @iter:       destination for our data
3488 *
3489 * This function copies data from a list of pages in a readdata response into
3490 * an array of iovecs. It will first calculate where the data should go
3491 * based on the info in the readdata and then copy the data into that spot.
3492 */
3493static int
3494cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3495{
3496        size_t remaining = rdata->got_bytes;
3497        unsigned int i;
3498
3499        for (i = 0; i < rdata->nr_pages; i++) {
3500                struct page *page = rdata->pages[i];
3501                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3502                size_t written;
3503
3504                if (unlikely(iov_iter_is_pipe(iter))) {
3505                        void *addr = kmap_atomic(page);
3506
3507                        written = copy_to_iter(addr, copy, iter);
3508                        kunmap_atomic(addr);
3509                } else
3510                        written = copy_page_to_iter(page, 0, copy, iter);
3511                remaining -= written;
3512                if (written < copy && iov_iter_count(iter) > 0)
3513                        break;
3514        }
3515        return remaining ? -EFAULT : 0;
3516}
3517
3518static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3519
3520static void
3521cifs_uncached_readv_complete(struct work_struct *work)
3522{
3523        struct cifs_readdata *rdata = container_of(work,
3524                                                struct cifs_readdata, work);
3525
3526        complete(&rdata->done);
3527        collect_uncached_read_data(rdata->ctx);
3528        /* the below call can possibly free the last ref to aio ctx */
3529        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3530}
3531
3532static int
3533uncached_fill_pages(struct TCP_Server_Info *server,
3534                    struct cifs_readdata *rdata, struct iov_iter *iter,
3535                    unsigned int len)
3536{
3537        int result = 0;
3538        unsigned int i;
3539        unsigned int nr_pages = rdata->nr_pages;
3540        unsigned int page_offset = rdata->page_offset;
3541
3542        rdata->got_bytes = 0;
3543        rdata->tailsz = PAGE_SIZE;
3544        for (i = 0; i < nr_pages; i++) {
3545                struct page *page = rdata->pages[i];
3546                size_t n;
3547                unsigned int segment_size = rdata->pagesz;
3548
3549                if (i == 0)
3550                        segment_size -= page_offset;
3551                else
3552                        page_offset = 0;
3553
3554
3555                if (len <= 0) {
3556                        /* no need to hold page hostage */
3557                        rdata->pages[i] = NULL;
3558                        rdata->nr_pages--;
3559                        put_page(page);
3560                        continue;
3561                }
3562
3563                n = len;
3564                if (len >= segment_size)
3565                        /* enough data to fill the page */
3566                        n = segment_size;
3567                else
3568                        rdata->tailsz = len;
3569                len -= n;
3570
3571                if (iter)
3572                        result = copy_page_from_iter(
3573                                        page, page_offset, n, iter);
3574#ifdef CONFIG_CIFS_SMB_DIRECT
3575                else if (rdata->mr)
3576                        result = n;
3577#endif
3578                else
3579                        result = cifs_read_page_from_socket(
3580                                        server, page, page_offset, n);
3581                if (result < 0)
3582                        break;
3583
3584                rdata->got_bytes += result;
3585        }
3586
3587        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3588                                                rdata->got_bytes : result;
3589}
3590
3591static int
3592cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3593                              struct cifs_readdata *rdata, unsigned int len)
3594{
3595        return uncached_fill_pages(server, rdata, NULL, len);
3596}
3597
3598static int
3599cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3600                              struct cifs_readdata *rdata,
3601                              struct iov_iter *iter)
3602{
3603        return uncached_fill_pages(server, rdata, iter, iter->count);
3604}
3605
3606static int cifs_resend_rdata(struct cifs_readdata *rdata,
3607                        struct list_head *rdata_list,
3608                        struct cifs_aio_ctx *ctx)
3609{
3610        unsigned int rsize;
3611        struct cifs_credits credits;
3612        int rc;
3613        struct TCP_Server_Info *server;
3614
3615        /* XXX: should we pick a new channel here? */
3616        server = rdata->server;
3617
3618        do {
3619                if (rdata->cfile->invalidHandle) {
3620                        rc = cifs_reopen_file(rdata->cfile, true);
3621                        if (rc == -EAGAIN)
3622                                continue;
3623                        else if (rc)
3624                                break;
3625                }
3626
3627                /*
3628                 * Wait for credits to resend this rdata.
3629                 * Note: we are attempting to resend the whole rdata not in
3630                 * segments
3631                 */
3632                do {
3633                        rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3634                                                &rsize, &credits);
3635
3636                        if (rc)
3637                                goto fail;
3638
3639                        if (rsize < rdata->bytes) {
3640                                add_credits_and_wake_if(server, &credits, 0);
3641                                msleep(1000);
3642                        }
3643                } while (rsize < rdata->bytes);
3644                rdata->credits = credits;
3645
3646                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3647                if (!rc) {
3648                        if (rdata->cfile->invalidHandle)
3649                                rc = -EAGAIN;
3650                        else {
3651#ifdef CONFIG_CIFS_SMB_DIRECT
3652                                if (rdata->mr) {
3653                                        rdata->mr->need_invalidate = true;
3654                                        smbd_deregister_mr(rdata->mr);
3655                                        rdata->mr = NULL;
3656                                }
3657#endif
3658                                rc = server->ops->async_readv(rdata);
3659                        }
3660                }
3661
3662                /* If the read was successfully sent, we are done */
3663                if (!rc) {
3664                        /* Add to aio pending list */
3665                        list_add_tail(&rdata->list, rdata_list);
3666                        return 0;
3667                }
3668
3669                /* Roll back credits and retry if needed */
3670                add_credits_and_wake_if(server, &rdata->credits, 0);
3671        } while (rc == -EAGAIN);
3672
3673fail:
3674        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3675        return rc;
3676}
3677
3678static int
3679cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3680                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3681                     struct cifs_aio_ctx *ctx)
3682{
3683        struct cifs_readdata *rdata;
3684        unsigned int npages, rsize;
3685        struct cifs_credits credits_on_stack;
3686        struct cifs_credits *credits = &credits_on_stack;
3687        size_t cur_len;
3688        int rc;
3689        pid_t pid;
3690        struct TCP_Server_Info *server;
3691        struct page **pagevec;
3692        size_t start;
3693        struct iov_iter direct_iov = ctx->iter;
3694
3695        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3696
3697        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3698                pid = open_file->pid;
3699        else
3700                pid = current->tgid;
3701
3702        if (ctx->direct_io)
3703                iov_iter_advance(&direct_iov, offset - ctx->pos);
3704
3705        do {
3706                if (open_file->invalidHandle) {
3707                        rc = cifs_reopen_file(open_file, true);
3708                        if (rc == -EAGAIN)
3709                                continue;
3710                        else if (rc)
3711                                break;
3712                }
3713
3714                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
3715                                                   &rsize, credits);
3716                if (rc)
3717                        break;
3718
3719                cur_len = min_t(const size_t, len, rsize);
3720
3721                if (ctx->direct_io) {
3722                        ssize_t result;
3723
3724                        result = iov_iter_get_pages_alloc(
3725                                        &direct_iov, &pagevec,
3726                                        cur_len, &start);
3727                        if (result < 0) {
3728                                cifs_dbg(VFS,
3729                                         "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3730                                         result, iov_iter_type(&direct_iov),
3731                                         direct_iov.iov_offset,
3732                                         direct_iov.count);
3733                                dump_stack();
3734
3735                                rc = result;
3736                                add_credits_and_wake_if(server, credits, 0);
3737                                break;
3738                        }
3739                        cur_len = (size_t)result;
3740                        iov_iter_advance(&direct_iov, cur_len);
3741
3742                        rdata = cifs_readdata_direct_alloc(
3743                                        pagevec, cifs_uncached_readv_complete);
3744                        if (!rdata) {
3745                                add_credits_and_wake_if(server, credits, 0);
3746                                rc = -ENOMEM;
3747                                break;
3748                        }
3749
3750                        npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3751                        rdata->page_offset = start;
3752                        rdata->tailsz = npages > 1 ?
3753                                cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3754                                cur_len;
3755
3756                } else {
3757
3758                        npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3759                        /* allocate a readdata struct */
3760                        rdata = cifs_readdata_alloc(npages,
3761                                            cifs_uncached_readv_complete);
3762                        if (!rdata) {
3763                                add_credits_and_wake_if(server, credits, 0);
3764                                rc = -ENOMEM;
3765                                break;
3766                        }
3767
3768                        rc = cifs_read_allocate_pages(rdata, npages);
3769                        if (rc) {
3770                                kvfree(rdata->pages);
3771                                kfree(rdata);
3772                                add_credits_and_wake_if(server, credits, 0);
3773                                break;
3774                        }
3775
3776                        rdata->tailsz = PAGE_SIZE;
3777                }
3778
3779                rdata->server = server;
3780                rdata->cfile = cifsFileInfo_get(open_file);
3781                rdata->nr_pages = npages;
3782                rdata->offset = offset;
3783                rdata->bytes = cur_len;
3784                rdata->pid = pid;
3785                rdata->pagesz = PAGE_SIZE;
3786                rdata->read_into_pages = cifs_uncached_read_into_pages;
3787                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3788                rdata->credits = credits_on_stack;
3789                rdata->ctx = ctx;
3790                kref_get(&ctx->refcount);
3791
3792                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3793
3794                if (!rc) {
3795                        if (rdata->cfile->invalidHandle)
3796                                rc = -EAGAIN;
3797                        else
3798                                rc = server->ops->async_readv(rdata);
3799                }
3800
3801                if (rc) {
3802                        add_credits_and_wake_if(server, &rdata->credits, 0);
3803                        kref_put(&rdata->refcount,
3804                                cifs_uncached_readdata_release);
3805                        if (rc == -EAGAIN) {
3806                                iov_iter_revert(&direct_iov, cur_len);
3807                                continue;
3808                        }
3809                        break;
3810                }
3811
3812                list_add_tail(&rdata->list, rdata_list);
3813                offset += cur_len;
3814                len -= cur_len;
3815        } while (len > 0);
3816
3817        return rc;
3818}
3819
3820static void
3821collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3822{
3823        struct cifs_readdata *rdata, *tmp;
3824        struct iov_iter *to = &ctx->iter;
3825        struct cifs_sb_info *cifs_sb;
3826        int rc;
3827
3828        cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3829
3830        mutex_lock(&ctx->aio_mutex);
3831
3832        if (list_empty(&ctx->list)) {
3833                mutex_unlock(&ctx->aio_mutex);
3834                return;
3835        }
3836
3837        rc = ctx->rc;
3838        /* the loop below should proceed in the order of increasing offsets */
3839again:
3840        list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3841                if (!rc) {
3842                        if (!try_wait_for_completion(&rdata->done)) {
3843                                mutex_unlock(&ctx->aio_mutex);
3844                                return;
3845                        }
3846
3847                        if (rdata->result == -EAGAIN) {
3848                                /* resend call if it's a retryable error */
3849                                struct list_head tmp_list;
3850                                unsigned int got_bytes = rdata->got_bytes;
3851
3852                                list_del_init(&rdata->list);
3853                                INIT_LIST_HEAD(&tmp_list);
3854
3855                                /*
3856                                 * Got a part of data and then reconnect has
3857                                 * happened -- fill the buffer and continue
3858                                 * reading.
3859                                 */
3860                                if (got_bytes && got_bytes < rdata->bytes) {
3861                                        rc = 0;
3862                                        if (!ctx->direct_io)
3863                                                rc = cifs_readdata_to_iov(rdata, to);
3864                                        if (rc) {
3865                                                kref_put(&rdata->refcount,
3866                                                        cifs_uncached_readdata_release);
3867                                                continue;
3868                                        }
3869                                }
3870
3871                                if (ctx->direct_io) {
3872                                        /*
3873                                         * Re-use rdata as this is a
3874                                         * direct I/O
3875                                         */
3876                                        rc = cifs_resend_rdata(
3877                                                rdata,
3878                                                &tmp_list, ctx);
3879                                } else {
3880                                        rc = cifs_send_async_read(
3881                                                rdata->offset + got_bytes,
3882                                                rdata->bytes - got_bytes,
3883                                                rdata->cfile, cifs_sb,
3884                                                &tmp_list, ctx);
3885
3886                                        kref_put(&rdata->refcount,
3887                                                cifs_uncached_readdata_release);
3888                                }
3889
3890                                list_splice(&tmp_list, &ctx->list);
3891
3892                                goto again;
3893                        } else if (rdata->result)
3894                                rc = rdata->result;
3895                        else if (!ctx->direct_io)
3896                                rc = cifs_readdata_to_iov(rdata, to);
3897
3898                        /* if there was a short read -- discard anything left */
3899                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3900                                rc = -ENODATA;
3901
3902                        ctx->total_len += rdata->got_bytes;
3903                }
3904                list_del_init(&rdata->list);
3905                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3906        }
3907
3908        if (!ctx->direct_io)
3909                ctx->total_len = ctx->len - iov_iter_count(to);
3910
3911        /* mask nodata case */
3912        if (rc == -ENODATA)
3913                rc = 0;
3914
3915        ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3916
3917        mutex_unlock(&ctx->aio_mutex);
3918
3919        if (ctx->iocb && ctx->iocb->ki_complete)
3920                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3921        else
3922                complete(&ctx->done);
3923}
3924
3925static ssize_t __cifs_readv(
3926        struct kiocb *iocb, struct iov_iter *to, bool direct)
3927{
3928        size_t len;
3929        struct file *file = iocb->ki_filp;
3930        struct cifs_sb_info *cifs_sb;
3931        struct cifsFileInfo *cfile;
3932        struct cifs_tcon *tcon;
3933        ssize_t rc, total_read = 0;
3934        loff_t offset = iocb->ki_pos;
3935        struct cifs_aio_ctx *ctx;
3936
3937        /*
3938         * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3939         * fall back to data copy read path
3940         * this could be improved by getting pages directly in ITER_KVEC
3941         */
3942        if (direct && iov_iter_is_kvec(to)) {
3943                cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3944                direct = false;
3945        }
3946
3947        len = iov_iter_count(to);
3948        if (!len)
3949                return 0;
3950
3951        cifs_sb = CIFS_FILE_SB(file);
3952        cfile = file->private_data;
3953        tcon = tlink_tcon(cfile->tlink);
3954
3955        if (!tcon->ses->server->ops->async_readv)
3956                return -ENOSYS;
3957
3958        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3959                cifs_dbg(FYI, "attempting read on write only file instance\n");
3960
3961        ctx = cifs_aio_ctx_alloc();
3962        if (!ctx)
3963                return -ENOMEM;
3964
3965        ctx->cfile = cifsFileInfo_get(cfile);
3966
3967        if (!is_sync_kiocb(iocb))
3968                ctx->iocb = iocb;
3969
3970        if (iter_is_iovec(to))
3971                ctx->should_dirty = true;
3972
3973        if (direct) {
3974                ctx->pos = offset;
3975                ctx->direct_io = true;
3976                ctx->iter = *to;
3977                ctx->len = len;
3978        } else {
3979                rc = setup_aio_ctx_iter(ctx, to, READ);
3980                if (rc) {
3981                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3982                        return rc;
3983                }
3984                len = ctx->len;
3985        }
3986
3987        /* grab a lock here due to read response handlers can access ctx */
3988        mutex_lock(&ctx->aio_mutex);
3989
3990        rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3991
3992        /* if at least one read request send succeeded, then reset rc */
3993        if (!list_empty(&ctx->list))
3994                rc = 0;
3995
3996        mutex_unlock(&ctx->aio_mutex);
3997
3998        if (rc) {
3999                kref_put(&ctx->refcount, cifs_aio_ctx_release);
4000                return rc;
4001        }
4002
4003        if (!is_sync_kiocb(iocb)) {
4004                kref_put(&ctx->refcount, cifs_aio_ctx_release);
4005                return -EIOCBQUEUED;
4006        }
4007
4008        rc = wait_for_completion_killable(&ctx->done);
4009        if (rc) {
4010                mutex_lock(&ctx->aio_mutex);
4011                ctx->rc = rc = -EINTR;
4012                total_read = ctx->total_len;
4013                mutex_unlock(&ctx->aio_mutex);
4014        } else {
4015                rc = ctx->rc;
4016                total_read = ctx->total_len;
4017        }
4018
4019        kref_put(&ctx->refcount, cifs_aio_ctx_release);
4020
4021        if (total_read) {
4022                iocb->ki_pos += total_read;
4023                return total_read;
4024        }
4025        return rc;
4026}
4027
4028ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4029{
4030        return __cifs_readv(iocb, to, true);
4031}
4032
4033ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4034{
4035        return __cifs_readv(iocb, to, false);
4036}
4037
4038ssize_t
4039cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4040{
4041        struct inode *inode = file_inode(iocb->ki_filp);
4042        struct cifsInodeInfo *cinode = CIFS_I(inode);
4043        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4044        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4045                                                iocb->ki_filp->private_data;
4046        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4047        int rc = -EACCES;
4048
4049        /*
4050         * In strict cache mode we need to read from the server all the time
4051         * if we don't have level II oplock because the server can delay mtime
4052         * change - so we can't make a decision about inode invalidating.
4053         * And we can also fail with pagereading if there are mandatory locks
4054         * on pages affected by this read but not on the region from pos to
4055         * pos+len-1.
4056         */
4057        if (!CIFS_CACHE_READ(cinode))
4058                return cifs_user_readv(iocb, to);
4059
4060        if (cap_unix(tcon->ses) &&
4061            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4062            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4063                return generic_file_read_iter(iocb, to);
4064
4065        /*
4066         * We need to hold the sem to be sure nobody modifies lock list
4067         * with a brlock that prevents reading.
4068         */
4069        down_read(&cinode->lock_sem);
4070        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4071                                     tcon->ses->server->vals->shared_lock_type,
4072                                     0, NULL, CIFS_READ_OP))
4073                rc = generic_file_read_iter(iocb, to);
4074        up_read(&cinode->lock_sem);
4075        return rc;
4076}
4077
4078static ssize_t
4079cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4080{
4081        int rc = -EACCES;
4082        unsigned int bytes_read = 0;
4083        unsigned int total_read;
4084        unsigned int current_read_size;
4085        unsigned int rsize;
4086        struct cifs_sb_info *cifs_sb;
4087        struct cifs_tcon *tcon;
4088        struct TCP_Server_Info *server;
4089        unsigned int xid;
4090        char *cur_offset;
4091        struct cifsFileInfo *open_file;
4092        struct cifs_io_parms io_parms = {0};
4093        int buf_type = CIFS_NO_BUFFER;
4094        __u32 pid;
4095
4096        xid = get_xid();
4097        cifs_sb = CIFS_FILE_SB(file);
4098
4099        /* FIXME: set up handlers for larger reads and/or convert to async */
4100        rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4101
4102        if (file->private_data == NULL) {
4103                rc = -EBADF;
4104                free_xid(xid);
4105                return rc;
4106        }
4107        open_file = file->private_data;
4108        tcon = tlink_tcon(open_file->tlink);
4109        server = cifs_pick_channel(tcon->ses);
4110
4111        if (!server->ops->sync_read) {
4112                free_xid(xid);
4113                return -ENOSYS;
4114        }
4115
4116        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4117                pid = open_file->pid;
4118        else
4119                pid = current->tgid;
4120
4121        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4122                cifs_dbg(FYI, "attempting read on write only file instance\n");
4123
4124        for (total_read = 0, cur_offset = read_data; read_size > total_read;
4125             total_read += bytes_read, cur_offset += bytes_read) {
4126                do {
4127                        current_read_size = min_t(uint, read_size - total_read,
4128                                                  rsize);
4129                        /*
4130                         * For windows me and 9x we do not want to request more
4131                         * than it negotiated since it will refuse the read
4132                         * then.
4133                         */
4134                        if (!(tcon->ses->capabilities &
4135                                tcon->ses->server->vals->cap_large_files)) {
4136                                current_read_size = min_t(uint,
4137                                        current_read_size, CIFSMaxBufSize);
4138                        }
4139                        if (open_file->invalidHandle) {
4140                                rc = cifs_reopen_file(open_file, true);
4141                                if (rc != 0)
4142                                        break;
4143                        }
4144                        io_parms.pid = pid;
4145                        io_parms.tcon = tcon;
4146                        io_parms.offset = *offset;
4147                        io_parms.length = current_read_size;
4148                        io_parms.server = server;
4149                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4150                                                    &bytes_read, &cur_offset,
4151                                                    &buf_type);
4152                } while (rc == -EAGAIN);
4153
4154                if (rc || (bytes_read == 0)) {
4155                        if (total_read) {
4156                                break;
4157                        } else {
4158                                free_xid(xid);
4159                                return rc;
4160                        }
4161                } else {
4162                        cifs_stats_bytes_read(tcon, total_read);
4163                        *offset += bytes_read;
4164                }
4165        }
4166        free_xid(xid);
4167        return total_read;
4168}
4169
4170/*
4171 * If the page is mmap'ed into a process' page tables, then we need to make
4172 * sure that it doesn't change while being written back.
4173 */
4174static vm_fault_t
4175cifs_page_mkwrite(struct vm_fault *vmf)
4176{
4177        struct page *page = vmf->page;
4178        struct file *file = vmf->vma->vm_file;
4179        struct inode *inode = file_inode(file);
4180
4181        cifs_fscache_wait_on_page_write(inode, page);
4182
4183        lock_page(page);
4184        return VM_FAULT_LOCKED;
4185}
4186
4187static const struct vm_operations_struct cifs_file_vm_ops = {
4188        .fault = filemap_fault,
4189        .map_pages = filemap_map_pages,
4190        .page_mkwrite = cifs_page_mkwrite,
4191};
4192
4193int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4194{
4195        int xid, rc = 0;
4196        struct inode *inode = file_inode(file);
4197
4198        xid = get_xid();
4199
4200        if (!CIFS_CACHE_READ(CIFS_I(inode)))
4201                rc = cifs_zap_mapping(inode);
4202        if (!rc)
4203                rc = generic_file_mmap(file, vma);
4204        if (!rc)
4205                vma->vm_ops = &cifs_file_vm_ops;
4206
4207        free_xid(xid);
4208        return rc;
4209}
4210
4211int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4212{
4213        int rc, xid;
4214
4215        xid = get_xid();
4216
4217        rc = cifs_revalidate_file(file);
4218        if (rc)
4219                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4220                         rc);
4221        if (!rc)
4222                rc = generic_file_mmap(file, vma);
4223        if (!rc)
4224                vma->vm_ops = &cifs_file_vm_ops;
4225
4226        free_xid(xid);
4227        return rc;
4228}
4229
4230static void
4231cifs_readv_complete(struct work_struct *work)
4232{
4233        unsigned int i, got_bytes;
4234        struct cifs_readdata *rdata = container_of(work,
4235                                                struct cifs_readdata, work);
4236
4237        got_bytes = rdata->got_bytes;
4238        for (i = 0; i < rdata->nr_pages; i++) {
4239                struct page *page = rdata->pages[i];
4240
4241                lru_cache_add(page);
4242
4243                if (rdata->result == 0 ||
4244                    (rdata->result == -EAGAIN && got_bytes)) {
4245                        flush_dcache_page(page);
4246                        SetPageUptodate(page);
4247                } else
4248                        SetPageError(page);
4249
4250                unlock_page(page);
4251
4252                if (rdata->result == 0 ||
4253                    (rdata->result == -EAGAIN && got_bytes))
4254                        cifs_readpage_to_fscache(rdata->mapping->host, page);
4255                else
4256                        cifs_fscache_uncache_page(rdata->mapping->host, page);
4257
4258                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4259
4260                put_page(page);
4261                rdata->pages[i] = NULL;
4262        }
4263        kref_put(&rdata->refcount, cifs_readdata_release);
4264}
4265
4266static int
4267readpages_fill_pages(struct TCP_Server_Info *server,
4268                     struct cifs_readdata *rdata, struct iov_iter *iter,
4269                     unsigned int len)
4270{
4271        int result = 0;
4272        unsigned int i;
4273        u64 eof;
4274        pgoff_t eof_index;
4275        unsigned int nr_pages = rdata->nr_pages;
4276        unsigned int page_offset = rdata->page_offset;
4277
4278        /* determine the eof that the server (probably) has */
4279        eof = CIFS_I(rdata->mapping->host)->server_eof;
4280        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4281        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4282
4283        rdata->got_bytes = 0;
4284        rdata->tailsz = PAGE_SIZE;
4285        for (i = 0; i < nr_pages; i++) {
4286                struct page *page = rdata->pages[i];
4287                unsigned int to_read = rdata->pagesz;
4288                size_t n;
4289
4290                if (i == 0)
4291                        to_read -= page_offset;
4292                else
4293                        page_offset = 0;
4294
4295                n = to_read;
4296
4297                if (len >= to_read) {
4298                        len -= to_read;
4299                } else if (len > 0) {
4300                        /* enough for partial page, fill and zero the rest */
4301                        zero_user(page, len + page_offset, to_read - len);
4302                        n = rdata->tailsz = len;
4303                        len = 0;
4304                } else if (page->index > eof_index) {
4305                        /*
4306                         * The VFS will not try to do readahead past the
4307                         * i_size, but it's possible that we have outstanding
4308                         * writes with gaps in the middle and the i_size hasn't
4309                         * caught up yet. Populate those with zeroed out pages
4310                         * to prevent the VFS from repeatedly attempting to
4311                         * fill them until the writes are flushed.
4312                         */
4313                        zero_user(page, 0, PAGE_SIZE);
4314                        lru_cache_add(page);
4315                        flush_dcache_page(page);
4316                        SetPageUptodate(page);
4317                        unlock_page(page);
4318                        put_page(page);
4319                        rdata->pages[i] = NULL;
4320                        rdata->nr_pages--;
4321                        continue;
4322                } else {
4323                        /* no need to hold page hostage */
4324                        lru_cache_add(page);
4325                        unlock_page(page);
4326                        put_page(page);
4327                        rdata->pages[i] = NULL;
4328                        rdata->nr_pages--;
4329                        continue;
4330                }
4331
4332                if (iter)
4333                        result = copy_page_from_iter(
4334                                        page, page_offset, n, iter);
4335#ifdef CONFIG_CIFS_SMB_DIRECT
4336                else if (rdata->mr)
4337                        result = n;
4338#endif
4339                else
4340                        result = cifs_read_page_from_socket(
4341                                        server, page, page_offset, n);
4342                if (result < 0)
4343                        break;
4344
4345                rdata->got_bytes += result;
4346        }
4347
4348        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4349                                                rdata->got_bytes : result;
4350}
4351
4352static int
4353cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4354                               struct cifs_readdata *rdata, unsigned int len)
4355{
4356        return readpages_fill_pages(server, rdata, NULL, len);
4357}
4358
4359static int
4360cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4361                               struct cifs_readdata *rdata,
4362                               struct iov_iter *iter)
4363{
4364        return readpages_fill_pages(server, rdata, iter, iter->count);
4365}
4366
4367static int
4368readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4369                    unsigned int rsize, struct list_head *tmplist,
4370                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4371{
4372        struct page *page, *tpage;
4373        unsigned int expected_index;
4374        int rc;
4375        gfp_t gfp = readahead_gfp_mask(mapping);
4376
4377        INIT_LIST_HEAD(tmplist);
4378
4379        page = lru_to_page(page_list);
4380
4381        /*
4382         * Lock the page and put it in the cache. Since no one else
4383         * should have access to this page, we're safe to simply set
4384         * PG_locked without checking it first.
4385         */
4386        __SetPageLocked(page);
4387        rc = add_to_page_cache_locked(page, mapping,
4388                                      page->index, gfp);
4389
4390        /* give up if we can't stick it in the cache */
4391        if (rc) {
4392                __ClearPageLocked(page);
4393                return rc;
4394        }
4395
4396        /* move first page to the tmplist */
4397        *offset = (loff_t)page->index << PAGE_SHIFT;
4398        *bytes = PAGE_SIZE;
4399        *nr_pages = 1;
4400        list_move_tail(&page->lru, tmplist);
4401
4402        /* now try and add more pages onto the request */
4403        expected_index = page->index + 1;
4404        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4405                /* discontinuity ? */
4406                if (page->index != expected_index)
4407                        break;
4408
4409                /* would this page push the read over the rsize? */
4410                if (*bytes + PAGE_SIZE > rsize)
4411                        break;
4412
4413                __SetPageLocked(page);
4414                rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4415                if (rc) {
4416                        __ClearPageLocked(page);
4417                        break;
4418                }
4419                list_move_tail(&page->lru, tmplist);
4420                (*bytes) += PAGE_SIZE;
4421                expected_index++;
4422                (*nr_pages)++;
4423        }
4424        return rc;
4425}
4426
4427static int cifs_readpages(struct file *file, struct address_space *mapping,
4428        struct list_head *page_list, unsigned num_pages)
4429{
4430        int rc;
4431        int err = 0;
4432        struct list_head tmplist;
4433        struct cifsFileInfo *open_file = file->private_data;
4434        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4435        struct TCP_Server_Info *server;
4436        pid_t pid;
4437        unsigned int xid;
4438
4439        xid = get_xid();
4440        /*
4441         * Reads as many pages as possible from fscache. Returns -ENOBUFS
4442         * immediately if the cookie is negative
4443         *
4444         * After this point, every page in the list might have PG_fscache set,
4445         * so we will need to clean that up off of every page we don't use.
4446         */
4447        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4448                                         &num_pages);
4449        if (rc == 0) {
4450                free_xid(xid);
4451                return rc;
4452        }
4453
4454        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4455                pid = open_file->pid;
4456        else
4457                pid = current->tgid;
4458
4459        rc = 0;
4460        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4461
4462        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4463                 __func__, file, mapping, num_pages);
4464
4465        /*
4466         * Start with the page at end of list and move it to private
4467         * list. Do the same with any following pages until we hit
4468         * the rsize limit, hit an index discontinuity, or run out of
4469         * pages. Issue the async read and then start the loop again
4470         * until the list is empty.
4471         *
4472         * Note that list order is important. The page_list is in
4473         * the order of declining indexes. When we put the pages in
4474         * the rdata->pages, then we want them in increasing order.
4475         */
4476        while (!list_empty(page_list) && !err) {
4477                unsigned int i, nr_pages, bytes, rsize;
4478                loff_t offset;
4479                struct page *page, *tpage;
4480                struct cifs_readdata *rdata;
4481                struct cifs_credits credits_on_stack;
4482                struct cifs_credits *credits = &credits_on_stack;
4483
4484                if (open_file->invalidHandle) {
4485                        rc = cifs_reopen_file(open_file, true);
4486                        if (rc == -EAGAIN)
4487                                continue;
4488                        else if (rc)
4489                                break;
4490                }
4491
4492                rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4493                                                   &rsize, credits);
4494                if (rc)
4495                        break;
4496
4497                /*
4498                 * Give up immediately if rsize is too small to read an entire
4499                 * page. The VFS will fall back to readpage. We should never
4500                 * reach this point however since we set ra_pages to 0 when the
4501                 * rsize is smaller than a cache page.
4502                 */
4503                if (unlikely(rsize < PAGE_SIZE)) {
4504                        add_credits_and_wake_if(server, credits, 0);
4505                        free_xid(xid);
4506                        return 0;
4507                }
4508
4509                nr_pages = 0;
4510                err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4511                                         &nr_pages, &offset, &bytes);
4512                if (!nr_pages) {
4513                        add_credits_and_wake_if(server, credits, 0);
4514                        break;
4515                }
4516
4517                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4518                if (!rdata) {
4519                        /* best to give up if we're out of mem */
4520                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4521                                list_del(&page->lru);
4522                                lru_cache_add(page);
4523                                unlock_page(page);
4524                                put_page(page);
4525                        }
4526                        rc = -ENOMEM;
4527                        add_credits_and_wake_if(server, credits, 0);
4528                        break;
4529                }
4530
4531                rdata->cfile = cifsFileInfo_get(open_file);
4532                rdata->server = server;
4533                rdata->mapping = mapping;
4534                rdata->offset = offset;
4535                rdata->bytes = bytes;
4536                rdata->pid = pid;
4537                rdata->pagesz = PAGE_SIZE;
4538                rdata->tailsz = PAGE_SIZE;
4539                rdata->read_into_pages = cifs_readpages_read_into_pages;
4540                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4541                rdata->credits = credits_on_stack;
4542
4543                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4544                        list_del(&page->lru);
4545                        rdata->pages[rdata->nr_pages++] = page;
4546                }
4547
4548                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4549
4550                if (!rc) {
4551                        if (rdata->cfile->invalidHandle)
4552                                rc = -EAGAIN;
4553                        else
4554                                rc = server->ops->async_readv(rdata);
4555                }
4556
4557                if (rc) {
4558                        add_credits_and_wake_if(server, &rdata->credits, 0);
4559                        for (i = 0; i < rdata->nr_pages; i++) {
4560                                page = rdata->pages[i];
4561                                lru_cache_add(page);
4562                                unlock_page(page);
4563                                put_page(page);
4564                        }
4565                        /* Fallback to the readpage in error/reconnect cases */
4566                        kref_put(&rdata->refcount, cifs_readdata_release);
4567                        break;
4568                }
4569
4570                kref_put(&rdata->refcount, cifs_readdata_release);
4571        }
4572
4573        /* Any pages that have been shown to fscache but didn't get added to
4574         * the pagecache must be uncached before they get returned to the
4575         * allocator.
4576         */
4577        cifs_fscache_readpages_cancel(mapping->host, page_list);
4578        free_xid(xid);
4579        return rc;
4580}
4581
4582/*
4583 * cifs_readpage_worker must be called with the page pinned
4584 */
4585static int cifs_readpage_worker(struct file *file, struct page *page,
4586        loff_t *poffset)
4587{
4588        char *read_data;
4589        int rc;
4590
4591        /* Is the page cached? */
4592        rc = cifs_readpage_from_fscache(file_inode(file), page);
4593        if (rc == 0)
4594                goto read_complete;
4595
4596        read_data = kmap(page);
4597        /* for reads over a certain size could initiate async read ahead */
4598
4599        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4600
4601        if (rc < 0)
4602                goto io_error;
4603        else
4604                cifs_dbg(FYI, "Bytes read %d\n", rc);
4605
4606        /* we do not want atime to be less than mtime, it broke some apps */
4607        file_inode(file)->i_atime = current_time(file_inode(file));
4608        if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4609                file_inode(file)->i_atime = file_inode(file)->i_mtime;
4610        else
4611                file_inode(file)->i_atime = current_time(file_inode(file));
4612
4613        if (PAGE_SIZE > rc)
4614                memset(read_data + rc, 0, PAGE_SIZE - rc);
4615
4616        flush_dcache_page(page);
4617        SetPageUptodate(page);
4618
4619        /* send this page to the cache */
4620        cifs_readpage_to_fscache(file_inode(file), page);
4621
4622        rc = 0;
4623
4624io_error:
4625        kunmap(page);
4626        unlock_page(page);
4627
4628read_complete:
4629        return rc;
4630}
4631
4632static int cifs_readpage(struct file *file, struct page *page)
4633{
4634        loff_t offset = page_file_offset(page);
4635        int rc = -EACCES;
4636        unsigned int xid;
4637
4638        xid = get_xid();
4639
4640        if (file->private_data == NULL) {
4641                rc = -EBADF;
4642                free_xid(xid);
4643                return rc;
4644        }
4645
4646        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4647                 page, (int)offset, (int)offset);
4648
4649        rc = cifs_readpage_worker(file, page, &offset);
4650
4651        free_xid(xid);
4652        return rc;
4653}
4654
4655static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4656{
4657        struct cifsFileInfo *open_file;
4658
4659        spin_lock(&cifs_inode->open_file_lock);
4660        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4661                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4662                        spin_unlock(&cifs_inode->open_file_lock);
4663                        return 1;
4664                }
4665        }
4666        spin_unlock(&cifs_inode->open_file_lock);
4667        return 0;
4668}
4669
4670/* We do not want to update the file size from server for inodes
4671   open for write - to avoid races with writepage extending
4672   the file - in the future we could consider allowing
4673   refreshing the inode only on increases in the file size
4674   but this is tricky to do without racing with writebehind
4675   page caching in the current Linux kernel design */
4676bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4677{
4678        if (!cifsInode)
4679                return true;
4680
4681        if (is_inode_writable(cifsInode)) {
4682                /* This inode is open for write at least once */
4683                struct cifs_sb_info *cifs_sb;
4684
4685                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4686                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4687                        /* since no page cache to corrupt on directio
4688                        we can change size safely */
4689                        return true;
4690                }
4691
4692                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4693                        return true;
4694
4695                return false;
4696        } else
4697                return true;
4698}
4699
4700static int cifs_write_begin(struct file *file, struct address_space *mapping,
4701                        loff_t pos, unsigned len, unsigned flags,
4702                        struct page **pagep, void **fsdata)
4703{
4704        int oncethru = 0;
4705        pgoff_t index = pos >> PAGE_SHIFT;
4706        loff_t offset = pos & (PAGE_SIZE - 1);
4707        loff_t page_start = pos & PAGE_MASK;
4708        loff_t i_size;
4709        struct page *page;
4710        int rc = 0;
4711
4712        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4713
4714start:
4715        page = grab_cache_page_write_begin(mapping, index, flags);
4716        if (!page) {
4717                rc = -ENOMEM;
4718                goto out;
4719        }
4720
4721        if (PageUptodate(page))
4722                goto out;
4723
4724        /*
4725         * If we write a full page it will be up to date, no need to read from
4726         * the server. If the write is short, we'll end up doing a sync write
4727         * instead.
4728         */
4729        if (len == PAGE_SIZE)
4730                goto out;
4731
4732        /*
4733         * optimize away the read when we have an oplock, and we're not
4734         * expecting to use any of the data we'd be reading in. That
4735         * is, when the page lies beyond the EOF, or straddles the EOF
4736         * and the write will cover all of the existing data.
4737         */
4738        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4739                i_size = i_size_read(mapping->host);
4740                if (page_start >= i_size ||
4741                    (offset == 0 && (pos + len) >= i_size)) {
4742                        zero_user_segments(page, 0, offset,
4743                                           offset + len,
4744                                           PAGE_SIZE);
4745                        /*
4746                         * PageChecked means that the parts of the page
4747                         * to which we're not writing are considered up
4748                         * to date. Once the data is copied to the
4749                         * page, it can be set uptodate.
4750                         */
4751                        SetPageChecked(page);
4752                        goto out;
4753                }
4754        }
4755
4756        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4757                /*
4758                 * might as well read a page, it is fast enough. If we get
4759                 * an error, we don't need to return it. cifs_write_end will
4760                 * do a sync write instead since PG_uptodate isn't set.
4761                 */
4762                cifs_readpage_worker(file, page, &page_start);
4763                put_page(page);
4764                oncethru = 1;
4765                goto start;
4766        } else {
4767                /* we could try using another file handle if there is one -
4768                   but how would we lock it to prevent close of that handle
4769                   racing with this read? In any case
4770                   this will be written out by write_end so is fine */
4771        }
4772out:
4773        *pagep = page;
4774        return rc;
4775}
4776
4777static int cifs_release_page(struct page *page, gfp_t gfp)
4778{
4779        if (PagePrivate(page))
4780                return 0;
4781
4782        return cifs_fscache_release_page(page, gfp);
4783}
4784
4785static void cifs_invalidate_page(struct page *page, unsigned int offset,
4786                                 unsigned int length)
4787{
4788        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4789
4790        if (offset == 0 && length == PAGE_SIZE)
4791                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4792}
4793
4794static int cifs_launder_page(struct page *page)
4795{
4796        int rc = 0;
4797        loff_t range_start = page_offset(page);
4798        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4799        struct writeback_control wbc = {
4800                .sync_mode = WB_SYNC_ALL,
4801                .nr_to_write = 0,
4802                .range_start = range_start,
4803                .range_end = range_end,
4804        };
4805
4806        cifs_dbg(FYI, "Launder page: %p\n", page);
4807
4808        if (clear_page_dirty_for_io(page))
4809                rc = cifs_writepage_locked(page, &wbc);
4810
4811        cifs_fscache_invalidate_page(page, page->mapping->host);
4812        return rc;
4813}
4814
4815void cifs_oplock_break(struct work_struct *work)
4816{
4817        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4818                                                  oplock_break);
4819        struct inode *inode = d_inode(cfile->dentry);
4820        struct cifsInodeInfo *cinode = CIFS_I(inode);
4821        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4822        struct TCP_Server_Info *server = tcon->ses->server;
4823        int rc = 0;
4824        bool purge_cache = false;
4825        bool is_deferred = false;
4826        struct cifs_deferred_close *dclose;
4827
4828        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4829                        TASK_UNINTERRUPTIBLE);
4830
4831        server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4832                                      cfile->oplock_epoch, &purge_cache);
4833
4834        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4835                                                cifs_has_mand_locks(cinode)) {
4836                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4837                         inode);
4838                cinode->oplock = 0;
4839        }
4840
4841        if (inode && S_ISREG(inode->i_mode)) {
4842                if (CIFS_CACHE_READ(cinode))
4843                        break_lease(inode, O_RDONLY);
4844                else
4845                        break_lease(inode, O_WRONLY);
4846                rc = filemap_fdatawrite(inode->i_mapping);
4847                if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4848                        rc = filemap_fdatawait(inode->i_mapping);
4849                        mapping_set_error(inode->i_mapping, rc);
4850                        cifs_zap_mapping(inode);
4851                }
4852                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4853                if (CIFS_CACHE_WRITE(cinode))
4854                        goto oplock_break_ack;
4855        }
4856
4857        rc = cifs_push_locks(cfile);
4858        if (rc)
4859                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4860
4861oplock_break_ack:
4862        /*
4863         * When oplock break is received and there are no active
4864         * file handles but cached, then schedule deferred close immediately.
4865         * So, new open will not use cached handle.
4866         */
4867        spin_lock(&CIFS_I(inode)->deferred_lock);
4868        is_deferred = cifs_is_deferred_close(cfile, &dclose);
4869        spin_unlock(&CIFS_I(inode)->deferred_lock);
4870        if (is_deferred &&
4871            cfile->deferred_close_scheduled &&
4872            delayed_work_pending(&cfile->deferred)) {
4873                if (cancel_delayed_work(&cfile->deferred)) {
4874                        _cifsFileInfo_put(cfile, false, false);
4875                        goto oplock_break_done;
4876                }
4877        }
4878        /*
4879         * releasing stale oplock after recent reconnect of smb session using
4880         * a now incorrect file handle is not a data integrity issue but do
4881         * not bother sending an oplock release if session to server still is
4882         * disconnected since oplock already released by the server
4883         */
4884        if (!cfile->oplock_break_cancelled) {
4885                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4886                                                             cinode);
4887                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4888        }
4889oplock_break_done:
4890        _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4891        cifs_done_oplock_break(cinode);
4892}
4893
4894/*
4895 * The presence of cifs_direct_io() in the address space ops vector
4896 * allowes open() O_DIRECT flags which would have failed otherwise.
4897 *
4898 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4899 * so this method should never be called.
4900 *
4901 * Direct IO is not yet supported in the cached mode. 
4902 */
4903static ssize_t
4904cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4905{
4906        /*
4907         * FIXME
4908         * Eventually need to support direct IO for non forcedirectio mounts
4909         */
4910        return -EINVAL;
4911}
4912
4913static int cifs_swap_activate(struct swap_info_struct *sis,
4914                              struct file *swap_file, sector_t *span)
4915{
4916        struct cifsFileInfo *cfile = swap_file->private_data;
4917        struct inode *inode = swap_file->f_mapping->host;
4918        unsigned long blocks;
4919        long long isize;
4920
4921        cifs_dbg(FYI, "swap activate\n");
4922
4923        spin_lock(&inode->i_lock);
4924        blocks = inode->i_blocks;
4925        isize = inode->i_size;
4926        spin_unlock(&inode->i_lock);
4927        if (blocks*512 < isize) {
4928                pr_warn("swap activate: swapfile has holes\n");
4929                return -EINVAL;
4930        }
4931        *span = sis->pages;
4932
4933        pr_warn_once("Swap support over SMB3 is experimental\n");
4934
4935        /*
4936         * TODO: consider adding ACL (or documenting how) to prevent other
4937         * users (on this or other systems) from reading it
4938         */
4939
4940
4941        /* TODO: add sk_set_memalloc(inet) or similar */
4942
4943        if (cfile)
4944                cfile->swapfile = true;
4945        /*
4946         * TODO: Since file already open, we can't open with DENY_ALL here
4947         * but we could add call to grab a byte range lock to prevent others
4948         * from reading or writing the file
4949         */
4950
4951        return 0;
4952}
4953
4954static void cifs_swap_deactivate(struct file *file)
4955{
4956        struct cifsFileInfo *cfile = file->private_data;
4957
4958        cifs_dbg(FYI, "swap deactivate\n");
4959
4960        /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4961
4962        if (cfile)
4963                cfile->swapfile = false;
4964
4965        /* do we need to unpin (or unlock) the file */
4966}
4967
4968const struct address_space_operations cifs_addr_ops = {
4969        .readpage = cifs_readpage,
4970        .readpages = cifs_readpages,
4971        .writepage = cifs_writepage,
4972        .writepages = cifs_writepages,
4973        .write_begin = cifs_write_begin,
4974        .write_end = cifs_write_end,
4975        .set_page_dirty = __set_page_dirty_nobuffers,
4976        .releasepage = cifs_release_page,
4977        .direct_IO = cifs_direct_io,
4978        .invalidatepage = cifs_invalidate_page,
4979        .launder_page = cifs_launder_page,
4980        /*
4981         * TODO: investigate and if useful we could add an cifs_migratePage
4982         * helper (under an CONFIG_MIGRATION) in the future, and also
4983         * investigate and add an is_dirty_writeback helper if needed
4984         */
4985        .swap_activate = cifs_swap_activate,
4986        .swap_deactivate = cifs_swap_deactivate,
4987};
4988
4989/*
4990 * cifs_readpages requires the server to support a buffer large enough to
4991 * contain the header plus one complete page of data.  Otherwise, we need
4992 * to leave cifs_readpages out of the address space operations.
4993 */
4994const struct address_space_operations cifs_addr_ops_smallbuf = {
4995        .readpage = cifs_readpage,
4996        .writepage = cifs_writepage,
4997        .writepages = cifs_writepages,
4998        .write_begin = cifs_write_begin,
4999        .write_end = cifs_write_end,
5000        .set_page_dirty = __set_page_dirty_nobuffers,
5001        .releasepage = cifs_release_page,
5002        .invalidatepage = cifs_invalidate_page,
5003        .launder_page = cifs_launder_page,
5004};
5005