linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <linux/mm.h>
  37#include <asm/div64.h>
  38#include "cifsfs.h"
  39#include "cifspdu.h"
  40#include "cifsglob.h"
  41#include "cifsproto.h"
  42#include "cifs_unicode.h"
  43#include "cifs_debug.h"
  44#include "cifs_fs_sb.h"
  45#include "fscache.h"
  46#include "smbdirect.h"
  47
  48static inline int cifs_convert_flags(unsigned int flags)
  49{
  50        if ((flags & O_ACCMODE) == O_RDONLY)
  51                return GENERIC_READ;
  52        else if ((flags & O_ACCMODE) == O_WRONLY)
  53                return GENERIC_WRITE;
  54        else if ((flags & O_ACCMODE) == O_RDWR) {
  55                /* GENERIC_ALL is too much permission to request
  56                   can cause unnecessary access denied on create */
  57                /* return GENERIC_ALL; */
  58                return (GENERIC_READ | GENERIC_WRITE);
  59        }
  60
  61        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  62                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  63                FILE_READ_DATA);
  64}
  65
  66static u32 cifs_posix_convert_flags(unsigned int flags)
  67{
  68        u32 posix_flags = 0;
  69
  70        if ((flags & O_ACCMODE) == O_RDONLY)
  71                posix_flags = SMB_O_RDONLY;
  72        else if ((flags & O_ACCMODE) == O_WRONLY)
  73                posix_flags = SMB_O_WRONLY;
  74        else if ((flags & O_ACCMODE) == O_RDWR)
  75                posix_flags = SMB_O_RDWR;
  76
  77        if (flags & O_CREAT) {
  78                posix_flags |= SMB_O_CREAT;
  79                if (flags & O_EXCL)
  80                        posix_flags |= SMB_O_EXCL;
  81        } else if (flags & O_EXCL)
  82                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  83                         current->comm, current->tgid);
  84
  85        if (flags & O_TRUNC)
  86                posix_flags |= SMB_O_TRUNC;
  87        /* be safe and imply O_SYNC for O_DSYNC */
  88        if (flags & O_DSYNC)
  89                posix_flags |= SMB_O_SYNC;
  90        if (flags & O_DIRECTORY)
  91                posix_flags |= SMB_O_DIRECTORY;
  92        if (flags & O_NOFOLLOW)
  93                posix_flags |= SMB_O_NOFOLLOW;
  94        if (flags & O_DIRECT)
  95                posix_flags |= SMB_O_DIRECT;
  96
  97        return posix_flags;
  98}
  99
 100static inline int cifs_get_disposition(unsigned int flags)
 101{
 102        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 103                return FILE_CREATE;
 104        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 105                return FILE_OVERWRITE_IF;
 106        else if ((flags & O_CREAT) == O_CREAT)
 107                return FILE_OPEN_IF;
 108        else if ((flags & O_TRUNC) == O_TRUNC)
 109                return FILE_OVERWRITE;
 110        else
 111                return FILE_OPEN;
 112}
 113
 114int cifs_posix_open(char *full_path, struct inode **pinode,
 115                        struct super_block *sb, int mode, unsigned int f_flags,
 116                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 117{
 118        int rc;
 119        FILE_UNIX_BASIC_INFO *presp_data;
 120        __u32 posix_flags = 0;
 121        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 122        struct cifs_fattr fattr;
 123        struct tcon_link *tlink;
 124        struct cifs_tcon *tcon;
 125
 126        cifs_dbg(FYI, "posix open %s\n", full_path);
 127
 128        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 129        if (presp_data == NULL)
 130                return -ENOMEM;
 131
 132        tlink = cifs_sb_tlink(cifs_sb);
 133        if (IS_ERR(tlink)) {
 134                rc = PTR_ERR(tlink);
 135                goto posix_open_ret;
 136        }
 137
 138        tcon = tlink_tcon(tlink);
 139        mode &= ~current_umask();
 140
 141        posix_flags = cifs_posix_convert_flags(f_flags);
 142        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 143                             poplock, full_path, cifs_sb->local_nls,
 144                             cifs_remap(cifs_sb));
 145        cifs_put_tlink(tlink);
 146
 147        if (rc)
 148                goto posix_open_ret;
 149
 150        if (presp_data->Type == cpu_to_le32(-1))
 151                goto posix_open_ret; /* open ok, caller does qpathinfo */
 152
 153        if (!pinode)
 154                goto posix_open_ret; /* caller does not need info */
 155
 156        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 157
 158        /* get new inode and set it up */
 159        if (*pinode == NULL) {
 160                cifs_fill_uniqueid(sb, &fattr);
 161                *pinode = cifs_iget(sb, &fattr);
 162                if (!*pinode) {
 163                        rc = -ENOMEM;
 164                        goto posix_open_ret;
 165                }
 166        } else {
 167                cifs_fattr_to_inode(*pinode, &fattr);
 168        }
 169
 170posix_open_ret:
 171        kfree(presp_data);
 172        return rc;
 173}
 174
 175static int
 176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 177             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 178             struct cifs_fid *fid, unsigned int xid)
 179{
 180        int rc;
 181        int desired_access;
 182        int disposition;
 183        int create_options = CREATE_NOT_DIR;
 184        FILE_ALL_INFO *buf;
 185        struct TCP_Server_Info *server = tcon->ses->server;
 186        struct cifs_open_parms oparms;
 187
 188        if (!server->ops->open)
 189                return -ENOSYS;
 190
 191        desired_access = cifs_convert_flags(f_flags);
 192
 193/*********************************************************************
 194 *  open flag mapping table:
 195 *
 196 *      POSIX Flag            CIFS Disposition
 197 *      ----------            ----------------
 198 *      O_CREAT               FILE_OPEN_IF
 199 *      O_CREAT | O_EXCL      FILE_CREATE
 200 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 201 *      O_TRUNC               FILE_OVERWRITE
 202 *      none of the above     FILE_OPEN
 203 *
 204 *      Note that there is not a direct match between disposition
 205 *      FILE_SUPERSEDE (ie create whether or not file exists although
 206 *      O_CREAT | O_TRUNC is similar but truncates the existing
 207 *      file rather than creating a new file as FILE_SUPERSEDE does
 208 *      (which uses the attributes / metadata passed in on open call)
 209 *?
 210 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 211 *?  and the read write flags match reasonably.  O_LARGEFILE
 212 *?  is irrelevant because largefile support is always used
 213 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 214 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 215 *********************************************************************/
 216
 217        disposition = cifs_get_disposition(f_flags);
 218
 219        /* BB pass O_SYNC flag through on file attributes .. BB */
 220
 221        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 222        if (!buf)
 223                return -ENOMEM;
 224
 225        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 226        if (f_flags & O_SYNC)
 227                create_options |= CREATE_WRITE_THROUGH;
 228
 229        if (f_flags & O_DIRECT)
 230                create_options |= CREATE_NO_BUFFER;
 231
 232        oparms.tcon = tcon;
 233        oparms.cifs_sb = cifs_sb;
 234        oparms.desired_access = desired_access;
 235        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 236        oparms.disposition = disposition;
 237        oparms.path = full_path;
 238        oparms.fid = fid;
 239        oparms.reconnect = false;
 240
 241        rc = server->ops->open(xid, &oparms, oplock, buf);
 242
 243        if (rc)
 244                goto out;
 245
 246        /* TODO: Add support for calling posix query info but with passing in fid */
 247        if (tcon->unix_ext)
 248                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 249                                              xid);
 250        else
 251                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 252                                         xid, fid);
 253
 254        if (rc) {
 255                server->ops->close(xid, tcon, fid);
 256                if (rc == -ESTALE)
 257                        rc = -EOPENSTALE;
 258        }
 259
 260out:
 261        kfree(buf);
 262        return rc;
 263}
 264
 265static bool
 266cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 267{
 268        struct cifs_fid_locks *cur;
 269        bool has_locks = false;
 270
 271        down_read(&cinode->lock_sem);
 272        list_for_each_entry(cur, &cinode->llist, llist) {
 273                if (!list_empty(&cur->locks)) {
 274                        has_locks = true;
 275                        break;
 276                }
 277        }
 278        up_read(&cinode->lock_sem);
 279        return has_locks;
 280}
 281
 282void
 283cifs_down_write(struct rw_semaphore *sem)
 284{
 285        while (!down_write_trylock(sem))
 286                msleep(10);
 287}
 288
 289static void cifsFileInfo_put_work(struct work_struct *work);
 290
 291struct cifsFileInfo *
 292cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 293                  struct tcon_link *tlink, __u32 oplock)
 294{
 295        struct dentry *dentry = file_dentry(file);
 296        struct inode *inode = d_inode(dentry);
 297        struct cifsInodeInfo *cinode = CIFS_I(inode);
 298        struct cifsFileInfo *cfile;
 299        struct cifs_fid_locks *fdlocks;
 300        struct cifs_tcon *tcon = tlink_tcon(tlink);
 301        struct TCP_Server_Info *server = tcon->ses->server;
 302
 303        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 304        if (cfile == NULL)
 305                return cfile;
 306
 307        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 308        if (!fdlocks) {
 309                kfree(cfile);
 310                return NULL;
 311        }
 312
 313        INIT_LIST_HEAD(&fdlocks->locks);
 314        fdlocks->cfile = cfile;
 315        cfile->llist = fdlocks;
 316
 317        cfile->count = 1;
 318        cfile->pid = current->tgid;
 319        cfile->uid = current_fsuid();
 320        cfile->dentry = dget(dentry);
 321        cfile->f_flags = file->f_flags;
 322        cfile->invalidHandle = false;
 323        cfile->tlink = cifs_get_tlink(tlink);
 324        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 325        INIT_WORK(&cfile->put, cifsFileInfo_put_work);
 326        mutex_init(&cfile->fh_mutex);
 327        spin_lock_init(&cfile->file_info_lock);
 328
 329        cifs_sb_active(inode->i_sb);
 330
 331        /*
 332         * If the server returned a read oplock and we have mandatory brlocks,
 333         * set oplock level to None.
 334         */
 335        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 336                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 337                oplock = 0;
 338        }
 339
 340        cifs_down_write(&cinode->lock_sem);
 341        list_add(&fdlocks->llist, &cinode->llist);
 342        up_write(&cinode->lock_sem);
 343
 344        spin_lock(&tcon->open_file_lock);
 345        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 346                oplock = fid->pending_open->oplock;
 347        list_del(&fid->pending_open->olist);
 348
 349        fid->purge_cache = false;
 350        server->ops->set_fid(cfile, fid, oplock);
 351
 352        list_add(&cfile->tlist, &tcon->openFileList);
 353        atomic_inc(&tcon->num_local_opens);
 354
 355        /* if readable file instance put first in list*/
 356        spin_lock(&cinode->open_file_lock);
 357        if (file->f_mode & FMODE_READ)
 358                list_add(&cfile->flist, &cinode->openFileList);
 359        else
 360                list_add_tail(&cfile->flist, &cinode->openFileList);
 361        spin_unlock(&cinode->open_file_lock);
 362        spin_unlock(&tcon->open_file_lock);
 363
 364        if (fid->purge_cache)
 365                cifs_zap_mapping(inode);
 366
 367        file->private_data = cfile;
 368        return cfile;
 369}
 370
 371struct cifsFileInfo *
 372cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 373{
 374        spin_lock(&cifs_file->file_info_lock);
 375        cifsFileInfo_get_locked(cifs_file);
 376        spin_unlock(&cifs_file->file_info_lock);
 377        return cifs_file;
 378}
 379
 380static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
 381{
 382        struct inode *inode = d_inode(cifs_file->dentry);
 383        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 384        struct cifsLockInfo *li, *tmp;
 385        struct super_block *sb = inode->i_sb;
 386
 387        /*
 388         * Delete any outstanding lock records. We'll lose them when the file
 389         * is closed anyway.
 390         */
 391        cifs_down_write(&cifsi->lock_sem);
 392        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 393                list_del(&li->llist);
 394                cifs_del_lock_waiters(li);
 395                kfree(li);
 396        }
 397        list_del(&cifs_file->llist->llist);
 398        kfree(cifs_file->llist);
 399        up_write(&cifsi->lock_sem);
 400
 401        cifs_put_tlink(cifs_file->tlink);
 402        dput(cifs_file->dentry);
 403        cifs_sb_deactive(sb);
 404        kfree(cifs_file);
 405}
 406
 407static void cifsFileInfo_put_work(struct work_struct *work)
 408{
 409        struct cifsFileInfo *cifs_file = container_of(work,
 410                        struct cifsFileInfo, put);
 411
 412        cifsFileInfo_put_final(cifs_file);
 413}
 414
 415/**
 416 * cifsFileInfo_put - release a reference of file priv data
 417 *
 418 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
 419 */
 420void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 421{
 422        _cifsFileInfo_put(cifs_file, true, true);
 423}
 424
 425/**
 426 * _cifsFileInfo_put - release a reference of file priv data
 427 *
 428 * This may involve closing the filehandle @cifs_file out on the
 429 * server. Must be called without holding tcon->open_file_lock,
 430 * cinode->open_file_lock and cifs_file->file_info_lock.
 431 *
 432 * If @wait_for_oplock_handler is true and we are releasing the last
 433 * reference, wait for any running oplock break handler of the file
 434 * and cancel any pending one. If calling this function from the
 435 * oplock break handler, you need to pass false.
 436 *
 437 */
 438void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
 439                       bool wait_oplock_handler, bool offload)
 440{
 441        struct inode *inode = d_inode(cifs_file->dentry);
 442        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 443        struct TCP_Server_Info *server = tcon->ses->server;
 444        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 445        struct super_block *sb = inode->i_sb;
 446        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 447        struct cifs_fid fid;
 448        struct cifs_pending_open open;
 449        bool oplock_break_cancelled;
 450
 451        spin_lock(&tcon->open_file_lock);
 452        spin_lock(&cifsi->open_file_lock);
 453        spin_lock(&cifs_file->file_info_lock);
 454        if (--cifs_file->count > 0) {
 455                spin_unlock(&cifs_file->file_info_lock);
 456                spin_unlock(&cifsi->open_file_lock);
 457                spin_unlock(&tcon->open_file_lock);
 458                return;
 459        }
 460        spin_unlock(&cifs_file->file_info_lock);
 461
 462        if (server->ops->get_lease_key)
 463                server->ops->get_lease_key(inode, &fid);
 464
 465        /* store open in pending opens to make sure we don't miss lease break */
 466        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 467
 468        /* remove it from the lists */
 469        list_del(&cifs_file->flist);
 470        list_del(&cifs_file->tlist);
 471        atomic_dec(&tcon->num_local_opens);
 472
 473        if (list_empty(&cifsi->openFileList)) {
 474                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 475                         d_inode(cifs_file->dentry));
 476                /*
 477                 * In strict cache mode we need invalidate mapping on the last
 478                 * close  because it may cause a error when we open this file
 479                 * again and get at least level II oplock.
 480                 */
 481                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 482                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 483                cifs_set_oplock_level(cifsi, 0);
 484        }
 485
 486        spin_unlock(&cifsi->open_file_lock);
 487        spin_unlock(&tcon->open_file_lock);
 488
 489        oplock_break_cancelled = wait_oplock_handler ?
 490                cancel_work_sync(&cifs_file->oplock_break) : false;
 491
 492        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 493                struct TCP_Server_Info *server = tcon->ses->server;
 494                unsigned int xid;
 495
 496                xid = get_xid();
 497                if (server->ops->close_getattr)
 498                        server->ops->close_getattr(xid, tcon, cifs_file);
 499                else if (server->ops->close)
 500                        server->ops->close(xid, tcon, &cifs_file->fid);
 501                _free_xid(xid);
 502        }
 503
 504        if (oplock_break_cancelled)
 505                cifs_done_oplock_break(cifsi);
 506
 507        cifs_del_pending_open(&open);
 508
 509        if (offload)
 510                queue_work(fileinfo_put_wq, &cifs_file->put);
 511        else
 512                cifsFileInfo_put_final(cifs_file);
 513}
 514
 515int cifs_open(struct inode *inode, struct file *file)
 516
 517{
 518        int rc = -EACCES;
 519        unsigned int xid;
 520        __u32 oplock;
 521        struct cifs_sb_info *cifs_sb;
 522        struct TCP_Server_Info *server;
 523        struct cifs_tcon *tcon;
 524        struct tcon_link *tlink;
 525        struct cifsFileInfo *cfile = NULL;
 526        char *full_path = NULL;
 527        bool posix_open_ok = false;
 528        struct cifs_fid fid;
 529        struct cifs_pending_open open;
 530
 531        xid = get_xid();
 532
 533        cifs_sb = CIFS_SB(inode->i_sb);
 534        tlink = cifs_sb_tlink(cifs_sb);
 535        if (IS_ERR(tlink)) {
 536                free_xid(xid);
 537                return PTR_ERR(tlink);
 538        }
 539        tcon = tlink_tcon(tlink);
 540        server = tcon->ses->server;
 541
 542        full_path = build_path_from_dentry(file_dentry(file));
 543        if (full_path == NULL) {
 544                rc = -ENOMEM;
 545                goto out;
 546        }
 547
 548        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 549                 inode, file->f_flags, full_path);
 550
 551        if (file->f_flags & O_DIRECT &&
 552            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 553                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 554                        file->f_op = &cifs_file_direct_nobrl_ops;
 555                else
 556                        file->f_op = &cifs_file_direct_ops;
 557        }
 558
 559        if (server->oplocks)
 560                oplock = REQ_OPLOCK;
 561        else
 562                oplock = 0;
 563
 564        if (!tcon->broken_posix_open && tcon->unix_ext &&
 565            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 566                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 567                /* can not refresh inode info since size could be stale */
 568                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 569                                cifs_sb->mnt_file_mode /* ignored */,
 570                                file->f_flags, &oplock, &fid.netfid, xid);
 571                if (rc == 0) {
 572                        cifs_dbg(FYI, "posix open succeeded\n");
 573                        posix_open_ok = true;
 574                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 575                        if (tcon->ses->serverNOS)
 576                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 577                                         tcon->ses->serverName,
 578                                         tcon->ses->serverNOS);
 579                        tcon->broken_posix_open = true;
 580                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 581                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 582                        goto out;
 583                /*
 584                 * Else fallthrough to retry open the old way on network i/o
 585                 * or DFS errors.
 586                 */
 587        }
 588
 589        if (server->ops->get_lease_key)
 590                server->ops->get_lease_key(inode, &fid);
 591
 592        cifs_add_pending_open(&fid, tlink, &open);
 593
 594        if (!posix_open_ok) {
 595                if (server->ops->get_lease_key)
 596                        server->ops->get_lease_key(inode, &fid);
 597
 598                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 599                                  file->f_flags, &oplock, &fid, xid);
 600                if (rc) {
 601                        cifs_del_pending_open(&open);
 602                        goto out;
 603                }
 604        }
 605
 606        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 607        if (cfile == NULL) {
 608                if (server->ops->close)
 609                        server->ops->close(xid, tcon, &fid);
 610                cifs_del_pending_open(&open);
 611                rc = -ENOMEM;
 612                goto out;
 613        }
 614
 615        cifs_fscache_set_inode_cookie(inode, file);
 616
 617        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 618                /*
 619                 * Time to set mode which we can not set earlier due to
 620                 * problems creating new read-only files.
 621                 */
 622                struct cifs_unix_set_info_args args = {
 623                        .mode   = inode->i_mode,
 624                        .uid    = INVALID_UID, /* no change */
 625                        .gid    = INVALID_GID, /* no change */
 626                        .ctime  = NO_CHANGE_64,
 627                        .atime  = NO_CHANGE_64,
 628                        .mtime  = NO_CHANGE_64,
 629                        .device = 0,
 630                };
 631                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 632                                       cfile->pid);
 633        }
 634
 635out:
 636        kfree(full_path);
 637        free_xid(xid);
 638        cifs_put_tlink(tlink);
 639        return rc;
 640}
 641
 642static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 643
 644/*
 645 * Try to reacquire byte range locks that were released when session
 646 * to server was lost.
 647 */
 648static int
 649cifs_relock_file(struct cifsFileInfo *cfile)
 650{
 651        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 652        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 653        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 654        int rc = 0;
 655
 656        down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 657        if (cinode->can_cache_brlcks) {
 658                /* can cache locks - no need to relock */
 659                up_read(&cinode->lock_sem);
 660                return rc;
 661        }
 662
 663        if (cap_unix(tcon->ses) &&
 664            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 665            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 666                rc = cifs_push_posix_locks(cfile);
 667        else
 668                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 669
 670        up_read(&cinode->lock_sem);
 671        return rc;
 672}
 673
 674static int
 675cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 676{
 677        int rc = -EACCES;
 678        unsigned int xid;
 679        __u32 oplock;
 680        struct cifs_sb_info *cifs_sb;
 681        struct cifs_tcon *tcon;
 682        struct TCP_Server_Info *server;
 683        struct cifsInodeInfo *cinode;
 684        struct inode *inode;
 685        char *full_path = NULL;
 686        int desired_access;
 687        int disposition = FILE_OPEN;
 688        int create_options = CREATE_NOT_DIR;
 689        struct cifs_open_parms oparms;
 690
 691        xid = get_xid();
 692        mutex_lock(&cfile->fh_mutex);
 693        if (!cfile->invalidHandle) {
 694                mutex_unlock(&cfile->fh_mutex);
 695                rc = 0;
 696                free_xid(xid);
 697                return rc;
 698        }
 699
 700        inode = d_inode(cfile->dentry);
 701        cifs_sb = CIFS_SB(inode->i_sb);
 702        tcon = tlink_tcon(cfile->tlink);
 703        server = tcon->ses->server;
 704
 705        /*
 706         * Can not grab rename sem here because various ops, including those
 707         * that already have the rename sem can end up causing writepage to get
 708         * called and if the server was down that means we end up here, and we
 709         * can never tell if the caller already has the rename_sem.
 710         */
 711        full_path = build_path_from_dentry(cfile->dentry);
 712        if (full_path == NULL) {
 713                rc = -ENOMEM;
 714                mutex_unlock(&cfile->fh_mutex);
 715                free_xid(xid);
 716                return rc;
 717        }
 718
 719        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 720                 inode, cfile->f_flags, full_path);
 721
 722        if (tcon->ses->server->oplocks)
 723                oplock = REQ_OPLOCK;
 724        else
 725                oplock = 0;
 726
 727        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 728            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 729                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 730                /*
 731                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 732                 * original open. Must mask them off for a reopen.
 733                 */
 734                unsigned int oflags = cfile->f_flags &
 735                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 736
 737                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 738                                     cifs_sb->mnt_file_mode /* ignored */,
 739                                     oflags, &oplock, &cfile->fid.netfid, xid);
 740                if (rc == 0) {
 741                        cifs_dbg(FYI, "posix reopen succeeded\n");
 742                        oparms.reconnect = true;
 743                        goto reopen_success;
 744                }
 745                /*
 746                 * fallthrough to retry open the old way on errors, especially
 747                 * in the reconnect path it is important to retry hard
 748                 */
 749        }
 750
 751        desired_access = cifs_convert_flags(cfile->f_flags);
 752
 753        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 754        if (cfile->f_flags & O_SYNC)
 755                create_options |= CREATE_WRITE_THROUGH;
 756
 757        if (cfile->f_flags & O_DIRECT)
 758                create_options |= CREATE_NO_BUFFER;
 759
 760        if (server->ops->get_lease_key)
 761                server->ops->get_lease_key(inode, &cfile->fid);
 762
 763        oparms.tcon = tcon;
 764        oparms.cifs_sb = cifs_sb;
 765        oparms.desired_access = desired_access;
 766        oparms.create_options = cifs_create_options(cifs_sb, create_options);
 767        oparms.disposition = disposition;
 768        oparms.path = full_path;
 769        oparms.fid = &cfile->fid;
 770        oparms.reconnect = true;
 771
 772        /*
 773         * Can not refresh inode by passing in file_info buf to be returned by
 774         * ops->open and then calling get_inode_info with returned buf since
 775         * file might have write behind data that needs to be flushed and server
 776         * version of file size can be stale. If we knew for sure that inode was
 777         * not dirty locally we could do this.
 778         */
 779        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 780        if (rc == -ENOENT && oparms.reconnect == false) {
 781                /* durable handle timeout is expired - open the file again */
 782                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 783                /* indicate that we need to relock the file */
 784                oparms.reconnect = true;
 785        }
 786
 787        if (rc) {
 788                mutex_unlock(&cfile->fh_mutex);
 789                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 790                cifs_dbg(FYI, "oplock: %d\n", oplock);
 791                goto reopen_error_exit;
 792        }
 793
 794reopen_success:
 795        cfile->invalidHandle = false;
 796        mutex_unlock(&cfile->fh_mutex);
 797        cinode = CIFS_I(inode);
 798
 799        if (can_flush) {
 800                rc = filemap_write_and_wait(inode->i_mapping);
 801                if (!is_interrupt_error(rc))
 802                        mapping_set_error(inode->i_mapping, rc);
 803
 804                if (tcon->posix_extensions)
 805                        rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
 806                else if (tcon->unix_ext)
 807                        rc = cifs_get_inode_info_unix(&inode, full_path,
 808                                                      inode->i_sb, xid);
 809                else
 810                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 811                                                 inode->i_sb, xid, NULL);
 812        }
 813        /*
 814         * Else we are writing out data to server already and could deadlock if
 815         * we tried to flush data, and since we do not know if we have data that
 816         * would invalidate the current end of file on the server we can not go
 817         * to the server to get the new inode info.
 818         */
 819
 820        /*
 821         * If the server returned a read oplock and we have mandatory brlocks,
 822         * set oplock level to None.
 823         */
 824        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 825                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 826                oplock = 0;
 827        }
 828
 829        server->ops->set_fid(cfile, &cfile->fid, oplock);
 830        if (oparms.reconnect)
 831                cifs_relock_file(cfile);
 832
 833reopen_error_exit:
 834        kfree(full_path);
 835        free_xid(xid);
 836        return rc;
 837}
 838
 839int cifs_close(struct inode *inode, struct file *file)
 840{
 841        if (file->private_data != NULL) {
 842                _cifsFileInfo_put(file->private_data, true, false);
 843                file->private_data = NULL;
 844        }
 845
 846        /* return code from the ->release op is always ignored */
 847        return 0;
 848}
 849
 850void
 851cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 852{
 853        struct cifsFileInfo *open_file;
 854        struct list_head *tmp;
 855        struct list_head *tmp1;
 856        struct list_head tmp_list;
 857
 858        if (!tcon->use_persistent || !tcon->need_reopen_files)
 859                return;
 860
 861        tcon->need_reopen_files = false;
 862
 863        cifs_dbg(FYI, "Reopen persistent handles\n");
 864        INIT_LIST_HEAD(&tmp_list);
 865
 866        /* list all files open on tree connection, reopen resilient handles  */
 867        spin_lock(&tcon->open_file_lock);
 868        list_for_each(tmp, &tcon->openFileList) {
 869                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 870                if (!open_file->invalidHandle)
 871                        continue;
 872                cifsFileInfo_get(open_file);
 873                list_add_tail(&open_file->rlist, &tmp_list);
 874        }
 875        spin_unlock(&tcon->open_file_lock);
 876
 877        list_for_each_safe(tmp, tmp1, &tmp_list) {
 878                open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 879                if (cifs_reopen_file(open_file, false /* do not flush */))
 880                        tcon->need_reopen_files = true;
 881                list_del_init(&open_file->rlist);
 882                cifsFileInfo_put(open_file);
 883        }
 884}
 885
 886int cifs_closedir(struct inode *inode, struct file *file)
 887{
 888        int rc = 0;
 889        unsigned int xid;
 890        struct cifsFileInfo *cfile = file->private_data;
 891        struct cifs_tcon *tcon;
 892        struct TCP_Server_Info *server;
 893        char *buf;
 894
 895        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 896
 897        if (cfile == NULL)
 898                return rc;
 899
 900        xid = get_xid();
 901        tcon = tlink_tcon(cfile->tlink);
 902        server = tcon->ses->server;
 903
 904        cifs_dbg(FYI, "Freeing private data in close dir\n");
 905        spin_lock(&cfile->file_info_lock);
 906        if (server->ops->dir_needs_close(cfile)) {
 907                cfile->invalidHandle = true;
 908                spin_unlock(&cfile->file_info_lock);
 909                if (server->ops->close_dir)
 910                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 911                else
 912                        rc = -ENOSYS;
 913                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 914                /* not much we can do if it fails anyway, ignore rc */
 915                rc = 0;
 916        } else
 917                spin_unlock(&cfile->file_info_lock);
 918
 919        buf = cfile->srch_inf.ntwrk_buf_start;
 920        if (buf) {
 921                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 922                cfile->srch_inf.ntwrk_buf_start = NULL;
 923                if (cfile->srch_inf.smallBuf)
 924                        cifs_small_buf_release(buf);
 925                else
 926                        cifs_buf_release(buf);
 927        }
 928
 929        cifs_put_tlink(cfile->tlink);
 930        kfree(file->private_data);
 931        file->private_data = NULL;
 932        /* BB can we lock the filestruct while this is going on? */
 933        free_xid(xid);
 934        return rc;
 935}
 936
 937static struct cifsLockInfo *
 938cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
 939{
 940        struct cifsLockInfo *lock =
 941                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 942        if (!lock)
 943                return lock;
 944        lock->offset = offset;
 945        lock->length = length;
 946        lock->type = type;
 947        lock->pid = current->tgid;
 948        lock->flags = flags;
 949        INIT_LIST_HEAD(&lock->blist);
 950        init_waitqueue_head(&lock->block_q);
 951        return lock;
 952}
 953
 954void
 955cifs_del_lock_waiters(struct cifsLockInfo *lock)
 956{
 957        struct cifsLockInfo *li, *tmp;
 958        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 959                list_del_init(&li->blist);
 960                wake_up(&li->block_q);
 961        }
 962}
 963
 964#define CIFS_LOCK_OP    0
 965#define CIFS_READ_OP    1
 966#define CIFS_WRITE_OP   2
 967
 968/* @rw_check : 0 - no op, 1 - read, 2 - write */
 969static bool
 970cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 971                            __u64 length, __u8 type, __u16 flags,
 972                            struct cifsFileInfo *cfile,
 973                            struct cifsLockInfo **conf_lock, int rw_check)
 974{
 975        struct cifsLockInfo *li;
 976        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 977        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 978
 979        list_for_each_entry(li, &fdlocks->locks, llist) {
 980                if (offset + length <= li->offset ||
 981                    offset >= li->offset + li->length)
 982                        continue;
 983                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 984                    server->ops->compare_fids(cfile, cur_cfile)) {
 985                        /* shared lock prevents write op through the same fid */
 986                        if (!(li->type & server->vals->shared_lock_type) ||
 987                            rw_check != CIFS_WRITE_OP)
 988                                continue;
 989                }
 990                if ((type & server->vals->shared_lock_type) &&
 991                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 992                     current->tgid == li->pid) || type == li->type))
 993                        continue;
 994                if (rw_check == CIFS_LOCK_OP &&
 995                    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
 996                    server->ops->compare_fids(cfile, cur_cfile))
 997                        continue;
 998                if (conf_lock)
 999                        *conf_lock = li;
1000                return true;
1001        }
1002        return false;
1003}
1004
1005bool
1006cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1007                        __u8 type, __u16 flags,
1008                        struct cifsLockInfo **conf_lock, int rw_check)
1009{
1010        bool rc = false;
1011        struct cifs_fid_locks *cur;
1012        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1013
1014        list_for_each_entry(cur, &cinode->llist, llist) {
1015                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1016                                                 flags, cfile, conf_lock,
1017                                                 rw_check);
1018                if (rc)
1019                        break;
1020        }
1021
1022        return rc;
1023}
1024
1025/*
1026 * Check if there is another lock that prevents us to set the lock (mandatory
1027 * style). If such a lock exists, update the flock structure with its
1028 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1029 * or leave it the same if we can't. Returns 0 if we don't need to request to
1030 * the server or 1 otherwise.
1031 */
1032static int
1033cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1034               __u8 type, struct file_lock *flock)
1035{
1036        int rc = 0;
1037        struct cifsLockInfo *conf_lock;
1038        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1039        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1040        bool exist;
1041
1042        down_read(&cinode->lock_sem);
1043
1044        exist = cifs_find_lock_conflict(cfile, offset, length, type,
1045                                        flock->fl_flags, &conf_lock,
1046                                        CIFS_LOCK_OP);
1047        if (exist) {
1048                flock->fl_start = conf_lock->offset;
1049                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1050                flock->fl_pid = conf_lock->pid;
1051                if (conf_lock->type & server->vals->shared_lock_type)
1052                        flock->fl_type = F_RDLCK;
1053                else
1054                        flock->fl_type = F_WRLCK;
1055        } else if (!cinode->can_cache_brlcks)
1056                rc = 1;
1057        else
1058                flock->fl_type = F_UNLCK;
1059
1060        up_read(&cinode->lock_sem);
1061        return rc;
1062}
1063
1064static void
1065cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1066{
1067        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1068        cifs_down_write(&cinode->lock_sem);
1069        list_add_tail(&lock->llist, &cfile->llist->locks);
1070        up_write(&cinode->lock_sem);
1071}
1072
1073/*
1074 * Set the byte-range lock (mandatory style). Returns:
1075 * 1) 0, if we set the lock and don't need to request to the server;
1076 * 2) 1, if no locks prevent us but we need to request to the server;
1077 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1078 */
1079static int
1080cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1081                 bool wait)
1082{
1083        struct cifsLockInfo *conf_lock;
1084        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1085        bool exist;
1086        int rc = 0;
1087
1088try_again:
1089        exist = false;
1090        cifs_down_write(&cinode->lock_sem);
1091
1092        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1093                                        lock->type, lock->flags, &conf_lock,
1094                                        CIFS_LOCK_OP);
1095        if (!exist && cinode->can_cache_brlcks) {
1096                list_add_tail(&lock->llist, &cfile->llist->locks);
1097                up_write(&cinode->lock_sem);
1098                return rc;
1099        }
1100
1101        if (!exist)
1102                rc = 1;
1103        else if (!wait)
1104                rc = -EACCES;
1105        else {
1106                list_add_tail(&lock->blist, &conf_lock->blist);
1107                up_write(&cinode->lock_sem);
1108                rc = wait_event_interruptible(lock->block_q,
1109                                        (lock->blist.prev == &lock->blist) &&
1110                                        (lock->blist.next == &lock->blist));
1111                if (!rc)
1112                        goto try_again;
1113                cifs_down_write(&cinode->lock_sem);
1114                list_del_init(&lock->blist);
1115        }
1116
1117        up_write(&cinode->lock_sem);
1118        return rc;
1119}
1120
1121/*
1122 * Check if there is another lock that prevents us to set the lock (posix
1123 * style). If such a lock exists, update the flock structure with its
1124 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1125 * or leave it the same if we can't. Returns 0 if we don't need to request to
1126 * the server or 1 otherwise.
1127 */
1128static int
1129cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1130{
1131        int rc = 0;
1132        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1133        unsigned char saved_type = flock->fl_type;
1134
1135        if ((flock->fl_flags & FL_POSIX) == 0)
1136                return 1;
1137
1138        down_read(&cinode->lock_sem);
1139        posix_test_lock(file, flock);
1140
1141        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1142                flock->fl_type = saved_type;
1143                rc = 1;
1144        }
1145
1146        up_read(&cinode->lock_sem);
1147        return rc;
1148}
1149
1150/*
1151 * Set the byte-range lock (posix style). Returns:
1152 * 1) <0, if the error occurs while setting the lock;
1153 * 2) 0, if we set the lock and don't need to request to the server;
1154 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1155 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1156 */
1157static int
1158cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1159{
1160        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1161        int rc = FILE_LOCK_DEFERRED + 1;
1162
1163        if ((flock->fl_flags & FL_POSIX) == 0)
1164                return rc;
1165
1166        cifs_down_write(&cinode->lock_sem);
1167        if (!cinode->can_cache_brlcks) {
1168                up_write(&cinode->lock_sem);
1169                return rc;
1170        }
1171
1172        rc = posix_lock_file(file, flock, NULL);
1173        up_write(&cinode->lock_sem);
1174        return rc;
1175}
1176
1177int
1178cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1179{
1180        unsigned int xid;
1181        int rc = 0, stored_rc;
1182        struct cifsLockInfo *li, *tmp;
1183        struct cifs_tcon *tcon;
1184        unsigned int num, max_num, max_buf;
1185        LOCKING_ANDX_RANGE *buf, *cur;
1186        static const int types[] = {
1187                LOCKING_ANDX_LARGE_FILES,
1188                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1189        };
1190        int i;
1191
1192        xid = get_xid();
1193        tcon = tlink_tcon(cfile->tlink);
1194
1195        /*
1196         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1197         * and check it before using.
1198         */
1199        max_buf = tcon->ses->server->maxBuf;
1200        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1201                free_xid(xid);
1202                return -EINVAL;
1203        }
1204
1205        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1206                     PAGE_SIZE);
1207        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1208                        PAGE_SIZE);
1209        max_num = (max_buf - sizeof(struct smb_hdr)) /
1210                                                sizeof(LOCKING_ANDX_RANGE);
1211        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1212        if (!buf) {
1213                free_xid(xid);
1214                return -ENOMEM;
1215        }
1216
1217        for (i = 0; i < 2; i++) {
1218                cur = buf;
1219                num = 0;
1220                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1221                        if (li->type != types[i])
1222                                continue;
1223                        cur->Pid = cpu_to_le16(li->pid);
1224                        cur->LengthLow = cpu_to_le32((u32)li->length);
1225                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1226                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1227                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1228                        if (++num == max_num) {
1229                                stored_rc = cifs_lockv(xid, tcon,
1230                                                       cfile->fid.netfid,
1231                                                       (__u8)li->type, 0, num,
1232                                                       buf);
1233                                if (stored_rc)
1234                                        rc = stored_rc;
1235                                cur = buf;
1236                                num = 0;
1237                        } else
1238                                cur++;
1239                }
1240
1241                if (num) {
1242                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1243                                               (__u8)types[i], 0, num, buf);
1244                        if (stored_rc)
1245                                rc = stored_rc;
1246                }
1247        }
1248
1249        kfree(buf);
1250        free_xid(xid);
1251        return rc;
1252}
1253
1254static __u32
1255hash_lockowner(fl_owner_t owner)
1256{
1257        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1258}
1259
1260struct lock_to_push {
1261        struct list_head llist;
1262        __u64 offset;
1263        __u64 length;
1264        __u32 pid;
1265        __u16 netfid;
1266        __u8 type;
1267};
1268
1269static int
1270cifs_push_posix_locks(struct cifsFileInfo *cfile)
1271{
1272        struct inode *inode = d_inode(cfile->dentry);
1273        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1274        struct file_lock *flock;
1275        struct file_lock_context *flctx = inode->i_flctx;
1276        unsigned int count = 0, i;
1277        int rc = 0, xid, type;
1278        struct list_head locks_to_send, *el;
1279        struct lock_to_push *lck, *tmp;
1280        __u64 length;
1281
1282        xid = get_xid();
1283
1284        if (!flctx)
1285                goto out;
1286
1287        spin_lock(&flctx->flc_lock);
1288        list_for_each(el, &flctx->flc_posix) {
1289                count++;
1290        }
1291        spin_unlock(&flctx->flc_lock);
1292
1293        INIT_LIST_HEAD(&locks_to_send);
1294
1295        /*
1296         * Allocating count locks is enough because no FL_POSIX locks can be
1297         * added to the list while we are holding cinode->lock_sem that
1298         * protects locking operations of this inode.
1299         */
1300        for (i = 0; i < count; i++) {
1301                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1302                if (!lck) {
1303                        rc = -ENOMEM;
1304                        goto err_out;
1305                }
1306                list_add_tail(&lck->llist, &locks_to_send);
1307        }
1308
1309        el = locks_to_send.next;
1310        spin_lock(&flctx->flc_lock);
1311        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1312                if (el == &locks_to_send) {
1313                        /*
1314                         * The list ended. We don't have enough allocated
1315                         * structures - something is really wrong.
1316                         */
1317                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1318                        break;
1319                }
1320                length = 1 + flock->fl_end - flock->fl_start;
1321                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1322                        type = CIFS_RDLCK;
1323                else
1324                        type = CIFS_WRLCK;
1325                lck = list_entry(el, struct lock_to_push, llist);
1326                lck->pid = hash_lockowner(flock->fl_owner);
1327                lck->netfid = cfile->fid.netfid;
1328                lck->length = length;
1329                lck->type = type;
1330                lck->offset = flock->fl_start;
1331        }
1332        spin_unlock(&flctx->flc_lock);
1333
1334        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1335                int stored_rc;
1336
1337                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1338                                             lck->offset, lck->length, NULL,
1339                                             lck->type, 0);
1340                if (stored_rc)
1341                        rc = stored_rc;
1342                list_del(&lck->llist);
1343                kfree(lck);
1344        }
1345
1346out:
1347        free_xid(xid);
1348        return rc;
1349err_out:
1350        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1351                list_del(&lck->llist);
1352                kfree(lck);
1353        }
1354        goto out;
1355}
1356
1357static int
1358cifs_push_locks(struct cifsFileInfo *cfile)
1359{
1360        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1361        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1362        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1363        int rc = 0;
1364
1365        /* we are going to update can_cache_brlcks here - need a write access */
1366        cifs_down_write(&cinode->lock_sem);
1367        if (!cinode->can_cache_brlcks) {
1368                up_write(&cinode->lock_sem);
1369                return rc;
1370        }
1371
1372        if (cap_unix(tcon->ses) &&
1373            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1374            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1375                rc = cifs_push_posix_locks(cfile);
1376        else
1377                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1378
1379        cinode->can_cache_brlcks = false;
1380        up_write(&cinode->lock_sem);
1381        return rc;
1382}
1383
1384static void
1385cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1386                bool *wait_flag, struct TCP_Server_Info *server)
1387{
1388        if (flock->fl_flags & FL_POSIX)
1389                cifs_dbg(FYI, "Posix\n");
1390        if (flock->fl_flags & FL_FLOCK)
1391                cifs_dbg(FYI, "Flock\n");
1392        if (flock->fl_flags & FL_SLEEP) {
1393                cifs_dbg(FYI, "Blocking lock\n");
1394                *wait_flag = true;
1395        }
1396        if (flock->fl_flags & FL_ACCESS)
1397                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1398        if (flock->fl_flags & FL_LEASE)
1399                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1400        if (flock->fl_flags &
1401            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1402               FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1403                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1404
1405        *type = server->vals->large_lock_type;
1406        if (flock->fl_type == F_WRLCK) {
1407                cifs_dbg(FYI, "F_WRLCK\n");
1408                *type |= server->vals->exclusive_lock_type;
1409                *lock = 1;
1410        } else if (flock->fl_type == F_UNLCK) {
1411                cifs_dbg(FYI, "F_UNLCK\n");
1412                *type |= server->vals->unlock_lock_type;
1413                *unlock = 1;
1414                /* Check if unlock includes more than one lock range */
1415        } else if (flock->fl_type == F_RDLCK) {
1416                cifs_dbg(FYI, "F_RDLCK\n");
1417                *type |= server->vals->shared_lock_type;
1418                *lock = 1;
1419        } else if (flock->fl_type == F_EXLCK) {
1420                cifs_dbg(FYI, "F_EXLCK\n");
1421                *type |= server->vals->exclusive_lock_type;
1422                *lock = 1;
1423        } else if (flock->fl_type == F_SHLCK) {
1424                cifs_dbg(FYI, "F_SHLCK\n");
1425                *type |= server->vals->shared_lock_type;
1426                *lock = 1;
1427        } else
1428                cifs_dbg(FYI, "Unknown type of lock\n");
1429}
1430
1431static int
1432cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1433           bool wait_flag, bool posix_lck, unsigned int xid)
1434{
1435        int rc = 0;
1436        __u64 length = 1 + flock->fl_end - flock->fl_start;
1437        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1438        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1439        struct TCP_Server_Info *server = tcon->ses->server;
1440        __u16 netfid = cfile->fid.netfid;
1441
1442        if (posix_lck) {
1443                int posix_lock_type;
1444
1445                rc = cifs_posix_lock_test(file, flock);
1446                if (!rc)
1447                        return rc;
1448
1449                if (type & server->vals->shared_lock_type)
1450                        posix_lock_type = CIFS_RDLCK;
1451                else
1452                        posix_lock_type = CIFS_WRLCK;
1453                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1454                                      hash_lockowner(flock->fl_owner),
1455                                      flock->fl_start, length, flock,
1456                                      posix_lock_type, wait_flag);
1457                return rc;
1458        }
1459
1460        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1461        if (!rc)
1462                return rc;
1463
1464        /* BB we could chain these into one lock request BB */
1465        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1466                                    1, 0, false);
1467        if (rc == 0) {
1468                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1469                                            type, 0, 1, false);
1470                flock->fl_type = F_UNLCK;
1471                if (rc != 0)
1472                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1473                                 rc);
1474                return 0;
1475        }
1476
1477        if (type & server->vals->shared_lock_type) {
1478                flock->fl_type = F_WRLCK;
1479                return 0;
1480        }
1481
1482        type &= ~server->vals->exclusive_lock_type;
1483
1484        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1485                                    type | server->vals->shared_lock_type,
1486                                    1, 0, false);
1487        if (rc == 0) {
1488                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1489                        type | server->vals->shared_lock_type, 0, 1, false);
1490                flock->fl_type = F_RDLCK;
1491                if (rc != 0)
1492                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1493                                 rc);
1494        } else
1495                flock->fl_type = F_WRLCK;
1496
1497        return 0;
1498}
1499
1500void
1501cifs_move_llist(struct list_head *source, struct list_head *dest)
1502{
1503        struct list_head *li, *tmp;
1504        list_for_each_safe(li, tmp, source)
1505                list_move(li, dest);
1506}
1507
1508void
1509cifs_free_llist(struct list_head *llist)
1510{
1511        struct cifsLockInfo *li, *tmp;
1512        list_for_each_entry_safe(li, tmp, llist, llist) {
1513                cifs_del_lock_waiters(li);
1514                list_del(&li->llist);
1515                kfree(li);
1516        }
1517}
1518
1519int
1520cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1521                  unsigned int xid)
1522{
1523        int rc = 0, stored_rc;
1524        static const int types[] = {
1525                LOCKING_ANDX_LARGE_FILES,
1526                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1527        };
1528        unsigned int i;
1529        unsigned int max_num, num, max_buf;
1530        LOCKING_ANDX_RANGE *buf, *cur;
1531        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1532        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1533        struct cifsLockInfo *li, *tmp;
1534        __u64 length = 1 + flock->fl_end - flock->fl_start;
1535        struct list_head tmp_llist;
1536
1537        INIT_LIST_HEAD(&tmp_llist);
1538
1539        /*
1540         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1541         * and check it before using.
1542         */
1543        max_buf = tcon->ses->server->maxBuf;
1544        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1545                return -EINVAL;
1546
1547        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1548                     PAGE_SIZE);
1549        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1550                        PAGE_SIZE);
1551        max_num = (max_buf - sizeof(struct smb_hdr)) /
1552                                                sizeof(LOCKING_ANDX_RANGE);
1553        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1554        if (!buf)
1555                return -ENOMEM;
1556
1557        cifs_down_write(&cinode->lock_sem);
1558        for (i = 0; i < 2; i++) {
1559                cur = buf;
1560                num = 0;
1561                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1562                        if (flock->fl_start > li->offset ||
1563                            (flock->fl_start + length) <
1564                            (li->offset + li->length))
1565                                continue;
1566                        if (current->tgid != li->pid)
1567                                continue;
1568                        if (types[i] != li->type)
1569                                continue;
1570                        if (cinode->can_cache_brlcks) {
1571                                /*
1572                                 * We can cache brlock requests - simply remove
1573                                 * a lock from the file's list.
1574                                 */
1575                                list_del(&li->llist);
1576                                cifs_del_lock_waiters(li);
1577                                kfree(li);
1578                                continue;
1579                        }
1580                        cur->Pid = cpu_to_le16(li->pid);
1581                        cur->LengthLow = cpu_to_le32((u32)li->length);
1582                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1583                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1584                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1585                        /*
1586                         * We need to save a lock here to let us add it again to
1587                         * the file's list if the unlock range request fails on
1588                         * the server.
1589                         */
1590                        list_move(&li->llist, &tmp_llist);
1591                        if (++num == max_num) {
1592                                stored_rc = cifs_lockv(xid, tcon,
1593                                                       cfile->fid.netfid,
1594                                                       li->type, num, 0, buf);
1595                                if (stored_rc) {
1596                                        /*
1597                                         * We failed on the unlock range
1598                                         * request - add all locks from the tmp
1599                                         * list to the head of the file's list.
1600                                         */
1601                                        cifs_move_llist(&tmp_llist,
1602                                                        &cfile->llist->locks);
1603                                        rc = stored_rc;
1604                                } else
1605                                        /*
1606                                         * The unlock range request succeed -
1607                                         * free the tmp list.
1608                                         */
1609                                        cifs_free_llist(&tmp_llist);
1610                                cur = buf;
1611                                num = 0;
1612                        } else
1613                                cur++;
1614                }
1615                if (num) {
1616                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1617                                               types[i], num, 0, buf);
1618                        if (stored_rc) {
1619                                cifs_move_llist(&tmp_llist,
1620                                                &cfile->llist->locks);
1621                                rc = stored_rc;
1622                        } else
1623                                cifs_free_llist(&tmp_llist);
1624                }
1625        }
1626
1627        up_write(&cinode->lock_sem);
1628        kfree(buf);
1629        return rc;
1630}
1631
1632static int
1633cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1634           bool wait_flag, bool posix_lck, int lock, int unlock,
1635           unsigned int xid)
1636{
1637        int rc = 0;
1638        __u64 length = 1 + flock->fl_end - flock->fl_start;
1639        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1640        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1641        struct TCP_Server_Info *server = tcon->ses->server;
1642        struct inode *inode = d_inode(cfile->dentry);
1643
1644        if (posix_lck) {
1645                int posix_lock_type;
1646
1647                rc = cifs_posix_lock_set(file, flock);
1648                if (rc <= FILE_LOCK_DEFERRED)
1649                        return rc;
1650
1651                if (type & server->vals->shared_lock_type)
1652                        posix_lock_type = CIFS_RDLCK;
1653                else
1654                        posix_lock_type = CIFS_WRLCK;
1655
1656                if (unlock == 1)
1657                        posix_lock_type = CIFS_UNLCK;
1658
1659                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1660                                      hash_lockowner(flock->fl_owner),
1661                                      flock->fl_start, length,
1662                                      NULL, posix_lock_type, wait_flag);
1663                goto out;
1664        }
1665
1666        if (lock) {
1667                struct cifsLockInfo *lock;
1668
1669                lock = cifs_lock_init(flock->fl_start, length, type,
1670                                      flock->fl_flags);
1671                if (!lock)
1672                        return -ENOMEM;
1673
1674                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1675                if (rc < 0) {
1676                        kfree(lock);
1677                        return rc;
1678                }
1679                if (!rc)
1680                        goto out;
1681
1682                /*
1683                 * Windows 7 server can delay breaking lease from read to None
1684                 * if we set a byte-range lock on a file - break it explicitly
1685                 * before sending the lock to the server to be sure the next
1686                 * read won't conflict with non-overlapted locks due to
1687                 * pagereading.
1688                 */
1689                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1690                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1691                        cifs_zap_mapping(inode);
1692                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1693                                 inode);
1694                        CIFS_I(inode)->oplock = 0;
1695                }
1696
1697                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1698                                            type, 1, 0, wait_flag);
1699                if (rc) {
1700                        kfree(lock);
1701                        return rc;
1702                }
1703
1704                cifs_lock_add(cfile, lock);
1705        } else if (unlock)
1706                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1707
1708out:
1709        if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1710                /*
1711                 * If this is a request to remove all locks because we
1712                 * are closing the file, it doesn't matter if the
1713                 * unlocking failed as both cifs.ko and the SMB server
1714                 * remove the lock on file close
1715                 */
1716                if (rc) {
1717                        cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1718                        if (!(flock->fl_flags & FL_CLOSE))
1719                                return rc;
1720                }
1721                rc = locks_lock_file_wait(file, flock);
1722        }
1723        return rc;
1724}
1725
1726int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1727{
1728        int rc, xid;
1729        int lock = 0, unlock = 0;
1730        bool wait_flag = false;
1731        bool posix_lck = false;
1732        struct cifs_sb_info *cifs_sb;
1733        struct cifs_tcon *tcon;
1734        struct cifsFileInfo *cfile;
1735        __u32 type;
1736
1737        rc = -EACCES;
1738        xid = get_xid();
1739
1740        if (!(fl->fl_flags & FL_FLOCK))
1741                return -ENOLCK;
1742
1743        cfile = (struct cifsFileInfo *)file->private_data;
1744        tcon = tlink_tcon(cfile->tlink);
1745
1746        cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1747                        tcon->ses->server);
1748        cifs_sb = CIFS_FILE_SB(file);
1749
1750        if (cap_unix(tcon->ses) &&
1751            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1752            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1753                posix_lck = true;
1754
1755        if (!lock && !unlock) {
1756                /*
1757                 * if no lock or unlock then nothing to do since we do not
1758                 * know what it is
1759                 */
1760                free_xid(xid);
1761                return -EOPNOTSUPP;
1762        }
1763
1764        rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1765                        xid);
1766        free_xid(xid);
1767        return rc;
1768
1769
1770}
1771
1772int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1773{
1774        int rc, xid;
1775        int lock = 0, unlock = 0;
1776        bool wait_flag = false;
1777        bool posix_lck = false;
1778        struct cifs_sb_info *cifs_sb;
1779        struct cifs_tcon *tcon;
1780        struct cifsFileInfo *cfile;
1781        __u32 type;
1782
1783        rc = -EACCES;
1784        xid = get_xid();
1785
1786        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1787                 cmd, flock->fl_flags, flock->fl_type,
1788                 flock->fl_start, flock->fl_end);
1789
1790        cfile = (struct cifsFileInfo *)file->private_data;
1791        tcon = tlink_tcon(cfile->tlink);
1792
1793        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1794                        tcon->ses->server);
1795        cifs_sb = CIFS_FILE_SB(file);
1796
1797        if (cap_unix(tcon->ses) &&
1798            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1799            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1800                posix_lck = true;
1801        /*
1802         * BB add code here to normalize offset and length to account for
1803         * negative length which we can not accept over the wire.
1804         */
1805        if (IS_GETLK(cmd)) {
1806                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1807                free_xid(xid);
1808                return rc;
1809        }
1810
1811        if (!lock && !unlock) {
1812                /*
1813                 * if no lock or unlock then nothing to do since we do not
1814                 * know what it is
1815                 */
1816                free_xid(xid);
1817                return -EOPNOTSUPP;
1818        }
1819
1820        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1821                        xid);
1822        free_xid(xid);
1823        return rc;
1824}
1825
1826/*
1827 * update the file size (if needed) after a write. Should be called with
1828 * the inode->i_lock held
1829 */
1830void
1831cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1832                      unsigned int bytes_written)
1833{
1834        loff_t end_of_write = offset + bytes_written;
1835
1836        if (end_of_write > cifsi->server_eof)
1837                cifsi->server_eof = end_of_write;
1838}
1839
1840static ssize_t
1841cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1842           size_t write_size, loff_t *offset)
1843{
1844        int rc = 0;
1845        unsigned int bytes_written = 0;
1846        unsigned int total_written;
1847        struct cifs_tcon *tcon;
1848        struct TCP_Server_Info *server;
1849        unsigned int xid;
1850        struct dentry *dentry = open_file->dentry;
1851        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1852        struct cifs_io_parms io_parms = {0};
1853
1854        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1855                 write_size, *offset, dentry);
1856
1857        tcon = tlink_tcon(open_file->tlink);
1858        server = tcon->ses->server;
1859
1860        if (!server->ops->sync_write)
1861                return -ENOSYS;
1862
1863        xid = get_xid();
1864
1865        for (total_written = 0; write_size > total_written;
1866             total_written += bytes_written) {
1867                rc = -EAGAIN;
1868                while (rc == -EAGAIN) {
1869                        struct kvec iov[2];
1870                        unsigned int len;
1871
1872                        if (open_file->invalidHandle) {
1873                                /* we could deadlock if we called
1874                                   filemap_fdatawait from here so tell
1875                                   reopen_file not to flush data to
1876                                   server now */
1877                                rc = cifs_reopen_file(open_file, false);
1878                                if (rc != 0)
1879                                        break;
1880                        }
1881
1882                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1883                                  (unsigned int)write_size - total_written);
1884                        /* iov[0] is reserved for smb header */
1885                        iov[1].iov_base = (char *)write_data + total_written;
1886                        iov[1].iov_len = len;
1887                        io_parms.pid = pid;
1888                        io_parms.tcon = tcon;
1889                        io_parms.offset = *offset;
1890                        io_parms.length = len;
1891                        rc = server->ops->sync_write(xid, &open_file->fid,
1892                                        &io_parms, &bytes_written, iov, 1);
1893                }
1894                if (rc || (bytes_written == 0)) {
1895                        if (total_written)
1896                                break;
1897                        else {
1898                                free_xid(xid);
1899                                return rc;
1900                        }
1901                } else {
1902                        spin_lock(&d_inode(dentry)->i_lock);
1903                        cifs_update_eof(cifsi, *offset, bytes_written);
1904                        spin_unlock(&d_inode(dentry)->i_lock);
1905                        *offset += bytes_written;
1906                }
1907        }
1908
1909        cifs_stats_bytes_written(tcon, total_written);
1910
1911        if (total_written > 0) {
1912                spin_lock(&d_inode(dentry)->i_lock);
1913                if (*offset > d_inode(dentry)->i_size)
1914                        i_size_write(d_inode(dentry), *offset);
1915                spin_unlock(&d_inode(dentry)->i_lock);
1916        }
1917        mark_inode_dirty_sync(d_inode(dentry));
1918        free_xid(xid);
1919        return total_written;
1920}
1921
1922struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1923                                        bool fsuid_only)
1924{
1925        struct cifsFileInfo *open_file = NULL;
1926        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1927
1928        /* only filter by fsuid on multiuser mounts */
1929        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1930                fsuid_only = false;
1931
1932        spin_lock(&cifs_inode->open_file_lock);
1933        /* we could simply get the first_list_entry since write-only entries
1934           are always at the end of the list but since the first entry might
1935           have a close pending, we go through the whole list */
1936        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1937                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1938                        continue;
1939                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1940                        if (!open_file->invalidHandle) {
1941                                /* found a good file */
1942                                /* lock it so it will not be closed on us */
1943                                cifsFileInfo_get(open_file);
1944                                spin_unlock(&cifs_inode->open_file_lock);
1945                                return open_file;
1946                        } /* else might as well continue, and look for
1947                             another, or simply have the caller reopen it
1948                             again rather than trying to fix this handle */
1949                } else /* write only file */
1950                        break; /* write only files are last so must be done */
1951        }
1952        spin_unlock(&cifs_inode->open_file_lock);
1953        return NULL;
1954}
1955
1956/* Return -EBADF if no handle is found and general rc otherwise */
1957int
1958cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
1959                       struct cifsFileInfo **ret_file)
1960{
1961        struct cifsFileInfo *open_file, *inv_file = NULL;
1962        struct cifs_sb_info *cifs_sb;
1963        bool any_available = false;
1964        int rc = -EBADF;
1965        unsigned int refind = 0;
1966        bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
1967        bool with_delete = flags & FIND_WR_WITH_DELETE;
1968        *ret_file = NULL;
1969
1970        /*
1971         * Having a null inode here (because mapping->host was set to zero by
1972         * the VFS or MM) should not happen but we had reports of on oops (due
1973         * to it being zero) during stress testcases so we need to check for it
1974         */
1975
1976        if (cifs_inode == NULL) {
1977                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1978                dump_stack();
1979                return rc;
1980        }
1981
1982        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1983
1984        /* only filter by fsuid on multiuser mounts */
1985        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1986                fsuid_only = false;
1987
1988        spin_lock(&cifs_inode->open_file_lock);
1989refind_writable:
1990        if (refind > MAX_REOPEN_ATT) {
1991                spin_unlock(&cifs_inode->open_file_lock);
1992                return rc;
1993        }
1994        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1995                if (!any_available && open_file->pid != current->tgid)
1996                        continue;
1997                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1998                        continue;
1999                if (with_delete && !(open_file->fid.access & DELETE))
2000                        continue;
2001                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2002                        if (!open_file->invalidHandle) {
2003                                /* found a good writable file */
2004                                cifsFileInfo_get(open_file);
2005                                spin_unlock(&cifs_inode->open_file_lock);
2006                                *ret_file = open_file;
2007                                return 0;
2008                        } else {
2009                                if (!inv_file)
2010                                        inv_file = open_file;
2011                        }
2012                }
2013        }
2014        /* couldn't find useable FH with same pid, try any available */
2015        if (!any_available) {
2016                any_available = true;
2017                goto refind_writable;
2018        }
2019
2020        if (inv_file) {
2021                any_available = false;
2022                cifsFileInfo_get(inv_file);
2023        }
2024
2025        spin_unlock(&cifs_inode->open_file_lock);
2026
2027        if (inv_file) {
2028                rc = cifs_reopen_file(inv_file, false);
2029                if (!rc) {
2030                        *ret_file = inv_file;
2031                        return 0;
2032                }
2033
2034                spin_lock(&cifs_inode->open_file_lock);
2035                list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2036                spin_unlock(&cifs_inode->open_file_lock);
2037                cifsFileInfo_put(inv_file);
2038                ++refind;
2039                inv_file = NULL;
2040                spin_lock(&cifs_inode->open_file_lock);
2041                goto refind_writable;
2042        }
2043
2044        return rc;
2045}
2046
2047struct cifsFileInfo *
2048find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2049{
2050        struct cifsFileInfo *cfile;
2051        int rc;
2052
2053        rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2054        if (rc)
2055                cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2056
2057        return cfile;
2058}
2059
2060int
2061cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2062                       int flags,
2063                       struct cifsFileInfo **ret_file)
2064{
2065        struct list_head *tmp;
2066        struct cifsFileInfo *cfile;
2067        struct cifsInodeInfo *cinode;
2068        char *full_path;
2069
2070        *ret_file = NULL;
2071
2072        spin_lock(&tcon->open_file_lock);
2073        list_for_each(tmp, &tcon->openFileList) {
2074                cfile = list_entry(tmp, struct cifsFileInfo,
2075                             tlist);
2076                full_path = build_path_from_dentry(cfile->dentry);
2077                if (full_path == NULL) {
2078                        spin_unlock(&tcon->open_file_lock);
2079                        return -ENOMEM;
2080                }
2081                if (strcmp(full_path, name)) {
2082                        kfree(full_path);
2083                        continue;
2084                }
2085
2086                kfree(full_path);
2087                cinode = CIFS_I(d_inode(cfile->dentry));
2088                spin_unlock(&tcon->open_file_lock);
2089                return cifs_get_writable_file(cinode, flags, ret_file);
2090        }
2091
2092        spin_unlock(&tcon->open_file_lock);
2093        return -ENOENT;
2094}
2095
2096int
2097cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2098                       struct cifsFileInfo **ret_file)
2099{
2100        struct list_head *tmp;
2101        struct cifsFileInfo *cfile;
2102        struct cifsInodeInfo *cinode;
2103        char *full_path;
2104
2105        *ret_file = NULL;
2106
2107        spin_lock(&tcon->open_file_lock);
2108        list_for_each(tmp, &tcon->openFileList) {
2109                cfile = list_entry(tmp, struct cifsFileInfo,
2110                             tlist);
2111                full_path = build_path_from_dentry(cfile->dentry);
2112                if (full_path == NULL) {
2113                        spin_unlock(&tcon->open_file_lock);
2114                        return -ENOMEM;
2115                }
2116                if (strcmp(full_path, name)) {
2117                        kfree(full_path);
2118                        continue;
2119                }
2120
2121                kfree(full_path);
2122                cinode = CIFS_I(d_inode(cfile->dentry));
2123                spin_unlock(&tcon->open_file_lock);
2124                *ret_file = find_readable_file(cinode, 0);
2125                return *ret_file ? 0 : -ENOENT;
2126        }
2127
2128        spin_unlock(&tcon->open_file_lock);
2129        return -ENOENT;
2130}
2131
2132static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2133{
2134        struct address_space *mapping = page->mapping;
2135        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2136        char *write_data;
2137        int rc = -EFAULT;
2138        int bytes_written = 0;
2139        struct inode *inode;
2140        struct cifsFileInfo *open_file;
2141
2142        if (!mapping || !mapping->host)
2143                return -EFAULT;
2144
2145        inode = page->mapping->host;
2146
2147        offset += (loff_t)from;
2148        write_data = kmap(page);
2149        write_data += from;
2150
2151        if ((to > PAGE_SIZE) || (from > to)) {
2152                kunmap(page);
2153                return -EIO;
2154        }
2155
2156        /* racing with truncate? */
2157        if (offset > mapping->host->i_size) {
2158                kunmap(page);
2159                return 0; /* don't care */
2160        }
2161
2162        /* check to make sure that we are not extending the file */
2163        if (mapping->host->i_size - offset < (loff_t)to)
2164                to = (unsigned)(mapping->host->i_size - offset);
2165
2166        rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2167                                    &open_file);
2168        if (!rc) {
2169                bytes_written = cifs_write(open_file, open_file->pid,
2170                                           write_data, to - from, &offset);
2171                cifsFileInfo_put(open_file);
2172                /* Does mm or vfs already set times? */
2173                inode->i_atime = inode->i_mtime = current_time(inode);
2174                if ((bytes_written > 0) && (offset))
2175                        rc = 0;
2176                else if (bytes_written < 0)
2177                        rc = bytes_written;
2178                else
2179                        rc = -EFAULT;
2180        } else {
2181                cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2182                if (!is_retryable_error(rc))
2183                        rc = -EIO;
2184        }
2185
2186        kunmap(page);
2187        return rc;
2188}
2189
2190static struct cifs_writedata *
2191wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2192                          pgoff_t end, pgoff_t *index,
2193                          unsigned int *found_pages)
2194{
2195        struct cifs_writedata *wdata;
2196
2197        wdata = cifs_writedata_alloc((unsigned int)tofind,
2198                                     cifs_writev_complete);
2199        if (!wdata)
2200                return NULL;
2201
2202        *found_pages = find_get_pages_range_tag(mapping, index, end,
2203                                PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2204        return wdata;
2205}
2206
2207static unsigned int
2208wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2209                    struct address_space *mapping,
2210                    struct writeback_control *wbc,
2211                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2212{
2213        unsigned int nr_pages = 0, i;
2214        struct page *page;
2215
2216        for (i = 0; i < found_pages; i++) {
2217                page = wdata->pages[i];
2218                /*
2219                 * At this point we hold neither the i_pages lock nor the
2220                 * page lock: the page may be truncated or invalidated
2221                 * (changing page->mapping to NULL), or even swizzled
2222                 * back from swapper_space to tmpfs file mapping
2223                 */
2224
2225                if (nr_pages == 0)
2226                        lock_page(page);
2227                else if (!trylock_page(page))
2228                        break;
2229
2230                if (unlikely(page->mapping != mapping)) {
2231                        unlock_page(page);
2232                        break;
2233                }
2234
2235                if (!wbc->range_cyclic && page->index > end) {
2236                        *done = true;
2237                        unlock_page(page);
2238                        break;
2239                }
2240
2241                if (*next && (page->index != *next)) {
2242                        /* Not next consecutive page */
2243                        unlock_page(page);
2244                        break;
2245                }
2246
2247                if (wbc->sync_mode != WB_SYNC_NONE)
2248                        wait_on_page_writeback(page);
2249
2250                if (PageWriteback(page) ||
2251                                !clear_page_dirty_for_io(page)) {
2252                        unlock_page(page);
2253                        break;
2254                }
2255
2256                /*
2257                 * This actually clears the dirty bit in the radix tree.
2258                 * See cifs_writepage() for more commentary.
2259                 */
2260                set_page_writeback(page);
2261                if (page_offset(page) >= i_size_read(mapping->host)) {
2262                        *done = true;
2263                        unlock_page(page);
2264                        end_page_writeback(page);
2265                        break;
2266                }
2267
2268                wdata->pages[i] = page;
2269                *next = page->index + 1;
2270                ++nr_pages;
2271        }
2272
2273        /* reset index to refind any pages skipped */
2274        if (nr_pages == 0)
2275                *index = wdata->pages[0]->index + 1;
2276
2277        /* put any pages we aren't going to use */
2278        for (i = nr_pages; i < found_pages; i++) {
2279                put_page(wdata->pages[i]);
2280                wdata->pages[i] = NULL;
2281        }
2282
2283        return nr_pages;
2284}
2285
2286static int
2287wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2288                 struct address_space *mapping, struct writeback_control *wbc)
2289{
2290        int rc;
2291
2292        wdata->sync_mode = wbc->sync_mode;
2293        wdata->nr_pages = nr_pages;
2294        wdata->offset = page_offset(wdata->pages[0]);
2295        wdata->pagesz = PAGE_SIZE;
2296        wdata->tailsz = min(i_size_read(mapping->host) -
2297                        page_offset(wdata->pages[nr_pages - 1]),
2298                        (loff_t)PAGE_SIZE);
2299        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2300        wdata->pid = wdata->cfile->pid;
2301
2302        rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2303        if (rc)
2304                return rc;
2305
2306        if (wdata->cfile->invalidHandle)
2307                rc = -EAGAIN;
2308        else
2309                rc = wdata->server->ops->async_writev(wdata,
2310                                                      cifs_writedata_release);
2311
2312        return rc;
2313}
2314
2315static int cifs_writepages(struct address_space *mapping,
2316                           struct writeback_control *wbc)
2317{
2318        struct inode *inode = mapping->host;
2319        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2320        struct TCP_Server_Info *server;
2321        bool done = false, scanned = false, range_whole = false;
2322        pgoff_t end, index;
2323        struct cifs_writedata *wdata;
2324        struct cifsFileInfo *cfile = NULL;
2325        int rc = 0;
2326        int saved_rc = 0;
2327        unsigned int xid;
2328
2329        /*
2330         * If wsize is smaller than the page cache size, default to writing
2331         * one page at a time via cifs_writepage
2332         */
2333        if (cifs_sb->wsize < PAGE_SIZE)
2334                return generic_writepages(mapping, wbc);
2335
2336        xid = get_xid();
2337        if (wbc->range_cyclic) {
2338                index = mapping->writeback_index; /* Start from prev offset */
2339                end = -1;
2340        } else {
2341                index = wbc->range_start >> PAGE_SHIFT;
2342                end = wbc->range_end >> PAGE_SHIFT;
2343                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2344                        range_whole = true;
2345                scanned = true;
2346        }
2347        server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2348
2349retry:
2350        while (!done && index <= end) {
2351                unsigned int i, nr_pages, found_pages, wsize;
2352                pgoff_t next = 0, tofind, saved_index = index;
2353                struct cifs_credits credits_on_stack;
2354                struct cifs_credits *credits = &credits_on_stack;
2355                int get_file_rc = 0;
2356
2357                if (cfile)
2358                        cifsFileInfo_put(cfile);
2359
2360                rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2361
2362                /* in case of an error store it to return later */
2363                if (rc)
2364                        get_file_rc = rc;
2365
2366                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2367                                                   &wsize, credits);
2368                if (rc != 0) {
2369                        done = true;
2370                        break;
2371                }
2372
2373                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2374
2375                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2376                                                  &found_pages);
2377                if (!wdata) {
2378                        rc = -ENOMEM;
2379                        done = true;
2380                        add_credits_and_wake_if(server, credits, 0);
2381                        break;
2382                }
2383
2384                if (found_pages == 0) {
2385                        kref_put(&wdata->refcount, cifs_writedata_release);
2386                        add_credits_and_wake_if(server, credits, 0);
2387                        break;
2388                }
2389
2390                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2391                                               end, &index, &next, &done);
2392
2393                /* nothing to write? */
2394                if (nr_pages == 0) {
2395                        kref_put(&wdata->refcount, cifs_writedata_release);
2396                        add_credits_and_wake_if(server, credits, 0);
2397                        continue;
2398                }
2399
2400                wdata->credits = credits_on_stack;
2401                wdata->cfile = cfile;
2402                wdata->server = server;
2403                cfile = NULL;
2404
2405                if (!wdata->cfile) {
2406                        cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2407                                 get_file_rc);
2408                        if (is_retryable_error(get_file_rc))
2409                                rc = get_file_rc;
2410                        else
2411                                rc = -EBADF;
2412                } else
2413                        rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2414
2415                for (i = 0; i < nr_pages; ++i)
2416                        unlock_page(wdata->pages[i]);
2417
2418                /* send failure -- clean up the mess */
2419                if (rc != 0) {
2420                        add_credits_and_wake_if(server, &wdata->credits, 0);
2421                        for (i = 0; i < nr_pages; ++i) {
2422                                if (is_retryable_error(rc))
2423                                        redirty_page_for_writepage(wbc,
2424                                                           wdata->pages[i]);
2425                                else
2426                                        SetPageError(wdata->pages[i]);
2427                                end_page_writeback(wdata->pages[i]);
2428                                put_page(wdata->pages[i]);
2429                        }
2430                        if (!is_retryable_error(rc))
2431                                mapping_set_error(mapping, rc);
2432                }
2433                kref_put(&wdata->refcount, cifs_writedata_release);
2434
2435                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2436                        index = saved_index;
2437                        continue;
2438                }
2439
2440                /* Return immediately if we received a signal during writing */
2441                if (is_interrupt_error(rc)) {
2442                        done = true;
2443                        break;
2444                }
2445
2446                if (rc != 0 && saved_rc == 0)
2447                        saved_rc = rc;
2448
2449                wbc->nr_to_write -= nr_pages;
2450                if (wbc->nr_to_write <= 0)
2451                        done = true;
2452
2453                index = next;
2454        }
2455
2456        if (!scanned && !done) {
2457                /*
2458                 * We hit the last page and there is more work to be done: wrap
2459                 * back to the start of the file
2460                 */
2461                scanned = true;
2462                index = 0;
2463                goto retry;
2464        }
2465
2466        if (saved_rc != 0)
2467                rc = saved_rc;
2468
2469        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2470                mapping->writeback_index = index;
2471
2472        if (cfile)
2473                cifsFileInfo_put(cfile);
2474        free_xid(xid);
2475        return rc;
2476}
2477
2478static int
2479cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2480{
2481        int rc;
2482        unsigned int xid;
2483
2484        xid = get_xid();
2485/* BB add check for wbc flags */
2486        get_page(page);
2487        if (!PageUptodate(page))
2488                cifs_dbg(FYI, "ppw - page not up to date\n");
2489
2490        /*
2491         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2492         *
2493         * A writepage() implementation always needs to do either this,
2494         * or re-dirty the page with "redirty_page_for_writepage()" in
2495         * the case of a failure.
2496         *
2497         * Just unlocking the page will cause the radix tree tag-bits
2498         * to fail to update with the state of the page correctly.
2499         */
2500        set_page_writeback(page);
2501retry_write:
2502        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2503        if (is_retryable_error(rc)) {
2504                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2505                        goto retry_write;
2506                redirty_page_for_writepage(wbc, page);
2507        } else if (rc != 0) {
2508                SetPageError(page);
2509                mapping_set_error(page->mapping, rc);
2510        } else {
2511                SetPageUptodate(page);
2512        }
2513        end_page_writeback(page);
2514        put_page(page);
2515        free_xid(xid);
2516        return rc;
2517}
2518
2519static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2520{
2521        int rc = cifs_writepage_locked(page, wbc);
2522        unlock_page(page);
2523        return rc;
2524}
2525
2526static int cifs_write_end(struct file *file, struct address_space *mapping,
2527                        loff_t pos, unsigned len, unsigned copied,
2528                        struct page *page, void *fsdata)
2529{
2530        int rc;
2531        struct inode *inode = mapping->host;
2532        struct cifsFileInfo *cfile = file->private_data;
2533        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2534        __u32 pid;
2535
2536        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2537                pid = cfile->pid;
2538        else
2539                pid = current->tgid;
2540
2541        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2542                 page, pos, copied);
2543
2544        if (PageChecked(page)) {
2545                if (copied == len)
2546                        SetPageUptodate(page);
2547                ClearPageChecked(page);
2548        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2549                SetPageUptodate(page);
2550
2551        if (!PageUptodate(page)) {
2552                char *page_data;
2553                unsigned offset = pos & (PAGE_SIZE - 1);
2554                unsigned int xid;
2555
2556                xid = get_xid();
2557                /* this is probably better than directly calling
2558                   partialpage_write since in this function the file handle is
2559                   known which we might as well leverage */
2560                /* BB check if anything else missing out of ppw
2561                   such as updating last write time */
2562                page_data = kmap(page);
2563                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2564                /* if (rc < 0) should we set writebehind rc? */
2565                kunmap(page);
2566
2567                free_xid(xid);
2568        } else {
2569                rc = copied;
2570                pos += copied;
2571                set_page_dirty(page);
2572        }
2573
2574        if (rc > 0) {
2575                spin_lock(&inode->i_lock);
2576                if (pos > inode->i_size)
2577                        i_size_write(inode, pos);
2578                spin_unlock(&inode->i_lock);
2579        }
2580
2581        unlock_page(page);
2582        put_page(page);
2583
2584        return rc;
2585}
2586
2587int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2588                      int datasync)
2589{
2590        unsigned int xid;
2591        int rc = 0;
2592        struct cifs_tcon *tcon;
2593        struct TCP_Server_Info *server;
2594        struct cifsFileInfo *smbfile = file->private_data;
2595        struct inode *inode = file_inode(file);
2596        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2597
2598        rc = file_write_and_wait_range(file, start, end);
2599        if (rc) {
2600                trace_cifs_fsync_err(inode->i_ino, rc);
2601                return rc;
2602        }
2603
2604        xid = get_xid();
2605
2606        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2607                 file, datasync);
2608
2609        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2610                rc = cifs_zap_mapping(inode);
2611                if (rc) {
2612                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2613                        rc = 0; /* don't care about it in fsync */
2614                }
2615        }
2616
2617        tcon = tlink_tcon(smbfile->tlink);
2618        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2619                server = tcon->ses->server;
2620                if (server->ops->flush)
2621                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2622                else
2623                        rc = -ENOSYS;
2624        }
2625
2626        free_xid(xid);
2627        return rc;
2628}
2629
2630int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2631{
2632        unsigned int xid;
2633        int rc = 0;
2634        struct cifs_tcon *tcon;
2635        struct TCP_Server_Info *server;
2636        struct cifsFileInfo *smbfile = file->private_data;
2637        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2638
2639        rc = file_write_and_wait_range(file, start, end);
2640        if (rc) {
2641                trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2642                return rc;
2643        }
2644
2645        xid = get_xid();
2646
2647        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2648                 file, datasync);
2649
2650        tcon = tlink_tcon(smbfile->tlink);
2651        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2652                server = tcon->ses->server;
2653                if (server->ops->flush)
2654                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2655                else
2656                        rc = -ENOSYS;
2657        }
2658
2659        free_xid(xid);
2660        return rc;
2661}
2662
2663/*
2664 * As file closes, flush all cached write data for this inode checking
2665 * for write behind errors.
2666 */
2667int cifs_flush(struct file *file, fl_owner_t id)
2668{
2669        struct inode *inode = file_inode(file);
2670        int rc = 0;
2671
2672        if (file->f_mode & FMODE_WRITE)
2673                rc = filemap_write_and_wait(inode->i_mapping);
2674
2675        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2676        if (rc)
2677                trace_cifs_flush_err(inode->i_ino, rc);
2678        return rc;
2679}
2680
2681static int
2682cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2683{
2684        int rc = 0;
2685        unsigned long i;
2686
2687        for (i = 0; i < num_pages; i++) {
2688                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2689                if (!pages[i]) {
2690                        /*
2691                         * save number of pages we have already allocated and
2692                         * return with ENOMEM error
2693                         */
2694                        num_pages = i;
2695                        rc = -ENOMEM;
2696                        break;
2697                }
2698        }
2699
2700        if (rc) {
2701                for (i = 0; i < num_pages; i++)
2702                        put_page(pages[i]);
2703        }
2704        return rc;
2705}
2706
2707static inline
2708size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2709{
2710        size_t num_pages;
2711        size_t clen;
2712
2713        clen = min_t(const size_t, len, wsize);
2714        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2715
2716        if (cur_len)
2717                *cur_len = clen;
2718
2719        return num_pages;
2720}
2721
2722static void
2723cifs_uncached_writedata_release(struct kref *refcount)
2724{
2725        int i;
2726        struct cifs_writedata *wdata = container_of(refcount,
2727                                        struct cifs_writedata, refcount);
2728
2729        kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2730        for (i = 0; i < wdata->nr_pages; i++)
2731                put_page(wdata->pages[i]);
2732        cifs_writedata_release(refcount);
2733}
2734
2735static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2736
2737static void
2738cifs_uncached_writev_complete(struct work_struct *work)
2739{
2740        struct cifs_writedata *wdata = container_of(work,
2741                                        struct cifs_writedata, work);
2742        struct inode *inode = d_inode(wdata->cfile->dentry);
2743        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2744
2745        spin_lock(&inode->i_lock);
2746        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2747        if (cifsi->server_eof > inode->i_size)
2748                i_size_write(inode, cifsi->server_eof);
2749        spin_unlock(&inode->i_lock);
2750
2751        complete(&wdata->done);
2752        collect_uncached_write_data(wdata->ctx);
2753        /* the below call can possibly free the last ref to aio ctx */
2754        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2755}
2756
2757static int
2758wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2759                      size_t *len, unsigned long *num_pages)
2760{
2761        size_t save_len, copied, bytes, cur_len = *len;
2762        unsigned long i, nr_pages = *num_pages;
2763
2764        save_len = cur_len;
2765        for (i = 0; i < nr_pages; i++) {
2766                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2767                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2768                cur_len -= copied;
2769                /*
2770                 * If we didn't copy as much as we expected, then that
2771                 * may mean we trod into an unmapped area. Stop copying
2772                 * at that point. On the next pass through the big
2773                 * loop, we'll likely end up getting a zero-length
2774                 * write and bailing out of it.
2775                 */
2776                if (copied < bytes)
2777                        break;
2778        }
2779        cur_len = save_len - cur_len;
2780        *len = cur_len;
2781
2782        /*
2783         * If we have no data to send, then that probably means that
2784         * the copy above failed altogether. That's most likely because
2785         * the address in the iovec was bogus. Return -EFAULT and let
2786         * the caller free anything we allocated and bail out.
2787         */
2788        if (!cur_len)
2789                return -EFAULT;
2790
2791        /*
2792         * i + 1 now represents the number of pages we actually used in
2793         * the copy phase above.
2794         */
2795        *num_pages = i + 1;
2796        return 0;
2797}
2798
2799static int
2800cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2801        struct cifs_aio_ctx *ctx)
2802{
2803        unsigned int wsize;
2804        struct cifs_credits credits;
2805        int rc;
2806        struct TCP_Server_Info *server = wdata->server;
2807
2808        do {
2809                if (wdata->cfile->invalidHandle) {
2810                        rc = cifs_reopen_file(wdata->cfile, false);
2811                        if (rc == -EAGAIN)
2812                                continue;
2813                        else if (rc)
2814                                break;
2815                }
2816
2817
2818                /*
2819                 * Wait for credits to resend this wdata.
2820                 * Note: we are attempting to resend the whole wdata not in
2821                 * segments
2822                 */
2823                do {
2824                        rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2825                                                &wsize, &credits);
2826                        if (rc)
2827                                goto fail;
2828
2829                        if (wsize < wdata->bytes) {
2830                                add_credits_and_wake_if(server, &credits, 0);
2831                                msleep(1000);
2832                        }
2833                } while (wsize < wdata->bytes);
2834                wdata->credits = credits;
2835
2836                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2837
2838                if (!rc) {
2839                        if (wdata->cfile->invalidHandle)
2840                                rc = -EAGAIN;
2841                        else {
2842#ifdef CONFIG_CIFS_SMB_DIRECT
2843                                if (wdata->mr) {
2844                                        wdata->mr->need_invalidate = true;
2845                                        smbd_deregister_mr(wdata->mr);
2846                                        wdata->mr = NULL;
2847                                }
2848#endif
2849                                rc = server->ops->async_writev(wdata,
2850                                        cifs_uncached_writedata_release);
2851                        }
2852                }
2853
2854                /* If the write was successfully sent, we are done */
2855                if (!rc) {
2856                        list_add_tail(&wdata->list, wdata_list);
2857                        return 0;
2858                }
2859
2860                /* Roll back credits and retry if needed */
2861                add_credits_and_wake_if(server, &wdata->credits, 0);
2862        } while (rc == -EAGAIN);
2863
2864fail:
2865        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2866        return rc;
2867}
2868
2869static int
2870cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2871                     struct cifsFileInfo *open_file,
2872                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2873                     struct cifs_aio_ctx *ctx)
2874{
2875        int rc = 0;
2876        size_t cur_len;
2877        unsigned long nr_pages, num_pages, i;
2878        struct cifs_writedata *wdata;
2879        struct iov_iter saved_from = *from;
2880        loff_t saved_offset = offset;
2881        pid_t pid;
2882        struct TCP_Server_Info *server;
2883        struct page **pagevec;
2884        size_t start;
2885        unsigned int xid;
2886
2887        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2888                pid = open_file->pid;
2889        else
2890                pid = current->tgid;
2891
2892        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
2893        xid = get_xid();
2894
2895        do {
2896                unsigned int wsize;
2897                struct cifs_credits credits_on_stack;
2898                struct cifs_credits *credits = &credits_on_stack;
2899
2900                if (open_file->invalidHandle) {
2901                        rc = cifs_reopen_file(open_file, false);
2902                        if (rc == -EAGAIN)
2903                                continue;
2904                        else if (rc)
2905                                break;
2906                }
2907
2908                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2909                                                   &wsize, credits);
2910                if (rc)
2911                        break;
2912
2913                cur_len = min_t(const size_t, len, wsize);
2914
2915                if (ctx->direct_io) {
2916                        ssize_t result;
2917
2918                        result = iov_iter_get_pages_alloc(
2919                                from, &pagevec, cur_len, &start);
2920                        if (result < 0) {
2921                                cifs_dbg(VFS,
2922                                         "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
2923                                         result, iov_iter_type(from),
2924                                         from->iov_offset, from->count);
2925                                dump_stack();
2926
2927                                rc = result;
2928                                add_credits_and_wake_if(server, credits, 0);
2929                                break;
2930                        }
2931                        cur_len = (size_t)result;
2932                        iov_iter_advance(from, cur_len);
2933
2934                        nr_pages =
2935                                (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2936
2937                        wdata = cifs_writedata_direct_alloc(pagevec,
2938                                             cifs_uncached_writev_complete);
2939                        if (!wdata) {
2940                                rc = -ENOMEM;
2941                                add_credits_and_wake_if(server, credits, 0);
2942                                break;
2943                        }
2944
2945
2946                        wdata->page_offset = start;
2947                        wdata->tailsz =
2948                                nr_pages > 1 ?
2949                                        cur_len - (PAGE_SIZE - start) -
2950                                        (nr_pages - 2) * PAGE_SIZE :
2951                                        cur_len;
2952                } else {
2953                        nr_pages = get_numpages(wsize, len, &cur_len);
2954                        wdata = cifs_writedata_alloc(nr_pages,
2955                                             cifs_uncached_writev_complete);
2956                        if (!wdata) {
2957                                rc = -ENOMEM;
2958                                add_credits_and_wake_if(server, credits, 0);
2959                                break;
2960                        }
2961
2962                        rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2963                        if (rc) {
2964                                kvfree(wdata->pages);
2965                                kfree(wdata);
2966                                add_credits_and_wake_if(server, credits, 0);
2967                                break;
2968                        }
2969
2970                        num_pages = nr_pages;
2971                        rc = wdata_fill_from_iovec(
2972                                wdata, from, &cur_len, &num_pages);
2973                        if (rc) {
2974                                for (i = 0; i < nr_pages; i++)
2975                                        put_page(wdata->pages[i]);
2976                                kvfree(wdata->pages);
2977                                kfree(wdata);
2978                                add_credits_and_wake_if(server, credits, 0);
2979                                break;
2980                        }
2981
2982                        /*
2983                         * Bring nr_pages down to the number of pages we
2984                         * actually used, and free any pages that we didn't use.
2985                         */
2986                        for ( ; nr_pages > num_pages; nr_pages--)
2987                                put_page(wdata->pages[nr_pages - 1]);
2988
2989                        wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2990                }
2991
2992                wdata->sync_mode = WB_SYNC_ALL;
2993                wdata->nr_pages = nr_pages;
2994                wdata->offset = (__u64)offset;
2995                wdata->cfile = cifsFileInfo_get(open_file);
2996                wdata->server = server;
2997                wdata->pid = pid;
2998                wdata->bytes = cur_len;
2999                wdata->pagesz = PAGE_SIZE;
3000                wdata->credits = credits_on_stack;
3001                wdata->ctx = ctx;
3002                kref_get(&ctx->refcount);
3003
3004                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3005
3006                if (!rc) {
3007                        if (wdata->cfile->invalidHandle)
3008                                rc = -EAGAIN;
3009                        else
3010                                rc = server->ops->async_writev(wdata,
3011                                        cifs_uncached_writedata_release);
3012                }
3013
3014                if (rc) {
3015                        add_credits_and_wake_if(server, &wdata->credits, 0);
3016                        kref_put(&wdata->refcount,
3017                                 cifs_uncached_writedata_release);
3018                        if (rc == -EAGAIN) {
3019                                *from = saved_from;
3020                                iov_iter_advance(from, offset - saved_offset);
3021                                continue;
3022                        }
3023                        break;
3024                }
3025
3026                list_add_tail(&wdata->list, wdata_list);
3027                offset += cur_len;
3028                len -= cur_len;
3029        } while (len > 0);
3030
3031        free_xid(xid);
3032        return rc;
3033}
3034
3035static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3036{
3037        struct cifs_writedata *wdata, *tmp;
3038        struct cifs_tcon *tcon;
3039        struct cifs_sb_info *cifs_sb;
3040        struct dentry *dentry = ctx->cfile->dentry;
3041        int rc;
3042
3043        tcon = tlink_tcon(ctx->cfile->tlink);
3044        cifs_sb = CIFS_SB(dentry->d_sb);
3045
3046        mutex_lock(&ctx->aio_mutex);
3047
3048        if (list_empty(&ctx->list)) {
3049                mutex_unlock(&ctx->aio_mutex);
3050                return;
3051        }
3052
3053        rc = ctx->rc;
3054        /*
3055         * Wait for and collect replies for any successful sends in order of
3056         * increasing offset. Once an error is hit, then return without waiting
3057         * for any more replies.
3058         */
3059restart_loop:
3060        list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3061                if (!rc) {
3062                        if (!try_wait_for_completion(&wdata->done)) {
3063                                mutex_unlock(&ctx->aio_mutex);
3064                                return;
3065                        }
3066
3067                        if (wdata->result)
3068                                rc = wdata->result;
3069                        else
3070                                ctx->total_len += wdata->bytes;
3071
3072                        /* resend call if it's a retryable error */
3073                        if (rc == -EAGAIN) {
3074                                struct list_head tmp_list;
3075                                struct iov_iter tmp_from = ctx->iter;
3076
3077                                INIT_LIST_HEAD(&tmp_list);
3078                                list_del_init(&wdata->list);
3079
3080                                if (ctx->direct_io)
3081                                        rc = cifs_resend_wdata(
3082                                                wdata, &tmp_list, ctx);
3083                                else {
3084                                        iov_iter_advance(&tmp_from,
3085                                                 wdata->offset - ctx->pos);
3086
3087                                        rc = cifs_write_from_iter(wdata->offset,
3088                                                wdata->bytes, &tmp_from,
3089                                                ctx->cfile, cifs_sb, &tmp_list,
3090                                                ctx);
3091
3092                                        kref_put(&wdata->refcount,
3093                                                cifs_uncached_writedata_release);
3094                                }
3095
3096                                list_splice(&tmp_list, &ctx->list);
3097                                goto restart_loop;
3098                        }
3099                }
3100                list_del_init(&wdata->list);
3101                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3102        }
3103
3104        cifs_stats_bytes_written(tcon, ctx->total_len);
3105        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3106
3107        ctx->rc = (rc == 0) ? ctx->total_len : rc;
3108
3109        mutex_unlock(&ctx->aio_mutex);
3110
3111        if (ctx->iocb && ctx->iocb->ki_complete)
3112                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3113        else
3114                complete(&ctx->done);
3115}
3116
3117static ssize_t __cifs_writev(
3118        struct kiocb *iocb, struct iov_iter *from, bool direct)
3119{
3120        struct file *file = iocb->ki_filp;
3121        ssize_t total_written = 0;
3122        struct cifsFileInfo *cfile;
3123        struct cifs_tcon *tcon;
3124        struct cifs_sb_info *cifs_sb;
3125        struct cifs_aio_ctx *ctx;
3126        struct iov_iter saved_from = *from;
3127        size_t len = iov_iter_count(from);
3128        int rc;
3129
3130        /*
3131         * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3132         * In this case, fall back to non-direct write function.
3133         * this could be improved by getting pages directly in ITER_KVEC
3134         */
3135        if (direct && iov_iter_is_kvec(from)) {
3136                cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3137                direct = false;
3138        }
3139
3140        rc = generic_write_checks(iocb, from);
3141        if (rc <= 0)
3142                return rc;
3143
3144        cifs_sb = CIFS_FILE_SB(file);
3145        cfile = file->private_data;
3146        tcon = tlink_tcon(cfile->tlink);
3147
3148        if (!tcon->ses->server->ops->async_writev)
3149                return -ENOSYS;
3150
3151        ctx = cifs_aio_ctx_alloc();
3152        if (!ctx)
3153                return -ENOMEM;
3154
3155        ctx->cfile = cifsFileInfo_get(cfile);
3156
3157        if (!is_sync_kiocb(iocb))
3158                ctx->iocb = iocb;
3159
3160        ctx->pos = iocb->ki_pos;
3161
3162        if (direct) {
3163                ctx->direct_io = true;
3164                ctx->iter = *from;
3165                ctx->len = len;
3166        } else {
3167                rc = setup_aio_ctx_iter(ctx, from, WRITE);
3168                if (rc) {
3169                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3170                        return rc;
3171                }
3172        }
3173
3174        /* grab a lock here due to read response handlers can access ctx */
3175        mutex_lock(&ctx->aio_mutex);
3176
3177        rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3178                                  cfile, cifs_sb, &ctx->list, ctx);
3179
3180        /*
3181         * If at least one write was successfully sent, then discard any rc
3182         * value from the later writes. If the other write succeeds, then
3183         * we'll end up returning whatever was written. If it fails, then
3184         * we'll get a new rc value from that.
3185         */
3186        if (!list_empty(&ctx->list))
3187                rc = 0;
3188
3189        mutex_unlock(&ctx->aio_mutex);
3190
3191        if (rc) {
3192                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3193                return rc;
3194        }
3195
3196        if (!is_sync_kiocb(iocb)) {
3197                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3198                return -EIOCBQUEUED;
3199        }
3200
3201        rc = wait_for_completion_killable(&ctx->done);
3202        if (rc) {
3203                mutex_lock(&ctx->aio_mutex);
3204                ctx->rc = rc = -EINTR;
3205                total_written = ctx->total_len;
3206                mutex_unlock(&ctx->aio_mutex);
3207        } else {
3208                rc = ctx->rc;
3209                total_written = ctx->total_len;
3210        }
3211
3212        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3213
3214        if (unlikely(!total_written))
3215                return rc;
3216
3217        iocb->ki_pos += total_written;
3218        return total_written;
3219}
3220
3221ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3222{
3223        return __cifs_writev(iocb, from, true);
3224}
3225
3226ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3227{
3228        return __cifs_writev(iocb, from, false);
3229}
3230
3231static ssize_t
3232cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3233{
3234        struct file *file = iocb->ki_filp;
3235        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3236        struct inode *inode = file->f_mapping->host;
3237        struct cifsInodeInfo *cinode = CIFS_I(inode);
3238        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3239        ssize_t rc;
3240
3241        inode_lock(inode);
3242        /*
3243         * We need to hold the sem to be sure nobody modifies lock list
3244         * with a brlock that prevents writing.
3245         */
3246        down_read(&cinode->lock_sem);
3247
3248        rc = generic_write_checks(iocb, from);
3249        if (rc <= 0)
3250                goto out;
3251
3252        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3253                                     server->vals->exclusive_lock_type, 0,
3254                                     NULL, CIFS_WRITE_OP))
3255                rc = __generic_file_write_iter(iocb, from);
3256        else
3257                rc = -EACCES;
3258out:
3259        up_read(&cinode->lock_sem);
3260        inode_unlock(inode);
3261
3262        if (rc > 0)
3263                rc = generic_write_sync(iocb, rc);
3264        return rc;
3265}
3266
3267ssize_t
3268cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3269{
3270        struct inode *inode = file_inode(iocb->ki_filp);
3271        struct cifsInodeInfo *cinode = CIFS_I(inode);
3272        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3273        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3274                                                iocb->ki_filp->private_data;
3275        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3276        ssize_t written;
3277
3278        written = cifs_get_writer(cinode);
3279        if (written)
3280                return written;
3281
3282        if (CIFS_CACHE_WRITE(cinode)) {
3283                if (cap_unix(tcon->ses) &&
3284                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3285                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3286                        written = generic_file_write_iter(iocb, from);
3287                        goto out;
3288                }
3289                written = cifs_writev(iocb, from);
3290                goto out;
3291        }
3292        /*
3293         * For non-oplocked files in strict cache mode we need to write the data
3294         * to the server exactly from the pos to pos+len-1 rather than flush all
3295         * affected pages because it may cause a error with mandatory locks on
3296         * these pages but not on the region from pos to ppos+len-1.
3297         */
3298        written = cifs_user_writev(iocb, from);
3299        if (CIFS_CACHE_READ(cinode)) {
3300                /*
3301                 * We have read level caching and we have just sent a write
3302                 * request to the server thus making data in the cache stale.
3303                 * Zap the cache and set oplock/lease level to NONE to avoid
3304                 * reading stale data from the cache. All subsequent read
3305                 * operations will read new data from the server.
3306                 */
3307                cifs_zap_mapping(inode);
3308                cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3309                         inode);
3310                cinode->oplock = 0;
3311        }
3312out:
3313        cifs_put_writer(cinode);
3314        return written;
3315}
3316
3317static struct cifs_readdata *
3318cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3319{
3320        struct cifs_readdata *rdata;
3321
3322        rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3323        if (rdata != NULL) {
3324                rdata->pages = pages;
3325                kref_init(&rdata->refcount);
3326                INIT_LIST_HEAD(&rdata->list);
3327                init_completion(&rdata->done);
3328                INIT_WORK(&rdata->work, complete);
3329        }
3330
3331        return rdata;
3332}
3333
3334static struct cifs_readdata *
3335cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3336{
3337        struct page **pages =
3338                kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3339        struct cifs_readdata *ret = NULL;
3340
3341        if (pages) {
3342                ret = cifs_readdata_direct_alloc(pages, complete);
3343                if (!ret)
3344                        kfree(pages);
3345        }
3346
3347        return ret;
3348}
3349
3350void
3351cifs_readdata_release(struct kref *refcount)
3352{
3353        struct cifs_readdata *rdata = container_of(refcount,
3354                                        struct cifs_readdata, refcount);
3355#ifdef CONFIG_CIFS_SMB_DIRECT
3356        if (rdata->mr) {
3357                smbd_deregister_mr(rdata->mr);
3358                rdata->mr = NULL;
3359        }
3360#endif
3361        if (rdata->cfile)
3362                cifsFileInfo_put(rdata->cfile);
3363
3364        kvfree(rdata->pages);
3365        kfree(rdata);
3366}
3367
3368static int
3369cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3370{
3371        int rc = 0;
3372        struct page *page;
3373        unsigned int i;
3374
3375        for (i = 0; i < nr_pages; i++) {
3376                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3377                if (!page) {
3378                        rc = -ENOMEM;
3379                        break;
3380                }
3381                rdata->pages[i] = page;
3382        }
3383
3384        if (rc) {
3385                unsigned int nr_page_failed = i;
3386
3387                for (i = 0; i < nr_page_failed; i++) {
3388                        put_page(rdata->pages[i]);
3389                        rdata->pages[i] = NULL;
3390                }
3391        }
3392        return rc;
3393}
3394
3395static void
3396cifs_uncached_readdata_release(struct kref *refcount)
3397{
3398        struct cifs_readdata *rdata = container_of(refcount,
3399                                        struct cifs_readdata, refcount);
3400        unsigned int i;
3401
3402        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3403        for (i = 0; i < rdata->nr_pages; i++) {
3404                put_page(rdata->pages[i]);
3405        }
3406        cifs_readdata_release(refcount);
3407}
3408
3409/**
3410 * cifs_readdata_to_iov - copy data from pages in response to an iovec
3411 * @rdata:      the readdata response with list of pages holding data
3412 * @iter:       destination for our data
3413 *
3414 * This function copies data from a list of pages in a readdata response into
3415 * an array of iovecs. It will first calculate where the data should go
3416 * based on the info in the readdata and then copy the data into that spot.
3417 */
3418static int
3419cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3420{
3421        size_t remaining = rdata->got_bytes;
3422        unsigned int i;
3423
3424        for (i = 0; i < rdata->nr_pages; i++) {
3425                struct page *page = rdata->pages[i];
3426                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3427                size_t written;
3428
3429                if (unlikely(iov_iter_is_pipe(iter))) {
3430                        void *addr = kmap_atomic(page);
3431
3432                        written = copy_to_iter(addr, copy, iter);
3433                        kunmap_atomic(addr);
3434                } else
3435                        written = copy_page_to_iter(page, 0, copy, iter);
3436                remaining -= written;
3437                if (written < copy && iov_iter_count(iter) > 0)
3438                        break;
3439        }
3440        return remaining ? -EFAULT : 0;
3441}
3442
3443static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3444
3445static void
3446cifs_uncached_readv_complete(struct work_struct *work)
3447{
3448        struct cifs_readdata *rdata = container_of(work,
3449                                                struct cifs_readdata, work);
3450
3451        complete(&rdata->done);
3452        collect_uncached_read_data(rdata->ctx);
3453        /* the below call can possibly free the last ref to aio ctx */
3454        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3455}
3456
3457static int
3458uncached_fill_pages(struct TCP_Server_Info *server,
3459                    struct cifs_readdata *rdata, struct iov_iter *iter,
3460                    unsigned int len)
3461{
3462        int result = 0;
3463        unsigned int i;
3464        unsigned int nr_pages = rdata->nr_pages;
3465        unsigned int page_offset = rdata->page_offset;
3466
3467        rdata->got_bytes = 0;
3468        rdata->tailsz = PAGE_SIZE;
3469        for (i = 0; i < nr_pages; i++) {
3470                struct page *page = rdata->pages[i];
3471                size_t n;
3472                unsigned int segment_size = rdata->pagesz;
3473
3474                if (i == 0)
3475                        segment_size -= page_offset;
3476                else
3477                        page_offset = 0;
3478
3479
3480                if (len <= 0) {
3481                        /* no need to hold page hostage */
3482                        rdata->pages[i] = NULL;
3483                        rdata->nr_pages--;
3484                        put_page(page);
3485                        continue;
3486                }
3487
3488                n = len;
3489                if (len >= segment_size)
3490                        /* enough data to fill the page */
3491                        n = segment_size;
3492                else
3493                        rdata->tailsz = len;
3494                len -= n;
3495
3496                if (iter)
3497                        result = copy_page_from_iter(
3498                                        page, page_offset, n, iter);
3499#ifdef CONFIG_CIFS_SMB_DIRECT
3500                else if (rdata->mr)
3501                        result = n;
3502#endif
3503                else
3504                        result = cifs_read_page_from_socket(
3505                                        server, page, page_offset, n);
3506                if (result < 0)
3507                        break;
3508
3509                rdata->got_bytes += result;
3510        }
3511
3512        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3513                                                rdata->got_bytes : result;
3514}
3515
3516static int
3517cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3518                              struct cifs_readdata *rdata, unsigned int len)
3519{
3520        return uncached_fill_pages(server, rdata, NULL, len);
3521}
3522
3523static int
3524cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3525                              struct cifs_readdata *rdata,
3526                              struct iov_iter *iter)
3527{
3528        return uncached_fill_pages(server, rdata, iter, iter->count);
3529}
3530
3531static int cifs_resend_rdata(struct cifs_readdata *rdata,
3532                        struct list_head *rdata_list,
3533                        struct cifs_aio_ctx *ctx)
3534{
3535        unsigned int rsize;
3536        struct cifs_credits credits;
3537        int rc;
3538        struct TCP_Server_Info *server;
3539
3540        /* XXX: should we pick a new channel here? */
3541        server = rdata->server;
3542
3543        do {
3544                if (rdata->cfile->invalidHandle) {
3545                        rc = cifs_reopen_file(rdata->cfile, true);
3546                        if (rc == -EAGAIN)
3547                                continue;
3548                        else if (rc)
3549                                break;
3550                }
3551
3552                /*
3553                 * Wait for credits to resend this rdata.
3554                 * Note: we are attempting to resend the whole rdata not in
3555                 * segments
3556                 */
3557                do {
3558                        rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3559                                                &rsize, &credits);
3560
3561                        if (rc)
3562                                goto fail;
3563
3564                        if (rsize < rdata->bytes) {
3565                                add_credits_and_wake_if(server, &credits, 0);
3566                                msleep(1000);
3567                        }
3568                } while (rsize < rdata->bytes);
3569                rdata->credits = credits;
3570
3571                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3572                if (!rc) {
3573                        if (rdata->cfile->invalidHandle)
3574                                rc = -EAGAIN;
3575                        else {
3576#ifdef CONFIG_CIFS_SMB_DIRECT
3577                                if (rdata->mr) {
3578                                        rdata->mr->need_invalidate = true;
3579                                        smbd_deregister_mr(rdata->mr);
3580                                        rdata->mr = NULL;
3581                                }
3582#endif
3583                                rc = server->ops->async_readv(rdata);
3584                        }
3585                }
3586
3587                /* If the read was successfully sent, we are done */
3588                if (!rc) {
3589                        /* Add to aio pending list */
3590                        list_add_tail(&rdata->list, rdata_list);
3591                        return 0;
3592                }
3593
3594                /* Roll back credits and retry if needed */
3595                add_credits_and_wake_if(server, &rdata->credits, 0);
3596        } while (rc == -EAGAIN);
3597
3598fail:
3599        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3600        return rc;
3601}
3602
3603static int
3604cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3605                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3606                     struct cifs_aio_ctx *ctx)
3607{
3608        struct cifs_readdata *rdata;
3609        unsigned int npages, rsize;
3610        struct cifs_credits credits_on_stack;
3611        struct cifs_credits *credits = &credits_on_stack;
3612        size_t cur_len;
3613        int rc;
3614        pid_t pid;
3615        struct TCP_Server_Info *server;
3616        struct page **pagevec;
3617        size_t start;
3618        struct iov_iter direct_iov = ctx->iter;
3619
3620        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3621
3622        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3623                pid = open_file->pid;
3624        else
3625                pid = current->tgid;
3626
3627        if (ctx->direct_io)
3628                iov_iter_advance(&direct_iov, offset - ctx->pos);
3629
3630        do {
3631                if (open_file->invalidHandle) {
3632                        rc = cifs_reopen_file(open_file, true);
3633                        if (rc == -EAGAIN)
3634                                continue;
3635                        else if (rc)
3636                                break;
3637                }
3638
3639                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3640                                                   &rsize, credits);
3641                if (rc)
3642                        break;
3643
3644                cur_len = min_t(const size_t, len, rsize);
3645
3646                if (ctx->direct_io) {
3647                        ssize_t result;
3648
3649                        result = iov_iter_get_pages_alloc(
3650                                        &direct_iov, &pagevec,
3651                                        cur_len, &start);
3652                        if (result < 0) {
3653                                cifs_dbg(VFS,
3654                                         "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3655                                         result, iov_iter_type(&direct_iov),
3656                                         direct_iov.iov_offset,
3657                                         direct_iov.count);
3658                                dump_stack();
3659
3660                                rc = result;
3661                                add_credits_and_wake_if(server, credits, 0);
3662                                break;
3663                        }
3664                        cur_len = (size_t)result;
3665                        iov_iter_advance(&direct_iov, cur_len);
3666
3667                        rdata = cifs_readdata_direct_alloc(
3668                                        pagevec, cifs_uncached_readv_complete);
3669                        if (!rdata) {
3670                                add_credits_and_wake_if(server, credits, 0);
3671                                rc = -ENOMEM;
3672                                break;
3673                        }
3674
3675                        npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3676                        rdata->page_offset = start;
3677                        rdata->tailsz = npages > 1 ?
3678                                cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3679                                cur_len;
3680
3681                } else {
3682
3683                        npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3684                        /* allocate a readdata struct */
3685                        rdata = cifs_readdata_alloc(npages,
3686                                            cifs_uncached_readv_complete);
3687                        if (!rdata) {
3688                                add_credits_and_wake_if(server, credits, 0);
3689                                rc = -ENOMEM;
3690                                break;
3691                        }
3692
3693                        rc = cifs_read_allocate_pages(rdata, npages);
3694                        if (rc) {
3695                                kvfree(rdata->pages);
3696                                kfree(rdata);
3697                                add_credits_and_wake_if(server, credits, 0);
3698                                break;
3699                        }
3700
3701                        rdata->tailsz = PAGE_SIZE;
3702                }
3703
3704                rdata->server = server;
3705                rdata->cfile = cifsFileInfo_get(open_file);
3706                rdata->nr_pages = npages;
3707                rdata->offset = offset;
3708                rdata->bytes = cur_len;
3709                rdata->pid = pid;
3710                rdata->pagesz = PAGE_SIZE;
3711                rdata->read_into_pages = cifs_uncached_read_into_pages;
3712                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3713                rdata->credits = credits_on_stack;
3714                rdata->ctx = ctx;
3715                kref_get(&ctx->refcount);
3716
3717                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3718
3719                if (!rc) {
3720                        if (rdata->cfile->invalidHandle)
3721                                rc = -EAGAIN;
3722                        else
3723                                rc = server->ops->async_readv(rdata);
3724                }
3725
3726                if (rc) {
3727                        add_credits_and_wake_if(server, &rdata->credits, 0);
3728                        kref_put(&rdata->refcount,
3729                                cifs_uncached_readdata_release);
3730                        if (rc == -EAGAIN) {
3731                                iov_iter_revert(&direct_iov, cur_len);
3732                                continue;
3733                        }
3734                        break;
3735                }
3736
3737                list_add_tail(&rdata->list, rdata_list);
3738                offset += cur_len;
3739                len -= cur_len;
3740        } while (len > 0);
3741
3742        return rc;
3743}
3744
3745static void
3746collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3747{
3748        struct cifs_readdata *rdata, *tmp;
3749        struct iov_iter *to = &ctx->iter;
3750        struct cifs_sb_info *cifs_sb;
3751        int rc;
3752
3753        cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3754
3755        mutex_lock(&ctx->aio_mutex);
3756
3757        if (list_empty(&ctx->list)) {
3758                mutex_unlock(&ctx->aio_mutex);
3759                return;
3760        }
3761
3762        rc = ctx->rc;
3763        /* the loop below should proceed in the order of increasing offsets */
3764again:
3765        list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3766                if (!rc) {
3767                        if (!try_wait_for_completion(&rdata->done)) {
3768                                mutex_unlock(&ctx->aio_mutex);
3769                                return;
3770                        }
3771
3772                        if (rdata->result == -EAGAIN) {
3773                                /* resend call if it's a retryable error */
3774                                struct list_head tmp_list;
3775                                unsigned int got_bytes = rdata->got_bytes;
3776
3777                                list_del_init(&rdata->list);
3778                                INIT_LIST_HEAD(&tmp_list);
3779
3780                                /*
3781                                 * Got a part of data and then reconnect has
3782                                 * happened -- fill the buffer and continue
3783                                 * reading.
3784                                 */
3785                                if (got_bytes && got_bytes < rdata->bytes) {
3786                                        rc = 0;
3787                                        if (!ctx->direct_io)
3788                                                rc = cifs_readdata_to_iov(rdata, to);
3789                                        if (rc) {
3790                                                kref_put(&rdata->refcount,
3791                                                        cifs_uncached_readdata_release);
3792                                                continue;
3793                                        }
3794                                }
3795
3796                                if (ctx->direct_io) {
3797                                        /*
3798                                         * Re-use rdata as this is a
3799                                         * direct I/O
3800                                         */
3801                                        rc = cifs_resend_rdata(
3802                                                rdata,
3803                                                &tmp_list, ctx);
3804                                } else {
3805                                        rc = cifs_send_async_read(
3806                                                rdata->offset + got_bytes,
3807                                                rdata->bytes - got_bytes,
3808                                                rdata->cfile, cifs_sb,
3809                                                &tmp_list, ctx);
3810
3811                                        kref_put(&rdata->refcount,
3812                                                cifs_uncached_readdata_release);
3813                                }
3814
3815                                list_splice(&tmp_list, &ctx->list);
3816
3817                                goto again;
3818                        } else if (rdata->result)
3819                                rc = rdata->result;
3820                        else if (!ctx->direct_io)
3821                                rc = cifs_readdata_to_iov(rdata, to);
3822
3823                        /* if there was a short read -- discard anything left */
3824                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3825                                rc = -ENODATA;
3826
3827                        ctx->total_len += rdata->got_bytes;
3828                }
3829                list_del_init(&rdata->list);
3830                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3831        }
3832
3833        if (!ctx->direct_io)
3834                ctx->total_len = ctx->len - iov_iter_count(to);
3835
3836        /* mask nodata case */
3837        if (rc == -ENODATA)
3838                rc = 0;
3839
3840        ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3841
3842        mutex_unlock(&ctx->aio_mutex);
3843
3844        if (ctx->iocb && ctx->iocb->ki_complete)
3845                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3846        else
3847                complete(&ctx->done);
3848}
3849
3850static ssize_t __cifs_readv(
3851        struct kiocb *iocb, struct iov_iter *to, bool direct)
3852{
3853        size_t len;
3854        struct file *file = iocb->ki_filp;
3855        struct cifs_sb_info *cifs_sb;
3856        struct cifsFileInfo *cfile;
3857        struct cifs_tcon *tcon;
3858        ssize_t rc, total_read = 0;
3859        loff_t offset = iocb->ki_pos;
3860        struct cifs_aio_ctx *ctx;
3861
3862        /*
3863         * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3864         * fall back to data copy read path
3865         * this could be improved by getting pages directly in ITER_KVEC
3866         */
3867        if (direct && iov_iter_is_kvec(to)) {
3868                cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3869                direct = false;
3870        }
3871
3872        len = iov_iter_count(to);
3873        if (!len)
3874                return 0;
3875
3876        cifs_sb = CIFS_FILE_SB(file);
3877        cfile = file->private_data;
3878        tcon = tlink_tcon(cfile->tlink);
3879
3880        if (!tcon->ses->server->ops->async_readv)
3881                return -ENOSYS;
3882
3883        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3884                cifs_dbg(FYI, "attempting read on write only file instance\n");
3885
3886        ctx = cifs_aio_ctx_alloc();
3887        if (!ctx)
3888                return -ENOMEM;
3889
3890        ctx->cfile = cifsFileInfo_get(cfile);
3891
3892        if (!is_sync_kiocb(iocb))
3893                ctx->iocb = iocb;
3894
3895        if (iter_is_iovec(to))
3896                ctx->should_dirty = true;
3897
3898        if (direct) {
3899                ctx->pos = offset;
3900                ctx->direct_io = true;
3901                ctx->iter = *to;
3902                ctx->len = len;
3903        } else {
3904                rc = setup_aio_ctx_iter(ctx, to, READ);
3905                if (rc) {
3906                        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3907                        return rc;
3908                }
3909                len = ctx->len;
3910        }
3911
3912        /* grab a lock here due to read response handlers can access ctx */
3913        mutex_lock(&ctx->aio_mutex);
3914
3915        rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3916
3917        /* if at least one read request send succeeded, then reset rc */
3918        if (!list_empty(&ctx->list))
3919                rc = 0;
3920
3921        mutex_unlock(&ctx->aio_mutex);
3922
3923        if (rc) {
3924                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3925                return rc;
3926        }
3927
3928        if (!is_sync_kiocb(iocb)) {
3929                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3930                return -EIOCBQUEUED;
3931        }
3932
3933        rc = wait_for_completion_killable(&ctx->done);
3934        if (rc) {
3935                mutex_lock(&ctx->aio_mutex);
3936                ctx->rc = rc = -EINTR;
3937                total_read = ctx->total_len;
3938                mutex_unlock(&ctx->aio_mutex);
3939        } else {
3940                rc = ctx->rc;
3941                total_read = ctx->total_len;
3942        }
3943
3944        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3945
3946        if (total_read) {
3947                iocb->ki_pos += total_read;
3948                return total_read;
3949        }
3950        return rc;
3951}
3952
3953ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3954{
3955        return __cifs_readv(iocb, to, true);
3956}
3957
3958ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3959{
3960        return __cifs_readv(iocb, to, false);
3961}
3962
3963ssize_t
3964cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3965{
3966        struct inode *inode = file_inode(iocb->ki_filp);
3967        struct cifsInodeInfo *cinode = CIFS_I(inode);
3968        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3969        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3970                                                iocb->ki_filp->private_data;
3971        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3972        int rc = -EACCES;
3973
3974        /*
3975         * In strict cache mode we need to read from the server all the time
3976         * if we don't have level II oplock because the server can delay mtime
3977         * change - so we can't make a decision about inode invalidating.
3978         * And we can also fail with pagereading if there are mandatory locks
3979         * on pages affected by this read but not on the region from pos to
3980         * pos+len-1.
3981         */
3982        if (!CIFS_CACHE_READ(cinode))
3983                return cifs_user_readv(iocb, to);
3984
3985        if (cap_unix(tcon->ses) &&
3986            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3987            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3988                return generic_file_read_iter(iocb, to);
3989
3990        /*
3991         * We need to hold the sem to be sure nobody modifies lock list
3992         * with a brlock that prevents reading.
3993         */
3994        down_read(&cinode->lock_sem);
3995        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3996                                     tcon->ses->server->vals->shared_lock_type,
3997                                     0, NULL, CIFS_READ_OP))
3998                rc = generic_file_read_iter(iocb, to);
3999        up_read(&cinode->lock_sem);
4000        return rc;
4001}
4002
4003static ssize_t
4004cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4005{
4006        int rc = -EACCES;
4007        unsigned int bytes_read = 0;
4008        unsigned int total_read;
4009        unsigned int current_read_size;
4010        unsigned int rsize;
4011        struct cifs_sb_info *cifs_sb;
4012        struct cifs_tcon *tcon;
4013        struct TCP_Server_Info *server;
4014        unsigned int xid;
4015        char *cur_offset;
4016        struct cifsFileInfo *open_file;
4017        struct cifs_io_parms io_parms = {0};
4018        int buf_type = CIFS_NO_BUFFER;
4019        __u32 pid;
4020
4021        xid = get_xid();
4022        cifs_sb = CIFS_FILE_SB(file);
4023
4024        /* FIXME: set up handlers for larger reads and/or convert to async */
4025        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4026
4027        if (file->private_data == NULL) {
4028                rc = -EBADF;
4029                free_xid(xid);
4030                return rc;
4031        }
4032        open_file = file->private_data;
4033        tcon = tlink_tcon(open_file->tlink);
4034        server = cifs_pick_channel(tcon->ses);
4035
4036        if (!server->ops->sync_read) {
4037                free_xid(xid);
4038                return -ENOSYS;
4039        }
4040
4041        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4042                pid = open_file->pid;
4043        else
4044                pid = current->tgid;
4045
4046        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4047                cifs_dbg(FYI, "attempting read on write only file instance\n");
4048
4049        for (total_read = 0, cur_offset = read_data; read_size > total_read;
4050             total_read += bytes_read, cur_offset += bytes_read) {
4051                do {
4052                        current_read_size = min_t(uint, read_size - total_read,
4053                                                  rsize);
4054                        /*
4055                         * For windows me and 9x we do not want to request more
4056                         * than it negotiated since it will refuse the read
4057                         * then.
4058                         */
4059                        if (!(tcon->ses->capabilities &
4060                                tcon->ses->server->vals->cap_large_files)) {
4061                                current_read_size = min_t(uint,
4062                                        current_read_size, CIFSMaxBufSize);
4063                        }
4064                        if (open_file->invalidHandle) {
4065                                rc = cifs_reopen_file(open_file, true);
4066                                if (rc != 0)
4067                                        break;
4068                        }
4069                        io_parms.pid = pid;
4070                        io_parms.tcon = tcon;
4071                        io_parms.offset = *offset;
4072                        io_parms.length = current_read_size;
4073                        io_parms.server = server;
4074                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4075                                                    &bytes_read, &cur_offset,
4076                                                    &buf_type);
4077                } while (rc == -EAGAIN);
4078
4079                if (rc || (bytes_read == 0)) {
4080                        if (total_read) {
4081                                break;
4082                        } else {
4083                                free_xid(xid);
4084                                return rc;
4085                        }
4086                } else {
4087                        cifs_stats_bytes_read(tcon, total_read);
4088                        *offset += bytes_read;
4089                }
4090        }
4091        free_xid(xid);
4092        return total_read;
4093}
4094
4095/*
4096 * If the page is mmap'ed into a process' page tables, then we need to make
4097 * sure that it doesn't change while being written back.
4098 */
4099static vm_fault_t
4100cifs_page_mkwrite(struct vm_fault *vmf)
4101{
4102        struct page *page = vmf->page;
4103
4104        lock_page(page);
4105        return VM_FAULT_LOCKED;
4106}
4107
4108static const struct vm_operations_struct cifs_file_vm_ops = {
4109        .fault = filemap_fault,
4110        .map_pages = filemap_map_pages,
4111        .page_mkwrite = cifs_page_mkwrite,
4112};
4113
4114int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4115{
4116        int xid, rc = 0;
4117        struct inode *inode = file_inode(file);
4118
4119        xid = get_xid();
4120
4121        if (!CIFS_CACHE_READ(CIFS_I(inode)))
4122                rc = cifs_zap_mapping(inode);
4123        if (!rc)
4124                rc = generic_file_mmap(file, vma);
4125        if (!rc)
4126                vma->vm_ops = &cifs_file_vm_ops;
4127
4128        free_xid(xid);
4129        return rc;
4130}
4131
4132int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4133{
4134        int rc, xid;
4135
4136        xid = get_xid();
4137
4138        rc = cifs_revalidate_file(file);
4139        if (rc)
4140                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4141                         rc);
4142        if (!rc)
4143                rc = generic_file_mmap(file, vma);
4144        if (!rc)
4145                vma->vm_ops = &cifs_file_vm_ops;
4146
4147        free_xid(xid);
4148        return rc;
4149}
4150
4151static void
4152cifs_readv_complete(struct work_struct *work)
4153{
4154        unsigned int i, got_bytes;
4155        struct cifs_readdata *rdata = container_of(work,
4156                                                struct cifs_readdata, work);
4157
4158        got_bytes = rdata->got_bytes;
4159        for (i = 0; i < rdata->nr_pages; i++) {
4160                struct page *page = rdata->pages[i];
4161
4162                lru_cache_add(page);
4163
4164                if (rdata->result == 0 ||
4165                    (rdata->result == -EAGAIN && got_bytes)) {
4166                        flush_dcache_page(page);
4167                        SetPageUptodate(page);
4168                }
4169
4170                unlock_page(page);
4171
4172                if (rdata->result == 0 ||
4173                    (rdata->result == -EAGAIN && got_bytes))
4174                        cifs_readpage_to_fscache(rdata->mapping->host, page);
4175
4176                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4177
4178                put_page(page);
4179                rdata->pages[i] = NULL;
4180        }
4181        kref_put(&rdata->refcount, cifs_readdata_release);
4182}
4183
4184static int
4185readpages_fill_pages(struct TCP_Server_Info *server,
4186                     struct cifs_readdata *rdata, struct iov_iter *iter,
4187                     unsigned int len)
4188{
4189        int result = 0;
4190        unsigned int i;
4191        u64 eof;
4192        pgoff_t eof_index;
4193        unsigned int nr_pages = rdata->nr_pages;
4194        unsigned int page_offset = rdata->page_offset;
4195
4196        /* determine the eof that the server (probably) has */
4197        eof = CIFS_I(rdata->mapping->host)->server_eof;
4198        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4199        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4200
4201        rdata->got_bytes = 0;
4202        rdata->tailsz = PAGE_SIZE;
4203        for (i = 0; i < nr_pages; i++) {
4204                struct page *page = rdata->pages[i];
4205                unsigned int to_read = rdata->pagesz;
4206                size_t n;
4207
4208                if (i == 0)
4209                        to_read -= page_offset;
4210                else
4211                        page_offset = 0;
4212
4213                n = to_read;
4214
4215                if (len >= to_read) {
4216                        len -= to_read;
4217                } else if (len > 0) {
4218                        /* enough for partial page, fill and zero the rest */
4219                        zero_user(page, len + page_offset, to_read - len);
4220                        n = rdata->tailsz = len;
4221                        len = 0;
4222                } else if (page->index > eof_index) {
4223                        /*
4224                         * The VFS will not try to do readahead past the
4225                         * i_size, but it's possible that we have outstanding
4226                         * writes with gaps in the middle and the i_size hasn't
4227                         * caught up yet. Populate those with zeroed out pages
4228                         * to prevent the VFS from repeatedly attempting to
4229                         * fill them until the writes are flushed.
4230                         */
4231                        zero_user(page, 0, PAGE_SIZE);
4232                        lru_cache_add(page);
4233                        flush_dcache_page(page);
4234                        SetPageUptodate(page);
4235                        unlock_page(page);
4236                        put_page(page);
4237                        rdata->pages[i] = NULL;
4238                        rdata->nr_pages--;
4239                        continue;
4240                } else {
4241                        /* no need to hold page hostage */
4242                        lru_cache_add(page);
4243                        unlock_page(page);
4244                        put_page(page);
4245                        rdata->pages[i] = NULL;
4246                        rdata->nr_pages--;
4247                        continue;
4248                }
4249
4250                if (iter)
4251                        result = copy_page_from_iter(
4252                                        page, page_offset, n, iter);
4253#ifdef CONFIG_CIFS_SMB_DIRECT
4254                else if (rdata->mr)
4255                        result = n;
4256#endif
4257                else
4258                        result = cifs_read_page_from_socket(
4259                                        server, page, page_offset, n);
4260                if (result < 0)
4261                        break;
4262
4263                rdata->got_bytes += result;
4264        }
4265
4266        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4267                                                rdata->got_bytes : result;
4268}
4269
4270static int
4271cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4272                               struct cifs_readdata *rdata, unsigned int len)
4273{
4274        return readpages_fill_pages(server, rdata, NULL, len);
4275}
4276
4277static int
4278cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4279                               struct cifs_readdata *rdata,
4280                               struct iov_iter *iter)
4281{
4282        return readpages_fill_pages(server, rdata, iter, iter->count);
4283}
4284
4285static int
4286readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4287                    unsigned int rsize, struct list_head *tmplist,
4288                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4289{
4290        struct page *page, *tpage;
4291        unsigned int expected_index;
4292        int rc;
4293        gfp_t gfp = readahead_gfp_mask(mapping);
4294
4295        INIT_LIST_HEAD(tmplist);
4296
4297        page = lru_to_page(page_list);
4298
4299        /*
4300         * Lock the page and put it in the cache. Since no one else
4301         * should have access to this page, we're safe to simply set
4302         * PG_locked without checking it first.
4303         */
4304        __SetPageLocked(page);
4305        rc = add_to_page_cache_locked(page, mapping,
4306                                      page->index, gfp);
4307
4308        /* give up if we can't stick it in the cache */
4309        if (rc) {
4310                __ClearPageLocked(page);
4311                return rc;
4312        }
4313
4314        /* move first page to the tmplist */
4315        *offset = (loff_t)page->index << PAGE_SHIFT;
4316        *bytes = PAGE_SIZE;
4317        *nr_pages = 1;
4318        list_move_tail(&page->lru, tmplist);
4319
4320        /* now try and add more pages onto the request */
4321        expected_index = page->index + 1;
4322        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4323                /* discontinuity ? */
4324                if (page->index != expected_index)
4325                        break;
4326
4327                /* would this page push the read over the rsize? */
4328                if (*bytes + PAGE_SIZE > rsize)
4329                        break;
4330
4331                __SetPageLocked(page);
4332                rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
4333                if (rc) {
4334                        __ClearPageLocked(page);
4335                        break;
4336                }
4337                list_move_tail(&page->lru, tmplist);
4338                (*bytes) += PAGE_SIZE;
4339                expected_index++;
4340                (*nr_pages)++;
4341        }
4342        return rc;
4343}
4344
4345static int cifs_readpages(struct file *file, struct address_space *mapping,
4346        struct list_head *page_list, unsigned num_pages)
4347{
4348        int rc;
4349        int err = 0;
4350        struct list_head tmplist;
4351        struct cifsFileInfo *open_file = file->private_data;
4352        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4353        struct TCP_Server_Info *server;
4354        pid_t pid;
4355        unsigned int xid;
4356
4357        xid = get_xid();
4358        /*
4359         * Reads as many pages as possible from fscache. Returns -ENOBUFS
4360         * immediately if the cookie is negative
4361         *
4362         * After this point, every page in the list might have PG_fscache set,
4363         * so we will need to clean that up off of every page we don't use.
4364         */
4365        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4366                                         &num_pages);
4367        if (rc == 0) {
4368                free_xid(xid);
4369                return rc;
4370        }
4371
4372        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4373                pid = open_file->pid;
4374        else
4375                pid = current->tgid;
4376
4377        rc = 0;
4378        server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4379
4380        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4381                 __func__, file, mapping, num_pages);
4382
4383        /*
4384         * Start with the page at end of list and move it to private
4385         * list. Do the same with any following pages until we hit
4386         * the rsize limit, hit an index discontinuity, or run out of
4387         * pages. Issue the async read and then start the loop again
4388         * until the list is empty.
4389         *
4390         * Note that list order is important. The page_list is in
4391         * the order of declining indexes. When we put the pages in
4392         * the rdata->pages, then we want them in increasing order.
4393         */
4394        while (!list_empty(page_list) && !err) {
4395                unsigned int i, nr_pages, bytes, rsize;
4396                loff_t offset;
4397                struct page *page, *tpage;
4398                struct cifs_readdata *rdata;
4399                struct cifs_credits credits_on_stack;
4400                struct cifs_credits *credits = &credits_on_stack;
4401
4402                if (open_file->invalidHandle) {
4403                        rc = cifs_reopen_file(open_file, true);
4404                        if (rc == -EAGAIN)
4405                                continue;
4406                        else if (rc)
4407                                break;
4408                }
4409
4410                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4411                                                   &rsize, credits);
4412                if (rc)
4413                        break;
4414
4415                /*
4416                 * Give up immediately if rsize is too small to read an entire
4417                 * page. The VFS will fall back to readpage. We should never
4418                 * reach this point however since we set ra_pages to 0 when the
4419                 * rsize is smaller than a cache page.
4420                 */
4421                if (unlikely(rsize < PAGE_SIZE)) {
4422                        add_credits_and_wake_if(server, credits, 0);
4423                        free_xid(xid);
4424                        return 0;
4425                }
4426
4427                nr_pages = 0;
4428                err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4429                                         &nr_pages, &offset, &bytes);
4430                if (!nr_pages) {
4431                        add_credits_and_wake_if(server, credits, 0);
4432                        break;
4433                }
4434
4435                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4436                if (!rdata) {
4437                        /* best to give up if we're out of mem */
4438                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4439                                list_del(&page->lru);
4440                                lru_cache_add(page);
4441                                unlock_page(page);
4442                                put_page(page);
4443                        }
4444                        rc = -ENOMEM;
4445                        add_credits_and_wake_if(server, credits, 0);
4446                        break;
4447                }
4448
4449                rdata->cfile = cifsFileInfo_get(open_file);
4450                rdata->server = server;
4451                rdata->mapping = mapping;
4452                rdata->offset = offset;
4453                rdata->bytes = bytes;
4454                rdata->pid = pid;
4455                rdata->pagesz = PAGE_SIZE;
4456                rdata->tailsz = PAGE_SIZE;
4457                rdata->read_into_pages = cifs_readpages_read_into_pages;
4458                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4459                rdata->credits = credits_on_stack;
4460
4461                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4462                        list_del(&page->lru);
4463                        rdata->pages[rdata->nr_pages++] = page;
4464                }
4465
4466                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4467
4468                if (!rc) {
4469                        if (rdata->cfile->invalidHandle)
4470                                rc = -EAGAIN;
4471                        else
4472                                rc = server->ops->async_readv(rdata);
4473                }
4474
4475                if (rc) {
4476                        add_credits_and_wake_if(server, &rdata->credits, 0);
4477                        for (i = 0; i < rdata->nr_pages; i++) {
4478                                page = rdata->pages[i];
4479                                lru_cache_add(page);
4480                                unlock_page(page);
4481                                put_page(page);
4482                        }
4483                        /* Fallback to the readpage in error/reconnect cases */
4484                        kref_put(&rdata->refcount, cifs_readdata_release);
4485                        break;
4486                }
4487
4488                kref_put(&rdata->refcount, cifs_readdata_release);
4489        }
4490
4491        /* Any pages that have been shown to fscache but didn't get added to
4492         * the pagecache must be uncached before they get returned to the
4493         * allocator.
4494         */
4495        cifs_fscache_readpages_cancel(mapping->host, page_list);
4496        free_xid(xid);
4497        return rc;
4498}
4499
4500/*
4501 * cifs_readpage_worker must be called with the page pinned
4502 */
4503static int cifs_readpage_worker(struct file *file, struct page *page,
4504        loff_t *poffset)
4505{
4506        char *read_data;
4507        int rc;
4508
4509        /* Is the page cached? */
4510        rc = cifs_readpage_from_fscache(file_inode(file), page);
4511        if (rc == 0)
4512                goto read_complete;
4513
4514        read_data = kmap(page);
4515        /* for reads over a certain size could initiate async read ahead */
4516
4517        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4518
4519        if (rc < 0)
4520                goto io_error;
4521        else
4522                cifs_dbg(FYI, "Bytes read %d\n", rc);
4523
4524        /* we do not want atime to be less than mtime, it broke some apps */
4525        file_inode(file)->i_atime = current_time(file_inode(file));
4526        if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4527                file_inode(file)->i_atime = file_inode(file)->i_mtime;
4528        else
4529                file_inode(file)->i_atime = current_time(file_inode(file));
4530
4531        if (PAGE_SIZE > rc)
4532                memset(read_data + rc, 0, PAGE_SIZE - rc);
4533
4534        flush_dcache_page(page);
4535        SetPageUptodate(page);
4536
4537        /* send this page to the cache */
4538        cifs_readpage_to_fscache(file_inode(file), page);
4539
4540        rc = 0;
4541
4542io_error:
4543        kunmap(page);
4544        unlock_page(page);
4545
4546read_complete:
4547        return rc;
4548}
4549
4550static int cifs_readpage(struct file *file, struct page *page)
4551{
4552        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4553        int rc = -EACCES;
4554        unsigned int xid;
4555
4556        xid = get_xid();
4557
4558        if (file->private_data == NULL) {
4559                rc = -EBADF;
4560                free_xid(xid);
4561                return rc;
4562        }
4563
4564        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4565                 page, (int)offset, (int)offset);
4566
4567        rc = cifs_readpage_worker(file, page, &offset);
4568
4569        free_xid(xid);
4570        return rc;
4571}
4572
4573static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4574{
4575        struct cifsFileInfo *open_file;
4576
4577        spin_lock(&cifs_inode->open_file_lock);
4578        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4579                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4580                        spin_unlock(&cifs_inode->open_file_lock);
4581                        return 1;
4582                }
4583        }
4584        spin_unlock(&cifs_inode->open_file_lock);
4585        return 0;
4586}
4587
4588/* We do not want to update the file size from server for inodes
4589   open for write - to avoid races with writepage extending
4590   the file - in the future we could consider allowing
4591   refreshing the inode only on increases in the file size
4592   but this is tricky to do without racing with writebehind
4593   page caching in the current Linux kernel design */
4594bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4595{
4596        if (!cifsInode)
4597                return true;
4598
4599        if (is_inode_writable(cifsInode)) {
4600                /* This inode is open for write at least once */
4601                struct cifs_sb_info *cifs_sb;
4602
4603                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4604                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4605                        /* since no page cache to corrupt on directio
4606                        we can change size safely */
4607                        return true;
4608                }
4609
4610                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4611                        return true;
4612
4613                return false;
4614        } else
4615                return true;
4616}
4617
4618static int cifs_write_begin(struct file *file, struct address_space *mapping,
4619                        loff_t pos, unsigned len, unsigned flags,
4620                        struct page **pagep, void **fsdata)
4621{
4622        int oncethru = 0;
4623        pgoff_t index = pos >> PAGE_SHIFT;
4624        loff_t offset = pos & (PAGE_SIZE - 1);
4625        loff_t page_start = pos & PAGE_MASK;
4626        loff_t i_size;
4627        struct page *page;
4628        int rc = 0;
4629
4630        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4631
4632start:
4633        page = grab_cache_page_write_begin(mapping, index, flags);
4634        if (!page) {
4635                rc = -ENOMEM;
4636                goto out;
4637        }
4638
4639        if (PageUptodate(page))
4640                goto out;
4641
4642        /*
4643         * If we write a full page it will be up to date, no need to read from
4644         * the server. If the write is short, we'll end up doing a sync write
4645         * instead.
4646         */
4647        if (len == PAGE_SIZE)
4648                goto out;
4649
4650        /*
4651         * optimize away the read when we have an oplock, and we're not
4652         * expecting to use any of the data we'd be reading in. That
4653         * is, when the page lies beyond the EOF, or straddles the EOF
4654         * and the write will cover all of the existing data.
4655         */
4656        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4657                i_size = i_size_read(mapping->host);
4658                if (page_start >= i_size ||
4659                    (offset == 0 && (pos + len) >= i_size)) {
4660                        zero_user_segments(page, 0, offset,
4661                                           offset + len,
4662                                           PAGE_SIZE);
4663                        /*
4664                         * PageChecked means that the parts of the page
4665                         * to which we're not writing are considered up
4666                         * to date. Once the data is copied to the
4667                         * page, it can be set uptodate.
4668                         */
4669                        SetPageChecked(page);
4670                        goto out;
4671                }
4672        }
4673
4674        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4675                /*
4676                 * might as well read a page, it is fast enough. If we get
4677                 * an error, we don't need to return it. cifs_write_end will
4678                 * do a sync write instead since PG_uptodate isn't set.
4679                 */
4680                cifs_readpage_worker(file, page, &page_start);
4681                put_page(page);
4682                oncethru = 1;
4683                goto start;
4684        } else {
4685                /* we could try using another file handle if there is one -
4686                   but how would we lock it to prevent close of that handle
4687                   racing with this read? In any case
4688                   this will be written out by write_end so is fine */
4689        }
4690out:
4691        *pagep = page;
4692        return rc;
4693}
4694
4695static int cifs_release_page(struct page *page, gfp_t gfp)
4696{
4697        if (PagePrivate(page))
4698                return 0;
4699
4700        return cifs_fscache_release_page(page, gfp);
4701}
4702
4703static void cifs_invalidate_page(struct page *page, unsigned int offset,
4704                                 unsigned int length)
4705{
4706        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4707
4708        if (offset == 0 && length == PAGE_SIZE)
4709                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4710}
4711
4712static int cifs_launder_page(struct page *page)
4713{
4714        int rc = 0;
4715        loff_t range_start = page_offset(page);
4716        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4717        struct writeback_control wbc = {
4718                .sync_mode = WB_SYNC_ALL,
4719                .nr_to_write = 0,
4720                .range_start = range_start,
4721                .range_end = range_end,
4722        };
4723
4724        cifs_dbg(FYI, "Launder page: %p\n", page);
4725
4726        if (clear_page_dirty_for_io(page))
4727                rc = cifs_writepage_locked(page, &wbc);
4728
4729        cifs_fscache_invalidate_page(page, page->mapping->host);
4730        return rc;
4731}
4732
4733void cifs_oplock_break(struct work_struct *work)
4734{
4735        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4736                                                  oplock_break);
4737        struct inode *inode = d_inode(cfile->dentry);
4738        struct cifsInodeInfo *cinode = CIFS_I(inode);
4739        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4740        struct TCP_Server_Info *server = tcon->ses->server;
4741        int rc = 0;
4742        bool purge_cache = false;
4743
4744        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4745                        TASK_UNINTERRUPTIBLE);
4746
4747        server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4748                                      cfile->oplock_epoch, &purge_cache);
4749
4750        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4751                                                cifs_has_mand_locks(cinode)) {
4752                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4753                         inode);
4754                cinode->oplock = 0;
4755        }
4756
4757        if (inode && S_ISREG(inode->i_mode)) {
4758                if (CIFS_CACHE_READ(cinode))
4759                        break_lease(inode, O_RDONLY);
4760                else
4761                        break_lease(inode, O_WRONLY);
4762                rc = filemap_fdatawrite(inode->i_mapping);
4763                if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4764                        rc = filemap_fdatawait(inode->i_mapping);
4765                        mapping_set_error(inode->i_mapping, rc);
4766                        cifs_zap_mapping(inode);
4767                }
4768                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4769                if (CIFS_CACHE_WRITE(cinode))
4770                        goto oplock_break_ack;
4771        }
4772
4773        rc = cifs_push_locks(cfile);
4774        if (rc)
4775                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4776
4777oplock_break_ack:
4778        /*
4779         * releasing stale oplock after recent reconnect of smb session using
4780         * a now incorrect file handle is not a data integrity issue but do
4781         * not bother sending an oplock release if session to server still is
4782         * disconnected since oplock already released by the server
4783         */
4784        if (!cfile->oplock_break_cancelled) {
4785                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4786                                                             cinode);
4787                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4788        }
4789        _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4790        cifs_done_oplock_break(cinode);
4791}
4792
4793/*
4794 * The presence of cifs_direct_io() in the address space ops vector
4795 * allowes open() O_DIRECT flags which would have failed otherwise.
4796 *
4797 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4798 * so this method should never be called.
4799 *
4800 * Direct IO is not yet supported in the cached mode. 
4801 */
4802static ssize_t
4803cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4804{
4805        /*
4806         * FIXME
4807         * Eventually need to support direct IO for non forcedirectio mounts
4808         */
4809        return -EINVAL;
4810}
4811
4812static int cifs_swap_activate(struct swap_info_struct *sis,
4813                              struct file *swap_file, sector_t *span)
4814{
4815        struct cifsFileInfo *cfile = swap_file->private_data;
4816        struct inode *inode = swap_file->f_mapping->host;
4817        unsigned long blocks;
4818        long long isize;
4819
4820        cifs_dbg(FYI, "swap activate\n");
4821
4822        spin_lock(&inode->i_lock);
4823        blocks = inode->i_blocks;
4824        isize = inode->i_size;
4825        spin_unlock(&inode->i_lock);
4826        if (blocks*512 < isize) {
4827                pr_warn("swap activate: swapfile has holes\n");
4828                return -EINVAL;
4829        }
4830        *span = sis->pages;
4831
4832        pr_warn_once("Swap support over SMB3 is experimental\n");
4833
4834        /*
4835         * TODO: consider adding ACL (or documenting how) to prevent other
4836         * users (on this or other systems) from reading it
4837         */
4838
4839
4840        /* TODO: add sk_set_memalloc(inet) or similar */
4841
4842        if (cfile)
4843                cfile->swapfile = true;
4844        /*
4845         * TODO: Since file already open, we can't open with DENY_ALL here
4846         * but we could add call to grab a byte range lock to prevent others
4847         * from reading or writing the file
4848         */
4849
4850        return 0;
4851}
4852
4853static void cifs_swap_deactivate(struct file *file)
4854{
4855        struct cifsFileInfo *cfile = file->private_data;
4856
4857        cifs_dbg(FYI, "swap deactivate\n");
4858
4859        /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
4860
4861        if (cfile)
4862                cfile->swapfile = false;
4863
4864        /* do we need to unpin (or unlock) the file */
4865}
4866
4867const struct address_space_operations cifs_addr_ops = {
4868        .readpage = cifs_readpage,
4869        .readpages = cifs_readpages,
4870        .writepage = cifs_writepage,
4871        .writepages = cifs_writepages,
4872        .write_begin = cifs_write_begin,
4873        .write_end = cifs_write_end,
4874        .set_page_dirty = __set_page_dirty_nobuffers,
4875        .releasepage = cifs_release_page,
4876        .direct_IO = cifs_direct_io,
4877        .invalidatepage = cifs_invalidate_page,
4878        .launder_page = cifs_launder_page,
4879        /*
4880         * TODO: investigate and if useful we could add an cifs_migratePage
4881         * helper (under an CONFIG_MIGRATION) in the future, and also
4882         * investigate and add an is_dirty_writeback helper if needed
4883         */
4884        .swap_activate = cifs_swap_activate,
4885        .swap_deactivate = cifs_swap_deactivate,
4886};
4887
4888/*
4889 * cifs_readpages requires the server to support a buffer large enough to
4890 * contain the header plus one complete page of data.  Otherwise, we need
4891 * to leave cifs_readpages out of the address space operations.
4892 */
4893const struct address_space_operations cifs_addr_ops_smallbuf = {
4894        .readpage = cifs_readpage,
4895        .writepage = cifs_writepage,
4896        .writepages = cifs_writepages,
4897        .write_begin = cifs_write_begin,
4898        .write_end = cifs_write_end,
4899        .set_page_dirty = __set_page_dirty_nobuffers,
4900        .releasepage = cifs_release_page,
4901        .invalidatepage = cifs_invalidate_page,
4902        .launder_page = cifs_launder_page,
4903};
4904