linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_remap(cifs_sb));
 144        cifs_put_tlink(tlink);
 145
 146        if (rc)
 147                goto posix_open_ret;
 148
 149        if (presp_data->Type == cpu_to_le32(-1))
 150                goto posix_open_ret; /* open ok, caller does qpathinfo */
 151
 152        if (!pinode)
 153                goto posix_open_ret; /* caller does not need info */
 154
 155        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 156
 157        /* get new inode and set it up */
 158        if (*pinode == NULL) {
 159                cifs_fill_uniqueid(sb, &fattr);
 160                *pinode = cifs_iget(sb, &fattr);
 161                if (!*pinode) {
 162                        rc = -ENOMEM;
 163                        goto posix_open_ret;
 164                }
 165        } else {
 166                cifs_fattr_to_inode(*pinode, &fattr);
 167        }
 168
 169posix_open_ret:
 170        kfree(presp_data);
 171        return rc;
 172}
 173
 174static int
 175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 176             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 177             struct cifs_fid *fid, unsigned int xid)
 178{
 179        int rc;
 180        int desired_access;
 181        int disposition;
 182        int create_options = CREATE_NOT_DIR;
 183        FILE_ALL_INFO *buf;
 184        struct TCP_Server_Info *server = tcon->ses->server;
 185        struct cifs_open_parms oparms;
 186
 187        if (!server->ops->open)
 188                return -ENOSYS;
 189
 190        desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *      POSIX Flag            CIFS Disposition
 196 *      ----------            ----------------
 197 *      O_CREAT               FILE_OPEN_IF
 198 *      O_CREAT | O_EXCL      FILE_CREATE
 199 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *      O_TRUNC               FILE_OVERWRITE
 201 *      none of the above     FILE_OPEN
 202 *
 203 *      Note that there is not a direct match between disposition
 204 *      FILE_SUPERSEDE (ie create whether or not file exists although
 205 *      O_CREAT | O_TRUNC is similar but truncates the existing
 206 *      file rather than creating a new file as FILE_SUPERSEDE does
 207 *      (which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216        disposition = cifs_get_disposition(f_flags);
 217
 218        /* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221        if (!buf)
 222                return -ENOMEM;
 223
 224        if (backup_cred(cifs_sb))
 225                create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
 228        if (f_flags & O_SYNC)
 229                create_options |= CREATE_WRITE_THROUGH;
 230
 231        if (f_flags & O_DIRECT)
 232                create_options |= CREATE_NO_BUFFER;
 233
 234        oparms.tcon = tcon;
 235        oparms.cifs_sb = cifs_sb;
 236        oparms.desired_access = desired_access;
 237        oparms.create_options = create_options;
 238        oparms.disposition = disposition;
 239        oparms.path = full_path;
 240        oparms.fid = fid;
 241        oparms.reconnect = false;
 242
 243        rc = server->ops->open(xid, &oparms, oplock, buf);
 244
 245        if (rc)
 246                goto out;
 247
 248        if (tcon->unix_ext)
 249                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 250                                              xid);
 251        else
 252                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 253                                         xid, fid);
 254
 255out:
 256        kfree(buf);
 257        return rc;
 258}
 259
 260static bool
 261cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 262{
 263        struct cifs_fid_locks *cur;
 264        bool has_locks = false;
 265
 266        down_read(&cinode->lock_sem);
 267        list_for_each_entry(cur, &cinode->llist, llist) {
 268                if (!list_empty(&cur->locks)) {
 269                        has_locks = true;
 270                        break;
 271                }
 272        }
 273        up_read(&cinode->lock_sem);
 274        return has_locks;
 275}
 276
 277struct cifsFileInfo *
 278cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 279                  struct tcon_link *tlink, __u32 oplock)
 280{
 281        struct dentry *dentry = file_dentry(file);
 282        struct inode *inode = d_inode(dentry);
 283        struct cifsInodeInfo *cinode = CIFS_I(inode);
 284        struct cifsFileInfo *cfile;
 285        struct cifs_fid_locks *fdlocks;
 286        struct cifs_tcon *tcon = tlink_tcon(tlink);
 287        struct TCP_Server_Info *server = tcon->ses->server;
 288
 289        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 290        if (cfile == NULL)
 291                return cfile;
 292
 293        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 294        if (!fdlocks) {
 295                kfree(cfile);
 296                return NULL;
 297        }
 298
 299        INIT_LIST_HEAD(&fdlocks->locks);
 300        fdlocks->cfile = cfile;
 301        cfile->llist = fdlocks;
 302        down_write(&cinode->lock_sem);
 303        list_add(&fdlocks->llist, &cinode->llist);
 304        up_write(&cinode->lock_sem);
 305
 306        cfile->count = 1;
 307        cfile->pid = current->tgid;
 308        cfile->uid = current_fsuid();
 309        cfile->dentry = dget(dentry);
 310        cfile->f_flags = file->f_flags;
 311        cfile->invalidHandle = false;
 312        cfile->tlink = cifs_get_tlink(tlink);
 313        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 314        mutex_init(&cfile->fh_mutex);
 315        spin_lock_init(&cfile->file_info_lock);
 316
 317        cifs_sb_active(inode->i_sb);
 318
 319        /*
 320         * If the server returned a read oplock and we have mandatory brlocks,
 321         * set oplock level to None.
 322         */
 323        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 324                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 325                oplock = 0;
 326        }
 327
 328        spin_lock(&tcon->open_file_lock);
 329        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 330                oplock = fid->pending_open->oplock;
 331        list_del(&fid->pending_open->olist);
 332
 333        fid->purge_cache = false;
 334        server->ops->set_fid(cfile, fid, oplock);
 335
 336        list_add(&cfile->tlist, &tcon->openFileList);
 337
 338        /* if readable file instance put first in list*/
 339        if (file->f_mode & FMODE_READ)
 340                list_add(&cfile->flist, &cinode->openFileList);
 341        else
 342                list_add_tail(&cfile->flist, &cinode->openFileList);
 343        spin_unlock(&tcon->open_file_lock);
 344
 345        if (fid->purge_cache)
 346                cifs_zap_mapping(inode);
 347
 348        file->private_data = cfile;
 349        return cfile;
 350}
 351
 352struct cifsFileInfo *
 353cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 354{
 355        spin_lock(&cifs_file->file_info_lock);
 356        cifsFileInfo_get_locked(cifs_file);
 357        spin_unlock(&cifs_file->file_info_lock);
 358        return cifs_file;
 359}
 360
 361/*
 362 * Release a reference on the file private data. This may involve closing
 363 * the filehandle out on the server. Must be called without holding
 364 * tcon->open_file_lock and cifs_file->file_info_lock.
 365 */
 366void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 367{
 368        struct inode *inode = d_inode(cifs_file->dentry);
 369        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 370        struct TCP_Server_Info *server = tcon->ses->server;
 371        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 372        struct super_block *sb = inode->i_sb;
 373        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 374        struct cifsLockInfo *li, *tmp;
 375        struct cifs_fid fid;
 376        struct cifs_pending_open open;
 377        bool oplock_break_cancelled;
 378
 379        spin_lock(&tcon->open_file_lock);
 380
 381        spin_lock(&cifs_file->file_info_lock);
 382        if (--cifs_file->count > 0) {
 383                spin_unlock(&cifs_file->file_info_lock);
 384                spin_unlock(&tcon->open_file_lock);
 385                return;
 386        }
 387        spin_unlock(&cifs_file->file_info_lock);
 388
 389        if (server->ops->get_lease_key)
 390                server->ops->get_lease_key(inode, &fid);
 391
 392        /* store open in pending opens to make sure we don't miss lease break */
 393        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 394
 395        /* remove it from the lists */
 396        list_del(&cifs_file->flist);
 397        list_del(&cifs_file->tlist);
 398
 399        if (list_empty(&cifsi->openFileList)) {
 400                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 401                         d_inode(cifs_file->dentry));
 402                /*
 403                 * In strict cache mode we need invalidate mapping on the last
 404                 * close  because it may cause a error when we open this file
 405                 * again and get at least level II oplock.
 406                 */
 407                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 408                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 409                cifs_set_oplock_level(cifsi, 0);
 410        }
 411
 412        spin_unlock(&tcon->open_file_lock);
 413
 414        oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 415
 416        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 417                struct TCP_Server_Info *server = tcon->ses->server;
 418                unsigned int xid;
 419
 420                xid = get_xid();
 421                if (server->ops->close)
 422                        server->ops->close(xid, tcon, &cifs_file->fid);
 423                _free_xid(xid);
 424        }
 425
 426        if (oplock_break_cancelled)
 427                cifs_done_oplock_break(cifsi);
 428
 429        cifs_del_pending_open(&open);
 430
 431        /*
 432         * Delete any outstanding lock records. We'll lose them when the file
 433         * is closed anyway.
 434         */
 435        down_write(&cifsi->lock_sem);
 436        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 437                list_del(&li->llist);
 438                cifs_del_lock_waiters(li);
 439                kfree(li);
 440        }
 441        list_del(&cifs_file->llist->llist);
 442        kfree(cifs_file->llist);
 443        up_write(&cifsi->lock_sem);
 444
 445        cifs_put_tlink(cifs_file->tlink);
 446        dput(cifs_file->dentry);
 447        cifs_sb_deactive(sb);
 448        kfree(cifs_file);
 449}
 450
 451int cifs_open(struct inode *inode, struct file *file)
 452
 453{
 454        int rc = -EACCES;
 455        unsigned int xid;
 456        __u32 oplock;
 457        struct cifs_sb_info *cifs_sb;
 458        struct TCP_Server_Info *server;
 459        struct cifs_tcon *tcon;
 460        struct tcon_link *tlink;
 461        struct cifsFileInfo *cfile = NULL;
 462        char *full_path = NULL;
 463        bool posix_open_ok = false;
 464        struct cifs_fid fid;
 465        struct cifs_pending_open open;
 466
 467        xid = get_xid();
 468
 469        cifs_sb = CIFS_SB(inode->i_sb);
 470        tlink = cifs_sb_tlink(cifs_sb);
 471        if (IS_ERR(tlink)) {
 472                free_xid(xid);
 473                return PTR_ERR(tlink);
 474        }
 475        tcon = tlink_tcon(tlink);
 476        server = tcon->ses->server;
 477
 478        full_path = build_path_from_dentry(file_dentry(file));
 479        if (full_path == NULL) {
 480                rc = -ENOMEM;
 481                goto out;
 482        }
 483
 484        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 485                 inode, file->f_flags, full_path);
 486
 487        if (file->f_flags & O_DIRECT &&
 488            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 489                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 490                        file->f_op = &cifs_file_direct_nobrl_ops;
 491                else
 492                        file->f_op = &cifs_file_direct_ops;
 493        }
 494
 495        if (server->oplocks)
 496                oplock = REQ_OPLOCK;
 497        else
 498                oplock = 0;
 499
 500        if (!tcon->broken_posix_open && tcon->unix_ext &&
 501            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 502                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 503                /* can not refresh inode info since size could be stale */
 504                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 505                                cifs_sb->mnt_file_mode /* ignored */,
 506                                file->f_flags, &oplock, &fid.netfid, xid);
 507                if (rc == 0) {
 508                        cifs_dbg(FYI, "posix open succeeded\n");
 509                        posix_open_ok = true;
 510                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 511                        if (tcon->ses->serverNOS)
 512                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 513                                         tcon->ses->serverName,
 514                                         tcon->ses->serverNOS);
 515                        tcon->broken_posix_open = true;
 516                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 517                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 518                        goto out;
 519                /*
 520                 * Else fallthrough to retry open the old way on network i/o
 521                 * or DFS errors.
 522                 */
 523        }
 524
 525        if (server->ops->get_lease_key)
 526                server->ops->get_lease_key(inode, &fid);
 527
 528        cifs_add_pending_open(&fid, tlink, &open);
 529
 530        if (!posix_open_ok) {
 531                if (server->ops->get_lease_key)
 532                        server->ops->get_lease_key(inode, &fid);
 533
 534                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 535                                  file->f_flags, &oplock, &fid, xid);
 536                if (rc) {
 537                        cifs_del_pending_open(&open);
 538                        goto out;
 539                }
 540        }
 541
 542        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 543        if (cfile == NULL) {
 544                if (server->ops->close)
 545                        server->ops->close(xid, tcon, &fid);
 546                cifs_del_pending_open(&open);
 547                rc = -ENOMEM;
 548                goto out;
 549        }
 550
 551        cifs_fscache_set_inode_cookie(inode, file);
 552
 553        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 554                /*
 555                 * Time to set mode which we can not set earlier due to
 556                 * problems creating new read-only files.
 557                 */
 558                struct cifs_unix_set_info_args args = {
 559                        .mode   = inode->i_mode,
 560                        .uid    = INVALID_UID, /* no change */
 561                        .gid    = INVALID_GID, /* no change */
 562                        .ctime  = NO_CHANGE_64,
 563                        .atime  = NO_CHANGE_64,
 564                        .mtime  = NO_CHANGE_64,
 565                        .device = 0,
 566                };
 567                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 568                                       cfile->pid);
 569        }
 570
 571out:
 572        kfree(full_path);
 573        free_xid(xid);
 574        cifs_put_tlink(tlink);
 575        return rc;
 576}
 577
 578static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 579
 580/*
 581 * Try to reacquire byte range locks that were released when session
 582 * to server was lost.
 583 */
 584static int
 585cifs_relock_file(struct cifsFileInfo *cfile)
 586{
 587        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 588        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 589        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 590        int rc = 0;
 591
 592        down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
 593        if (cinode->can_cache_brlcks) {
 594                /* can cache locks - no need to relock */
 595                up_read(&cinode->lock_sem);
 596                return rc;
 597        }
 598
 599        if (cap_unix(tcon->ses) &&
 600            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 601            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 602                rc = cifs_push_posix_locks(cfile);
 603        else
 604                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 605
 606        up_read(&cinode->lock_sem);
 607        return rc;
 608}
 609
 610static int
 611cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 612{
 613        int rc = -EACCES;
 614        unsigned int xid;
 615        __u32 oplock;
 616        struct cifs_sb_info *cifs_sb;
 617        struct cifs_tcon *tcon;
 618        struct TCP_Server_Info *server;
 619        struct cifsInodeInfo *cinode;
 620        struct inode *inode;
 621        char *full_path = NULL;
 622        int desired_access;
 623        int disposition = FILE_OPEN;
 624        int create_options = CREATE_NOT_DIR;
 625        struct cifs_open_parms oparms;
 626
 627        xid = get_xid();
 628        mutex_lock(&cfile->fh_mutex);
 629        if (!cfile->invalidHandle) {
 630                mutex_unlock(&cfile->fh_mutex);
 631                rc = 0;
 632                free_xid(xid);
 633                return rc;
 634        }
 635
 636        inode = d_inode(cfile->dentry);
 637        cifs_sb = CIFS_SB(inode->i_sb);
 638        tcon = tlink_tcon(cfile->tlink);
 639        server = tcon->ses->server;
 640
 641        /*
 642         * Can not grab rename sem here because various ops, including those
 643         * that already have the rename sem can end up causing writepage to get
 644         * called and if the server was down that means we end up here, and we
 645         * can never tell if the caller already has the rename_sem.
 646         */
 647        full_path = build_path_from_dentry(cfile->dentry);
 648        if (full_path == NULL) {
 649                rc = -ENOMEM;
 650                mutex_unlock(&cfile->fh_mutex);
 651                free_xid(xid);
 652                return rc;
 653        }
 654
 655        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 656                 inode, cfile->f_flags, full_path);
 657
 658        if (tcon->ses->server->oplocks)
 659                oplock = REQ_OPLOCK;
 660        else
 661                oplock = 0;
 662
 663        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 664            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 665                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 666                /*
 667                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 668                 * original open. Must mask them off for a reopen.
 669                 */
 670                unsigned int oflags = cfile->f_flags &
 671                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 672
 673                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 674                                     cifs_sb->mnt_file_mode /* ignored */,
 675                                     oflags, &oplock, &cfile->fid.netfid, xid);
 676                if (rc == 0) {
 677                        cifs_dbg(FYI, "posix reopen succeeded\n");
 678                        oparms.reconnect = true;
 679                        goto reopen_success;
 680                }
 681                /*
 682                 * fallthrough to retry open the old way on errors, especially
 683                 * in the reconnect path it is important to retry hard
 684                 */
 685        }
 686
 687        desired_access = cifs_convert_flags(cfile->f_flags);
 688
 689        if (backup_cred(cifs_sb))
 690                create_options |= CREATE_OPEN_BACKUP_INTENT;
 691
 692        if (server->ops->get_lease_key)
 693                server->ops->get_lease_key(inode, &cfile->fid);
 694
 695        oparms.tcon = tcon;
 696        oparms.cifs_sb = cifs_sb;
 697        oparms.desired_access = desired_access;
 698        oparms.create_options = create_options;
 699        oparms.disposition = disposition;
 700        oparms.path = full_path;
 701        oparms.fid = &cfile->fid;
 702        oparms.reconnect = true;
 703
 704        /*
 705         * Can not refresh inode by passing in file_info buf to be returned by
 706         * ops->open and then calling get_inode_info with returned buf since
 707         * file might have write behind data that needs to be flushed and server
 708         * version of file size can be stale. If we knew for sure that inode was
 709         * not dirty locally we could do this.
 710         */
 711        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 712        if (rc == -ENOENT && oparms.reconnect == false) {
 713                /* durable handle timeout is expired - open the file again */
 714                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 715                /* indicate that we need to relock the file */
 716                oparms.reconnect = true;
 717        }
 718
 719        if (rc) {
 720                mutex_unlock(&cfile->fh_mutex);
 721                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 722                cifs_dbg(FYI, "oplock: %d\n", oplock);
 723                goto reopen_error_exit;
 724        }
 725
 726reopen_success:
 727        cfile->invalidHandle = false;
 728        mutex_unlock(&cfile->fh_mutex);
 729        cinode = CIFS_I(inode);
 730
 731        if (can_flush) {
 732                rc = filemap_write_and_wait(inode->i_mapping);
 733                mapping_set_error(inode->i_mapping, rc);
 734
 735                if (tcon->unix_ext)
 736                        rc = cifs_get_inode_info_unix(&inode, full_path,
 737                                                      inode->i_sb, xid);
 738                else
 739                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 740                                                 inode->i_sb, xid, NULL);
 741        }
 742        /*
 743         * Else we are writing out data to server already and could deadlock if
 744         * we tried to flush data, and since we do not know if we have data that
 745         * would invalidate the current end of file on the server we can not go
 746         * to the server to get the new inode info.
 747         */
 748
 749        /*
 750         * If the server returned a read oplock and we have mandatory brlocks,
 751         * set oplock level to None.
 752         */
 753        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 754                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 755                oplock = 0;
 756        }
 757
 758        server->ops->set_fid(cfile, &cfile->fid, oplock);
 759        if (oparms.reconnect)
 760                cifs_relock_file(cfile);
 761
 762reopen_error_exit:
 763        kfree(full_path);
 764        free_xid(xid);
 765        return rc;
 766}
 767
 768int cifs_close(struct inode *inode, struct file *file)
 769{
 770        if (file->private_data != NULL) {
 771                cifsFileInfo_put(file->private_data);
 772                file->private_data = NULL;
 773        }
 774
 775        /* return code from the ->release op is always ignored */
 776        return 0;
 777}
 778
 779void
 780cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
 781{
 782        struct cifsFileInfo *open_file;
 783        struct list_head *tmp;
 784        struct list_head *tmp1;
 785        struct list_head tmp_list;
 786
 787        if (!tcon->use_persistent || !tcon->need_reopen_files)
 788                return;
 789
 790        tcon->need_reopen_files = false;
 791
 792        cifs_dbg(FYI, "Reopen persistent handles");
 793        INIT_LIST_HEAD(&tmp_list);
 794
 795        /* list all files open on tree connection, reopen resilient handles  */
 796        spin_lock(&tcon->open_file_lock);
 797        list_for_each(tmp, &tcon->openFileList) {
 798                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
 799                if (!open_file->invalidHandle)
 800                        continue;
 801                cifsFileInfo_get(open_file);
 802                list_add_tail(&open_file->rlist, &tmp_list);
 803        }
 804        spin_unlock(&tcon->open_file_lock);
 805
 806        list_for_each_safe(tmp, tmp1, &tmp_list) {
 807                open_file = list_entry(tmp, struct cifsFileInfo, rlist);
 808                if (cifs_reopen_file(open_file, false /* do not flush */))
 809                        tcon->need_reopen_files = true;
 810                list_del_init(&open_file->rlist);
 811                cifsFileInfo_put(open_file);
 812        }
 813}
 814
 815int cifs_closedir(struct inode *inode, struct file *file)
 816{
 817        int rc = 0;
 818        unsigned int xid;
 819        struct cifsFileInfo *cfile = file->private_data;
 820        struct cifs_tcon *tcon;
 821        struct TCP_Server_Info *server;
 822        char *buf;
 823
 824        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 825
 826        if (cfile == NULL)
 827                return rc;
 828
 829        xid = get_xid();
 830        tcon = tlink_tcon(cfile->tlink);
 831        server = tcon->ses->server;
 832
 833        cifs_dbg(FYI, "Freeing private data in close dir\n");
 834        spin_lock(&cfile->file_info_lock);
 835        if (server->ops->dir_needs_close(cfile)) {
 836                cfile->invalidHandle = true;
 837                spin_unlock(&cfile->file_info_lock);
 838                if (server->ops->close_dir)
 839                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 840                else
 841                        rc = -ENOSYS;
 842                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 843                /* not much we can do if it fails anyway, ignore rc */
 844                rc = 0;
 845        } else
 846                spin_unlock(&cfile->file_info_lock);
 847
 848        buf = cfile->srch_inf.ntwrk_buf_start;
 849        if (buf) {
 850                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 851                cfile->srch_inf.ntwrk_buf_start = NULL;
 852                if (cfile->srch_inf.smallBuf)
 853                        cifs_small_buf_release(buf);
 854                else
 855                        cifs_buf_release(buf);
 856        }
 857
 858        cifs_put_tlink(cfile->tlink);
 859        kfree(file->private_data);
 860        file->private_data = NULL;
 861        /* BB can we lock the filestruct while this is going on? */
 862        free_xid(xid);
 863        return rc;
 864}
 865
 866static struct cifsLockInfo *
 867cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 868{
 869        struct cifsLockInfo *lock =
 870                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 871        if (!lock)
 872                return lock;
 873        lock->offset = offset;
 874        lock->length = length;
 875        lock->type = type;
 876        lock->pid = current->tgid;
 877        INIT_LIST_HEAD(&lock->blist);
 878        init_waitqueue_head(&lock->block_q);
 879        return lock;
 880}
 881
 882void
 883cifs_del_lock_waiters(struct cifsLockInfo *lock)
 884{
 885        struct cifsLockInfo *li, *tmp;
 886        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 887                list_del_init(&li->blist);
 888                wake_up(&li->block_q);
 889        }
 890}
 891
 892#define CIFS_LOCK_OP    0
 893#define CIFS_READ_OP    1
 894#define CIFS_WRITE_OP   2
 895
 896/* @rw_check : 0 - no op, 1 - read, 2 - write */
 897static bool
 898cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 899                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 900                            struct cifsLockInfo **conf_lock, int rw_check)
 901{
 902        struct cifsLockInfo *li;
 903        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 904        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 905
 906        list_for_each_entry(li, &fdlocks->locks, llist) {
 907                if (offset + length <= li->offset ||
 908                    offset >= li->offset + li->length)
 909                        continue;
 910                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 911                    server->ops->compare_fids(cfile, cur_cfile)) {
 912                        /* shared lock prevents write op through the same fid */
 913                        if (!(li->type & server->vals->shared_lock_type) ||
 914                            rw_check != CIFS_WRITE_OP)
 915                                continue;
 916                }
 917                if ((type & server->vals->shared_lock_type) &&
 918                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 919                     current->tgid == li->pid) || type == li->type))
 920                        continue;
 921                if (conf_lock)
 922                        *conf_lock = li;
 923                return true;
 924        }
 925        return false;
 926}
 927
 928bool
 929cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 930                        __u8 type, struct cifsLockInfo **conf_lock,
 931                        int rw_check)
 932{
 933        bool rc = false;
 934        struct cifs_fid_locks *cur;
 935        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 936
 937        list_for_each_entry(cur, &cinode->llist, llist) {
 938                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 939                                                 cfile, conf_lock, rw_check);
 940                if (rc)
 941                        break;
 942        }
 943
 944        return rc;
 945}
 946
 947/*
 948 * Check if there is another lock that prevents us to set the lock (mandatory
 949 * style). If such a lock exists, update the flock structure with its
 950 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 951 * or leave it the same if we can't. Returns 0 if we don't need to request to
 952 * the server or 1 otherwise.
 953 */
 954static int
 955cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 956               __u8 type, struct file_lock *flock)
 957{
 958        int rc = 0;
 959        struct cifsLockInfo *conf_lock;
 960        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 961        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 962        bool exist;
 963
 964        down_read(&cinode->lock_sem);
 965
 966        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 967                                        &conf_lock, CIFS_LOCK_OP);
 968        if (exist) {
 969                flock->fl_start = conf_lock->offset;
 970                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 971                flock->fl_pid = conf_lock->pid;
 972                if (conf_lock->type & server->vals->shared_lock_type)
 973                        flock->fl_type = F_RDLCK;
 974                else
 975                        flock->fl_type = F_WRLCK;
 976        } else if (!cinode->can_cache_brlcks)
 977                rc = 1;
 978        else
 979                flock->fl_type = F_UNLCK;
 980
 981        up_read(&cinode->lock_sem);
 982        return rc;
 983}
 984
 985static void
 986cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 987{
 988        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 989        down_write(&cinode->lock_sem);
 990        list_add_tail(&lock->llist, &cfile->llist->locks);
 991        up_write(&cinode->lock_sem);
 992}
 993
 994/*
 995 * Set the byte-range lock (mandatory style). Returns:
 996 * 1) 0, if we set the lock and don't need to request to the server;
 997 * 2) 1, if no locks prevent us but we need to request to the server;
 998 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 999 */
1000static int
1001cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1002                 bool wait)
1003{
1004        struct cifsLockInfo *conf_lock;
1005        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1006        bool exist;
1007        int rc = 0;
1008
1009try_again:
1010        exist = false;
1011        down_write(&cinode->lock_sem);
1012
1013        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1014                                        lock->type, &conf_lock, CIFS_LOCK_OP);
1015        if (!exist && cinode->can_cache_brlcks) {
1016                list_add_tail(&lock->llist, &cfile->llist->locks);
1017                up_write(&cinode->lock_sem);
1018                return rc;
1019        }
1020
1021        if (!exist)
1022                rc = 1;
1023        else if (!wait)
1024                rc = -EACCES;
1025        else {
1026                list_add_tail(&lock->blist, &conf_lock->blist);
1027                up_write(&cinode->lock_sem);
1028                rc = wait_event_interruptible(lock->block_q,
1029                                        (lock->blist.prev == &lock->blist) &&
1030                                        (lock->blist.next == &lock->blist));
1031                if (!rc)
1032                        goto try_again;
1033                down_write(&cinode->lock_sem);
1034                list_del_init(&lock->blist);
1035        }
1036
1037        up_write(&cinode->lock_sem);
1038        return rc;
1039}
1040
1041/*
1042 * Check if there is another lock that prevents us to set the lock (posix
1043 * style). If such a lock exists, update the flock structure with its
1044 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1045 * or leave it the same if we can't. Returns 0 if we don't need to request to
1046 * the server or 1 otherwise.
1047 */
1048static int
1049cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1050{
1051        int rc = 0;
1052        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1053        unsigned char saved_type = flock->fl_type;
1054
1055        if ((flock->fl_flags & FL_POSIX) == 0)
1056                return 1;
1057
1058        down_read(&cinode->lock_sem);
1059        posix_test_lock(file, flock);
1060
1061        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1062                flock->fl_type = saved_type;
1063                rc = 1;
1064        }
1065
1066        up_read(&cinode->lock_sem);
1067        return rc;
1068}
1069
1070/*
1071 * Set the byte-range lock (posix style). Returns:
1072 * 1) 0, if we set the lock and don't need to request to the server;
1073 * 2) 1, if we need to request to the server;
1074 * 3) <0, if the error occurs while setting the lock.
1075 */
1076static int
1077cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1078{
1079        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1080        int rc = 1;
1081
1082        if ((flock->fl_flags & FL_POSIX) == 0)
1083                return rc;
1084
1085try_again:
1086        down_write(&cinode->lock_sem);
1087        if (!cinode->can_cache_brlcks) {
1088                up_write(&cinode->lock_sem);
1089                return rc;
1090        }
1091
1092        rc = posix_lock_file(file, flock, NULL);
1093        up_write(&cinode->lock_sem);
1094        if (rc == FILE_LOCK_DEFERRED) {
1095                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1096                if (!rc)
1097                        goto try_again;
1098                posix_unblock_lock(flock);
1099        }
1100        return rc;
1101}
1102
1103int
1104cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1105{
1106        unsigned int xid;
1107        int rc = 0, stored_rc;
1108        struct cifsLockInfo *li, *tmp;
1109        struct cifs_tcon *tcon;
1110        unsigned int num, max_num, max_buf;
1111        LOCKING_ANDX_RANGE *buf, *cur;
1112        static const int types[] = {
1113                LOCKING_ANDX_LARGE_FILES,
1114                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1115        };
1116        int i;
1117
1118        xid = get_xid();
1119        tcon = tlink_tcon(cfile->tlink);
1120
1121        /*
1122         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1123         * and check it for zero before using.
1124         */
1125        max_buf = tcon->ses->server->maxBuf;
1126        if (!max_buf) {
1127                free_xid(xid);
1128                return -EINVAL;
1129        }
1130
1131        max_num = (max_buf - sizeof(struct smb_hdr)) /
1132                                                sizeof(LOCKING_ANDX_RANGE);
1133        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1134        if (!buf) {
1135                free_xid(xid);
1136                return -ENOMEM;
1137        }
1138
1139        for (i = 0; i < 2; i++) {
1140                cur = buf;
1141                num = 0;
1142                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1143                        if (li->type != types[i])
1144                                continue;
1145                        cur->Pid = cpu_to_le16(li->pid);
1146                        cur->LengthLow = cpu_to_le32((u32)li->length);
1147                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1148                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1149                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1150                        if (++num == max_num) {
1151                                stored_rc = cifs_lockv(xid, tcon,
1152                                                       cfile->fid.netfid,
1153                                                       (__u8)li->type, 0, num,
1154                                                       buf);
1155                                if (stored_rc)
1156                                        rc = stored_rc;
1157                                cur = buf;
1158                                num = 0;
1159                        } else
1160                                cur++;
1161                }
1162
1163                if (num) {
1164                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1165                                               (__u8)types[i], 0, num, buf);
1166                        if (stored_rc)
1167                                rc = stored_rc;
1168                }
1169        }
1170
1171        kfree(buf);
1172        free_xid(xid);
1173        return rc;
1174}
1175
1176static __u32
1177hash_lockowner(fl_owner_t owner)
1178{
1179        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1180}
1181
1182struct lock_to_push {
1183        struct list_head llist;
1184        __u64 offset;
1185        __u64 length;
1186        __u32 pid;
1187        __u16 netfid;
1188        __u8 type;
1189};
1190
1191static int
1192cifs_push_posix_locks(struct cifsFileInfo *cfile)
1193{
1194        struct inode *inode = d_inode(cfile->dentry);
1195        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1196        struct file_lock *flock;
1197        struct file_lock_context *flctx = inode->i_flctx;
1198        unsigned int count = 0, i;
1199        int rc = 0, xid, type;
1200        struct list_head locks_to_send, *el;
1201        struct lock_to_push *lck, *tmp;
1202        __u64 length;
1203
1204        xid = get_xid();
1205
1206        if (!flctx)
1207                goto out;
1208
1209        spin_lock(&flctx->flc_lock);
1210        list_for_each(el, &flctx->flc_posix) {
1211                count++;
1212        }
1213        spin_unlock(&flctx->flc_lock);
1214
1215        INIT_LIST_HEAD(&locks_to_send);
1216
1217        /*
1218         * Allocating count locks is enough because no FL_POSIX locks can be
1219         * added to the list while we are holding cinode->lock_sem that
1220         * protects locking operations of this inode.
1221         */
1222        for (i = 0; i < count; i++) {
1223                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1224                if (!lck) {
1225                        rc = -ENOMEM;
1226                        goto err_out;
1227                }
1228                list_add_tail(&lck->llist, &locks_to_send);
1229        }
1230
1231        el = locks_to_send.next;
1232        spin_lock(&flctx->flc_lock);
1233        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1234                if (el == &locks_to_send) {
1235                        /*
1236                         * The list ended. We don't have enough allocated
1237                         * structures - something is really wrong.
1238                         */
1239                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1240                        break;
1241                }
1242                length = 1 + flock->fl_end - flock->fl_start;
1243                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1244                        type = CIFS_RDLCK;
1245                else
1246                        type = CIFS_WRLCK;
1247                lck = list_entry(el, struct lock_to_push, llist);
1248                lck->pid = hash_lockowner(flock->fl_owner);
1249                lck->netfid = cfile->fid.netfid;
1250                lck->length = length;
1251                lck->type = type;
1252                lck->offset = flock->fl_start;
1253        }
1254        spin_unlock(&flctx->flc_lock);
1255
1256        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1257                int stored_rc;
1258
1259                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1260                                             lck->offset, lck->length, NULL,
1261                                             lck->type, 0);
1262                if (stored_rc)
1263                        rc = stored_rc;
1264                list_del(&lck->llist);
1265                kfree(lck);
1266        }
1267
1268out:
1269        free_xid(xid);
1270        return rc;
1271err_out:
1272        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1273                list_del(&lck->llist);
1274                kfree(lck);
1275        }
1276        goto out;
1277}
1278
1279static int
1280cifs_push_locks(struct cifsFileInfo *cfile)
1281{
1282        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1283        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1284        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1285        int rc = 0;
1286
1287        /* we are going to update can_cache_brlcks here - need a write access */
1288        down_write(&cinode->lock_sem);
1289        if (!cinode->can_cache_brlcks) {
1290                up_write(&cinode->lock_sem);
1291                return rc;
1292        }
1293
1294        if (cap_unix(tcon->ses) &&
1295            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1296            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1297                rc = cifs_push_posix_locks(cfile);
1298        else
1299                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1300
1301        cinode->can_cache_brlcks = false;
1302        up_write(&cinode->lock_sem);
1303        return rc;
1304}
1305
1306static void
1307cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1308                bool *wait_flag, struct TCP_Server_Info *server)
1309{
1310        if (flock->fl_flags & FL_POSIX)
1311                cifs_dbg(FYI, "Posix\n");
1312        if (flock->fl_flags & FL_FLOCK)
1313                cifs_dbg(FYI, "Flock\n");
1314        if (flock->fl_flags & FL_SLEEP) {
1315                cifs_dbg(FYI, "Blocking lock\n");
1316                *wait_flag = true;
1317        }
1318        if (flock->fl_flags & FL_ACCESS)
1319                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1320        if (flock->fl_flags & FL_LEASE)
1321                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1322        if (flock->fl_flags &
1323            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1324               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1325                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1326
1327        *type = server->vals->large_lock_type;
1328        if (flock->fl_type == F_WRLCK) {
1329                cifs_dbg(FYI, "F_WRLCK\n");
1330                *type |= server->vals->exclusive_lock_type;
1331                *lock = 1;
1332        } else if (flock->fl_type == F_UNLCK) {
1333                cifs_dbg(FYI, "F_UNLCK\n");
1334                *type |= server->vals->unlock_lock_type;
1335                *unlock = 1;
1336                /* Check if unlock includes more than one lock range */
1337        } else if (flock->fl_type == F_RDLCK) {
1338                cifs_dbg(FYI, "F_RDLCK\n");
1339                *type |= server->vals->shared_lock_type;
1340                *lock = 1;
1341        } else if (flock->fl_type == F_EXLCK) {
1342                cifs_dbg(FYI, "F_EXLCK\n");
1343                *type |= server->vals->exclusive_lock_type;
1344                *lock = 1;
1345        } else if (flock->fl_type == F_SHLCK) {
1346                cifs_dbg(FYI, "F_SHLCK\n");
1347                *type |= server->vals->shared_lock_type;
1348                *lock = 1;
1349        } else
1350                cifs_dbg(FYI, "Unknown type of lock\n");
1351}
1352
1353static int
1354cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1355           bool wait_flag, bool posix_lck, unsigned int xid)
1356{
1357        int rc = 0;
1358        __u64 length = 1 + flock->fl_end - flock->fl_start;
1359        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361        struct TCP_Server_Info *server = tcon->ses->server;
1362        __u16 netfid = cfile->fid.netfid;
1363
1364        if (posix_lck) {
1365                int posix_lock_type;
1366
1367                rc = cifs_posix_lock_test(file, flock);
1368                if (!rc)
1369                        return rc;
1370
1371                if (type & server->vals->shared_lock_type)
1372                        posix_lock_type = CIFS_RDLCK;
1373                else
1374                        posix_lock_type = CIFS_WRLCK;
1375                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1376                                      hash_lockowner(flock->fl_owner),
1377                                      flock->fl_start, length, flock,
1378                                      posix_lock_type, wait_flag);
1379                return rc;
1380        }
1381
1382        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1383        if (!rc)
1384                return rc;
1385
1386        /* BB we could chain these into one lock request BB */
1387        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1388                                    1, 0, false);
1389        if (rc == 0) {
1390                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1391                                            type, 0, 1, false);
1392                flock->fl_type = F_UNLCK;
1393                if (rc != 0)
1394                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1395                                 rc);
1396                return 0;
1397        }
1398
1399        if (type & server->vals->shared_lock_type) {
1400                flock->fl_type = F_WRLCK;
1401                return 0;
1402        }
1403
1404        type &= ~server->vals->exclusive_lock_type;
1405
1406        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1407                                    type | server->vals->shared_lock_type,
1408                                    1, 0, false);
1409        if (rc == 0) {
1410                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1411                        type | server->vals->shared_lock_type, 0, 1, false);
1412                flock->fl_type = F_RDLCK;
1413                if (rc != 0)
1414                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1415                                 rc);
1416        } else
1417                flock->fl_type = F_WRLCK;
1418
1419        return 0;
1420}
1421
1422void
1423cifs_move_llist(struct list_head *source, struct list_head *dest)
1424{
1425        struct list_head *li, *tmp;
1426        list_for_each_safe(li, tmp, source)
1427                list_move(li, dest);
1428}
1429
1430void
1431cifs_free_llist(struct list_head *llist)
1432{
1433        struct cifsLockInfo *li, *tmp;
1434        list_for_each_entry_safe(li, tmp, llist, llist) {
1435                cifs_del_lock_waiters(li);
1436                list_del(&li->llist);
1437                kfree(li);
1438        }
1439}
1440
1441int
1442cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1443                  unsigned int xid)
1444{
1445        int rc = 0, stored_rc;
1446        static const int types[] = {
1447                LOCKING_ANDX_LARGE_FILES,
1448                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1449        };
1450        unsigned int i;
1451        unsigned int max_num, num, max_buf;
1452        LOCKING_ANDX_RANGE *buf, *cur;
1453        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1454        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1455        struct cifsLockInfo *li, *tmp;
1456        __u64 length = 1 + flock->fl_end - flock->fl_start;
1457        struct list_head tmp_llist;
1458
1459        INIT_LIST_HEAD(&tmp_llist);
1460
1461        /*
1462         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1463         * and check it for zero before using.
1464         */
1465        max_buf = tcon->ses->server->maxBuf;
1466        if (!max_buf)
1467                return -EINVAL;
1468
1469        max_num = (max_buf - sizeof(struct smb_hdr)) /
1470                                                sizeof(LOCKING_ANDX_RANGE);
1471        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1472        if (!buf)
1473                return -ENOMEM;
1474
1475        down_write(&cinode->lock_sem);
1476        for (i = 0; i < 2; i++) {
1477                cur = buf;
1478                num = 0;
1479                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1480                        if (flock->fl_start > li->offset ||
1481                            (flock->fl_start + length) <
1482                            (li->offset + li->length))
1483                                continue;
1484                        if (current->tgid != li->pid)
1485                                continue;
1486                        if (types[i] != li->type)
1487                                continue;
1488                        if (cinode->can_cache_brlcks) {
1489                                /*
1490                                 * We can cache brlock requests - simply remove
1491                                 * a lock from the file's list.
1492                                 */
1493                                list_del(&li->llist);
1494                                cifs_del_lock_waiters(li);
1495                                kfree(li);
1496                                continue;
1497                        }
1498                        cur->Pid = cpu_to_le16(li->pid);
1499                        cur->LengthLow = cpu_to_le32((u32)li->length);
1500                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1501                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1502                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1503                        /*
1504                         * We need to save a lock here to let us add it again to
1505                         * the file's list if the unlock range request fails on
1506                         * the server.
1507                         */
1508                        list_move(&li->llist, &tmp_llist);
1509                        if (++num == max_num) {
1510                                stored_rc = cifs_lockv(xid, tcon,
1511                                                       cfile->fid.netfid,
1512                                                       li->type, num, 0, buf);
1513                                if (stored_rc) {
1514                                        /*
1515                                         * We failed on the unlock range
1516                                         * request - add all locks from the tmp
1517                                         * list to the head of the file's list.
1518                                         */
1519                                        cifs_move_llist(&tmp_llist,
1520                                                        &cfile->llist->locks);
1521                                        rc = stored_rc;
1522                                } else
1523                                        /*
1524                                         * The unlock range request succeed -
1525                                         * free the tmp list.
1526                                         */
1527                                        cifs_free_llist(&tmp_llist);
1528                                cur = buf;
1529                                num = 0;
1530                        } else
1531                                cur++;
1532                }
1533                if (num) {
1534                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1535                                               types[i], num, 0, buf);
1536                        if (stored_rc) {
1537                                cifs_move_llist(&tmp_llist,
1538                                                &cfile->llist->locks);
1539                                rc = stored_rc;
1540                        } else
1541                                cifs_free_llist(&tmp_llist);
1542                }
1543        }
1544
1545        up_write(&cinode->lock_sem);
1546        kfree(buf);
1547        return rc;
1548}
1549
1550static int
1551cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1552           bool wait_flag, bool posix_lck, int lock, int unlock,
1553           unsigned int xid)
1554{
1555        int rc = 0;
1556        __u64 length = 1 + flock->fl_end - flock->fl_start;
1557        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1558        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1559        struct TCP_Server_Info *server = tcon->ses->server;
1560        struct inode *inode = d_inode(cfile->dentry);
1561
1562        if (posix_lck) {
1563                int posix_lock_type;
1564
1565                rc = cifs_posix_lock_set(file, flock);
1566                if (!rc || rc < 0)
1567                        return rc;
1568
1569                if (type & server->vals->shared_lock_type)
1570                        posix_lock_type = CIFS_RDLCK;
1571                else
1572                        posix_lock_type = CIFS_WRLCK;
1573
1574                if (unlock == 1)
1575                        posix_lock_type = CIFS_UNLCK;
1576
1577                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1578                                      hash_lockowner(flock->fl_owner),
1579                                      flock->fl_start, length,
1580                                      NULL, posix_lock_type, wait_flag);
1581                goto out;
1582        }
1583
1584        if (lock) {
1585                struct cifsLockInfo *lock;
1586
1587                lock = cifs_lock_init(flock->fl_start, length, type);
1588                if (!lock)
1589                        return -ENOMEM;
1590
1591                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1592                if (rc < 0) {
1593                        kfree(lock);
1594                        return rc;
1595                }
1596                if (!rc)
1597                        goto out;
1598
1599                /*
1600                 * Windows 7 server can delay breaking lease from read to None
1601                 * if we set a byte-range lock on a file - break it explicitly
1602                 * before sending the lock to the server to be sure the next
1603                 * read won't conflict with non-overlapted locks due to
1604                 * pagereading.
1605                 */
1606                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1607                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1608                        cifs_zap_mapping(inode);
1609                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1610                                 inode);
1611                        CIFS_I(inode)->oplock = 0;
1612                }
1613
1614                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1615                                            type, 1, 0, wait_flag);
1616                if (rc) {
1617                        kfree(lock);
1618                        return rc;
1619                }
1620
1621                cifs_lock_add(cfile, lock);
1622        } else if (unlock)
1623                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1624
1625out:
1626        if (flock->fl_flags & FL_POSIX && !rc)
1627                rc = locks_lock_file_wait(file, flock);
1628        return rc;
1629}
1630
1631int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1632{
1633        int rc, xid;
1634        int lock = 0, unlock = 0;
1635        bool wait_flag = false;
1636        bool posix_lck = false;
1637        struct cifs_sb_info *cifs_sb;
1638        struct cifs_tcon *tcon;
1639        struct cifsInodeInfo *cinode;
1640        struct cifsFileInfo *cfile;
1641        __u16 netfid;
1642        __u32 type;
1643
1644        rc = -EACCES;
1645        xid = get_xid();
1646
1647        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1648                 cmd, flock->fl_flags, flock->fl_type,
1649                 flock->fl_start, flock->fl_end);
1650
1651        cfile = (struct cifsFileInfo *)file->private_data;
1652        tcon = tlink_tcon(cfile->tlink);
1653
1654        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1655                        tcon->ses->server);
1656
1657        cifs_sb = CIFS_FILE_SB(file);
1658        netfid = cfile->fid.netfid;
1659        cinode = CIFS_I(file_inode(file));
1660
1661        if (cap_unix(tcon->ses) &&
1662            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1663            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1664                posix_lck = true;
1665        /*
1666         * BB add code here to normalize offset and length to account for
1667         * negative length which we can not accept over the wire.
1668         */
1669        if (IS_GETLK(cmd)) {
1670                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1671                free_xid(xid);
1672                return rc;
1673        }
1674
1675        if (!lock && !unlock) {
1676                /*
1677                 * if no lock or unlock then nothing to do since we do not
1678                 * know what it is
1679                 */
1680                free_xid(xid);
1681                return -EOPNOTSUPP;
1682        }
1683
1684        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1685                        xid);
1686        free_xid(xid);
1687        return rc;
1688}
1689
1690/*
1691 * update the file size (if needed) after a write. Should be called with
1692 * the inode->i_lock held
1693 */
1694void
1695cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1696                      unsigned int bytes_written)
1697{
1698        loff_t end_of_write = offset + bytes_written;
1699
1700        if (end_of_write > cifsi->server_eof)
1701                cifsi->server_eof = end_of_write;
1702}
1703
1704static ssize_t
1705cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1706           size_t write_size, loff_t *offset)
1707{
1708        int rc = 0;
1709        unsigned int bytes_written = 0;
1710        unsigned int total_written;
1711        struct cifs_sb_info *cifs_sb;
1712        struct cifs_tcon *tcon;
1713        struct TCP_Server_Info *server;
1714        unsigned int xid;
1715        struct dentry *dentry = open_file->dentry;
1716        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1717        struct cifs_io_parms io_parms;
1718
1719        cifs_sb = CIFS_SB(dentry->d_sb);
1720
1721        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1722                 write_size, *offset, dentry);
1723
1724        tcon = tlink_tcon(open_file->tlink);
1725        server = tcon->ses->server;
1726
1727        if (!server->ops->sync_write)
1728                return -ENOSYS;
1729
1730        xid = get_xid();
1731
1732        for (total_written = 0; write_size > total_written;
1733             total_written += bytes_written) {
1734                rc = -EAGAIN;
1735                while (rc == -EAGAIN) {
1736                        struct kvec iov[2];
1737                        unsigned int len;
1738
1739                        if (open_file->invalidHandle) {
1740                                /* we could deadlock if we called
1741                                   filemap_fdatawait from here so tell
1742                                   reopen_file not to flush data to
1743                                   server now */
1744                                rc = cifs_reopen_file(open_file, false);
1745                                if (rc != 0)
1746                                        break;
1747                        }
1748
1749                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1750                                  (unsigned int)write_size - total_written);
1751                        /* iov[0] is reserved for smb header */
1752                        iov[1].iov_base = (char *)write_data + total_written;
1753                        iov[1].iov_len = len;
1754                        io_parms.pid = pid;
1755                        io_parms.tcon = tcon;
1756                        io_parms.offset = *offset;
1757                        io_parms.length = len;
1758                        rc = server->ops->sync_write(xid, &open_file->fid,
1759                                        &io_parms, &bytes_written, iov, 1);
1760                }
1761                if (rc || (bytes_written == 0)) {
1762                        if (total_written)
1763                                break;
1764                        else {
1765                                free_xid(xid);
1766                                return rc;
1767                        }
1768                } else {
1769                        spin_lock(&d_inode(dentry)->i_lock);
1770                        cifs_update_eof(cifsi, *offset, bytes_written);
1771                        spin_unlock(&d_inode(dentry)->i_lock);
1772                        *offset += bytes_written;
1773                }
1774        }
1775
1776        cifs_stats_bytes_written(tcon, total_written);
1777
1778        if (total_written > 0) {
1779                spin_lock(&d_inode(dentry)->i_lock);
1780                if (*offset > d_inode(dentry)->i_size)
1781                        i_size_write(d_inode(dentry), *offset);
1782                spin_unlock(&d_inode(dentry)->i_lock);
1783        }
1784        mark_inode_dirty_sync(d_inode(dentry));
1785        free_xid(xid);
1786        return total_written;
1787}
1788
1789struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1790                                        bool fsuid_only)
1791{
1792        struct cifsFileInfo *open_file = NULL;
1793        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1794        struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1795
1796        /* only filter by fsuid on multiuser mounts */
1797        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1798                fsuid_only = false;
1799
1800        spin_lock(&tcon->open_file_lock);
1801        /* we could simply get the first_list_entry since write-only entries
1802           are always at the end of the list but since the first entry might
1803           have a close pending, we go through the whole list */
1804        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1805                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1806                        continue;
1807                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1808                        if (!open_file->invalidHandle) {
1809                                /* found a good file */
1810                                /* lock it so it will not be closed on us */
1811                                cifsFileInfo_get(open_file);
1812                                spin_unlock(&tcon->open_file_lock);
1813                                return open_file;
1814                        } /* else might as well continue, and look for
1815                             another, or simply have the caller reopen it
1816                             again rather than trying to fix this handle */
1817                } else /* write only file */
1818                        break; /* write only files are last so must be done */
1819        }
1820        spin_unlock(&tcon->open_file_lock);
1821        return NULL;
1822}
1823
1824struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1825                                        bool fsuid_only)
1826{
1827        struct cifsFileInfo *open_file, *inv_file = NULL;
1828        struct cifs_sb_info *cifs_sb;
1829        struct cifs_tcon *tcon;
1830        bool any_available = false;
1831        int rc;
1832        unsigned int refind = 0;
1833
1834        /* Having a null inode here (because mapping->host was set to zero by
1835        the VFS or MM) should not happen but we had reports of on oops (due to
1836        it being zero) during stress testcases so we need to check for it */
1837
1838        if (cifs_inode == NULL) {
1839                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1840                dump_stack();
1841                return NULL;
1842        }
1843
1844        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1845        tcon = cifs_sb_master_tcon(cifs_sb);
1846
1847        /* only filter by fsuid on multiuser mounts */
1848        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1849                fsuid_only = false;
1850
1851        spin_lock(&tcon->open_file_lock);
1852refind_writable:
1853        if (refind > MAX_REOPEN_ATT) {
1854                spin_unlock(&tcon->open_file_lock);
1855                return NULL;
1856        }
1857        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1858                if (!any_available && open_file->pid != current->tgid)
1859                        continue;
1860                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1861                        continue;
1862                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1863                        if (!open_file->invalidHandle) {
1864                                /* found a good writable file */
1865                                cifsFileInfo_get(open_file);
1866                                spin_unlock(&tcon->open_file_lock);
1867                                return open_file;
1868                        } else {
1869                                if (!inv_file)
1870                                        inv_file = open_file;
1871                        }
1872                }
1873        }
1874        /* couldn't find useable FH with same pid, try any available */
1875        if (!any_available) {
1876                any_available = true;
1877                goto refind_writable;
1878        }
1879
1880        if (inv_file) {
1881                any_available = false;
1882                cifsFileInfo_get(inv_file);
1883        }
1884
1885        spin_unlock(&tcon->open_file_lock);
1886
1887        if (inv_file) {
1888                rc = cifs_reopen_file(inv_file, false);
1889                if (!rc)
1890                        return inv_file;
1891                else {
1892                        spin_lock(&tcon->open_file_lock);
1893                        list_move_tail(&inv_file->flist,
1894                                        &cifs_inode->openFileList);
1895                        spin_unlock(&tcon->open_file_lock);
1896                        cifsFileInfo_put(inv_file);
1897                        ++refind;
1898                        inv_file = NULL;
1899                        spin_lock(&tcon->open_file_lock);
1900                        goto refind_writable;
1901                }
1902        }
1903
1904        return NULL;
1905}
1906
1907static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1908{
1909        struct address_space *mapping = page->mapping;
1910        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1911        char *write_data;
1912        int rc = -EFAULT;
1913        int bytes_written = 0;
1914        struct inode *inode;
1915        struct cifsFileInfo *open_file;
1916
1917        if (!mapping || !mapping->host)
1918                return -EFAULT;
1919
1920        inode = page->mapping->host;
1921
1922        offset += (loff_t)from;
1923        write_data = kmap(page);
1924        write_data += from;
1925
1926        if ((to > PAGE_SIZE) || (from > to)) {
1927                kunmap(page);
1928                return -EIO;
1929        }
1930
1931        /* racing with truncate? */
1932        if (offset > mapping->host->i_size) {
1933                kunmap(page);
1934                return 0; /* don't care */
1935        }
1936
1937        /* check to make sure that we are not extending the file */
1938        if (mapping->host->i_size - offset < (loff_t)to)
1939                to = (unsigned)(mapping->host->i_size - offset);
1940
1941        open_file = find_writable_file(CIFS_I(mapping->host), false);
1942        if (open_file) {
1943                bytes_written = cifs_write(open_file, open_file->pid,
1944                                           write_data, to - from, &offset);
1945                cifsFileInfo_put(open_file);
1946                /* Does mm or vfs already set times? */
1947                inode->i_atime = inode->i_mtime = current_time(inode);
1948                if ((bytes_written > 0) && (offset))
1949                        rc = 0;
1950                else if (bytes_written < 0)
1951                        rc = bytes_written;
1952        } else {
1953                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1954                rc = -EIO;
1955        }
1956
1957        kunmap(page);
1958        return rc;
1959}
1960
1961static struct cifs_writedata *
1962wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1963                          pgoff_t end, pgoff_t *index,
1964                          unsigned int *found_pages)
1965{
1966        unsigned int nr_pages;
1967        struct page **pages;
1968        struct cifs_writedata *wdata;
1969
1970        wdata = cifs_writedata_alloc((unsigned int)tofind,
1971                                     cifs_writev_complete);
1972        if (!wdata)
1973                return NULL;
1974
1975        /*
1976         * find_get_pages_tag seems to return a max of 256 on each
1977         * iteration, so we must call it several times in order to
1978         * fill the array or the wsize is effectively limited to
1979         * 256 * PAGE_SIZE.
1980         */
1981        *found_pages = 0;
1982        pages = wdata->pages;
1983        do {
1984                nr_pages = find_get_pages_tag(mapping, index,
1985                                              PAGECACHE_TAG_DIRTY, tofind,
1986                                              pages);
1987                *found_pages += nr_pages;
1988                tofind -= nr_pages;
1989                pages += nr_pages;
1990        } while (nr_pages && tofind && *index <= end);
1991
1992        return wdata;
1993}
1994
1995static unsigned int
1996wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1997                    struct address_space *mapping,
1998                    struct writeback_control *wbc,
1999                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2000{
2001        unsigned int nr_pages = 0, i;
2002        struct page *page;
2003
2004        for (i = 0; i < found_pages; i++) {
2005                page = wdata->pages[i];
2006                /*
2007                 * At this point we hold neither mapping->tree_lock nor
2008                 * lock on the page itself: the page may be truncated or
2009                 * invalidated (changing page->mapping to NULL), or even
2010                 * swizzled back from swapper_space to tmpfs file
2011                 * mapping
2012                 */
2013
2014                if (nr_pages == 0)
2015                        lock_page(page);
2016                else if (!trylock_page(page))
2017                        break;
2018
2019                if (unlikely(page->mapping != mapping)) {
2020                        unlock_page(page);
2021                        break;
2022                }
2023
2024                if (!wbc->range_cyclic && page->index > end) {
2025                        *done = true;
2026                        unlock_page(page);
2027                        break;
2028                }
2029
2030                if (*next && (page->index != *next)) {
2031                        /* Not next consecutive page */
2032                        unlock_page(page);
2033                        break;
2034                }
2035
2036                if (wbc->sync_mode != WB_SYNC_NONE)
2037                        wait_on_page_writeback(page);
2038
2039                if (PageWriteback(page) ||
2040                                !clear_page_dirty_for_io(page)) {
2041                        unlock_page(page);
2042                        break;
2043                }
2044
2045                /*
2046                 * This actually clears the dirty bit in the radix tree.
2047                 * See cifs_writepage() for more commentary.
2048                 */
2049                set_page_writeback(page);
2050                if (page_offset(page) >= i_size_read(mapping->host)) {
2051                        *done = true;
2052                        unlock_page(page);
2053                        end_page_writeback(page);
2054                        break;
2055                }
2056
2057                wdata->pages[i] = page;
2058                *next = page->index + 1;
2059                ++nr_pages;
2060        }
2061
2062        /* reset index to refind any pages skipped */
2063        if (nr_pages == 0)
2064                *index = wdata->pages[0]->index + 1;
2065
2066        /* put any pages we aren't going to use */
2067        for (i = nr_pages; i < found_pages; i++) {
2068                put_page(wdata->pages[i]);
2069                wdata->pages[i] = NULL;
2070        }
2071
2072        return nr_pages;
2073}
2074
2075static int
2076wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2077                 struct address_space *mapping, struct writeback_control *wbc)
2078{
2079        int rc = 0;
2080        struct TCP_Server_Info *server;
2081        unsigned int i;
2082
2083        wdata->sync_mode = wbc->sync_mode;
2084        wdata->nr_pages = nr_pages;
2085        wdata->offset = page_offset(wdata->pages[0]);
2086        wdata->pagesz = PAGE_SIZE;
2087        wdata->tailsz = min(i_size_read(mapping->host) -
2088                        page_offset(wdata->pages[nr_pages - 1]),
2089                        (loff_t)PAGE_SIZE);
2090        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2091
2092        if (wdata->cfile != NULL)
2093                cifsFileInfo_put(wdata->cfile);
2094        wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2095        if (!wdata->cfile) {
2096                cifs_dbg(VFS, "No writable handles for inode\n");
2097                rc = -EBADF;
2098        } else {
2099                wdata->pid = wdata->cfile->pid;
2100                server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2101                rc = server->ops->async_writev(wdata, cifs_writedata_release);
2102        }
2103
2104        for (i = 0; i < nr_pages; ++i)
2105                unlock_page(wdata->pages[i]);
2106
2107        return rc;
2108}
2109
2110static int cifs_writepages(struct address_space *mapping,
2111                           struct writeback_control *wbc)
2112{
2113        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2114        struct TCP_Server_Info *server;
2115        bool done = false, scanned = false, range_whole = false;
2116        pgoff_t end, index;
2117        struct cifs_writedata *wdata;
2118        int rc = 0;
2119
2120        /*
2121         * If wsize is smaller than the page cache size, default to writing
2122         * one page at a time via cifs_writepage
2123         */
2124        if (cifs_sb->wsize < PAGE_SIZE)
2125                return generic_writepages(mapping, wbc);
2126
2127        if (wbc->range_cyclic) {
2128                index = mapping->writeback_index; /* Start from prev offset */
2129                end = -1;
2130        } else {
2131                index = wbc->range_start >> PAGE_SHIFT;
2132                end = wbc->range_end >> PAGE_SHIFT;
2133                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2134                        range_whole = true;
2135                scanned = true;
2136        }
2137        server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2138retry:
2139        while (!done && index <= end) {
2140                unsigned int i, nr_pages, found_pages, wsize, credits;
2141                pgoff_t next = 0, tofind, saved_index = index;
2142
2143                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2144                                                   &wsize, &credits);
2145                if (rc)
2146                        break;
2147
2148                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2149
2150                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2151                                                  &found_pages);
2152                if (!wdata) {
2153                        rc = -ENOMEM;
2154                        add_credits_and_wake_if(server, credits, 0);
2155                        break;
2156                }
2157
2158                if (found_pages == 0) {
2159                        kref_put(&wdata->refcount, cifs_writedata_release);
2160                        add_credits_and_wake_if(server, credits, 0);
2161                        break;
2162                }
2163
2164                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2165                                               end, &index, &next, &done);
2166
2167                /* nothing to write? */
2168                if (nr_pages == 0) {
2169                        kref_put(&wdata->refcount, cifs_writedata_release);
2170                        add_credits_and_wake_if(server, credits, 0);
2171                        continue;
2172                }
2173
2174                wdata->credits = credits;
2175
2176                rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2177
2178                /* send failure -- clean up the mess */
2179                if (rc != 0) {
2180                        add_credits_and_wake_if(server, wdata->credits, 0);
2181                        for (i = 0; i < nr_pages; ++i) {
2182                                if (rc == -EAGAIN)
2183                                        redirty_page_for_writepage(wbc,
2184                                                           wdata->pages[i]);
2185                                else
2186                                        SetPageError(wdata->pages[i]);
2187                                end_page_writeback(wdata->pages[i]);
2188                                put_page(wdata->pages[i]);
2189                        }
2190                        if (rc != -EAGAIN)
2191                                mapping_set_error(mapping, rc);
2192                }
2193                kref_put(&wdata->refcount, cifs_writedata_release);
2194
2195                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2196                        index = saved_index;
2197                        continue;
2198                }
2199
2200                wbc->nr_to_write -= nr_pages;
2201                if (wbc->nr_to_write <= 0)
2202                        done = true;
2203
2204                index = next;
2205        }
2206
2207        if (!scanned && !done) {
2208                /*
2209                 * We hit the last page and there is more work to be done: wrap
2210                 * back to the start of the file
2211                 */
2212                scanned = true;
2213                index = 0;
2214                goto retry;
2215        }
2216
2217        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2218                mapping->writeback_index = index;
2219
2220        return rc;
2221}
2222
2223static int
2224cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2225{
2226        int rc;
2227        unsigned int xid;
2228
2229        xid = get_xid();
2230/* BB add check for wbc flags */
2231        get_page(page);
2232        if (!PageUptodate(page))
2233                cifs_dbg(FYI, "ppw - page not up to date\n");
2234
2235        /*
2236         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2237         *
2238         * A writepage() implementation always needs to do either this,
2239         * or re-dirty the page with "redirty_page_for_writepage()" in
2240         * the case of a failure.
2241         *
2242         * Just unlocking the page will cause the radix tree tag-bits
2243         * to fail to update with the state of the page correctly.
2244         */
2245        set_page_writeback(page);
2246retry_write:
2247        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2248        if (rc == -EAGAIN) {
2249                if (wbc->sync_mode == WB_SYNC_ALL)
2250                        goto retry_write;
2251                redirty_page_for_writepage(wbc, page);
2252        } else if (rc != 0) {
2253                SetPageError(page);
2254                mapping_set_error(page->mapping, rc);
2255        } else {
2256                SetPageUptodate(page);
2257        }
2258        end_page_writeback(page);
2259        put_page(page);
2260        free_xid(xid);
2261        return rc;
2262}
2263
2264static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2265{
2266        int rc = cifs_writepage_locked(page, wbc);
2267        unlock_page(page);
2268        return rc;
2269}
2270
2271static int cifs_write_end(struct file *file, struct address_space *mapping,
2272                        loff_t pos, unsigned len, unsigned copied,
2273                        struct page *page, void *fsdata)
2274{
2275        int rc;
2276        struct inode *inode = mapping->host;
2277        struct cifsFileInfo *cfile = file->private_data;
2278        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2279        __u32 pid;
2280
2281        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2282                pid = cfile->pid;
2283        else
2284                pid = current->tgid;
2285
2286        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2287                 page, pos, copied);
2288
2289        if (PageChecked(page)) {
2290                if (copied == len)
2291                        SetPageUptodate(page);
2292                ClearPageChecked(page);
2293        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2294                SetPageUptodate(page);
2295
2296        if (!PageUptodate(page)) {
2297                char *page_data;
2298                unsigned offset = pos & (PAGE_SIZE - 1);
2299                unsigned int xid;
2300
2301                xid = get_xid();
2302                /* this is probably better than directly calling
2303                   partialpage_write since in this function the file handle is
2304                   known which we might as well leverage */
2305                /* BB check if anything else missing out of ppw
2306                   such as updating last write time */
2307                page_data = kmap(page);
2308                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2309                /* if (rc < 0) should we set writebehind rc? */
2310                kunmap(page);
2311
2312                free_xid(xid);
2313        } else {
2314                rc = copied;
2315                pos += copied;
2316                set_page_dirty(page);
2317        }
2318
2319        if (rc > 0) {
2320                spin_lock(&inode->i_lock);
2321                if (pos > inode->i_size)
2322                        i_size_write(inode, pos);
2323                spin_unlock(&inode->i_lock);
2324        }
2325
2326        unlock_page(page);
2327        put_page(page);
2328
2329        return rc;
2330}
2331
2332int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2333                      int datasync)
2334{
2335        unsigned int xid;
2336        int rc = 0;
2337        struct cifs_tcon *tcon;
2338        struct TCP_Server_Info *server;
2339        struct cifsFileInfo *smbfile = file->private_data;
2340        struct inode *inode = file_inode(file);
2341        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2342
2343        rc = file_write_and_wait_range(file, start, end);
2344        if (rc)
2345                return rc;
2346        inode_lock(inode);
2347
2348        xid = get_xid();
2349
2350        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2351                 file, datasync);
2352
2353        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2354                rc = cifs_zap_mapping(inode);
2355                if (rc) {
2356                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2357                        rc = 0; /* don't care about it in fsync */
2358                }
2359        }
2360
2361        tcon = tlink_tcon(smbfile->tlink);
2362        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2363                server = tcon->ses->server;
2364                if (server->ops->flush)
2365                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2366                else
2367                        rc = -ENOSYS;
2368        }
2369
2370        free_xid(xid);
2371        inode_unlock(inode);
2372        return rc;
2373}
2374
2375int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2376{
2377        unsigned int xid;
2378        int rc = 0;
2379        struct cifs_tcon *tcon;
2380        struct TCP_Server_Info *server;
2381        struct cifsFileInfo *smbfile = file->private_data;
2382        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2383        struct inode *inode = file->f_mapping->host;
2384
2385        rc = file_write_and_wait_range(file, start, end);
2386        if (rc)
2387                return rc;
2388        inode_lock(inode);
2389
2390        xid = get_xid();
2391
2392        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2393                 file, datasync);
2394
2395        tcon = tlink_tcon(smbfile->tlink);
2396        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2397                server = tcon->ses->server;
2398                if (server->ops->flush)
2399                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2400                else
2401                        rc = -ENOSYS;
2402        }
2403
2404        free_xid(xid);
2405        inode_unlock(inode);
2406        return rc;
2407}
2408
2409/*
2410 * As file closes, flush all cached write data for this inode checking
2411 * for write behind errors.
2412 */
2413int cifs_flush(struct file *file, fl_owner_t id)
2414{
2415        struct inode *inode = file_inode(file);
2416        int rc = 0;
2417
2418        if (file->f_mode & FMODE_WRITE)
2419                rc = filemap_write_and_wait(inode->i_mapping);
2420
2421        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2422
2423        return rc;
2424}
2425
2426static int
2427cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2428{
2429        int rc = 0;
2430        unsigned long i;
2431
2432        for (i = 0; i < num_pages; i++) {
2433                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2434                if (!pages[i]) {
2435                        /*
2436                         * save number of pages we have already allocated and
2437                         * return with ENOMEM error
2438                         */
2439                        num_pages = i;
2440                        rc = -ENOMEM;
2441                        break;
2442                }
2443        }
2444
2445        if (rc) {
2446                for (i = 0; i < num_pages; i++)
2447                        put_page(pages[i]);
2448        }
2449        return rc;
2450}
2451
2452static inline
2453size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2454{
2455        size_t num_pages;
2456        size_t clen;
2457
2458        clen = min_t(const size_t, len, wsize);
2459        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2460
2461        if (cur_len)
2462                *cur_len = clen;
2463
2464        return num_pages;
2465}
2466
2467static void
2468cifs_uncached_writedata_release(struct kref *refcount)
2469{
2470        int i;
2471        struct cifs_writedata *wdata = container_of(refcount,
2472                                        struct cifs_writedata, refcount);
2473
2474        kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2475        for (i = 0; i < wdata->nr_pages; i++)
2476                put_page(wdata->pages[i]);
2477        cifs_writedata_release(refcount);
2478}
2479
2480static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2481
2482static void
2483cifs_uncached_writev_complete(struct work_struct *work)
2484{
2485        struct cifs_writedata *wdata = container_of(work,
2486                                        struct cifs_writedata, work);
2487        struct inode *inode = d_inode(wdata->cfile->dentry);
2488        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2489
2490        spin_lock(&inode->i_lock);
2491        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2492        if (cifsi->server_eof > inode->i_size)
2493                i_size_write(inode, cifsi->server_eof);
2494        spin_unlock(&inode->i_lock);
2495
2496        complete(&wdata->done);
2497        collect_uncached_write_data(wdata->ctx);
2498        /* the below call can possibly free the last ref to aio ctx */
2499        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2500}
2501
2502static int
2503wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2504                      size_t *len, unsigned long *num_pages)
2505{
2506        size_t save_len, copied, bytes, cur_len = *len;
2507        unsigned long i, nr_pages = *num_pages;
2508
2509        save_len = cur_len;
2510        for (i = 0; i < nr_pages; i++) {
2511                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2512                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2513                cur_len -= copied;
2514                /*
2515                 * If we didn't copy as much as we expected, then that
2516                 * may mean we trod into an unmapped area. Stop copying
2517                 * at that point. On the next pass through the big
2518                 * loop, we'll likely end up getting a zero-length
2519                 * write and bailing out of it.
2520                 */
2521                if (copied < bytes)
2522                        break;
2523        }
2524        cur_len = save_len - cur_len;
2525        *len = cur_len;
2526
2527        /*
2528         * If we have no data to send, then that probably means that
2529         * the copy above failed altogether. That's most likely because
2530         * the address in the iovec was bogus. Return -EFAULT and let
2531         * the caller free anything we allocated and bail out.
2532         */
2533        if (!cur_len)
2534                return -EFAULT;
2535
2536        /*
2537         * i + 1 now represents the number of pages we actually used in
2538         * the copy phase above.
2539         */
2540        *num_pages = i + 1;
2541        return 0;
2542}
2543
2544static int
2545cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2546                     struct cifsFileInfo *open_file,
2547                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2548                     struct cifs_aio_ctx *ctx)
2549{
2550        int rc = 0;
2551        size_t cur_len;
2552        unsigned long nr_pages, num_pages, i;
2553        struct cifs_writedata *wdata;
2554        struct iov_iter saved_from = *from;
2555        loff_t saved_offset = offset;
2556        pid_t pid;
2557        struct TCP_Server_Info *server;
2558
2559        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2560                pid = open_file->pid;
2561        else
2562                pid = current->tgid;
2563
2564        server = tlink_tcon(open_file->tlink)->ses->server;
2565
2566        do {
2567                unsigned int wsize, credits;
2568
2569                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2570                                                   &wsize, &credits);
2571                if (rc)
2572                        break;
2573
2574                nr_pages = get_numpages(wsize, len, &cur_len);
2575                wdata = cifs_writedata_alloc(nr_pages,
2576                                             cifs_uncached_writev_complete);
2577                if (!wdata) {
2578                        rc = -ENOMEM;
2579                        add_credits_and_wake_if(server, credits, 0);
2580                        break;
2581                }
2582
2583                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2584                if (rc) {
2585                        kfree(wdata);
2586                        add_credits_and_wake_if(server, credits, 0);
2587                        break;
2588                }
2589
2590                num_pages = nr_pages;
2591                rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2592                if (rc) {
2593                        for (i = 0; i < nr_pages; i++)
2594                                put_page(wdata->pages[i]);
2595                        kfree(wdata);
2596                        add_credits_and_wake_if(server, credits, 0);
2597                        break;
2598                }
2599
2600                /*
2601                 * Bring nr_pages down to the number of pages we actually used,
2602                 * and free any pages that we didn't use.
2603                 */
2604                for ( ; nr_pages > num_pages; nr_pages--)
2605                        put_page(wdata->pages[nr_pages - 1]);
2606
2607                wdata->sync_mode = WB_SYNC_ALL;
2608                wdata->nr_pages = nr_pages;
2609                wdata->offset = (__u64)offset;
2610                wdata->cfile = cifsFileInfo_get(open_file);
2611                wdata->pid = pid;
2612                wdata->bytes = cur_len;
2613                wdata->pagesz = PAGE_SIZE;
2614                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2615                wdata->credits = credits;
2616                wdata->ctx = ctx;
2617                kref_get(&ctx->refcount);
2618
2619                if (!wdata->cfile->invalidHandle ||
2620                    !(rc = cifs_reopen_file(wdata->cfile, false)))
2621                        rc = server->ops->async_writev(wdata,
2622                                        cifs_uncached_writedata_release);
2623                if (rc) {
2624                        add_credits_and_wake_if(server, wdata->credits, 0);
2625                        kref_put(&wdata->refcount,
2626                                 cifs_uncached_writedata_release);
2627                        if (rc == -EAGAIN) {
2628                                *from = saved_from;
2629                                iov_iter_advance(from, offset - saved_offset);
2630                                continue;
2631                        }
2632                        break;
2633                }
2634
2635                list_add_tail(&wdata->list, wdata_list);
2636                offset += cur_len;
2637                len -= cur_len;
2638        } while (len > 0);
2639
2640        return rc;
2641}
2642
2643static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2644{
2645        struct cifs_writedata *wdata, *tmp;
2646        struct cifs_tcon *tcon;
2647        struct cifs_sb_info *cifs_sb;
2648        struct dentry *dentry = ctx->cfile->dentry;
2649        unsigned int i;
2650        int rc;
2651
2652        tcon = tlink_tcon(ctx->cfile->tlink);
2653        cifs_sb = CIFS_SB(dentry->d_sb);
2654
2655        mutex_lock(&ctx->aio_mutex);
2656
2657        if (list_empty(&ctx->list)) {
2658                mutex_unlock(&ctx->aio_mutex);
2659                return;
2660        }
2661
2662        rc = ctx->rc;
2663        /*
2664         * Wait for and collect replies for any successful sends in order of
2665         * increasing offset. Once an error is hit, then return without waiting
2666         * for any more replies.
2667         */
2668restart_loop:
2669        list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2670                if (!rc) {
2671                        if (!try_wait_for_completion(&wdata->done)) {
2672                                mutex_unlock(&ctx->aio_mutex);
2673                                return;
2674                        }
2675
2676                        if (wdata->result)
2677                                rc = wdata->result;
2678                        else
2679                                ctx->total_len += wdata->bytes;
2680
2681                        /* resend call if it's a retryable error */
2682                        if (rc == -EAGAIN) {
2683                                struct list_head tmp_list;
2684                                struct iov_iter tmp_from = ctx->iter;
2685
2686                                INIT_LIST_HEAD(&tmp_list);
2687                                list_del_init(&wdata->list);
2688
2689                                iov_iter_advance(&tmp_from,
2690                                                 wdata->offset - ctx->pos);
2691
2692                                rc = cifs_write_from_iter(wdata->offset,
2693                                                wdata->bytes, &tmp_from,
2694                                                ctx->cfile, cifs_sb, &tmp_list,
2695                                                ctx);
2696
2697                                list_splice(&tmp_list, &ctx->list);
2698
2699                                kref_put(&wdata->refcount,
2700                                         cifs_uncached_writedata_release);
2701                                goto restart_loop;
2702                        }
2703                }
2704                list_del_init(&wdata->list);
2705                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2706        }
2707
2708        for (i = 0; i < ctx->npages; i++)
2709                put_page(ctx->bv[i].bv_page);
2710
2711        cifs_stats_bytes_written(tcon, ctx->total_len);
2712        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2713
2714        ctx->rc = (rc == 0) ? ctx->total_len : rc;
2715
2716        mutex_unlock(&ctx->aio_mutex);
2717
2718        if (ctx->iocb && ctx->iocb->ki_complete)
2719                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2720        else
2721                complete(&ctx->done);
2722}
2723
2724ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2725{
2726        struct file *file = iocb->ki_filp;
2727        ssize_t total_written = 0;
2728        struct cifsFileInfo *cfile;
2729        struct cifs_tcon *tcon;
2730        struct cifs_sb_info *cifs_sb;
2731        struct cifs_aio_ctx *ctx;
2732        struct iov_iter saved_from = *from;
2733        int rc;
2734
2735        /*
2736         * BB - optimize the way when signing is disabled. We can drop this
2737         * extra memory-to-memory copying and use iovec buffers for constructing
2738         * write request.
2739         */
2740
2741        rc = generic_write_checks(iocb, from);
2742        if (rc <= 0)
2743                return rc;
2744
2745        cifs_sb = CIFS_FILE_SB(file);
2746        cfile = file->private_data;
2747        tcon = tlink_tcon(cfile->tlink);
2748
2749        if (!tcon->ses->server->ops->async_writev)
2750                return -ENOSYS;
2751
2752        ctx = cifs_aio_ctx_alloc();
2753        if (!ctx)
2754                return -ENOMEM;
2755
2756        ctx->cfile = cifsFileInfo_get(cfile);
2757
2758        if (!is_sync_kiocb(iocb))
2759                ctx->iocb = iocb;
2760
2761        ctx->pos = iocb->ki_pos;
2762
2763        rc = setup_aio_ctx_iter(ctx, from, WRITE);
2764        if (rc) {
2765                kref_put(&ctx->refcount, cifs_aio_ctx_release);
2766                return rc;
2767        }
2768
2769        /* grab a lock here due to read response handlers can access ctx */
2770        mutex_lock(&ctx->aio_mutex);
2771
2772        rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2773                                  cfile, cifs_sb, &ctx->list, ctx);
2774
2775        /*
2776         * If at least one write was successfully sent, then discard any rc
2777         * value from the later writes. If the other write succeeds, then
2778         * we'll end up returning whatever was written. If it fails, then
2779         * we'll get a new rc value from that.
2780         */
2781        if (!list_empty(&ctx->list))
2782                rc = 0;
2783
2784        mutex_unlock(&ctx->aio_mutex);
2785
2786        if (rc) {
2787                kref_put(&ctx->refcount, cifs_aio_ctx_release);
2788                return rc;
2789        }
2790
2791        if (!is_sync_kiocb(iocb)) {
2792                kref_put(&ctx->refcount, cifs_aio_ctx_release);
2793                return -EIOCBQUEUED;
2794        }
2795
2796        rc = wait_for_completion_killable(&ctx->done);
2797        if (rc) {
2798                mutex_lock(&ctx->aio_mutex);
2799                ctx->rc = rc = -EINTR;
2800                total_written = ctx->total_len;
2801                mutex_unlock(&ctx->aio_mutex);
2802        } else {
2803                rc = ctx->rc;
2804                total_written = ctx->total_len;
2805        }
2806
2807        kref_put(&ctx->refcount, cifs_aio_ctx_release);
2808
2809        if (unlikely(!total_written))
2810                return rc;
2811
2812        iocb->ki_pos += total_written;
2813        return total_written;
2814}
2815
2816static ssize_t
2817cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2818{
2819        struct file *file = iocb->ki_filp;
2820        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2821        struct inode *inode = file->f_mapping->host;
2822        struct cifsInodeInfo *cinode = CIFS_I(inode);
2823        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2824        ssize_t rc;
2825
2826        inode_lock(inode);
2827        /*
2828         * We need to hold the sem to be sure nobody modifies lock list
2829         * with a brlock that prevents writing.
2830         */
2831        down_read(&cinode->lock_sem);
2832
2833        rc = generic_write_checks(iocb, from);
2834        if (rc <= 0)
2835                goto out;
2836
2837        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2838                                     server->vals->exclusive_lock_type, NULL,
2839                                     CIFS_WRITE_OP))
2840                rc = __generic_file_write_iter(iocb, from);
2841        else
2842                rc = -EACCES;
2843out:
2844        up_read(&cinode->lock_sem);
2845        inode_unlock(inode);
2846
2847        if (rc > 0)
2848                rc = generic_write_sync(iocb, rc);
2849        return rc;
2850}
2851
2852ssize_t
2853cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2854{
2855        struct inode *inode = file_inode(iocb->ki_filp);
2856        struct cifsInodeInfo *cinode = CIFS_I(inode);
2857        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2858        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2859                                                iocb->ki_filp->private_data;
2860        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2861        ssize_t written;
2862
2863        written = cifs_get_writer(cinode);
2864        if (written)
2865                return written;
2866
2867        if (CIFS_CACHE_WRITE(cinode)) {
2868                if (cap_unix(tcon->ses) &&
2869                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2870                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2871                        written = generic_file_write_iter(iocb, from);
2872                        goto out;
2873                }
2874                written = cifs_writev(iocb, from);
2875                goto out;
2876        }
2877        /*
2878         * For non-oplocked files in strict cache mode we need to write the data
2879         * to the server exactly from the pos to pos+len-1 rather than flush all
2880         * affected pages because it may cause a error with mandatory locks on
2881         * these pages but not on the region from pos to ppos+len-1.
2882         */
2883        written = cifs_user_writev(iocb, from);
2884        if (written > 0 && CIFS_CACHE_READ(cinode)) {
2885                /*
2886                 * Windows 7 server can delay breaking level2 oplock if a write
2887                 * request comes - break it on the client to prevent reading
2888                 * an old data.
2889                 */
2890                cifs_zap_mapping(inode);
2891                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2892                         inode);
2893                cinode->oplock = 0;
2894        }
2895out:
2896        cifs_put_writer(cinode);
2897        return written;
2898}
2899
2900static struct cifs_readdata *
2901cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2902{
2903        struct cifs_readdata *rdata;
2904
2905        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2906                        GFP_KERNEL);
2907        if (rdata != NULL) {
2908                kref_init(&rdata->refcount);
2909                INIT_LIST_HEAD(&rdata->list);
2910                init_completion(&rdata->done);
2911                INIT_WORK(&rdata->work, complete);
2912        }
2913
2914        return rdata;
2915}
2916
2917void
2918cifs_readdata_release(struct kref *refcount)
2919{
2920        struct cifs_readdata *rdata = container_of(refcount,
2921                                        struct cifs_readdata, refcount);
2922
2923        if (rdata->cfile)
2924                cifsFileInfo_put(rdata->cfile);
2925
2926        kfree(rdata);
2927}
2928
2929static int
2930cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2931{
2932        int rc = 0;
2933        struct page *page;
2934        unsigned int i;
2935
2936        for (i = 0; i < nr_pages; i++) {
2937                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2938                if (!page) {
2939                        rc = -ENOMEM;
2940                        break;
2941                }
2942                rdata->pages[i] = page;
2943        }
2944
2945        if (rc) {
2946                for (i = 0; i < nr_pages; i++) {
2947                        put_page(rdata->pages[i]);
2948                        rdata->pages[i] = NULL;
2949                }
2950        }
2951        return rc;
2952}
2953
2954static void
2955cifs_uncached_readdata_release(struct kref *refcount)
2956{
2957        struct cifs_readdata *rdata = container_of(refcount,
2958                                        struct cifs_readdata, refcount);
2959        unsigned int i;
2960
2961        kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
2962        for (i = 0; i < rdata->nr_pages; i++) {
2963                put_page(rdata->pages[i]);
2964                rdata->pages[i] = NULL;
2965        }
2966        cifs_readdata_release(refcount);
2967}
2968
2969/**
2970 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2971 * @rdata:      the readdata response with list of pages holding data
2972 * @iter:       destination for our data
2973 *
2974 * This function copies data from a list of pages in a readdata response into
2975 * an array of iovecs. It will first calculate where the data should go
2976 * based on the info in the readdata and then copy the data into that spot.
2977 */
2978static int
2979cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2980{
2981        size_t remaining = rdata->got_bytes;
2982        unsigned int i;
2983
2984        for (i = 0; i < rdata->nr_pages; i++) {
2985                struct page *page = rdata->pages[i];
2986                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2987                size_t written;
2988
2989                if (unlikely(iter->type & ITER_PIPE)) {
2990                        void *addr = kmap_atomic(page);
2991
2992                        written = copy_to_iter(addr, copy, iter);
2993                        kunmap_atomic(addr);
2994                } else
2995                        written = copy_page_to_iter(page, 0, copy, iter);
2996                remaining -= written;
2997                if (written < copy && iov_iter_count(iter) > 0)
2998                        break;
2999        }
3000        return remaining ? -EFAULT : 0;
3001}
3002
3003static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3004
3005static void
3006cifs_uncached_readv_complete(struct work_struct *work)
3007{
3008        struct cifs_readdata *rdata = container_of(work,
3009                                                struct cifs_readdata, work);
3010
3011        complete(&rdata->done);
3012        collect_uncached_read_data(rdata->ctx);
3013        /* the below call can possibly free the last ref to aio ctx */
3014        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3015}
3016
3017static int
3018uncached_fill_pages(struct TCP_Server_Info *server,
3019                    struct cifs_readdata *rdata, struct iov_iter *iter,
3020                    unsigned int len)
3021{
3022        int result = 0;
3023        unsigned int i;
3024        unsigned int nr_pages = rdata->nr_pages;
3025
3026        rdata->got_bytes = 0;
3027        rdata->tailsz = PAGE_SIZE;
3028        for (i = 0; i < nr_pages; i++) {
3029                struct page *page = rdata->pages[i];
3030                size_t n;
3031
3032                if (len <= 0) {
3033                        /* no need to hold page hostage */
3034                        rdata->pages[i] = NULL;
3035                        rdata->nr_pages--;
3036                        put_page(page);
3037                        continue;
3038                }
3039                n = len;
3040                if (len >= PAGE_SIZE) {
3041                        /* enough data to fill the page */
3042                        n = PAGE_SIZE;
3043                        len -= n;
3044                } else {
3045                        zero_user(page, len, PAGE_SIZE - len);
3046                        rdata->tailsz = len;
3047                        len = 0;
3048                }
3049                if (iter)
3050                        result = copy_page_from_iter(page, 0, n, iter);
3051                else
3052                        result = cifs_read_page_from_socket(server, page, n);
3053                if (result < 0)
3054                        break;
3055
3056                rdata->got_bytes += result;
3057        }
3058
3059        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3060                                                rdata->got_bytes : result;
3061}
3062
3063static int
3064cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3065                              struct cifs_readdata *rdata, unsigned int len)
3066{
3067        return uncached_fill_pages(server, rdata, NULL, len);
3068}
3069
3070static int
3071cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3072                              struct cifs_readdata *rdata,
3073                              struct iov_iter *iter)
3074{
3075        return uncached_fill_pages(server, rdata, iter, iter->count);
3076}
3077
3078static int
3079cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3080                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3081                     struct cifs_aio_ctx *ctx)
3082{
3083        struct cifs_readdata *rdata;
3084        unsigned int npages, rsize, credits;
3085        size_t cur_len;
3086        int rc;
3087        pid_t pid;
3088        struct TCP_Server_Info *server;
3089
3090        server = tlink_tcon(open_file->tlink)->ses->server;
3091
3092        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3093                pid = open_file->pid;
3094        else
3095                pid = current->tgid;
3096
3097        do {
3098                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3099                                                   &rsize, &credits);
3100                if (rc)
3101                        break;
3102
3103                cur_len = min_t(const size_t, len, rsize);
3104                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3105
3106                /* allocate a readdata struct */
3107                rdata = cifs_readdata_alloc(npages,
3108                                            cifs_uncached_readv_complete);
3109                if (!rdata) {
3110                        add_credits_and_wake_if(server, credits, 0);
3111                        rc = -ENOMEM;
3112                        break;
3113                }
3114
3115                rc = cifs_read_allocate_pages(rdata, npages);
3116                if (rc)
3117                        goto error;
3118
3119                rdata->cfile = cifsFileInfo_get(open_file);
3120                rdata->nr_pages = npages;
3121                rdata->offset = offset;
3122                rdata->bytes = cur_len;
3123                rdata->pid = pid;
3124                rdata->pagesz = PAGE_SIZE;
3125                rdata->read_into_pages = cifs_uncached_read_into_pages;
3126                rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3127                rdata->credits = credits;
3128                rdata->ctx = ctx;
3129                kref_get(&ctx->refcount);
3130
3131                if (!rdata->cfile->invalidHandle ||
3132                    !(rc = cifs_reopen_file(rdata->cfile, true)))
3133                        rc = server->ops->async_readv(rdata);
3134error:
3135                if (rc) {
3136                        add_credits_and_wake_if(server, rdata->credits, 0);
3137                        kref_put(&rdata->refcount,
3138                                 cifs_uncached_readdata_release);
3139                        if (rc == -EAGAIN)
3140                                continue;
3141                        break;
3142                }
3143
3144                list_add_tail(&rdata->list, rdata_list);
3145                offset += cur_len;
3146                len -= cur_len;
3147        } while (len > 0);
3148
3149        return rc;
3150}
3151
3152static void
3153collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3154{
3155        struct cifs_readdata *rdata, *tmp;
3156        struct iov_iter *to = &ctx->iter;
3157        struct cifs_sb_info *cifs_sb;
3158        struct cifs_tcon *tcon;
3159        unsigned int i;
3160        int rc;
3161
3162        tcon = tlink_tcon(ctx->cfile->tlink);
3163        cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3164
3165        mutex_lock(&ctx->aio_mutex);
3166
3167        if (list_empty(&ctx->list)) {
3168                mutex_unlock(&ctx->aio_mutex);
3169                return;
3170        }
3171
3172        rc = ctx->rc;
3173        /* the loop below should proceed in the order of increasing offsets */
3174again:
3175        list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3176                if (!rc) {
3177                        if (!try_wait_for_completion(&rdata->done)) {
3178                                mutex_unlock(&ctx->aio_mutex);
3179                                return;
3180                        }
3181
3182                        if (rdata->result == -EAGAIN) {
3183                                /* resend call if it's a retryable error */
3184                                struct list_head tmp_list;
3185                                unsigned int got_bytes = rdata->got_bytes;
3186
3187                                list_del_init(&rdata->list);
3188                                INIT_LIST_HEAD(&tmp_list);
3189
3190                                /*
3191                                 * Got a part of data and then reconnect has
3192                                 * happened -- fill the buffer and continue
3193                                 * reading.
3194                                 */
3195                                if (got_bytes && got_bytes < rdata->bytes) {
3196                                        rc = cifs_readdata_to_iov(rdata, to);
3197                                        if (rc) {
3198                                                kref_put(&rdata->refcount,
3199                                                cifs_uncached_readdata_release);
3200                                                continue;
3201                                        }
3202                                }
3203
3204                                rc = cifs_send_async_read(
3205                                                rdata->offset + got_bytes,
3206                                                rdata->bytes - got_bytes,
3207                                                rdata->cfile, cifs_sb,
3208                                                &tmp_list, ctx);
3209
3210                                list_splice(&tmp_list, &ctx->list);
3211
3212                                kref_put(&rdata->refcount,
3213                                         cifs_uncached_readdata_release);
3214                                goto again;
3215                        } else if (rdata->result)
3216                                rc = rdata->result;
3217                        else
3218                                rc = cifs_readdata_to_iov(rdata, to);
3219
3220                        /* if there was a short read -- discard anything left */
3221                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3222                                rc = -ENODATA;
3223                }
3224                list_del_init(&rdata->list);
3225                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3226        }
3227
3228        for (i = 0; i < ctx->npages; i++) {
3229                if (ctx->should_dirty)
3230                        set_page_dirty(ctx->bv[i].bv_page);
3231                put_page(ctx->bv[i].bv_page);
3232        }
3233
3234        ctx->total_len = ctx->len - iov_iter_count(to);
3235
3236        cifs_stats_bytes_read(tcon, ctx->total_len);
3237
3238        /* mask nodata case */
3239        if (rc == -ENODATA)
3240                rc = 0;
3241
3242        ctx->rc = (rc == 0) ? ctx->total_len : rc;
3243
3244        mutex_unlock(&ctx->aio_mutex);
3245
3246        if (ctx->iocb && ctx->iocb->ki_complete)
3247                ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3248        else
3249                complete(&ctx->done);
3250}
3251
3252ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3253{
3254        struct file *file = iocb->ki_filp;
3255        ssize_t rc;
3256        size_t len;
3257        ssize_t total_read = 0;
3258        loff_t offset = iocb->ki_pos;
3259        struct cifs_sb_info *cifs_sb;
3260        struct cifs_tcon *tcon;
3261        struct cifsFileInfo *cfile;
3262        struct cifs_aio_ctx *ctx;
3263
3264        len = iov_iter_count(to);
3265        if (!len)
3266                return 0;
3267
3268        cifs_sb = CIFS_FILE_SB(file);
3269        cfile = file->private_data;
3270        tcon = tlink_tcon(cfile->tlink);
3271
3272        if (!tcon->ses->server->ops->async_readv)
3273                return -ENOSYS;
3274
3275        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3276                cifs_dbg(FYI, "attempting read on write only file instance\n");
3277
3278        ctx = cifs_aio_ctx_alloc();
3279        if (!ctx)
3280                return -ENOMEM;
3281
3282        ctx->cfile = cifsFileInfo_get(cfile);
3283
3284        if (!is_sync_kiocb(iocb))
3285                ctx->iocb = iocb;
3286
3287        if (to->type == ITER_IOVEC)
3288                ctx->should_dirty = true;
3289
3290        rc = setup_aio_ctx_iter(ctx, to, READ);
3291        if (rc) {
3292                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3293                return rc;
3294        }
3295
3296        len = ctx->len;
3297
3298        /* grab a lock here due to read response handlers can access ctx */
3299        mutex_lock(&ctx->aio_mutex);
3300
3301        rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3302
3303        /* if at least one read request send succeeded, then reset rc */
3304        if (!list_empty(&ctx->list))
3305                rc = 0;
3306
3307        mutex_unlock(&ctx->aio_mutex);
3308
3309        if (rc) {
3310                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3311                return rc;
3312        }
3313
3314        if (!is_sync_kiocb(iocb)) {
3315                kref_put(&ctx->refcount, cifs_aio_ctx_release);
3316                return -EIOCBQUEUED;
3317        }
3318
3319        rc = wait_for_completion_killable(&ctx->done);
3320        if (rc) {
3321                mutex_lock(&ctx->aio_mutex);
3322                ctx->rc = rc = -EINTR;
3323                total_read = ctx->total_len;
3324                mutex_unlock(&ctx->aio_mutex);
3325        } else {
3326                rc = ctx->rc;
3327                total_read = ctx->total_len;
3328        }
3329
3330        kref_put(&ctx->refcount, cifs_aio_ctx_release);
3331
3332        if (total_read) {
3333                iocb->ki_pos += total_read;
3334                return total_read;
3335        }
3336        return rc;
3337}
3338
3339ssize_t
3340cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3341{
3342        struct inode *inode = file_inode(iocb->ki_filp);
3343        struct cifsInodeInfo *cinode = CIFS_I(inode);
3344        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3345        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3346                                                iocb->ki_filp->private_data;
3347        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3348        int rc = -EACCES;
3349
3350        /*
3351         * In strict cache mode we need to read from the server all the time
3352         * if we don't have level II oplock because the server can delay mtime
3353         * change - so we can't make a decision about inode invalidating.
3354         * And we can also fail with pagereading if there are mandatory locks
3355         * on pages affected by this read but not on the region from pos to
3356         * pos+len-1.
3357         */
3358        if (!CIFS_CACHE_READ(cinode))
3359                return cifs_user_readv(iocb, to);
3360
3361        if (cap_unix(tcon->ses) &&
3362            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3363            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3364                return generic_file_read_iter(iocb, to);
3365
3366        /*
3367         * We need to hold the sem to be sure nobody modifies lock list
3368         * with a brlock that prevents reading.
3369         */
3370        down_read(&cinode->lock_sem);
3371        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3372                                     tcon->ses->server->vals->shared_lock_type,
3373                                     NULL, CIFS_READ_OP))
3374                rc = generic_file_read_iter(iocb, to);
3375        up_read(&cinode->lock_sem);
3376        return rc;
3377}
3378
3379static ssize_t
3380cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3381{
3382        int rc = -EACCES;
3383        unsigned int bytes_read = 0;
3384        unsigned int total_read;
3385        unsigned int current_read_size;
3386        unsigned int rsize;
3387        struct cifs_sb_info *cifs_sb;
3388        struct cifs_tcon *tcon;
3389        struct TCP_Server_Info *server;
3390        unsigned int xid;
3391        char *cur_offset;
3392        struct cifsFileInfo *open_file;
3393        struct cifs_io_parms io_parms;
3394        int buf_type = CIFS_NO_BUFFER;
3395        __u32 pid;
3396
3397        xid = get_xid();
3398        cifs_sb = CIFS_FILE_SB(file);
3399
3400        /* FIXME: set up handlers for larger reads and/or convert to async */
3401        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3402
3403        if (file->private_data == NULL) {
3404                rc = -EBADF;
3405                free_xid(xid);
3406                return rc;
3407        }
3408        open_file = file->private_data;
3409        tcon = tlink_tcon(open_file->tlink);
3410        server = tcon->ses->server;
3411
3412        if (!server->ops->sync_read) {
3413                free_xid(xid);
3414                return -ENOSYS;
3415        }
3416
3417        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3418                pid = open_file->pid;
3419        else
3420                pid = current->tgid;
3421
3422        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3423                cifs_dbg(FYI, "attempting read on write only file instance\n");
3424
3425        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3426             total_read += bytes_read, cur_offset += bytes_read) {
3427                do {
3428                        current_read_size = min_t(uint, read_size - total_read,
3429                                                  rsize);
3430                        /*
3431                         * For windows me and 9x we do not want to request more
3432                         * than it negotiated since it will refuse the read
3433                         * then.
3434                         */
3435                        if ((tcon->ses) && !(tcon->ses->capabilities &
3436                                tcon->ses->server->vals->cap_large_files)) {
3437                                current_read_size = min_t(uint,
3438                                        current_read_size, CIFSMaxBufSize);
3439                        }
3440                        if (open_file->invalidHandle) {
3441                                rc = cifs_reopen_file(open_file, true);
3442                                if (rc != 0)
3443                                        break;
3444                        }
3445                        io_parms.pid = pid;
3446                        io_parms.tcon = tcon;
3447                        io_parms.offset = *offset;
3448                        io_parms.length = current_read_size;
3449                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3450                                                    &bytes_read, &cur_offset,
3451                                                    &buf_type);
3452                } while (rc == -EAGAIN);
3453
3454                if (rc || (bytes_read == 0)) {
3455                        if (total_read) {
3456                                break;
3457                        } else {
3458                                free_xid(xid);
3459                                return rc;
3460                        }
3461                } else {
3462                        cifs_stats_bytes_read(tcon, total_read);
3463                        *offset += bytes_read;
3464                }
3465        }
3466        free_xid(xid);
3467        return total_read;
3468}
3469
3470/*
3471 * If the page is mmap'ed into a process' page tables, then we need to make
3472 * sure that it doesn't change while being written back.
3473 */
3474static int
3475cifs_page_mkwrite(struct vm_fault *vmf)
3476{
3477        struct page *page = vmf->page;
3478
3479        lock_page(page);
3480        return VM_FAULT_LOCKED;
3481}
3482
3483static const struct vm_operations_struct cifs_file_vm_ops = {
3484        .fault = filemap_fault,
3485        .map_pages = filemap_map_pages,
3486        .page_mkwrite = cifs_page_mkwrite,
3487};
3488
3489int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3490{
3491        int rc, xid;
3492        struct inode *inode = file_inode(file);
3493
3494        xid = get_xid();
3495
3496        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3497                rc = cifs_zap_mapping(inode);
3498                if (rc)
3499                        return rc;
3500        }
3501
3502        rc = generic_file_mmap(file, vma);
3503        if (rc == 0)
3504                vma->vm_ops = &cifs_file_vm_ops;
3505        free_xid(xid);
3506        return rc;
3507}
3508
3509int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3510{
3511        int rc, xid;
3512
3513        xid = get_xid();
3514        rc = cifs_revalidate_file(file);
3515        if (rc) {
3516                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3517                         rc);
3518                free_xid(xid);
3519                return rc;
3520        }
3521        rc = generic_file_mmap(file, vma);
3522        if (rc == 0)
3523                vma->vm_ops = &cifs_file_vm_ops;
3524        free_xid(xid);
3525        return rc;
3526}
3527
3528static void
3529cifs_readv_complete(struct work_struct *work)
3530{
3531        unsigned int i, got_bytes;
3532        struct cifs_readdata *rdata = container_of(work,
3533                                                struct cifs_readdata, work);
3534
3535        got_bytes = rdata->got_bytes;
3536        for (i = 0; i < rdata->nr_pages; i++) {
3537                struct page *page = rdata->pages[i];
3538
3539                lru_cache_add_file(page);
3540
3541                if (rdata->result == 0 ||
3542                    (rdata->result == -EAGAIN && got_bytes)) {
3543                        flush_dcache_page(page);
3544                        SetPageUptodate(page);
3545                }
3546
3547                unlock_page(page);
3548
3549                if (rdata->result == 0 ||
3550                    (rdata->result == -EAGAIN && got_bytes))
3551                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3552
3553                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3554
3555                put_page(page);
3556                rdata->pages[i] = NULL;
3557        }
3558        kref_put(&rdata->refcount, cifs_readdata_release);
3559}
3560
3561static int
3562readpages_fill_pages(struct TCP_Server_Info *server,
3563                     struct cifs_readdata *rdata, struct iov_iter *iter,
3564                     unsigned int len)
3565{
3566        int result = 0;
3567        unsigned int i;
3568        u64 eof;
3569        pgoff_t eof_index;
3570        unsigned int nr_pages = rdata->nr_pages;
3571
3572        /* determine the eof that the server (probably) has */
3573        eof = CIFS_I(rdata->mapping->host)->server_eof;
3574        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3575        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3576
3577        rdata->got_bytes = 0;
3578        rdata->tailsz = PAGE_SIZE;
3579        for (i = 0; i < nr_pages; i++) {
3580                struct page *page = rdata->pages[i];
3581                size_t n = PAGE_SIZE;
3582
3583                if (len >= PAGE_SIZE) {
3584                        len -= PAGE_SIZE;
3585                } else if (len > 0) {
3586                        /* enough for partial page, fill and zero the rest */
3587                        zero_user(page, len, PAGE_SIZE - len);
3588                        n = rdata->tailsz = len;
3589                        len = 0;
3590                } else if (page->index > eof_index) {
3591                        /*
3592                         * The VFS will not try to do readahead past the
3593                         * i_size, but it's possible that we have outstanding
3594                         * writes with gaps in the middle and the i_size hasn't
3595                         * caught up yet. Populate those with zeroed out pages
3596                         * to prevent the VFS from repeatedly attempting to
3597                         * fill them until the writes are flushed.
3598                         */
3599                        zero_user(page, 0, PAGE_SIZE);
3600                        lru_cache_add_file(page);
3601                        flush_dcache_page(page);
3602                        SetPageUptodate(page);
3603                        unlock_page(page);
3604                        put_page(page);
3605                        rdata->pages[i] = NULL;
3606                        rdata->nr_pages--;
3607                        continue;
3608                } else {
3609                        /* no need to hold page hostage */
3610                        lru_cache_add_file(page);
3611                        unlock_page(page);
3612                        put_page(page);
3613                        rdata->pages[i] = NULL;
3614                        rdata->nr_pages--;
3615                        continue;
3616                }
3617
3618                if (iter)
3619                        result = copy_page_from_iter(page, 0, n, iter);
3620                else
3621                        result = cifs_read_page_from_socket(server, page, n);
3622                if (result < 0)
3623                        break;
3624
3625                rdata->got_bytes += result;
3626        }
3627
3628        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3629                                                rdata->got_bytes : result;
3630}
3631
3632static int
3633cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3634                               struct cifs_readdata *rdata, unsigned int len)
3635{
3636        return readpages_fill_pages(server, rdata, NULL, len);
3637}
3638
3639static int
3640cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3641                               struct cifs_readdata *rdata,
3642                               struct iov_iter *iter)
3643{
3644        return readpages_fill_pages(server, rdata, iter, iter->count);
3645}
3646
3647static int
3648readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3649                    unsigned int rsize, struct list_head *tmplist,
3650                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3651{
3652        struct page *page, *tpage;
3653        unsigned int expected_index;
3654        int rc;
3655        gfp_t gfp = readahead_gfp_mask(mapping);
3656
3657        INIT_LIST_HEAD(tmplist);
3658
3659        page = list_entry(page_list->prev, struct page, lru);
3660
3661        /*
3662         * Lock the page and put it in the cache. Since no one else
3663         * should have access to this page, we're safe to simply set
3664         * PG_locked without checking it first.
3665         */
3666        __SetPageLocked(page);
3667        rc = add_to_page_cache_locked(page, mapping,
3668                                      page->index, gfp);
3669
3670        /* give up if we can't stick it in the cache */
3671        if (rc) {
3672                __ClearPageLocked(page);
3673                return rc;
3674        }
3675
3676        /* move first page to the tmplist */
3677        *offset = (loff_t)page->index << PAGE_SHIFT;
3678        *bytes = PAGE_SIZE;
3679        *nr_pages = 1;
3680        list_move_tail(&page->lru, tmplist);
3681
3682        /* now try and add more pages onto the request */
3683        expected_index = page->index + 1;
3684        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3685                /* discontinuity ? */
3686                if (page->index != expected_index)
3687                        break;
3688
3689                /* would this page push the read over the rsize? */
3690                if (*bytes + PAGE_SIZE > rsize)
3691                        break;
3692
3693                __SetPageLocked(page);
3694                if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3695                        __ClearPageLocked(page);
3696                        break;
3697                }
3698                list_move_tail(&page->lru, tmplist);
3699                (*bytes) += PAGE_SIZE;
3700                expected_index++;
3701                (*nr_pages)++;
3702        }
3703        return rc;
3704}
3705
3706static int cifs_readpages(struct file *file, struct address_space *mapping,
3707        struct list_head *page_list, unsigned num_pages)
3708{
3709        int rc;
3710        struct list_head tmplist;
3711        struct cifsFileInfo *open_file = file->private_data;
3712        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3713        struct TCP_Server_Info *server;
3714        pid_t pid;
3715
3716        /*
3717         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3718         * immediately if the cookie is negative
3719         *
3720         * After this point, every page in the list might have PG_fscache set,
3721         * so we will need to clean that up off of every page we don't use.
3722         */
3723        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3724                                         &num_pages);
3725        if (rc == 0)
3726                return rc;
3727
3728        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3729                pid = open_file->pid;
3730        else
3731                pid = current->tgid;
3732
3733        rc = 0;
3734        server = tlink_tcon(open_file->tlink)->ses->server;
3735
3736        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3737                 __func__, file, mapping, num_pages);
3738
3739        /*
3740         * Start with the page at end of list and move it to private
3741         * list. Do the same with any following pages until we hit
3742         * the rsize limit, hit an index discontinuity, or run out of
3743         * pages. Issue the async read and then start the loop again
3744         * until the list is empty.
3745         *
3746         * Note that list order is important. The page_list is in
3747         * the order of declining indexes. When we put the pages in
3748         * the rdata->pages, then we want them in increasing order.
3749         */
3750        while (!list_empty(page_list)) {
3751                unsigned int i, nr_pages, bytes, rsize;
3752                loff_t offset;
3753                struct page *page, *tpage;
3754                struct cifs_readdata *rdata;
3755                unsigned credits;
3756
3757                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3758                                                   &rsize, &credits);
3759                if (rc)
3760                        break;
3761
3762                /*
3763                 * Give up immediately if rsize is too small to read an entire
3764                 * page. The VFS will fall back to readpage. We should never
3765                 * reach this point however since we set ra_pages to 0 when the
3766                 * rsize is smaller than a cache page.
3767                 */
3768                if (unlikely(rsize < PAGE_SIZE)) {
3769                        add_credits_and_wake_if(server, credits, 0);
3770                        return 0;
3771                }
3772
3773                rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3774                                         &nr_pages, &offset, &bytes);
3775                if (rc) {
3776                        add_credits_and_wake_if(server, credits, 0);
3777                        break;
3778                }
3779
3780                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3781                if (!rdata) {
3782                        /* best to give up if we're out of mem */
3783                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3784                                list_del(&page->lru);
3785                                lru_cache_add_file(page);
3786                                unlock_page(page);
3787                                put_page(page);
3788                        }
3789                        rc = -ENOMEM;
3790                        add_credits_and_wake_if(server, credits, 0);
3791                        break;
3792                }
3793
3794                rdata->cfile = cifsFileInfo_get(open_file);
3795                rdata->mapping = mapping;
3796                rdata->offset = offset;
3797                rdata->bytes = bytes;
3798                rdata->pid = pid;
3799                rdata->pagesz = PAGE_SIZE;
3800                rdata->read_into_pages = cifs_readpages_read_into_pages;
3801                rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3802                rdata->credits = credits;
3803
3804                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3805                        list_del(&page->lru);
3806                        rdata->pages[rdata->nr_pages++] = page;
3807                }
3808
3809                if (!rdata->cfile->invalidHandle ||
3810                    !(rc = cifs_reopen_file(rdata->cfile, true)))
3811                        rc = server->ops->async_readv(rdata);
3812                if (rc) {
3813                        add_credits_and_wake_if(server, rdata->credits, 0);
3814                        for (i = 0; i < rdata->nr_pages; i++) {
3815                                page = rdata->pages[i];
3816                                lru_cache_add_file(page);
3817                                unlock_page(page);
3818                                put_page(page);
3819                        }
3820                        /* Fallback to the readpage in error/reconnect cases */
3821                        kref_put(&rdata->refcount, cifs_readdata_release);
3822                        break;
3823                }
3824
3825                kref_put(&rdata->refcount, cifs_readdata_release);
3826        }
3827
3828        /* Any pages that have been shown to fscache but didn't get added to
3829         * the pagecache must be uncached before they get returned to the
3830         * allocator.
3831         */
3832        cifs_fscache_readpages_cancel(mapping->host, page_list);
3833        return rc;
3834}
3835
3836/*
3837 * cifs_readpage_worker must be called with the page pinned
3838 */
3839static int cifs_readpage_worker(struct file *file, struct page *page,
3840        loff_t *poffset)
3841{
3842        char *read_data;
3843        int rc;
3844
3845        /* Is the page cached? */
3846        rc = cifs_readpage_from_fscache(file_inode(file), page);
3847        if (rc == 0)
3848                goto read_complete;
3849
3850        read_data = kmap(page);
3851        /* for reads over a certain size could initiate async read ahead */
3852
3853        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3854
3855        if (rc < 0)
3856                goto io_error;
3857        else
3858                cifs_dbg(FYI, "Bytes read %d\n", rc);
3859
3860        file_inode(file)->i_atime =
3861                current_time(file_inode(file));
3862
3863        if (PAGE_SIZE > rc)
3864                memset(read_data + rc, 0, PAGE_SIZE - rc);
3865
3866        flush_dcache_page(page);
3867        SetPageUptodate(page);
3868
3869        /* send this page to the cache */
3870        cifs_readpage_to_fscache(file_inode(file), page);
3871
3872        rc = 0;
3873
3874io_error:
3875        kunmap(page);
3876        unlock_page(page);
3877
3878read_complete:
3879        return rc;
3880}
3881
3882static int cifs_readpage(struct file *file, struct page *page)
3883{
3884        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3885        int rc = -EACCES;
3886        unsigned int xid;
3887
3888        xid = get_xid();
3889
3890        if (file->private_data == NULL) {
3891                rc = -EBADF;
3892                free_xid(xid);
3893                return rc;
3894        }
3895
3896        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3897                 page, (int)offset, (int)offset);
3898
3899        rc = cifs_readpage_worker(file, page, &offset);
3900
3901        free_xid(xid);
3902        return rc;
3903}
3904
3905static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3906{
3907        struct cifsFileInfo *open_file;
3908        struct cifs_tcon *tcon =
3909                cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3910
3911        spin_lock(&tcon->open_file_lock);
3912        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3913                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3914                        spin_unlock(&tcon->open_file_lock);
3915                        return 1;
3916                }
3917        }
3918        spin_unlock(&tcon->open_file_lock);
3919        return 0;
3920}
3921
3922/* We do not want to update the file size from server for inodes
3923   open for write - to avoid races with writepage extending
3924   the file - in the future we could consider allowing
3925   refreshing the inode only on increases in the file size
3926   but this is tricky to do without racing with writebehind
3927   page caching in the current Linux kernel design */
3928bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3929{
3930        if (!cifsInode)
3931                return true;
3932
3933        if (is_inode_writable(cifsInode)) {
3934                /* This inode is open for write at least once */
3935                struct cifs_sb_info *cifs_sb;
3936
3937                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3938                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3939                        /* since no page cache to corrupt on directio
3940                        we can change size safely */
3941                        return true;
3942                }
3943
3944                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3945                        return true;
3946
3947                return false;
3948        } else
3949                return true;
3950}
3951
3952static int cifs_write_begin(struct file *file, struct address_space *mapping,
3953                        loff_t pos, unsigned len, unsigned flags,
3954                        struct page **pagep, void **fsdata)
3955{
3956        int oncethru = 0;
3957        pgoff_t index = pos >> PAGE_SHIFT;
3958        loff_t offset = pos & (PAGE_SIZE - 1);
3959        loff_t page_start = pos & PAGE_MASK;
3960        loff_t i_size;
3961        struct page *page;
3962        int rc = 0;
3963
3964        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3965
3966start:
3967        page = grab_cache_page_write_begin(mapping, index, flags);
3968        if (!page) {
3969                rc = -ENOMEM;
3970                goto out;
3971        }
3972
3973        if (PageUptodate(page))
3974                goto out;
3975
3976        /*
3977         * If we write a full page it will be up to date, no need to read from
3978         * the server. If the write is short, we'll end up doing a sync write
3979         * instead.
3980         */
3981        if (len == PAGE_SIZE)
3982                goto out;
3983
3984        /*
3985         * optimize away the read when we have an oplock, and we're not
3986         * expecting to use any of the data we'd be reading in. That
3987         * is, when the page lies beyond the EOF, or straddles the EOF
3988         * and the write will cover all of the existing data.
3989         */
3990        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3991                i_size = i_size_read(mapping->host);
3992                if (page_start >= i_size ||
3993                    (offset == 0 && (pos + len) >= i_size)) {
3994                        zero_user_segments(page, 0, offset,
3995                                           offset + len,
3996                                           PAGE_SIZE);
3997                        /*
3998                         * PageChecked means that the parts of the page
3999                         * to which we're not writing are considered up
4000                         * to date. Once the data is copied to the
4001                         * page, it can be set uptodate.
4002                         */
4003                        SetPageChecked(page);
4004                        goto out;
4005                }
4006        }
4007
4008        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4009                /*
4010                 * might as well read a page, it is fast enough. If we get
4011                 * an error, we don't need to return it. cifs_write_end will
4012                 * do a sync write instead since PG_uptodate isn't set.
4013                 */
4014                cifs_readpage_worker(file, page, &page_start);
4015                put_page(page);
4016                oncethru = 1;
4017                goto start;
4018        } else {
4019                /* we could try using another file handle if there is one -
4020                   but how would we lock it to prevent close of that handle
4021                   racing with this read? In any case
4022                   this will be written out by write_end so is fine */
4023        }
4024out:
4025        *pagep = page;
4026        return rc;
4027}
4028
4029static int cifs_release_page(struct page *page, gfp_t gfp)
4030{
4031        if (PagePrivate(page))
4032                return 0;
4033
4034        return cifs_fscache_release_page(page, gfp);
4035}
4036
4037static void cifs_invalidate_page(struct page *page, unsigned int offset,
4038                                 unsigned int length)
4039{
4040        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4041
4042        if (offset == 0 && length == PAGE_SIZE)
4043                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4044}
4045
4046static int cifs_launder_page(struct page *page)
4047{
4048        int rc = 0;
4049        loff_t range_start = page_offset(page);
4050        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4051        struct writeback_control wbc = {
4052                .sync_mode = WB_SYNC_ALL,
4053                .nr_to_write = 0,
4054                .range_start = range_start,
4055                .range_end = range_end,
4056        };
4057
4058        cifs_dbg(FYI, "Launder page: %p\n", page);
4059
4060        if (clear_page_dirty_for_io(page))
4061                rc = cifs_writepage_locked(page, &wbc);
4062
4063        cifs_fscache_invalidate_page(page, page->mapping->host);
4064        return rc;
4065}
4066
4067void cifs_oplock_break(struct work_struct *work)
4068{
4069        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4070                                                  oplock_break);
4071        struct inode *inode = d_inode(cfile->dentry);
4072        struct cifsInodeInfo *cinode = CIFS_I(inode);
4073        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4074        struct TCP_Server_Info *server = tcon->ses->server;
4075        int rc = 0;
4076
4077        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4078                        TASK_UNINTERRUPTIBLE);
4079
4080        server->ops->downgrade_oplock(server, cinode,
4081                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4082
4083        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4084                                                cifs_has_mand_locks(cinode)) {
4085                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4086                         inode);
4087                cinode->oplock = 0;
4088        }
4089
4090        if (inode && S_ISREG(inode->i_mode)) {
4091                if (CIFS_CACHE_READ(cinode))
4092                        break_lease(inode, O_RDONLY);
4093                else
4094                        break_lease(inode, O_WRONLY);
4095                rc = filemap_fdatawrite(inode->i_mapping);
4096                if (!CIFS_CACHE_READ(cinode)) {
4097                        rc = filemap_fdatawait(inode->i_mapping);
4098                        mapping_set_error(inode->i_mapping, rc);
4099                        cifs_zap_mapping(inode);
4100                }
4101                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4102        }
4103
4104        rc = cifs_push_locks(cfile);
4105        if (rc)
4106                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4107
4108        /*
4109         * releasing stale oplock after recent reconnect of smb session using
4110         * a now incorrect file handle is not a data integrity issue but do
4111         * not bother sending an oplock release if session to server still is
4112         * disconnected since oplock already released by the server
4113         */
4114        if (!cfile->oplock_break_cancelled) {
4115                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4116                                                             cinode);
4117                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4118        }
4119        cifs_done_oplock_break(cinode);
4120}
4121
4122/*
4123 * The presence of cifs_direct_io() in the address space ops vector
4124 * allowes open() O_DIRECT flags which would have failed otherwise.
4125 *
4126 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4127 * so this method should never be called.
4128 *
4129 * Direct IO is not yet supported in the cached mode. 
4130 */
4131static ssize_t
4132cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4133{
4134        /*
4135         * FIXME
4136         * Eventually need to support direct IO for non forcedirectio mounts
4137         */
4138        return -EINVAL;
4139}
4140
4141
4142const struct address_space_operations cifs_addr_ops = {
4143        .readpage = cifs_readpage,
4144        .readpages = cifs_readpages,
4145        .writepage = cifs_writepage,
4146        .writepages = cifs_writepages,
4147        .write_begin = cifs_write_begin,
4148        .write_end = cifs_write_end,
4149        .set_page_dirty = __set_page_dirty_nobuffers,
4150        .releasepage = cifs_release_page,
4151        .direct_IO = cifs_direct_io,
4152        .invalidatepage = cifs_invalidate_page,
4153        .launder_page = cifs_launder_page,
4154};
4155
4156/*
4157 * cifs_readpages requires the server to support a buffer large enough to
4158 * contain the header plus one complete page of data.  Otherwise, we need
4159 * to leave cifs_readpages out of the address space operations.
4160 */
4161const struct address_space_operations cifs_addr_ops_smallbuf = {
4162        .readpage = cifs_readpage,
4163        .writepage = cifs_writepage,
4164        .writepages = cifs_writepages,
4165        .write_begin = cifs_write_begin,
4166        .write_end = cifs_write_end,
4167        .set_page_dirty = __set_page_dirty_nobuffers,
4168        .releasepage = cifs_release_page,
4169        .invalidatepage = cifs_invalidate_page,
4170        .launder_page = cifs_launder_page,
4171};
4172