linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_remap(cifs_sb));
 144        cifs_put_tlink(tlink);
 145
 146        if (rc)
 147                goto posix_open_ret;
 148
 149        if (presp_data->Type == cpu_to_le32(-1))
 150                goto posix_open_ret; /* open ok, caller does qpathinfo */
 151
 152        if (!pinode)
 153                goto posix_open_ret; /* caller does not need info */
 154
 155        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 156
 157        /* get new inode and set it up */
 158        if (*pinode == NULL) {
 159                cifs_fill_uniqueid(sb, &fattr);
 160                *pinode = cifs_iget(sb, &fattr);
 161                if (!*pinode) {
 162                        rc = -ENOMEM;
 163                        goto posix_open_ret;
 164                }
 165        } else {
 166                cifs_fattr_to_inode(*pinode, &fattr);
 167        }
 168
 169posix_open_ret:
 170        kfree(presp_data);
 171        return rc;
 172}
 173
 174static int
 175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 176             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 177             struct cifs_fid *fid, unsigned int xid)
 178{
 179        int rc;
 180        int desired_access;
 181        int disposition;
 182        int create_options = CREATE_NOT_DIR;
 183        FILE_ALL_INFO *buf;
 184        struct TCP_Server_Info *server = tcon->ses->server;
 185        struct cifs_open_parms oparms;
 186
 187        if (!server->ops->open)
 188                return -ENOSYS;
 189
 190        desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *      POSIX Flag            CIFS Disposition
 196 *      ----------            ----------------
 197 *      O_CREAT               FILE_OPEN_IF
 198 *      O_CREAT | O_EXCL      FILE_CREATE
 199 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *      O_TRUNC               FILE_OVERWRITE
 201 *      none of the above     FILE_OPEN
 202 *
 203 *      Note that there is not a direct match between disposition
 204 *      FILE_SUPERSEDE (ie create whether or not file exists although
 205 *      O_CREAT | O_TRUNC is similar but truncates the existing
 206 *      file rather than creating a new file as FILE_SUPERSEDE does
 207 *      (which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216        disposition = cifs_get_disposition(f_flags);
 217
 218        /* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221        if (!buf)
 222                return -ENOMEM;
 223
 224        if (backup_cred(cifs_sb))
 225                create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227        oparms.tcon = tcon;
 228        oparms.cifs_sb = cifs_sb;
 229        oparms.desired_access = desired_access;
 230        oparms.create_options = create_options;
 231        oparms.disposition = disposition;
 232        oparms.path = full_path;
 233        oparms.fid = fid;
 234        oparms.reconnect = false;
 235
 236        rc = server->ops->open(xid, &oparms, oplock, buf);
 237
 238        if (rc)
 239                goto out;
 240
 241        if (tcon->unix_ext)
 242                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 243                                              xid);
 244        else
 245                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 246                                         xid, fid);
 247
 248out:
 249        kfree(buf);
 250        return rc;
 251}
 252
 253static bool
 254cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 255{
 256        struct cifs_fid_locks *cur;
 257        bool has_locks = false;
 258
 259        down_read(&cinode->lock_sem);
 260        list_for_each_entry(cur, &cinode->llist, llist) {
 261                if (!list_empty(&cur->locks)) {
 262                        has_locks = true;
 263                        break;
 264                }
 265        }
 266        up_read(&cinode->lock_sem);
 267        return has_locks;
 268}
 269
 270struct cifsFileInfo *
 271cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 272                  struct tcon_link *tlink, __u32 oplock)
 273{
 274        struct dentry *dentry = file->f_path.dentry;
 275        struct inode *inode = d_inode(dentry);
 276        struct cifsInodeInfo *cinode = CIFS_I(inode);
 277        struct cifsFileInfo *cfile;
 278        struct cifs_fid_locks *fdlocks;
 279        struct cifs_tcon *tcon = tlink_tcon(tlink);
 280        struct TCP_Server_Info *server = tcon->ses->server;
 281
 282        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 283        if (cfile == NULL)
 284                return cfile;
 285
 286        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 287        if (!fdlocks) {
 288                kfree(cfile);
 289                return NULL;
 290        }
 291
 292        INIT_LIST_HEAD(&fdlocks->locks);
 293        fdlocks->cfile = cfile;
 294        cfile->llist = fdlocks;
 295        down_write(&cinode->lock_sem);
 296        list_add(&fdlocks->llist, &cinode->llist);
 297        up_write(&cinode->lock_sem);
 298
 299        cfile->count = 1;
 300        cfile->pid = current->tgid;
 301        cfile->uid = current_fsuid();
 302        cfile->dentry = dget(dentry);
 303        cfile->f_flags = file->f_flags;
 304        cfile->invalidHandle = false;
 305        cfile->tlink = cifs_get_tlink(tlink);
 306        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 307        mutex_init(&cfile->fh_mutex);
 308
 309        cifs_sb_active(inode->i_sb);
 310
 311        /*
 312         * If the server returned a read oplock and we have mandatory brlocks,
 313         * set oplock level to None.
 314         */
 315        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 316                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 317                oplock = 0;
 318        }
 319
 320        spin_lock(&cifs_file_list_lock);
 321        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 322                oplock = fid->pending_open->oplock;
 323        list_del(&fid->pending_open->olist);
 324
 325        fid->purge_cache = false;
 326        server->ops->set_fid(cfile, fid, oplock);
 327
 328        list_add(&cfile->tlist, &tcon->openFileList);
 329        /* if readable file instance put first in list*/
 330        if (file->f_mode & FMODE_READ)
 331                list_add(&cfile->flist, &cinode->openFileList);
 332        else
 333                list_add_tail(&cfile->flist, &cinode->openFileList);
 334        spin_unlock(&cifs_file_list_lock);
 335
 336        if (fid->purge_cache)
 337                cifs_zap_mapping(inode);
 338
 339        file->private_data = cfile;
 340        return cfile;
 341}
 342
 343struct cifsFileInfo *
 344cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 345{
 346        spin_lock(&cifs_file_list_lock);
 347        cifsFileInfo_get_locked(cifs_file);
 348        spin_unlock(&cifs_file_list_lock);
 349        return cifs_file;
 350}
 351
 352/*
 353 * Release a reference on the file private data. This may involve closing
 354 * the filehandle out on the server. Must be called without holding
 355 * cifs_file_list_lock.
 356 */
 357void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 358{
 359        struct inode *inode = d_inode(cifs_file->dentry);
 360        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 361        struct TCP_Server_Info *server = tcon->ses->server;
 362        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 363        struct super_block *sb = inode->i_sb;
 364        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 365        struct cifsLockInfo *li, *tmp;
 366        struct cifs_fid fid;
 367        struct cifs_pending_open open;
 368        bool oplock_break_cancelled;
 369
 370        spin_lock(&cifs_file_list_lock);
 371        if (--cifs_file->count > 0) {
 372                spin_unlock(&cifs_file_list_lock);
 373                return;
 374        }
 375
 376        if (server->ops->get_lease_key)
 377                server->ops->get_lease_key(inode, &fid);
 378
 379        /* store open in pending opens to make sure we don't miss lease break */
 380        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 381
 382        /* remove it from the lists */
 383        list_del(&cifs_file->flist);
 384        list_del(&cifs_file->tlist);
 385
 386        if (list_empty(&cifsi->openFileList)) {
 387                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 388                         d_inode(cifs_file->dentry));
 389                /*
 390                 * In strict cache mode we need invalidate mapping on the last
 391                 * close  because it may cause a error when we open this file
 392                 * again and get at least level II oplock.
 393                 */
 394                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 395                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 396                cifs_set_oplock_level(cifsi, 0);
 397        }
 398        spin_unlock(&cifs_file_list_lock);
 399
 400        oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 401
 402        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 403                struct TCP_Server_Info *server = tcon->ses->server;
 404                unsigned int xid;
 405
 406                xid = get_xid();
 407                if (server->ops->close)
 408                        server->ops->close(xid, tcon, &cifs_file->fid);
 409                _free_xid(xid);
 410        }
 411
 412        if (oplock_break_cancelled)
 413                cifs_done_oplock_break(cifsi);
 414
 415        cifs_del_pending_open(&open);
 416
 417        /*
 418         * Delete any outstanding lock records. We'll lose them when the file
 419         * is closed anyway.
 420         */
 421        down_write(&cifsi->lock_sem);
 422        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 423                list_del(&li->llist);
 424                cifs_del_lock_waiters(li);
 425                kfree(li);
 426        }
 427        list_del(&cifs_file->llist->llist);
 428        kfree(cifs_file->llist);
 429        up_write(&cifsi->lock_sem);
 430
 431        cifs_put_tlink(cifs_file->tlink);
 432        dput(cifs_file->dentry);
 433        cifs_sb_deactive(sb);
 434        kfree(cifs_file);
 435}
 436
 437int cifs_open(struct inode *inode, struct file *file)
 438
 439{
 440        int rc = -EACCES;
 441        unsigned int xid;
 442        __u32 oplock;
 443        struct cifs_sb_info *cifs_sb;
 444        struct TCP_Server_Info *server;
 445        struct cifs_tcon *tcon;
 446        struct tcon_link *tlink;
 447        struct cifsFileInfo *cfile = NULL;
 448        char *full_path = NULL;
 449        bool posix_open_ok = false;
 450        struct cifs_fid fid;
 451        struct cifs_pending_open open;
 452
 453        xid = get_xid();
 454
 455        cifs_sb = CIFS_SB(inode->i_sb);
 456        tlink = cifs_sb_tlink(cifs_sb);
 457        if (IS_ERR(tlink)) {
 458                free_xid(xid);
 459                return PTR_ERR(tlink);
 460        }
 461        tcon = tlink_tcon(tlink);
 462        server = tcon->ses->server;
 463
 464        full_path = build_path_from_dentry(file->f_path.dentry);
 465        if (full_path == NULL) {
 466                rc = -ENOMEM;
 467                goto out;
 468        }
 469
 470        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 471                 inode, file->f_flags, full_path);
 472
 473        if (file->f_flags & O_DIRECT &&
 474            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 475                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 476                        file->f_op = &cifs_file_direct_nobrl_ops;
 477                else
 478                        file->f_op = &cifs_file_direct_ops;
 479        }
 480
 481        if (server->oplocks)
 482                oplock = REQ_OPLOCK;
 483        else
 484                oplock = 0;
 485
 486        if (!tcon->broken_posix_open && tcon->unix_ext &&
 487            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 488                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 489                /* can not refresh inode info since size could be stale */
 490                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 491                                cifs_sb->mnt_file_mode /* ignored */,
 492                                file->f_flags, &oplock, &fid.netfid, xid);
 493                if (rc == 0) {
 494                        cifs_dbg(FYI, "posix open succeeded\n");
 495                        posix_open_ok = true;
 496                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 497                        if (tcon->ses->serverNOS)
 498                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 499                                         tcon->ses->serverName,
 500                                         tcon->ses->serverNOS);
 501                        tcon->broken_posix_open = true;
 502                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 503                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 504                        goto out;
 505                /*
 506                 * Else fallthrough to retry open the old way on network i/o
 507                 * or DFS errors.
 508                 */
 509        }
 510
 511        if (server->ops->get_lease_key)
 512                server->ops->get_lease_key(inode, &fid);
 513
 514        cifs_add_pending_open(&fid, tlink, &open);
 515
 516        if (!posix_open_ok) {
 517                if (server->ops->get_lease_key)
 518                        server->ops->get_lease_key(inode, &fid);
 519
 520                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 521                                  file->f_flags, &oplock, &fid, xid);
 522                if (rc) {
 523                        cifs_del_pending_open(&open);
 524                        goto out;
 525                }
 526        }
 527
 528        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 529        if (cfile == NULL) {
 530                if (server->ops->close)
 531                        server->ops->close(xid, tcon, &fid);
 532                cifs_del_pending_open(&open);
 533                rc = -ENOMEM;
 534                goto out;
 535        }
 536
 537        cifs_fscache_set_inode_cookie(inode, file);
 538
 539        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 540                /*
 541                 * Time to set mode which we can not set earlier due to
 542                 * problems creating new read-only files.
 543                 */
 544                struct cifs_unix_set_info_args args = {
 545                        .mode   = inode->i_mode,
 546                        .uid    = INVALID_UID, /* no change */
 547                        .gid    = INVALID_GID, /* no change */
 548                        .ctime  = NO_CHANGE_64,
 549                        .atime  = NO_CHANGE_64,
 550                        .mtime  = NO_CHANGE_64,
 551                        .device = 0,
 552                };
 553                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 554                                       cfile->pid);
 555        }
 556
 557out:
 558        kfree(full_path);
 559        free_xid(xid);
 560        cifs_put_tlink(tlink);
 561        return rc;
 562}
 563
 564static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 565
 566/*
 567 * Try to reacquire byte range locks that were released when session
 568 * to server was lost.
 569 */
 570static int
 571cifs_relock_file(struct cifsFileInfo *cfile)
 572{
 573        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 574        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 575        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 576        int rc = 0;
 577
 578        down_read(&cinode->lock_sem);
 579        if (cinode->can_cache_brlcks) {
 580                /* can cache locks - no need to relock */
 581                up_read(&cinode->lock_sem);
 582                return rc;
 583        }
 584
 585        if (cap_unix(tcon->ses) &&
 586            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 587            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 588                rc = cifs_push_posix_locks(cfile);
 589        else
 590                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 591
 592        up_read(&cinode->lock_sem);
 593        return rc;
 594}
 595
 596static int
 597cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 598{
 599        int rc = -EACCES;
 600        unsigned int xid;
 601        __u32 oplock;
 602        struct cifs_sb_info *cifs_sb;
 603        struct cifs_tcon *tcon;
 604        struct TCP_Server_Info *server;
 605        struct cifsInodeInfo *cinode;
 606        struct inode *inode;
 607        char *full_path = NULL;
 608        int desired_access;
 609        int disposition = FILE_OPEN;
 610        int create_options = CREATE_NOT_DIR;
 611        struct cifs_open_parms oparms;
 612
 613        xid = get_xid();
 614        mutex_lock(&cfile->fh_mutex);
 615        if (!cfile->invalidHandle) {
 616                mutex_unlock(&cfile->fh_mutex);
 617                rc = 0;
 618                free_xid(xid);
 619                return rc;
 620        }
 621
 622        inode = d_inode(cfile->dentry);
 623        cifs_sb = CIFS_SB(inode->i_sb);
 624        tcon = tlink_tcon(cfile->tlink);
 625        server = tcon->ses->server;
 626
 627        /*
 628         * Can not grab rename sem here because various ops, including those
 629         * that already have the rename sem can end up causing writepage to get
 630         * called and if the server was down that means we end up here, and we
 631         * can never tell if the caller already has the rename_sem.
 632         */
 633        full_path = build_path_from_dentry(cfile->dentry);
 634        if (full_path == NULL) {
 635                rc = -ENOMEM;
 636                mutex_unlock(&cfile->fh_mutex);
 637                free_xid(xid);
 638                return rc;
 639        }
 640
 641        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 642                 inode, cfile->f_flags, full_path);
 643
 644        if (tcon->ses->server->oplocks)
 645                oplock = REQ_OPLOCK;
 646        else
 647                oplock = 0;
 648
 649        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 650            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 651                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 652                /*
 653                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 654                 * original open. Must mask them off for a reopen.
 655                 */
 656                unsigned int oflags = cfile->f_flags &
 657                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 658
 659                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 660                                     cifs_sb->mnt_file_mode /* ignored */,
 661                                     oflags, &oplock, &cfile->fid.netfid, xid);
 662                if (rc == 0) {
 663                        cifs_dbg(FYI, "posix reopen succeeded\n");
 664                        oparms.reconnect = true;
 665                        goto reopen_success;
 666                }
 667                /*
 668                 * fallthrough to retry open the old way on errors, especially
 669                 * in the reconnect path it is important to retry hard
 670                 */
 671        }
 672
 673        desired_access = cifs_convert_flags(cfile->f_flags);
 674
 675        if (backup_cred(cifs_sb))
 676                create_options |= CREATE_OPEN_BACKUP_INTENT;
 677
 678        if (server->ops->get_lease_key)
 679                server->ops->get_lease_key(inode, &cfile->fid);
 680
 681        oparms.tcon = tcon;
 682        oparms.cifs_sb = cifs_sb;
 683        oparms.desired_access = desired_access;
 684        oparms.create_options = create_options;
 685        oparms.disposition = disposition;
 686        oparms.path = full_path;
 687        oparms.fid = &cfile->fid;
 688        oparms.reconnect = true;
 689
 690        /*
 691         * Can not refresh inode by passing in file_info buf to be returned by
 692         * ops->open and then calling get_inode_info with returned buf since
 693         * file might have write behind data that needs to be flushed and server
 694         * version of file size can be stale. If we knew for sure that inode was
 695         * not dirty locally we could do this.
 696         */
 697        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 698        if (rc == -ENOENT && oparms.reconnect == false) {
 699                /* durable handle timeout is expired - open the file again */
 700                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 701                /* indicate that we need to relock the file */
 702                oparms.reconnect = true;
 703        }
 704
 705        if (rc) {
 706                mutex_unlock(&cfile->fh_mutex);
 707                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 708                cifs_dbg(FYI, "oplock: %d\n", oplock);
 709                goto reopen_error_exit;
 710        }
 711
 712reopen_success:
 713        cfile->invalidHandle = false;
 714        mutex_unlock(&cfile->fh_mutex);
 715        cinode = CIFS_I(inode);
 716
 717        if (can_flush) {
 718                rc = filemap_write_and_wait(inode->i_mapping);
 719                mapping_set_error(inode->i_mapping, rc);
 720
 721                if (tcon->unix_ext)
 722                        rc = cifs_get_inode_info_unix(&inode, full_path,
 723                                                      inode->i_sb, xid);
 724                else
 725                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 726                                                 inode->i_sb, xid, NULL);
 727        }
 728        /*
 729         * Else we are writing out data to server already and could deadlock if
 730         * we tried to flush data, and since we do not know if we have data that
 731         * would invalidate the current end of file on the server we can not go
 732         * to the server to get the new inode info.
 733         */
 734
 735        server->ops->set_fid(cfile, &cfile->fid, oplock);
 736        if (oparms.reconnect)
 737                cifs_relock_file(cfile);
 738
 739reopen_error_exit:
 740        kfree(full_path);
 741        free_xid(xid);
 742        return rc;
 743}
 744
 745int cifs_close(struct inode *inode, struct file *file)
 746{
 747        if (file->private_data != NULL) {
 748                cifsFileInfo_put(file->private_data);
 749                file->private_data = NULL;
 750        }
 751
 752        /* return code from the ->release op is always ignored */
 753        return 0;
 754}
 755
 756int cifs_closedir(struct inode *inode, struct file *file)
 757{
 758        int rc = 0;
 759        unsigned int xid;
 760        struct cifsFileInfo *cfile = file->private_data;
 761        struct cifs_tcon *tcon;
 762        struct TCP_Server_Info *server;
 763        char *buf;
 764
 765        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 766
 767        if (cfile == NULL)
 768                return rc;
 769
 770        xid = get_xid();
 771        tcon = tlink_tcon(cfile->tlink);
 772        server = tcon->ses->server;
 773
 774        cifs_dbg(FYI, "Freeing private data in close dir\n");
 775        spin_lock(&cifs_file_list_lock);
 776        if (server->ops->dir_needs_close(cfile)) {
 777                cfile->invalidHandle = true;
 778                spin_unlock(&cifs_file_list_lock);
 779                if (server->ops->close_dir)
 780                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 781                else
 782                        rc = -ENOSYS;
 783                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 784                /* not much we can do if it fails anyway, ignore rc */
 785                rc = 0;
 786        } else
 787                spin_unlock(&cifs_file_list_lock);
 788
 789        buf = cfile->srch_inf.ntwrk_buf_start;
 790        if (buf) {
 791                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 792                cfile->srch_inf.ntwrk_buf_start = NULL;
 793                if (cfile->srch_inf.smallBuf)
 794                        cifs_small_buf_release(buf);
 795                else
 796                        cifs_buf_release(buf);
 797        }
 798
 799        cifs_put_tlink(cfile->tlink);
 800        kfree(file->private_data);
 801        file->private_data = NULL;
 802        /* BB can we lock the filestruct while this is going on? */
 803        free_xid(xid);
 804        return rc;
 805}
 806
 807static struct cifsLockInfo *
 808cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 809{
 810        struct cifsLockInfo *lock =
 811                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 812        if (!lock)
 813                return lock;
 814        lock->offset = offset;
 815        lock->length = length;
 816        lock->type = type;
 817        lock->pid = current->tgid;
 818        INIT_LIST_HEAD(&lock->blist);
 819        init_waitqueue_head(&lock->block_q);
 820        return lock;
 821}
 822
 823void
 824cifs_del_lock_waiters(struct cifsLockInfo *lock)
 825{
 826        struct cifsLockInfo *li, *tmp;
 827        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 828                list_del_init(&li->blist);
 829                wake_up(&li->block_q);
 830        }
 831}
 832
 833#define CIFS_LOCK_OP    0
 834#define CIFS_READ_OP    1
 835#define CIFS_WRITE_OP   2
 836
 837/* @rw_check : 0 - no op, 1 - read, 2 - write */
 838static bool
 839cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 840                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 841                            struct cifsLockInfo **conf_lock, int rw_check)
 842{
 843        struct cifsLockInfo *li;
 844        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 845        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 846
 847        list_for_each_entry(li, &fdlocks->locks, llist) {
 848                if (offset + length <= li->offset ||
 849                    offset >= li->offset + li->length)
 850                        continue;
 851                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 852                    server->ops->compare_fids(cfile, cur_cfile)) {
 853                        /* shared lock prevents write op through the same fid */
 854                        if (!(li->type & server->vals->shared_lock_type) ||
 855                            rw_check != CIFS_WRITE_OP)
 856                                continue;
 857                }
 858                if ((type & server->vals->shared_lock_type) &&
 859                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 860                     current->tgid == li->pid) || type == li->type))
 861                        continue;
 862                if (conf_lock)
 863                        *conf_lock = li;
 864                return true;
 865        }
 866        return false;
 867}
 868
 869bool
 870cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 871                        __u8 type, struct cifsLockInfo **conf_lock,
 872                        int rw_check)
 873{
 874        bool rc = false;
 875        struct cifs_fid_locks *cur;
 876        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 877
 878        list_for_each_entry(cur, &cinode->llist, llist) {
 879                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 880                                                 cfile, conf_lock, rw_check);
 881                if (rc)
 882                        break;
 883        }
 884
 885        return rc;
 886}
 887
 888/*
 889 * Check if there is another lock that prevents us to set the lock (mandatory
 890 * style). If such a lock exists, update the flock structure with its
 891 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 892 * or leave it the same if we can't. Returns 0 if we don't need to request to
 893 * the server or 1 otherwise.
 894 */
 895static int
 896cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 897               __u8 type, struct file_lock *flock)
 898{
 899        int rc = 0;
 900        struct cifsLockInfo *conf_lock;
 901        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 902        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 903        bool exist;
 904
 905        down_read(&cinode->lock_sem);
 906
 907        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 908                                        &conf_lock, CIFS_LOCK_OP);
 909        if (exist) {
 910                flock->fl_start = conf_lock->offset;
 911                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 912                flock->fl_pid = conf_lock->pid;
 913                if (conf_lock->type & server->vals->shared_lock_type)
 914                        flock->fl_type = F_RDLCK;
 915                else
 916                        flock->fl_type = F_WRLCK;
 917        } else if (!cinode->can_cache_brlcks)
 918                rc = 1;
 919        else
 920                flock->fl_type = F_UNLCK;
 921
 922        up_read(&cinode->lock_sem);
 923        return rc;
 924}
 925
 926static void
 927cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 928{
 929        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 930        down_write(&cinode->lock_sem);
 931        list_add_tail(&lock->llist, &cfile->llist->locks);
 932        up_write(&cinode->lock_sem);
 933}
 934
 935/*
 936 * Set the byte-range lock (mandatory style). Returns:
 937 * 1) 0, if we set the lock and don't need to request to the server;
 938 * 2) 1, if no locks prevent us but we need to request to the server;
 939 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 940 */
 941static int
 942cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 943                 bool wait)
 944{
 945        struct cifsLockInfo *conf_lock;
 946        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 947        bool exist;
 948        int rc = 0;
 949
 950try_again:
 951        exist = false;
 952        down_write(&cinode->lock_sem);
 953
 954        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 955                                        lock->type, &conf_lock, CIFS_LOCK_OP);
 956        if (!exist && cinode->can_cache_brlcks) {
 957                list_add_tail(&lock->llist, &cfile->llist->locks);
 958                up_write(&cinode->lock_sem);
 959                return rc;
 960        }
 961
 962        if (!exist)
 963                rc = 1;
 964        else if (!wait)
 965                rc = -EACCES;
 966        else {
 967                list_add_tail(&lock->blist, &conf_lock->blist);
 968                up_write(&cinode->lock_sem);
 969                rc = wait_event_interruptible(lock->block_q,
 970                                        (lock->blist.prev == &lock->blist) &&
 971                                        (lock->blist.next == &lock->blist));
 972                if (!rc)
 973                        goto try_again;
 974                down_write(&cinode->lock_sem);
 975                list_del_init(&lock->blist);
 976        }
 977
 978        up_write(&cinode->lock_sem);
 979        return rc;
 980}
 981
 982/*
 983 * Check if there is another lock that prevents us to set the lock (posix
 984 * style). If such a lock exists, update the flock structure with its
 985 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 986 * or leave it the same if we can't. Returns 0 if we don't need to request to
 987 * the server or 1 otherwise.
 988 */
 989static int
 990cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 991{
 992        int rc = 0;
 993        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 994        unsigned char saved_type = flock->fl_type;
 995
 996        if ((flock->fl_flags & FL_POSIX) == 0)
 997                return 1;
 998
 999        down_read(&cinode->lock_sem);
1000        posix_test_lock(file, flock);
1001
1002        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003                flock->fl_type = saved_type;
1004                rc = 1;
1005        }
1006
1007        up_read(&cinode->lock_sem);
1008        return rc;
1009}
1010
1011/*
1012 * Set the byte-range lock (posix style). Returns:
1013 * 1) 0, if we set the lock and don't need to request to the server;
1014 * 2) 1, if we need to request to the server;
1015 * 3) <0, if the error occurs while setting the lock.
1016 */
1017static int
1018cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019{
1020        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021        int rc = 1;
1022
1023        if ((flock->fl_flags & FL_POSIX) == 0)
1024                return rc;
1025
1026try_again:
1027        down_write(&cinode->lock_sem);
1028        if (!cinode->can_cache_brlcks) {
1029                up_write(&cinode->lock_sem);
1030                return rc;
1031        }
1032
1033        rc = posix_lock_file(file, flock, NULL);
1034        up_write(&cinode->lock_sem);
1035        if (rc == FILE_LOCK_DEFERRED) {
1036                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037                if (!rc)
1038                        goto try_again;
1039                posix_unblock_lock(flock);
1040        }
1041        return rc;
1042}
1043
1044int
1045cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046{
1047        unsigned int xid;
1048        int rc = 0, stored_rc;
1049        struct cifsLockInfo *li, *tmp;
1050        struct cifs_tcon *tcon;
1051        unsigned int num, max_num, max_buf;
1052        LOCKING_ANDX_RANGE *buf, *cur;
1053        int types[] = {LOCKING_ANDX_LARGE_FILES,
1054                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055        int i;
1056
1057        xid = get_xid();
1058        tcon = tlink_tcon(cfile->tlink);
1059
1060        /*
1061         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062         * and check it for zero before using.
1063         */
1064        max_buf = tcon->ses->server->maxBuf;
1065        if (!max_buf) {
1066                free_xid(xid);
1067                return -EINVAL;
1068        }
1069
1070        max_num = (max_buf - sizeof(struct smb_hdr)) /
1071                                                sizeof(LOCKING_ANDX_RANGE);
1072        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073        if (!buf) {
1074                free_xid(xid);
1075                return -ENOMEM;
1076        }
1077
1078        for (i = 0; i < 2; i++) {
1079                cur = buf;
1080                num = 0;
1081                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082                        if (li->type != types[i])
1083                                continue;
1084                        cur->Pid = cpu_to_le16(li->pid);
1085                        cur->LengthLow = cpu_to_le32((u32)li->length);
1086                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089                        if (++num == max_num) {
1090                                stored_rc = cifs_lockv(xid, tcon,
1091                                                       cfile->fid.netfid,
1092                                                       (__u8)li->type, 0, num,
1093                                                       buf);
1094                                if (stored_rc)
1095                                        rc = stored_rc;
1096                                cur = buf;
1097                                num = 0;
1098                        } else
1099                                cur++;
1100                }
1101
1102                if (num) {
1103                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104                                               (__u8)types[i], 0, num, buf);
1105                        if (stored_rc)
1106                                rc = stored_rc;
1107                }
1108        }
1109
1110        kfree(buf);
1111        free_xid(xid);
1112        return rc;
1113}
1114
1115struct lock_to_push {
1116        struct list_head llist;
1117        __u64 offset;
1118        __u64 length;
1119        __u32 pid;
1120        __u16 netfid;
1121        __u8 type;
1122};
1123
1124static int
1125cifs_push_posix_locks(struct cifsFileInfo *cfile)
1126{
1127        struct inode *inode = d_inode(cfile->dentry);
1128        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1129        struct file_lock *flock;
1130        struct file_lock_context *flctx = inode->i_flctx;
1131        unsigned int count = 0, i;
1132        int rc = 0, xid, type;
1133        struct list_head locks_to_send, *el;
1134        struct lock_to_push *lck, *tmp;
1135        __u64 length;
1136
1137        xid = get_xid();
1138
1139        if (!flctx)
1140                goto out;
1141
1142        spin_lock(&flctx->flc_lock);
1143        list_for_each(el, &flctx->flc_posix) {
1144                count++;
1145        }
1146        spin_unlock(&flctx->flc_lock);
1147
1148        INIT_LIST_HEAD(&locks_to_send);
1149
1150        /*
1151         * Allocating count locks is enough because no FL_POSIX locks can be
1152         * added to the list while we are holding cinode->lock_sem that
1153         * protects locking operations of this inode.
1154         */
1155        for (i = 0; i < count; i++) {
1156                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1157                if (!lck) {
1158                        rc = -ENOMEM;
1159                        goto err_out;
1160                }
1161                list_add_tail(&lck->llist, &locks_to_send);
1162        }
1163
1164        el = locks_to_send.next;
1165        spin_lock(&flctx->flc_lock);
1166        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1167                if (el == &locks_to_send) {
1168                        /*
1169                         * The list ended. We don't have enough allocated
1170                         * structures - something is really wrong.
1171                         */
1172                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1173                        break;
1174                }
1175                length = 1 + flock->fl_end - flock->fl_start;
1176                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1177                        type = CIFS_RDLCK;
1178                else
1179                        type = CIFS_WRLCK;
1180                lck = list_entry(el, struct lock_to_push, llist);
1181                lck->pid = flock->fl_pid;
1182                lck->netfid = cfile->fid.netfid;
1183                lck->length = length;
1184                lck->type = type;
1185                lck->offset = flock->fl_start;
1186        }
1187        spin_unlock(&flctx->flc_lock);
1188
1189        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1190                int stored_rc;
1191
1192                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1193                                             lck->offset, lck->length, NULL,
1194                                             lck->type, 0);
1195                if (stored_rc)
1196                        rc = stored_rc;
1197                list_del(&lck->llist);
1198                kfree(lck);
1199        }
1200
1201out:
1202        free_xid(xid);
1203        return rc;
1204err_out:
1205        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1206                list_del(&lck->llist);
1207                kfree(lck);
1208        }
1209        goto out;
1210}
1211
1212static int
1213cifs_push_locks(struct cifsFileInfo *cfile)
1214{
1215        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1216        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1217        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1218        int rc = 0;
1219
1220        /* we are going to update can_cache_brlcks here - need a write access */
1221        down_write(&cinode->lock_sem);
1222        if (!cinode->can_cache_brlcks) {
1223                up_write(&cinode->lock_sem);
1224                return rc;
1225        }
1226
1227        if (cap_unix(tcon->ses) &&
1228            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1229            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1230                rc = cifs_push_posix_locks(cfile);
1231        else
1232                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1233
1234        cinode->can_cache_brlcks = false;
1235        up_write(&cinode->lock_sem);
1236        return rc;
1237}
1238
1239static void
1240cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1241                bool *wait_flag, struct TCP_Server_Info *server)
1242{
1243        if (flock->fl_flags & FL_POSIX)
1244                cifs_dbg(FYI, "Posix\n");
1245        if (flock->fl_flags & FL_FLOCK)
1246                cifs_dbg(FYI, "Flock\n");
1247        if (flock->fl_flags & FL_SLEEP) {
1248                cifs_dbg(FYI, "Blocking lock\n");
1249                *wait_flag = true;
1250        }
1251        if (flock->fl_flags & FL_ACCESS)
1252                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1253        if (flock->fl_flags & FL_LEASE)
1254                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1255        if (flock->fl_flags &
1256            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1257               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1258                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1259
1260        *type = server->vals->large_lock_type;
1261        if (flock->fl_type == F_WRLCK) {
1262                cifs_dbg(FYI, "F_WRLCK\n");
1263                *type |= server->vals->exclusive_lock_type;
1264                *lock = 1;
1265        } else if (flock->fl_type == F_UNLCK) {
1266                cifs_dbg(FYI, "F_UNLCK\n");
1267                *type |= server->vals->unlock_lock_type;
1268                *unlock = 1;
1269                /* Check if unlock includes more than one lock range */
1270        } else if (flock->fl_type == F_RDLCK) {
1271                cifs_dbg(FYI, "F_RDLCK\n");
1272                *type |= server->vals->shared_lock_type;
1273                *lock = 1;
1274        } else if (flock->fl_type == F_EXLCK) {
1275                cifs_dbg(FYI, "F_EXLCK\n");
1276                *type |= server->vals->exclusive_lock_type;
1277                *lock = 1;
1278        } else if (flock->fl_type == F_SHLCK) {
1279                cifs_dbg(FYI, "F_SHLCK\n");
1280                *type |= server->vals->shared_lock_type;
1281                *lock = 1;
1282        } else
1283                cifs_dbg(FYI, "Unknown type of lock\n");
1284}
1285
1286static int
1287cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1288           bool wait_flag, bool posix_lck, unsigned int xid)
1289{
1290        int rc = 0;
1291        __u64 length = 1 + flock->fl_end - flock->fl_start;
1292        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1293        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1294        struct TCP_Server_Info *server = tcon->ses->server;
1295        __u16 netfid = cfile->fid.netfid;
1296
1297        if (posix_lck) {
1298                int posix_lock_type;
1299
1300                rc = cifs_posix_lock_test(file, flock);
1301                if (!rc)
1302                        return rc;
1303
1304                if (type & server->vals->shared_lock_type)
1305                        posix_lock_type = CIFS_RDLCK;
1306                else
1307                        posix_lock_type = CIFS_WRLCK;
1308                rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1309                                      flock->fl_start, length, flock,
1310                                      posix_lock_type, wait_flag);
1311                return rc;
1312        }
1313
1314        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1315        if (!rc)
1316                return rc;
1317
1318        /* BB we could chain these into one lock request BB */
1319        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1320                                    1, 0, false);
1321        if (rc == 0) {
1322                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1323                                            type, 0, 1, false);
1324                flock->fl_type = F_UNLCK;
1325                if (rc != 0)
1326                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1327                                 rc);
1328                return 0;
1329        }
1330
1331        if (type & server->vals->shared_lock_type) {
1332                flock->fl_type = F_WRLCK;
1333                return 0;
1334        }
1335
1336        type &= ~server->vals->exclusive_lock_type;
1337
1338        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1339                                    type | server->vals->shared_lock_type,
1340                                    1, 0, false);
1341        if (rc == 0) {
1342                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1343                        type | server->vals->shared_lock_type, 0, 1, false);
1344                flock->fl_type = F_RDLCK;
1345                if (rc != 0)
1346                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1347                                 rc);
1348        } else
1349                flock->fl_type = F_WRLCK;
1350
1351        return 0;
1352}
1353
1354void
1355cifs_move_llist(struct list_head *source, struct list_head *dest)
1356{
1357        struct list_head *li, *tmp;
1358        list_for_each_safe(li, tmp, source)
1359                list_move(li, dest);
1360}
1361
1362void
1363cifs_free_llist(struct list_head *llist)
1364{
1365        struct cifsLockInfo *li, *tmp;
1366        list_for_each_entry_safe(li, tmp, llist, llist) {
1367                cifs_del_lock_waiters(li);
1368                list_del(&li->llist);
1369                kfree(li);
1370        }
1371}
1372
1373int
1374cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1375                  unsigned int xid)
1376{
1377        int rc = 0, stored_rc;
1378        int types[] = {LOCKING_ANDX_LARGE_FILES,
1379                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1380        unsigned int i;
1381        unsigned int max_num, num, max_buf;
1382        LOCKING_ANDX_RANGE *buf, *cur;
1383        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1384        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1385        struct cifsLockInfo *li, *tmp;
1386        __u64 length = 1 + flock->fl_end - flock->fl_start;
1387        struct list_head tmp_llist;
1388
1389        INIT_LIST_HEAD(&tmp_llist);
1390
1391        /*
1392         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1393         * and check it for zero before using.
1394         */
1395        max_buf = tcon->ses->server->maxBuf;
1396        if (!max_buf)
1397                return -EINVAL;
1398
1399        max_num = (max_buf - sizeof(struct smb_hdr)) /
1400                                                sizeof(LOCKING_ANDX_RANGE);
1401        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1402        if (!buf)
1403                return -ENOMEM;
1404
1405        down_write(&cinode->lock_sem);
1406        for (i = 0; i < 2; i++) {
1407                cur = buf;
1408                num = 0;
1409                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1410                        if (flock->fl_start > li->offset ||
1411                            (flock->fl_start + length) <
1412                            (li->offset + li->length))
1413                                continue;
1414                        if (current->tgid != li->pid)
1415                                continue;
1416                        if (types[i] != li->type)
1417                                continue;
1418                        if (cinode->can_cache_brlcks) {
1419                                /*
1420                                 * We can cache brlock requests - simply remove
1421                                 * a lock from the file's list.
1422                                 */
1423                                list_del(&li->llist);
1424                                cifs_del_lock_waiters(li);
1425                                kfree(li);
1426                                continue;
1427                        }
1428                        cur->Pid = cpu_to_le16(li->pid);
1429                        cur->LengthLow = cpu_to_le32((u32)li->length);
1430                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1431                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1432                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1433                        /*
1434                         * We need to save a lock here to let us add it again to
1435                         * the file's list if the unlock range request fails on
1436                         * the server.
1437                         */
1438                        list_move(&li->llist, &tmp_llist);
1439                        if (++num == max_num) {
1440                                stored_rc = cifs_lockv(xid, tcon,
1441                                                       cfile->fid.netfid,
1442                                                       li->type, num, 0, buf);
1443                                if (stored_rc) {
1444                                        /*
1445                                         * We failed on the unlock range
1446                                         * request - add all locks from the tmp
1447                                         * list to the head of the file's list.
1448                                         */
1449                                        cifs_move_llist(&tmp_llist,
1450                                                        &cfile->llist->locks);
1451                                        rc = stored_rc;
1452                                } else
1453                                        /*
1454                                         * The unlock range request succeed -
1455                                         * free the tmp list.
1456                                         */
1457                                        cifs_free_llist(&tmp_llist);
1458                                cur = buf;
1459                                num = 0;
1460                        } else
1461                                cur++;
1462                }
1463                if (num) {
1464                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1465                                               types[i], num, 0, buf);
1466                        if (stored_rc) {
1467                                cifs_move_llist(&tmp_llist,
1468                                                &cfile->llist->locks);
1469                                rc = stored_rc;
1470                        } else
1471                                cifs_free_llist(&tmp_llist);
1472                }
1473        }
1474
1475        up_write(&cinode->lock_sem);
1476        kfree(buf);
1477        return rc;
1478}
1479
1480static int
1481cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1482           bool wait_flag, bool posix_lck, int lock, int unlock,
1483           unsigned int xid)
1484{
1485        int rc = 0;
1486        __u64 length = 1 + flock->fl_end - flock->fl_start;
1487        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1488        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1489        struct TCP_Server_Info *server = tcon->ses->server;
1490        struct inode *inode = d_inode(cfile->dentry);
1491
1492        if (posix_lck) {
1493                int posix_lock_type;
1494
1495                rc = cifs_posix_lock_set(file, flock);
1496                if (!rc || rc < 0)
1497                        return rc;
1498
1499                if (type & server->vals->shared_lock_type)
1500                        posix_lock_type = CIFS_RDLCK;
1501                else
1502                        posix_lock_type = CIFS_WRLCK;
1503
1504                if (unlock == 1)
1505                        posix_lock_type = CIFS_UNLCK;
1506
1507                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1508                                      current->tgid, flock->fl_start, length,
1509                                      NULL, posix_lock_type, wait_flag);
1510                goto out;
1511        }
1512
1513        if (lock) {
1514                struct cifsLockInfo *lock;
1515
1516                lock = cifs_lock_init(flock->fl_start, length, type);
1517                if (!lock)
1518                        return -ENOMEM;
1519
1520                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1521                if (rc < 0) {
1522                        kfree(lock);
1523                        return rc;
1524                }
1525                if (!rc)
1526                        goto out;
1527
1528                /*
1529                 * Windows 7 server can delay breaking lease from read to None
1530                 * if we set a byte-range lock on a file - break it explicitly
1531                 * before sending the lock to the server to be sure the next
1532                 * read won't conflict with non-overlapted locks due to
1533                 * pagereading.
1534                 */
1535                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1536                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1537                        cifs_zap_mapping(inode);
1538                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1539                                 inode);
1540                        CIFS_I(inode)->oplock = 0;
1541                }
1542
1543                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1544                                            type, 1, 0, wait_flag);
1545                if (rc) {
1546                        kfree(lock);
1547                        return rc;
1548                }
1549
1550                cifs_lock_add(cfile, lock);
1551        } else if (unlock)
1552                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1553
1554out:
1555        if (flock->fl_flags & FL_POSIX && !rc)
1556                rc = locks_lock_file_wait(file, flock);
1557        return rc;
1558}
1559
1560int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1561{
1562        int rc, xid;
1563        int lock = 0, unlock = 0;
1564        bool wait_flag = false;
1565        bool posix_lck = false;
1566        struct cifs_sb_info *cifs_sb;
1567        struct cifs_tcon *tcon;
1568        struct cifsInodeInfo *cinode;
1569        struct cifsFileInfo *cfile;
1570        __u16 netfid;
1571        __u32 type;
1572
1573        rc = -EACCES;
1574        xid = get_xid();
1575
1576        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1577                 cmd, flock->fl_flags, flock->fl_type,
1578                 flock->fl_start, flock->fl_end);
1579
1580        cfile = (struct cifsFileInfo *)file->private_data;
1581        tcon = tlink_tcon(cfile->tlink);
1582
1583        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1584                        tcon->ses->server);
1585
1586        cifs_sb = CIFS_FILE_SB(file);
1587        netfid = cfile->fid.netfid;
1588        cinode = CIFS_I(file_inode(file));
1589
1590        if (cap_unix(tcon->ses) &&
1591            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1592            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1593                posix_lck = true;
1594        /*
1595         * BB add code here to normalize offset and length to account for
1596         * negative length which we can not accept over the wire.
1597         */
1598        if (IS_GETLK(cmd)) {
1599                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1600                free_xid(xid);
1601                return rc;
1602        }
1603
1604        if (!lock && !unlock) {
1605                /*
1606                 * if no lock or unlock then nothing to do since we do not
1607                 * know what it is
1608                 */
1609                free_xid(xid);
1610                return -EOPNOTSUPP;
1611        }
1612
1613        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1614                        xid);
1615        free_xid(xid);
1616        return rc;
1617}
1618
1619/*
1620 * update the file size (if needed) after a write. Should be called with
1621 * the inode->i_lock held
1622 */
1623void
1624cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1625                      unsigned int bytes_written)
1626{
1627        loff_t end_of_write = offset + bytes_written;
1628
1629        if (end_of_write > cifsi->server_eof)
1630                cifsi->server_eof = end_of_write;
1631}
1632
1633static ssize_t
1634cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1635           size_t write_size, loff_t *offset)
1636{
1637        int rc = 0;
1638        unsigned int bytes_written = 0;
1639        unsigned int total_written;
1640        struct cifs_sb_info *cifs_sb;
1641        struct cifs_tcon *tcon;
1642        struct TCP_Server_Info *server;
1643        unsigned int xid;
1644        struct dentry *dentry = open_file->dentry;
1645        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1646        struct cifs_io_parms io_parms;
1647
1648        cifs_sb = CIFS_SB(dentry->d_sb);
1649
1650        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1651                 write_size, *offset, dentry);
1652
1653        tcon = tlink_tcon(open_file->tlink);
1654        server = tcon->ses->server;
1655
1656        if (!server->ops->sync_write)
1657                return -ENOSYS;
1658
1659        xid = get_xid();
1660
1661        for (total_written = 0; write_size > total_written;
1662             total_written += bytes_written) {
1663                rc = -EAGAIN;
1664                while (rc == -EAGAIN) {
1665                        struct kvec iov[2];
1666                        unsigned int len;
1667
1668                        if (open_file->invalidHandle) {
1669                                /* we could deadlock if we called
1670                                   filemap_fdatawait from here so tell
1671                                   reopen_file not to flush data to
1672                                   server now */
1673                                rc = cifs_reopen_file(open_file, false);
1674                                if (rc != 0)
1675                                        break;
1676                        }
1677
1678                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1679                                  (unsigned int)write_size - total_written);
1680                        /* iov[0] is reserved for smb header */
1681                        iov[1].iov_base = (char *)write_data + total_written;
1682                        iov[1].iov_len = len;
1683                        io_parms.pid = pid;
1684                        io_parms.tcon = tcon;
1685                        io_parms.offset = *offset;
1686                        io_parms.length = len;
1687                        rc = server->ops->sync_write(xid, &open_file->fid,
1688                                        &io_parms, &bytes_written, iov, 1);
1689                }
1690                if (rc || (bytes_written == 0)) {
1691                        if (total_written)
1692                                break;
1693                        else {
1694                                free_xid(xid);
1695                                return rc;
1696                        }
1697                } else {
1698                        spin_lock(&d_inode(dentry)->i_lock);
1699                        cifs_update_eof(cifsi, *offset, bytes_written);
1700                        spin_unlock(&d_inode(dentry)->i_lock);
1701                        *offset += bytes_written;
1702                }
1703        }
1704
1705        cifs_stats_bytes_written(tcon, total_written);
1706
1707        if (total_written > 0) {
1708                spin_lock(&d_inode(dentry)->i_lock);
1709                if (*offset > d_inode(dentry)->i_size)
1710                        i_size_write(d_inode(dentry), *offset);
1711                spin_unlock(&d_inode(dentry)->i_lock);
1712        }
1713        mark_inode_dirty_sync(d_inode(dentry));
1714        free_xid(xid);
1715        return total_written;
1716}
1717
1718struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1719                                        bool fsuid_only)
1720{
1721        struct cifsFileInfo *open_file = NULL;
1722        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1723
1724        /* only filter by fsuid on multiuser mounts */
1725        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1726                fsuid_only = false;
1727
1728        spin_lock(&cifs_file_list_lock);
1729        /* we could simply get the first_list_entry since write-only entries
1730           are always at the end of the list but since the first entry might
1731           have a close pending, we go through the whole list */
1732        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1733                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1734                        continue;
1735                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1736                        if (!open_file->invalidHandle) {
1737                                /* found a good file */
1738                                /* lock it so it will not be closed on us */
1739                                cifsFileInfo_get_locked(open_file);
1740                                spin_unlock(&cifs_file_list_lock);
1741                                return open_file;
1742                        } /* else might as well continue, and look for
1743                             another, or simply have the caller reopen it
1744                             again rather than trying to fix this handle */
1745                } else /* write only file */
1746                        break; /* write only files are last so must be done */
1747        }
1748        spin_unlock(&cifs_file_list_lock);
1749        return NULL;
1750}
1751
1752struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1753                                        bool fsuid_only)
1754{
1755        struct cifsFileInfo *open_file, *inv_file = NULL;
1756        struct cifs_sb_info *cifs_sb;
1757        bool any_available = false;
1758        int rc;
1759        unsigned int refind = 0;
1760
1761        /* Having a null inode here (because mapping->host was set to zero by
1762        the VFS or MM) should not happen but we had reports of on oops (due to
1763        it being zero) during stress testcases so we need to check for it */
1764
1765        if (cifs_inode == NULL) {
1766                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1767                dump_stack();
1768                return NULL;
1769        }
1770
1771        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1772
1773        /* only filter by fsuid on multiuser mounts */
1774        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1775                fsuid_only = false;
1776
1777        spin_lock(&cifs_file_list_lock);
1778refind_writable:
1779        if (refind > MAX_REOPEN_ATT) {
1780                spin_unlock(&cifs_file_list_lock);
1781                return NULL;
1782        }
1783        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1784                if (!any_available && open_file->pid != current->tgid)
1785                        continue;
1786                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1787                        continue;
1788                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1789                        if (!open_file->invalidHandle) {
1790                                /* found a good writable file */
1791                                cifsFileInfo_get_locked(open_file);
1792                                spin_unlock(&cifs_file_list_lock);
1793                                return open_file;
1794                        } else {
1795                                if (!inv_file)
1796                                        inv_file = open_file;
1797                        }
1798                }
1799        }
1800        /* couldn't find useable FH with same pid, try any available */
1801        if (!any_available) {
1802                any_available = true;
1803                goto refind_writable;
1804        }
1805
1806        if (inv_file) {
1807                any_available = false;
1808                cifsFileInfo_get_locked(inv_file);
1809        }
1810
1811        spin_unlock(&cifs_file_list_lock);
1812
1813        if (inv_file) {
1814                rc = cifs_reopen_file(inv_file, false);
1815                if (!rc)
1816                        return inv_file;
1817                else {
1818                        spin_lock(&cifs_file_list_lock);
1819                        list_move_tail(&inv_file->flist,
1820                                        &cifs_inode->openFileList);
1821                        spin_unlock(&cifs_file_list_lock);
1822                        cifsFileInfo_put(inv_file);
1823                        spin_lock(&cifs_file_list_lock);
1824                        ++refind;
1825                        inv_file = NULL;
1826                        goto refind_writable;
1827                }
1828        }
1829
1830        return NULL;
1831}
1832
1833static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1834{
1835        struct address_space *mapping = page->mapping;
1836        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1837        char *write_data;
1838        int rc = -EFAULT;
1839        int bytes_written = 0;
1840        struct inode *inode;
1841        struct cifsFileInfo *open_file;
1842
1843        if (!mapping || !mapping->host)
1844                return -EFAULT;
1845
1846        inode = page->mapping->host;
1847
1848        offset += (loff_t)from;
1849        write_data = kmap(page);
1850        write_data += from;
1851
1852        if ((to > PAGE_SIZE) || (from > to)) {
1853                kunmap(page);
1854                return -EIO;
1855        }
1856
1857        /* racing with truncate? */
1858        if (offset > mapping->host->i_size) {
1859                kunmap(page);
1860                return 0; /* don't care */
1861        }
1862
1863        /* check to make sure that we are not extending the file */
1864        if (mapping->host->i_size - offset < (loff_t)to)
1865                to = (unsigned)(mapping->host->i_size - offset);
1866
1867        open_file = find_writable_file(CIFS_I(mapping->host), false);
1868        if (open_file) {
1869                bytes_written = cifs_write(open_file, open_file->pid,
1870                                           write_data, to - from, &offset);
1871                cifsFileInfo_put(open_file);
1872                /* Does mm or vfs already set times? */
1873                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1874                if ((bytes_written > 0) && (offset))
1875                        rc = 0;
1876                else if (bytes_written < 0)
1877                        rc = bytes_written;
1878        } else {
1879                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1880                rc = -EIO;
1881        }
1882
1883        kunmap(page);
1884        return rc;
1885}
1886
1887static struct cifs_writedata *
1888wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1889                          pgoff_t end, pgoff_t *index,
1890                          unsigned int *found_pages)
1891{
1892        unsigned int nr_pages;
1893        struct page **pages;
1894        struct cifs_writedata *wdata;
1895
1896        wdata = cifs_writedata_alloc((unsigned int)tofind,
1897                                     cifs_writev_complete);
1898        if (!wdata)
1899                return NULL;
1900
1901        /*
1902         * find_get_pages_tag seems to return a max of 256 on each
1903         * iteration, so we must call it several times in order to
1904         * fill the array or the wsize is effectively limited to
1905         * 256 * PAGE_SIZE.
1906         */
1907        *found_pages = 0;
1908        pages = wdata->pages;
1909        do {
1910                nr_pages = find_get_pages_tag(mapping, index,
1911                                              PAGECACHE_TAG_DIRTY, tofind,
1912                                              pages);
1913                *found_pages += nr_pages;
1914                tofind -= nr_pages;
1915                pages += nr_pages;
1916        } while (nr_pages && tofind && *index <= end);
1917
1918        return wdata;
1919}
1920
1921static unsigned int
1922wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1923                    struct address_space *mapping,
1924                    struct writeback_control *wbc,
1925                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1926{
1927        unsigned int nr_pages = 0, i;
1928        struct page *page;
1929
1930        for (i = 0; i < found_pages; i++) {
1931                page = wdata->pages[i];
1932                /*
1933                 * At this point we hold neither mapping->tree_lock nor
1934                 * lock on the page itself: the page may be truncated or
1935                 * invalidated (changing page->mapping to NULL), or even
1936                 * swizzled back from swapper_space to tmpfs file
1937                 * mapping
1938                 */
1939
1940                if (nr_pages == 0)
1941                        lock_page(page);
1942                else if (!trylock_page(page))
1943                        break;
1944
1945                if (unlikely(page->mapping != mapping)) {
1946                        unlock_page(page);
1947                        break;
1948                }
1949
1950                if (!wbc->range_cyclic && page->index > end) {
1951                        *done = true;
1952                        unlock_page(page);
1953                        break;
1954                }
1955
1956                if (*next && (page->index != *next)) {
1957                        /* Not next consecutive page */
1958                        unlock_page(page);
1959                        break;
1960                }
1961
1962                if (wbc->sync_mode != WB_SYNC_NONE)
1963                        wait_on_page_writeback(page);
1964
1965                if (PageWriteback(page) ||
1966                                !clear_page_dirty_for_io(page)) {
1967                        unlock_page(page);
1968                        break;
1969                }
1970
1971                /*
1972                 * This actually clears the dirty bit in the radix tree.
1973                 * See cifs_writepage() for more commentary.
1974                 */
1975                set_page_writeback(page);
1976                if (page_offset(page) >= i_size_read(mapping->host)) {
1977                        *done = true;
1978                        unlock_page(page);
1979                        end_page_writeback(page);
1980                        break;
1981                }
1982
1983                wdata->pages[i] = page;
1984                *next = page->index + 1;
1985                ++nr_pages;
1986        }
1987
1988        /* reset index to refind any pages skipped */
1989        if (nr_pages == 0)
1990                *index = wdata->pages[0]->index + 1;
1991
1992        /* put any pages we aren't going to use */
1993        for (i = nr_pages; i < found_pages; i++) {
1994                put_page(wdata->pages[i]);
1995                wdata->pages[i] = NULL;
1996        }
1997
1998        return nr_pages;
1999}
2000
2001static int
2002wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2003                 struct address_space *mapping, struct writeback_control *wbc)
2004{
2005        int rc = 0;
2006        struct TCP_Server_Info *server;
2007        unsigned int i;
2008
2009        wdata->sync_mode = wbc->sync_mode;
2010        wdata->nr_pages = nr_pages;
2011        wdata->offset = page_offset(wdata->pages[0]);
2012        wdata->pagesz = PAGE_SIZE;
2013        wdata->tailsz = min(i_size_read(mapping->host) -
2014                        page_offset(wdata->pages[nr_pages - 1]),
2015                        (loff_t)PAGE_SIZE);
2016        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2017
2018        if (wdata->cfile != NULL)
2019                cifsFileInfo_put(wdata->cfile);
2020        wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2021        if (!wdata->cfile) {
2022                cifs_dbg(VFS, "No writable handles for inode\n");
2023                rc = -EBADF;
2024        } else {
2025                wdata->pid = wdata->cfile->pid;
2026                server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2027                rc = server->ops->async_writev(wdata, cifs_writedata_release);
2028        }
2029
2030        for (i = 0; i < nr_pages; ++i)
2031                unlock_page(wdata->pages[i]);
2032
2033        return rc;
2034}
2035
2036static int cifs_writepages(struct address_space *mapping,
2037                           struct writeback_control *wbc)
2038{
2039        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2040        struct TCP_Server_Info *server;
2041        bool done = false, scanned = false, range_whole = false;
2042        pgoff_t end, index;
2043        struct cifs_writedata *wdata;
2044        int rc = 0;
2045
2046        /*
2047         * If wsize is smaller than the page cache size, default to writing
2048         * one page at a time via cifs_writepage
2049         */
2050        if (cifs_sb->wsize < PAGE_SIZE)
2051                return generic_writepages(mapping, wbc);
2052
2053        if (wbc->range_cyclic) {
2054                index = mapping->writeback_index; /* Start from prev offset */
2055                end = -1;
2056        } else {
2057                index = wbc->range_start >> PAGE_SHIFT;
2058                end = wbc->range_end >> PAGE_SHIFT;
2059                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2060                        range_whole = true;
2061                scanned = true;
2062        }
2063        server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2064retry:
2065        while (!done && index <= end) {
2066                unsigned int i, nr_pages, found_pages, wsize, credits;
2067                pgoff_t next = 0, tofind, saved_index = index;
2068
2069                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2070                                                   &wsize, &credits);
2071                if (rc)
2072                        break;
2073
2074                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2075
2076                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2077                                                  &found_pages);
2078                if (!wdata) {
2079                        rc = -ENOMEM;
2080                        add_credits_and_wake_if(server, credits, 0);
2081                        break;
2082                }
2083
2084                if (found_pages == 0) {
2085                        kref_put(&wdata->refcount, cifs_writedata_release);
2086                        add_credits_and_wake_if(server, credits, 0);
2087                        break;
2088                }
2089
2090                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2091                                               end, &index, &next, &done);
2092
2093                /* nothing to write? */
2094                if (nr_pages == 0) {
2095                        kref_put(&wdata->refcount, cifs_writedata_release);
2096                        add_credits_and_wake_if(server, credits, 0);
2097                        continue;
2098                }
2099
2100                wdata->credits = credits;
2101
2102                rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2103
2104                /* send failure -- clean up the mess */
2105                if (rc != 0) {
2106                        add_credits_and_wake_if(server, wdata->credits, 0);
2107                        for (i = 0; i < nr_pages; ++i) {
2108                                if (rc == -EAGAIN)
2109                                        redirty_page_for_writepage(wbc,
2110                                                           wdata->pages[i]);
2111                                else
2112                                        SetPageError(wdata->pages[i]);
2113                                end_page_writeback(wdata->pages[i]);
2114                                put_page(wdata->pages[i]);
2115                        }
2116                        if (rc != -EAGAIN)
2117                                mapping_set_error(mapping, rc);
2118                }
2119                kref_put(&wdata->refcount, cifs_writedata_release);
2120
2121                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2122                        index = saved_index;
2123                        continue;
2124                }
2125
2126                wbc->nr_to_write -= nr_pages;
2127                if (wbc->nr_to_write <= 0)
2128                        done = true;
2129
2130                index = next;
2131        }
2132
2133        if (!scanned && !done) {
2134                /*
2135                 * We hit the last page and there is more work to be done: wrap
2136                 * back to the start of the file
2137                 */
2138                scanned = true;
2139                index = 0;
2140                goto retry;
2141        }
2142
2143        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2144                mapping->writeback_index = index;
2145
2146        return rc;
2147}
2148
2149static int
2150cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2151{
2152        int rc;
2153        unsigned int xid;
2154
2155        xid = get_xid();
2156/* BB add check for wbc flags */
2157        get_page(page);
2158        if (!PageUptodate(page))
2159                cifs_dbg(FYI, "ppw - page not up to date\n");
2160
2161        /*
2162         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2163         *
2164         * A writepage() implementation always needs to do either this,
2165         * or re-dirty the page with "redirty_page_for_writepage()" in
2166         * the case of a failure.
2167         *
2168         * Just unlocking the page will cause the radix tree tag-bits
2169         * to fail to update with the state of the page correctly.
2170         */
2171        set_page_writeback(page);
2172retry_write:
2173        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2174        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2175                goto retry_write;
2176        else if (rc == -EAGAIN)
2177                redirty_page_for_writepage(wbc, page);
2178        else if (rc != 0)
2179                SetPageError(page);
2180        else
2181                SetPageUptodate(page);
2182        end_page_writeback(page);
2183        put_page(page);
2184        free_xid(xid);
2185        return rc;
2186}
2187
2188static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2189{
2190        int rc = cifs_writepage_locked(page, wbc);
2191        unlock_page(page);
2192        return rc;
2193}
2194
2195static int cifs_write_end(struct file *file, struct address_space *mapping,
2196                        loff_t pos, unsigned len, unsigned copied,
2197                        struct page *page, void *fsdata)
2198{
2199        int rc;
2200        struct inode *inode = mapping->host;
2201        struct cifsFileInfo *cfile = file->private_data;
2202        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2203        __u32 pid;
2204
2205        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2206                pid = cfile->pid;
2207        else
2208                pid = current->tgid;
2209
2210        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2211                 page, pos, copied);
2212
2213        if (PageChecked(page)) {
2214                if (copied == len)
2215                        SetPageUptodate(page);
2216                ClearPageChecked(page);
2217        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2218                SetPageUptodate(page);
2219
2220        if (!PageUptodate(page)) {
2221                char *page_data;
2222                unsigned offset = pos & (PAGE_SIZE - 1);
2223                unsigned int xid;
2224
2225                xid = get_xid();
2226                /* this is probably better than directly calling
2227                   partialpage_write since in this function the file handle is
2228                   known which we might as well leverage */
2229                /* BB check if anything else missing out of ppw
2230                   such as updating last write time */
2231                page_data = kmap(page);
2232                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2233                /* if (rc < 0) should we set writebehind rc? */
2234                kunmap(page);
2235
2236                free_xid(xid);
2237        } else {
2238                rc = copied;
2239                pos += copied;
2240                set_page_dirty(page);
2241        }
2242
2243        if (rc > 0) {
2244                spin_lock(&inode->i_lock);
2245                if (pos > inode->i_size)
2246                        i_size_write(inode, pos);
2247                spin_unlock(&inode->i_lock);
2248        }
2249
2250        unlock_page(page);
2251        put_page(page);
2252
2253        return rc;
2254}
2255
2256int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2257                      int datasync)
2258{
2259        unsigned int xid;
2260        int rc = 0;
2261        struct cifs_tcon *tcon;
2262        struct TCP_Server_Info *server;
2263        struct cifsFileInfo *smbfile = file->private_data;
2264        struct inode *inode = file_inode(file);
2265        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2266
2267        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2268        if (rc)
2269                return rc;
2270        inode_lock(inode);
2271
2272        xid = get_xid();
2273
2274        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2275                 file, datasync);
2276
2277        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2278                rc = cifs_zap_mapping(inode);
2279                if (rc) {
2280                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2281                        rc = 0; /* don't care about it in fsync */
2282                }
2283        }
2284
2285        tcon = tlink_tcon(smbfile->tlink);
2286        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2287                server = tcon->ses->server;
2288                if (server->ops->flush)
2289                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2290                else
2291                        rc = -ENOSYS;
2292        }
2293
2294        free_xid(xid);
2295        inode_unlock(inode);
2296        return rc;
2297}
2298
2299int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2300{
2301        unsigned int xid;
2302        int rc = 0;
2303        struct cifs_tcon *tcon;
2304        struct TCP_Server_Info *server;
2305        struct cifsFileInfo *smbfile = file->private_data;
2306        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2307        struct inode *inode = file->f_mapping->host;
2308
2309        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2310        if (rc)
2311                return rc;
2312        inode_lock(inode);
2313
2314        xid = get_xid();
2315
2316        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2317                 file, datasync);
2318
2319        tcon = tlink_tcon(smbfile->tlink);
2320        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2321                server = tcon->ses->server;
2322                if (server->ops->flush)
2323                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2324                else
2325                        rc = -ENOSYS;
2326        }
2327
2328        free_xid(xid);
2329        inode_unlock(inode);
2330        return rc;
2331}
2332
2333/*
2334 * As file closes, flush all cached write data for this inode checking
2335 * for write behind errors.
2336 */
2337int cifs_flush(struct file *file, fl_owner_t id)
2338{
2339        struct inode *inode = file_inode(file);
2340        int rc = 0;
2341
2342        if (file->f_mode & FMODE_WRITE)
2343                rc = filemap_write_and_wait(inode->i_mapping);
2344
2345        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2346
2347        return rc;
2348}
2349
2350static int
2351cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2352{
2353        int rc = 0;
2354        unsigned long i;
2355
2356        for (i = 0; i < num_pages; i++) {
2357                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2358                if (!pages[i]) {
2359                        /*
2360                         * save number of pages we have already allocated and
2361                         * return with ENOMEM error
2362                         */
2363                        num_pages = i;
2364                        rc = -ENOMEM;
2365                        break;
2366                }
2367        }
2368
2369        if (rc) {
2370                for (i = 0; i < num_pages; i++)
2371                        put_page(pages[i]);
2372        }
2373        return rc;
2374}
2375
2376static inline
2377size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2378{
2379        size_t num_pages;
2380        size_t clen;
2381
2382        clen = min_t(const size_t, len, wsize);
2383        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2384
2385        if (cur_len)
2386                *cur_len = clen;
2387
2388        return num_pages;
2389}
2390
2391static void
2392cifs_uncached_writedata_release(struct kref *refcount)
2393{
2394        int i;
2395        struct cifs_writedata *wdata = container_of(refcount,
2396                                        struct cifs_writedata, refcount);
2397
2398        for (i = 0; i < wdata->nr_pages; i++)
2399                put_page(wdata->pages[i]);
2400        cifs_writedata_release(refcount);
2401}
2402
2403static void
2404cifs_uncached_writev_complete(struct work_struct *work)
2405{
2406        struct cifs_writedata *wdata = container_of(work,
2407                                        struct cifs_writedata, work);
2408        struct inode *inode = d_inode(wdata->cfile->dentry);
2409        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2410
2411        spin_lock(&inode->i_lock);
2412        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2413        if (cifsi->server_eof > inode->i_size)
2414                i_size_write(inode, cifsi->server_eof);
2415        spin_unlock(&inode->i_lock);
2416
2417        complete(&wdata->done);
2418
2419        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2420}
2421
2422static int
2423wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2424                      size_t *len, unsigned long *num_pages)
2425{
2426        size_t save_len, copied, bytes, cur_len = *len;
2427        unsigned long i, nr_pages = *num_pages;
2428
2429        save_len = cur_len;
2430        for (i = 0; i < nr_pages; i++) {
2431                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2432                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2433                cur_len -= copied;
2434                /*
2435                 * If we didn't copy as much as we expected, then that
2436                 * may mean we trod into an unmapped area. Stop copying
2437                 * at that point. On the next pass through the big
2438                 * loop, we'll likely end up getting a zero-length
2439                 * write and bailing out of it.
2440                 */
2441                if (copied < bytes)
2442                        break;
2443        }
2444        cur_len = save_len - cur_len;
2445        *len = cur_len;
2446
2447        /*
2448         * If we have no data to send, then that probably means that
2449         * the copy above failed altogether. That's most likely because
2450         * the address in the iovec was bogus. Return -EFAULT and let
2451         * the caller free anything we allocated and bail out.
2452         */
2453        if (!cur_len)
2454                return -EFAULT;
2455
2456        /*
2457         * i + 1 now represents the number of pages we actually used in
2458         * the copy phase above.
2459         */
2460        *num_pages = i + 1;
2461        return 0;
2462}
2463
2464static int
2465cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2466                     struct cifsFileInfo *open_file,
2467                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2468{
2469        int rc = 0;
2470        size_t cur_len;
2471        unsigned long nr_pages, num_pages, i;
2472        struct cifs_writedata *wdata;
2473        struct iov_iter saved_from;
2474        loff_t saved_offset = offset;
2475        pid_t pid;
2476        struct TCP_Server_Info *server;
2477
2478        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2479                pid = open_file->pid;
2480        else
2481                pid = current->tgid;
2482
2483        server = tlink_tcon(open_file->tlink)->ses->server;
2484        memcpy(&saved_from, from, sizeof(struct iov_iter));
2485
2486        do {
2487                unsigned int wsize, credits;
2488
2489                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2490                                                   &wsize, &credits);
2491                if (rc)
2492                        break;
2493
2494                nr_pages = get_numpages(wsize, len, &cur_len);
2495                wdata = cifs_writedata_alloc(nr_pages,
2496                                             cifs_uncached_writev_complete);
2497                if (!wdata) {
2498                        rc = -ENOMEM;
2499                        add_credits_and_wake_if(server, credits, 0);
2500                        break;
2501                }
2502
2503                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2504                if (rc) {
2505                        kfree(wdata);
2506                        add_credits_and_wake_if(server, credits, 0);
2507                        break;
2508                }
2509
2510                num_pages = nr_pages;
2511                rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2512                if (rc) {
2513                        for (i = 0; i < nr_pages; i++)
2514                                put_page(wdata->pages[i]);
2515                        kfree(wdata);
2516                        add_credits_and_wake_if(server, credits, 0);
2517                        break;
2518                }
2519
2520                /*
2521                 * Bring nr_pages down to the number of pages we actually used,
2522                 * and free any pages that we didn't use.
2523                 */
2524                for ( ; nr_pages > num_pages; nr_pages--)
2525                        put_page(wdata->pages[nr_pages - 1]);
2526
2527                wdata->sync_mode = WB_SYNC_ALL;
2528                wdata->nr_pages = nr_pages;
2529                wdata->offset = (__u64)offset;
2530                wdata->cfile = cifsFileInfo_get(open_file);
2531                wdata->pid = pid;
2532                wdata->bytes = cur_len;
2533                wdata->pagesz = PAGE_SIZE;
2534                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2535                wdata->credits = credits;
2536
2537                if (!wdata->cfile->invalidHandle ||
2538                    !cifs_reopen_file(wdata->cfile, false))
2539                        rc = server->ops->async_writev(wdata,
2540                                        cifs_uncached_writedata_release);
2541                if (rc) {
2542                        add_credits_and_wake_if(server, wdata->credits, 0);
2543                        kref_put(&wdata->refcount,
2544                                 cifs_uncached_writedata_release);
2545                        if (rc == -EAGAIN) {
2546                                memcpy(from, &saved_from,
2547                                       sizeof(struct iov_iter));
2548                                iov_iter_advance(from, offset - saved_offset);
2549                                continue;
2550                        }
2551                        break;
2552                }
2553
2554                list_add_tail(&wdata->list, wdata_list);
2555                offset += cur_len;
2556                len -= cur_len;
2557        } while (len > 0);
2558
2559        return rc;
2560}
2561
2562ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2563{
2564        struct file *file = iocb->ki_filp;
2565        ssize_t total_written = 0;
2566        struct cifsFileInfo *open_file;
2567        struct cifs_tcon *tcon;
2568        struct cifs_sb_info *cifs_sb;
2569        struct cifs_writedata *wdata, *tmp;
2570        struct list_head wdata_list;
2571        struct iov_iter saved_from;
2572        int rc;
2573
2574        /*
2575         * BB - optimize the way when signing is disabled. We can drop this
2576         * extra memory-to-memory copying and use iovec buffers for constructing
2577         * write request.
2578         */
2579
2580        rc = generic_write_checks(iocb, from);
2581        if (rc <= 0)
2582                return rc;
2583
2584        INIT_LIST_HEAD(&wdata_list);
2585        cifs_sb = CIFS_FILE_SB(file);
2586        open_file = file->private_data;
2587        tcon = tlink_tcon(open_file->tlink);
2588
2589        if (!tcon->ses->server->ops->async_writev)
2590                return -ENOSYS;
2591
2592        memcpy(&saved_from, from, sizeof(struct iov_iter));
2593
2594        rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2595                                  open_file, cifs_sb, &wdata_list);
2596
2597        /*
2598         * If at least one write was successfully sent, then discard any rc
2599         * value from the later writes. If the other write succeeds, then
2600         * we'll end up returning whatever was written. If it fails, then
2601         * we'll get a new rc value from that.
2602         */
2603        if (!list_empty(&wdata_list))
2604                rc = 0;
2605
2606        /*
2607         * Wait for and collect replies for any successful sends in order of
2608         * increasing offset. Once an error is hit or we get a fatal signal
2609         * while waiting, then return without waiting for any more replies.
2610         */
2611restart_loop:
2612        list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2613                if (!rc) {
2614                        /* FIXME: freezable too? */
2615                        rc = wait_for_completion_killable(&wdata->done);
2616                        if (rc)
2617                                rc = -EINTR;
2618                        else if (wdata->result)
2619                                rc = wdata->result;
2620                        else
2621                                total_written += wdata->bytes;
2622
2623                        /* resend call if it's a retryable error */
2624                        if (rc == -EAGAIN) {
2625                                struct list_head tmp_list;
2626                                struct iov_iter tmp_from;
2627
2628                                INIT_LIST_HEAD(&tmp_list);
2629                                list_del_init(&wdata->list);
2630
2631                                memcpy(&tmp_from, &saved_from,
2632                                       sizeof(struct iov_iter));
2633                                iov_iter_advance(&tmp_from,
2634                                                 wdata->offset - iocb->ki_pos);
2635
2636                                rc = cifs_write_from_iter(wdata->offset,
2637                                                wdata->bytes, &tmp_from,
2638                                                open_file, cifs_sb, &tmp_list);
2639
2640                                list_splice(&tmp_list, &wdata_list);
2641
2642                                kref_put(&wdata->refcount,
2643                                         cifs_uncached_writedata_release);
2644                                goto restart_loop;
2645                        }
2646                }
2647                list_del_init(&wdata->list);
2648                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2649        }
2650
2651        if (unlikely(!total_written))
2652                return rc;
2653
2654        iocb->ki_pos += total_written;
2655        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2656        cifs_stats_bytes_written(tcon, total_written);
2657        return total_written;
2658}
2659
2660static ssize_t
2661cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2662{
2663        struct file *file = iocb->ki_filp;
2664        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2665        struct inode *inode = file->f_mapping->host;
2666        struct cifsInodeInfo *cinode = CIFS_I(inode);
2667        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2668        ssize_t rc;
2669
2670        /*
2671         * We need to hold the sem to be sure nobody modifies lock list
2672         * with a brlock that prevents writing.
2673         */
2674        down_read(&cinode->lock_sem);
2675        inode_lock(inode);
2676
2677        rc = generic_write_checks(iocb, from);
2678        if (rc <= 0)
2679                goto out;
2680
2681        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2682                                     server->vals->exclusive_lock_type, NULL,
2683                                     CIFS_WRITE_OP))
2684                rc = __generic_file_write_iter(iocb, from);
2685        else
2686                rc = -EACCES;
2687out:
2688        inode_unlock(inode);
2689
2690        if (rc > 0) {
2691                ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2692                if (err < 0)
2693                        rc = err;
2694        }
2695        up_read(&cinode->lock_sem);
2696        return rc;
2697}
2698
2699ssize_t
2700cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2701{
2702        struct inode *inode = file_inode(iocb->ki_filp);
2703        struct cifsInodeInfo *cinode = CIFS_I(inode);
2704        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2705        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2706                                                iocb->ki_filp->private_data;
2707        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2708        ssize_t written;
2709
2710        written = cifs_get_writer(cinode);
2711        if (written)
2712                return written;
2713
2714        if (CIFS_CACHE_WRITE(cinode)) {
2715                if (cap_unix(tcon->ses) &&
2716                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2717                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2718                        written = generic_file_write_iter(iocb, from);
2719                        goto out;
2720                }
2721                written = cifs_writev(iocb, from);
2722                goto out;
2723        }
2724        /*
2725         * For non-oplocked files in strict cache mode we need to write the data
2726         * to the server exactly from the pos to pos+len-1 rather than flush all
2727         * affected pages because it may cause a error with mandatory locks on
2728         * these pages but not on the region from pos to ppos+len-1.
2729         */
2730        written = cifs_user_writev(iocb, from);
2731        if (written > 0 && CIFS_CACHE_READ(cinode)) {
2732                /*
2733                 * Windows 7 server can delay breaking level2 oplock if a write
2734                 * request comes - break it on the client to prevent reading
2735                 * an old data.
2736                 */
2737                cifs_zap_mapping(inode);
2738                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2739                         inode);
2740                cinode->oplock = 0;
2741        }
2742out:
2743        cifs_put_writer(cinode);
2744        return written;
2745}
2746
2747static struct cifs_readdata *
2748cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2749{
2750        struct cifs_readdata *rdata;
2751
2752        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2753                        GFP_KERNEL);
2754        if (rdata != NULL) {
2755                kref_init(&rdata->refcount);
2756                INIT_LIST_HEAD(&rdata->list);
2757                init_completion(&rdata->done);
2758                INIT_WORK(&rdata->work, complete);
2759        }
2760
2761        return rdata;
2762}
2763
2764void
2765cifs_readdata_release(struct kref *refcount)
2766{
2767        struct cifs_readdata *rdata = container_of(refcount,
2768                                        struct cifs_readdata, refcount);
2769
2770        if (rdata->cfile)
2771                cifsFileInfo_put(rdata->cfile);
2772
2773        kfree(rdata);
2774}
2775
2776static int
2777cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2778{
2779        int rc = 0;
2780        struct page *page;
2781        unsigned int i;
2782
2783        for (i = 0; i < nr_pages; i++) {
2784                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2785                if (!page) {
2786                        rc = -ENOMEM;
2787                        break;
2788                }
2789                rdata->pages[i] = page;
2790        }
2791
2792        if (rc) {
2793                for (i = 0; i < nr_pages; i++) {
2794                        put_page(rdata->pages[i]);
2795                        rdata->pages[i] = NULL;
2796                }
2797        }
2798        return rc;
2799}
2800
2801static void
2802cifs_uncached_readdata_release(struct kref *refcount)
2803{
2804        struct cifs_readdata *rdata = container_of(refcount,
2805                                        struct cifs_readdata, refcount);
2806        unsigned int i;
2807
2808        for (i = 0; i < rdata->nr_pages; i++) {
2809                put_page(rdata->pages[i]);
2810                rdata->pages[i] = NULL;
2811        }
2812        cifs_readdata_release(refcount);
2813}
2814
2815/**
2816 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2817 * @rdata:      the readdata response with list of pages holding data
2818 * @iter:       destination for our data
2819 *
2820 * This function copies data from a list of pages in a readdata response into
2821 * an array of iovecs. It will first calculate where the data should go
2822 * based on the info in the readdata and then copy the data into that spot.
2823 */
2824static int
2825cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2826{
2827        size_t remaining = rdata->got_bytes;
2828        unsigned int i;
2829
2830        for (i = 0; i < rdata->nr_pages; i++) {
2831                struct page *page = rdata->pages[i];
2832                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2833                size_t written = copy_page_to_iter(page, 0, copy, iter);
2834                remaining -= written;
2835                if (written < copy && iov_iter_count(iter) > 0)
2836                        break;
2837        }
2838        return remaining ? -EFAULT : 0;
2839}
2840
2841static void
2842cifs_uncached_readv_complete(struct work_struct *work)
2843{
2844        struct cifs_readdata *rdata = container_of(work,
2845                                                struct cifs_readdata, work);
2846
2847        complete(&rdata->done);
2848        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2849}
2850
2851static int
2852cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2853                        struct cifs_readdata *rdata, unsigned int len)
2854{
2855        int result = 0;
2856        unsigned int i;
2857        unsigned int nr_pages = rdata->nr_pages;
2858        struct kvec iov;
2859
2860        rdata->got_bytes = 0;
2861        rdata->tailsz = PAGE_SIZE;
2862        for (i = 0; i < nr_pages; i++) {
2863                struct page *page = rdata->pages[i];
2864
2865                if (len >= PAGE_SIZE) {
2866                        /* enough data to fill the page */
2867                        iov.iov_base = kmap(page);
2868                        iov.iov_len = PAGE_SIZE;
2869                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2870                                 i, iov.iov_base, iov.iov_len);
2871                        len -= PAGE_SIZE;
2872                } else if (len > 0) {
2873                        /* enough for partial page, fill and zero the rest */
2874                        iov.iov_base = kmap(page);
2875                        iov.iov_len = len;
2876                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2877                                 i, iov.iov_base, iov.iov_len);
2878                        memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2879                        rdata->tailsz = len;
2880                        len = 0;
2881                } else {
2882                        /* no need to hold page hostage */
2883                        rdata->pages[i] = NULL;
2884                        rdata->nr_pages--;
2885                        put_page(page);
2886                        continue;
2887                }
2888
2889                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2890                kunmap(page);
2891                if (result < 0)
2892                        break;
2893
2894                rdata->got_bytes += result;
2895        }
2896
2897        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2898                                                rdata->got_bytes : result;
2899}
2900
2901static int
2902cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2903                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2904{
2905        struct cifs_readdata *rdata;
2906        unsigned int npages, rsize, credits;
2907        size_t cur_len;
2908        int rc;
2909        pid_t pid;
2910        struct TCP_Server_Info *server;
2911
2912        server = tlink_tcon(open_file->tlink)->ses->server;
2913
2914        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2915                pid = open_file->pid;
2916        else
2917                pid = current->tgid;
2918
2919        do {
2920                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2921                                                   &rsize, &credits);
2922                if (rc)
2923                        break;
2924
2925                cur_len = min_t(const size_t, len, rsize);
2926                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2927
2928                /* allocate a readdata struct */
2929                rdata = cifs_readdata_alloc(npages,
2930                                            cifs_uncached_readv_complete);
2931                if (!rdata) {
2932                        add_credits_and_wake_if(server, credits, 0);
2933                        rc = -ENOMEM;
2934                        break;
2935                }
2936
2937                rc = cifs_read_allocate_pages(rdata, npages);
2938                if (rc)
2939                        goto error;
2940
2941                rdata->cfile = cifsFileInfo_get(open_file);
2942                rdata->nr_pages = npages;
2943                rdata->offset = offset;
2944                rdata->bytes = cur_len;
2945                rdata->pid = pid;
2946                rdata->pagesz = PAGE_SIZE;
2947                rdata->read_into_pages = cifs_uncached_read_into_pages;
2948                rdata->credits = credits;
2949
2950                if (!rdata->cfile->invalidHandle ||
2951                    !cifs_reopen_file(rdata->cfile, true))
2952                        rc = server->ops->async_readv(rdata);
2953error:
2954                if (rc) {
2955                        add_credits_and_wake_if(server, rdata->credits, 0);
2956                        kref_put(&rdata->refcount,
2957                                 cifs_uncached_readdata_release);
2958                        if (rc == -EAGAIN)
2959                                continue;
2960                        break;
2961                }
2962
2963                list_add_tail(&rdata->list, rdata_list);
2964                offset += cur_len;
2965                len -= cur_len;
2966        } while (len > 0);
2967
2968        return rc;
2969}
2970
2971ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2972{
2973        struct file *file = iocb->ki_filp;
2974        ssize_t rc;
2975        size_t len;
2976        ssize_t total_read = 0;
2977        loff_t offset = iocb->ki_pos;
2978        struct cifs_sb_info *cifs_sb;
2979        struct cifs_tcon *tcon;
2980        struct cifsFileInfo *open_file;
2981        struct cifs_readdata *rdata, *tmp;
2982        struct list_head rdata_list;
2983
2984        len = iov_iter_count(to);
2985        if (!len)
2986                return 0;
2987
2988        INIT_LIST_HEAD(&rdata_list);
2989        cifs_sb = CIFS_FILE_SB(file);
2990        open_file = file->private_data;
2991        tcon = tlink_tcon(open_file->tlink);
2992
2993        if (!tcon->ses->server->ops->async_readv)
2994                return -ENOSYS;
2995
2996        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2997                cifs_dbg(FYI, "attempting read on write only file instance\n");
2998
2999        rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3000
3001        /* if at least one read request send succeeded, then reset rc */
3002        if (!list_empty(&rdata_list))
3003                rc = 0;
3004
3005        len = iov_iter_count(to);
3006        /* the loop below should proceed in the order of increasing offsets */
3007again:
3008        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3009                if (!rc) {
3010                        /* FIXME: freezable sleep too? */
3011                        rc = wait_for_completion_killable(&rdata->done);
3012                        if (rc)
3013                                rc = -EINTR;
3014                        else if (rdata->result == -EAGAIN) {
3015                                /* resend call if it's a retryable error */
3016                                struct list_head tmp_list;
3017                                unsigned int got_bytes = rdata->got_bytes;
3018
3019                                list_del_init(&rdata->list);
3020                                INIT_LIST_HEAD(&tmp_list);
3021
3022                                /*
3023                                 * Got a part of data and then reconnect has
3024                                 * happened -- fill the buffer and continue
3025                                 * reading.
3026                                 */
3027                                if (got_bytes && got_bytes < rdata->bytes) {
3028                                        rc = cifs_readdata_to_iov(rdata, to);
3029                                        if (rc) {
3030                                                kref_put(&rdata->refcount,
3031                                                cifs_uncached_readdata_release);
3032                                                continue;
3033                                        }
3034                                }
3035
3036                                rc = cifs_send_async_read(
3037                                                rdata->offset + got_bytes,
3038                                                rdata->bytes - got_bytes,
3039                                                rdata->cfile, cifs_sb,
3040                                                &tmp_list);
3041
3042                                list_splice(&tmp_list, &rdata_list);
3043
3044                                kref_put(&rdata->refcount,
3045                                         cifs_uncached_readdata_release);
3046                                goto again;
3047                        } else if (rdata->result)
3048                                rc = rdata->result;
3049                        else
3050                                rc = cifs_readdata_to_iov(rdata, to);
3051
3052                        /* if there was a short read -- discard anything left */
3053                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3054                                rc = -ENODATA;
3055                }
3056                list_del_init(&rdata->list);
3057                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3058        }
3059
3060        total_read = len - iov_iter_count(to);
3061
3062        cifs_stats_bytes_read(tcon, total_read);
3063
3064        /* mask nodata case */
3065        if (rc == -ENODATA)
3066                rc = 0;
3067
3068        if (total_read) {
3069                iocb->ki_pos += total_read;
3070                return total_read;
3071        }
3072        return rc;
3073}
3074
3075ssize_t
3076cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3077{
3078        struct inode *inode = file_inode(iocb->ki_filp);
3079        struct cifsInodeInfo *cinode = CIFS_I(inode);
3080        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3081        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3082                                                iocb->ki_filp->private_data;
3083        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3084        int rc = -EACCES;
3085
3086        /*
3087         * In strict cache mode we need to read from the server all the time
3088         * if we don't have level II oplock because the server can delay mtime
3089         * change - so we can't make a decision about inode invalidating.
3090         * And we can also fail with pagereading if there are mandatory locks
3091         * on pages affected by this read but not on the region from pos to
3092         * pos+len-1.
3093         */
3094        if (!CIFS_CACHE_READ(cinode))
3095                return cifs_user_readv(iocb, to);
3096
3097        if (cap_unix(tcon->ses) &&
3098            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3099            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3100                return generic_file_read_iter(iocb, to);
3101
3102        /*
3103         * We need to hold the sem to be sure nobody modifies lock list
3104         * with a brlock that prevents reading.
3105         */
3106        down_read(&cinode->lock_sem);
3107        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3108                                     tcon->ses->server->vals->shared_lock_type,
3109                                     NULL, CIFS_READ_OP))
3110                rc = generic_file_read_iter(iocb, to);
3111        up_read(&cinode->lock_sem);
3112        return rc;
3113}
3114
3115static ssize_t
3116cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3117{
3118        int rc = -EACCES;
3119        unsigned int bytes_read = 0;
3120        unsigned int total_read;
3121        unsigned int current_read_size;
3122        unsigned int rsize;
3123        struct cifs_sb_info *cifs_sb;
3124        struct cifs_tcon *tcon;
3125        struct TCP_Server_Info *server;
3126        unsigned int xid;
3127        char *cur_offset;
3128        struct cifsFileInfo *open_file;
3129        struct cifs_io_parms io_parms;
3130        int buf_type = CIFS_NO_BUFFER;
3131        __u32 pid;
3132
3133        xid = get_xid();
3134        cifs_sb = CIFS_FILE_SB(file);
3135
3136        /* FIXME: set up handlers for larger reads and/or convert to async */
3137        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3138
3139        if (file->private_data == NULL) {
3140                rc = -EBADF;
3141                free_xid(xid);
3142                return rc;
3143        }
3144        open_file = file->private_data;
3145        tcon = tlink_tcon(open_file->tlink);
3146        server = tcon->ses->server;
3147
3148        if (!server->ops->sync_read) {
3149                free_xid(xid);
3150                return -ENOSYS;
3151        }
3152
3153        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3154                pid = open_file->pid;
3155        else
3156                pid = current->tgid;
3157
3158        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3159                cifs_dbg(FYI, "attempting read on write only file instance\n");
3160
3161        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3162             total_read += bytes_read, cur_offset += bytes_read) {
3163                do {
3164                        current_read_size = min_t(uint, read_size - total_read,
3165                                                  rsize);
3166                        /*
3167                         * For windows me and 9x we do not want to request more
3168                         * than it negotiated since it will refuse the read
3169                         * then.
3170                         */
3171                        if ((tcon->ses) && !(tcon->ses->capabilities &
3172                                tcon->ses->server->vals->cap_large_files)) {
3173                                current_read_size = min_t(uint,
3174                                        current_read_size, CIFSMaxBufSize);
3175                        }
3176                        if (open_file->invalidHandle) {
3177                                rc = cifs_reopen_file(open_file, true);
3178                                if (rc != 0)
3179                                        break;
3180                        }
3181                        io_parms.pid = pid;
3182                        io_parms.tcon = tcon;
3183                        io_parms.offset = *offset;
3184                        io_parms.length = current_read_size;
3185                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3186                                                    &bytes_read, &cur_offset,
3187                                                    &buf_type);
3188                } while (rc == -EAGAIN);
3189
3190                if (rc || (bytes_read == 0)) {
3191                        if (total_read) {
3192                                break;
3193                        } else {
3194                                free_xid(xid);
3195                                return rc;
3196                        }
3197                } else {
3198                        cifs_stats_bytes_read(tcon, total_read);
3199                        *offset += bytes_read;
3200                }
3201        }
3202        free_xid(xid);
3203        return total_read;
3204}
3205
3206/*
3207 * If the page is mmap'ed into a process' page tables, then we need to make
3208 * sure that it doesn't change while being written back.
3209 */
3210static int
3211cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3212{
3213        struct page *page = vmf->page;
3214
3215        lock_page(page);
3216        return VM_FAULT_LOCKED;
3217}
3218
3219static const struct vm_operations_struct cifs_file_vm_ops = {
3220        .fault = filemap_fault,
3221        .map_pages = filemap_map_pages,
3222        .page_mkwrite = cifs_page_mkwrite,
3223};
3224
3225int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3226{
3227        int rc, xid;
3228        struct inode *inode = file_inode(file);
3229
3230        xid = get_xid();
3231
3232        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3233                rc = cifs_zap_mapping(inode);
3234                if (rc)
3235                        return rc;
3236        }
3237
3238        rc = generic_file_mmap(file, vma);
3239        if (rc == 0)
3240                vma->vm_ops = &cifs_file_vm_ops;
3241        free_xid(xid);
3242        return rc;
3243}
3244
3245int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3246{
3247        int rc, xid;
3248
3249        xid = get_xid();
3250        rc = cifs_revalidate_file(file);
3251        if (rc) {
3252                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3253                         rc);
3254                free_xid(xid);
3255                return rc;
3256        }
3257        rc = generic_file_mmap(file, vma);
3258        if (rc == 0)
3259                vma->vm_ops = &cifs_file_vm_ops;
3260        free_xid(xid);
3261        return rc;
3262}
3263
3264static void
3265cifs_readv_complete(struct work_struct *work)
3266{
3267        unsigned int i, got_bytes;
3268        struct cifs_readdata *rdata = container_of(work,
3269                                                struct cifs_readdata, work);
3270
3271        got_bytes = rdata->got_bytes;
3272        for (i = 0; i < rdata->nr_pages; i++) {
3273                struct page *page = rdata->pages[i];
3274
3275                lru_cache_add_file(page);
3276
3277                if (rdata->result == 0 ||
3278                    (rdata->result == -EAGAIN && got_bytes)) {
3279                        flush_dcache_page(page);
3280                        SetPageUptodate(page);
3281                }
3282
3283                unlock_page(page);
3284
3285                if (rdata->result == 0 ||
3286                    (rdata->result == -EAGAIN && got_bytes))
3287                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3288
3289                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3290
3291                put_page(page);
3292                rdata->pages[i] = NULL;
3293        }
3294        kref_put(&rdata->refcount, cifs_readdata_release);
3295}
3296
3297static int
3298cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3299                        struct cifs_readdata *rdata, unsigned int len)
3300{
3301        int result = 0;
3302        unsigned int i;
3303        u64 eof;
3304        pgoff_t eof_index;
3305        unsigned int nr_pages = rdata->nr_pages;
3306        struct kvec iov;
3307
3308        /* determine the eof that the server (probably) has */
3309        eof = CIFS_I(rdata->mapping->host)->server_eof;
3310        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3311        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3312
3313        rdata->got_bytes = 0;
3314        rdata->tailsz = PAGE_SIZE;
3315        for (i = 0; i < nr_pages; i++) {
3316                struct page *page = rdata->pages[i];
3317
3318                if (len >= PAGE_SIZE) {
3319                        /* enough data to fill the page */
3320                        iov.iov_base = kmap(page);
3321                        iov.iov_len = PAGE_SIZE;
3322                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3323                                 i, page->index, iov.iov_base, iov.iov_len);
3324                        len -= PAGE_SIZE;
3325                } else if (len > 0) {
3326                        /* enough for partial page, fill and zero the rest */
3327                        iov.iov_base = kmap(page);
3328                        iov.iov_len = len;
3329                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3330                                 i, page->index, iov.iov_base, iov.iov_len);
3331                        memset(iov.iov_base + len,
3332                                '\0', PAGE_SIZE - len);
3333                        rdata->tailsz = len;
3334                        len = 0;
3335                } else if (page->index > eof_index) {
3336                        /*
3337                         * The VFS will not try to do readahead past the
3338                         * i_size, but it's possible that we have outstanding
3339                         * writes with gaps in the middle and the i_size hasn't
3340                         * caught up yet. Populate those with zeroed out pages
3341                         * to prevent the VFS from repeatedly attempting to
3342                         * fill them until the writes are flushed.
3343                         */
3344                        zero_user(page, 0, PAGE_SIZE);
3345                        lru_cache_add_file(page);
3346                        flush_dcache_page(page);
3347                        SetPageUptodate(page);
3348                        unlock_page(page);
3349                        put_page(page);
3350                        rdata->pages[i] = NULL;
3351                        rdata->nr_pages--;
3352                        continue;
3353                } else {
3354                        /* no need to hold page hostage */
3355                        lru_cache_add_file(page);
3356                        unlock_page(page);
3357                        put_page(page);
3358                        rdata->pages[i] = NULL;
3359                        rdata->nr_pages--;
3360                        continue;
3361                }
3362
3363                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3364                kunmap(page);
3365                if (result < 0)
3366                        break;
3367
3368                rdata->got_bytes += result;
3369        }
3370
3371        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3372                                                rdata->got_bytes : result;
3373}
3374
3375static int
3376readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3377                    unsigned int rsize, struct list_head *tmplist,
3378                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3379{
3380        struct page *page, *tpage;
3381        unsigned int expected_index;
3382        int rc;
3383        gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
3384
3385        INIT_LIST_HEAD(tmplist);
3386
3387        page = list_entry(page_list->prev, struct page, lru);
3388
3389        /*
3390         * Lock the page and put it in the cache. Since no one else
3391         * should have access to this page, we're safe to simply set
3392         * PG_locked without checking it first.
3393         */
3394        __SetPageLocked(page);
3395        rc = add_to_page_cache_locked(page, mapping,
3396                                      page->index, gfp);
3397
3398        /* give up if we can't stick it in the cache */
3399        if (rc) {
3400                __ClearPageLocked(page);
3401                return rc;
3402        }
3403
3404        /* move first page to the tmplist */
3405        *offset = (loff_t)page->index << PAGE_SHIFT;
3406        *bytes = PAGE_SIZE;
3407        *nr_pages = 1;
3408        list_move_tail(&page->lru, tmplist);
3409
3410        /* now try and add more pages onto the request */
3411        expected_index = page->index + 1;
3412        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3413                /* discontinuity ? */
3414                if (page->index != expected_index)
3415                        break;
3416
3417                /* would this page push the read over the rsize? */
3418                if (*bytes + PAGE_SIZE > rsize)
3419                        break;
3420
3421                __SetPageLocked(page);
3422                if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3423                        __ClearPageLocked(page);
3424                        break;
3425                }
3426                list_move_tail(&page->lru, tmplist);
3427                (*bytes) += PAGE_SIZE;
3428                expected_index++;
3429                (*nr_pages)++;
3430        }
3431        return rc;
3432}
3433
3434static int cifs_readpages(struct file *file, struct address_space *mapping,
3435        struct list_head *page_list, unsigned num_pages)
3436{
3437        int rc;
3438        struct list_head tmplist;
3439        struct cifsFileInfo *open_file = file->private_data;
3440        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3441        struct TCP_Server_Info *server;
3442        pid_t pid;
3443
3444        /*
3445         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3446         * immediately if the cookie is negative
3447         *
3448         * After this point, every page in the list might have PG_fscache set,
3449         * so we will need to clean that up off of every page we don't use.
3450         */
3451        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3452                                         &num_pages);
3453        if (rc == 0)
3454                return rc;
3455
3456        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3457                pid = open_file->pid;
3458        else
3459                pid = current->tgid;
3460
3461        rc = 0;
3462        server = tlink_tcon(open_file->tlink)->ses->server;
3463
3464        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3465                 __func__, file, mapping, num_pages);
3466
3467        /*
3468         * Start with the page at end of list and move it to private
3469         * list. Do the same with any following pages until we hit
3470         * the rsize limit, hit an index discontinuity, or run out of
3471         * pages. Issue the async read and then start the loop again
3472         * until the list is empty.
3473         *
3474         * Note that list order is important. The page_list is in
3475         * the order of declining indexes. When we put the pages in
3476         * the rdata->pages, then we want them in increasing order.
3477         */
3478        while (!list_empty(page_list)) {
3479                unsigned int i, nr_pages, bytes, rsize;
3480                loff_t offset;
3481                struct page *page, *tpage;
3482                struct cifs_readdata *rdata;
3483                unsigned credits;
3484
3485                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3486                                                   &rsize, &credits);
3487                if (rc)
3488                        break;
3489
3490                /*
3491                 * Give up immediately if rsize is too small to read an entire
3492                 * page. The VFS will fall back to readpage. We should never
3493                 * reach this point however since we set ra_pages to 0 when the
3494                 * rsize is smaller than a cache page.
3495                 */
3496                if (unlikely(rsize < PAGE_SIZE)) {
3497                        add_credits_and_wake_if(server, credits, 0);
3498                        return 0;
3499                }
3500
3501                rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3502                                         &nr_pages, &offset, &bytes);
3503                if (rc) {
3504                        add_credits_and_wake_if(server, credits, 0);
3505                        break;
3506                }
3507
3508                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3509                if (!rdata) {
3510                        /* best to give up if we're out of mem */
3511                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3512                                list_del(&page->lru);
3513                                lru_cache_add_file(page);
3514                                unlock_page(page);
3515                                put_page(page);
3516                        }
3517                        rc = -ENOMEM;
3518                        add_credits_and_wake_if(server, credits, 0);
3519                        break;
3520                }
3521
3522                rdata->cfile = cifsFileInfo_get(open_file);
3523                rdata->mapping = mapping;
3524                rdata->offset = offset;
3525                rdata->bytes = bytes;
3526                rdata->pid = pid;
3527                rdata->pagesz = PAGE_SIZE;
3528                rdata->read_into_pages = cifs_readpages_read_into_pages;
3529                rdata->credits = credits;
3530
3531                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3532                        list_del(&page->lru);
3533                        rdata->pages[rdata->nr_pages++] = page;
3534                }
3535
3536                if (!rdata->cfile->invalidHandle ||
3537                    !cifs_reopen_file(rdata->cfile, true))
3538                        rc = server->ops->async_readv(rdata);
3539                if (rc) {
3540                        add_credits_and_wake_if(server, rdata->credits, 0);
3541                        for (i = 0; i < rdata->nr_pages; i++) {
3542                                page = rdata->pages[i];
3543                                lru_cache_add_file(page);
3544                                unlock_page(page);
3545                                put_page(page);
3546                        }
3547                        /* Fallback to the readpage in error/reconnect cases */
3548                        kref_put(&rdata->refcount, cifs_readdata_release);
3549                        break;
3550                }
3551
3552                kref_put(&rdata->refcount, cifs_readdata_release);
3553        }
3554
3555        /* Any pages that have been shown to fscache but didn't get added to
3556         * the pagecache must be uncached before they get returned to the
3557         * allocator.
3558         */
3559        cifs_fscache_readpages_cancel(mapping->host, page_list);
3560        return rc;
3561}
3562
3563/*
3564 * cifs_readpage_worker must be called with the page pinned
3565 */
3566static int cifs_readpage_worker(struct file *file, struct page *page,
3567        loff_t *poffset)
3568{
3569        char *read_data;
3570        int rc;
3571
3572        /* Is the page cached? */
3573        rc = cifs_readpage_from_fscache(file_inode(file), page);
3574        if (rc == 0)
3575                goto read_complete;
3576
3577        read_data = kmap(page);
3578        /* for reads over a certain size could initiate async read ahead */
3579
3580        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3581
3582        if (rc < 0)
3583                goto io_error;
3584        else
3585                cifs_dbg(FYI, "Bytes read %d\n", rc);
3586
3587        file_inode(file)->i_atime =
3588                current_fs_time(file_inode(file)->i_sb);
3589
3590        if (PAGE_SIZE > rc)
3591                memset(read_data + rc, 0, PAGE_SIZE - rc);
3592
3593        flush_dcache_page(page);
3594        SetPageUptodate(page);
3595
3596        /* send this page to the cache */
3597        cifs_readpage_to_fscache(file_inode(file), page);
3598
3599        rc = 0;
3600
3601io_error:
3602        kunmap(page);
3603        unlock_page(page);
3604
3605read_complete:
3606        return rc;
3607}
3608
3609static int cifs_readpage(struct file *file, struct page *page)
3610{
3611        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3612        int rc = -EACCES;
3613        unsigned int xid;
3614
3615        xid = get_xid();
3616
3617        if (file->private_data == NULL) {
3618                rc = -EBADF;
3619                free_xid(xid);
3620                return rc;
3621        }
3622
3623        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3624                 page, (int)offset, (int)offset);
3625
3626        rc = cifs_readpage_worker(file, page, &offset);
3627
3628        free_xid(xid);
3629        return rc;
3630}
3631
3632static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3633{
3634        struct cifsFileInfo *open_file;
3635
3636        spin_lock(&cifs_file_list_lock);
3637        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3638                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3639                        spin_unlock(&cifs_file_list_lock);
3640                        return 1;
3641                }
3642        }
3643        spin_unlock(&cifs_file_list_lock);
3644        return 0;
3645}
3646
3647/* We do not want to update the file size from server for inodes
3648   open for write - to avoid races with writepage extending
3649   the file - in the future we could consider allowing
3650   refreshing the inode only on increases in the file size
3651   but this is tricky to do without racing with writebehind
3652   page caching in the current Linux kernel design */
3653bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3654{
3655        if (!cifsInode)
3656                return true;
3657
3658        if (is_inode_writable(cifsInode)) {
3659                /* This inode is open for write at least once */
3660                struct cifs_sb_info *cifs_sb;
3661
3662                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3663                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3664                        /* since no page cache to corrupt on directio
3665                        we can change size safely */
3666                        return true;
3667                }
3668
3669                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3670                        return true;
3671
3672                return false;
3673        } else
3674                return true;
3675}
3676
3677static int cifs_write_begin(struct file *file, struct address_space *mapping,
3678                        loff_t pos, unsigned len, unsigned flags,
3679                        struct page **pagep, void **fsdata)
3680{
3681        int oncethru = 0;
3682        pgoff_t index = pos >> PAGE_SHIFT;
3683        loff_t offset = pos & (PAGE_SIZE - 1);
3684        loff_t page_start = pos & PAGE_MASK;
3685        loff_t i_size;
3686        struct page *page;
3687        int rc = 0;
3688
3689        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3690
3691start:
3692        page = grab_cache_page_write_begin(mapping, index, flags);
3693        if (!page) {
3694                rc = -ENOMEM;
3695                goto out;
3696        }
3697
3698        if (PageUptodate(page))
3699                goto out;
3700
3701        /*
3702         * If we write a full page it will be up to date, no need to read from
3703         * the server. If the write is short, we'll end up doing a sync write
3704         * instead.
3705         */
3706        if (len == PAGE_SIZE)
3707                goto out;
3708
3709        /*
3710         * optimize away the read when we have an oplock, and we're not
3711         * expecting to use any of the data we'd be reading in. That
3712         * is, when the page lies beyond the EOF, or straddles the EOF
3713         * and the write will cover all of the existing data.
3714         */
3715        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3716                i_size = i_size_read(mapping->host);
3717                if (page_start >= i_size ||
3718                    (offset == 0 && (pos + len) >= i_size)) {
3719                        zero_user_segments(page, 0, offset,
3720                                           offset + len,
3721                                           PAGE_SIZE);
3722                        /*
3723                         * PageChecked means that the parts of the page
3724                         * to which we're not writing are considered up
3725                         * to date. Once the data is copied to the
3726                         * page, it can be set uptodate.
3727                         */
3728                        SetPageChecked(page);
3729                        goto out;
3730                }
3731        }
3732
3733        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3734                /*
3735                 * might as well read a page, it is fast enough. If we get
3736                 * an error, we don't need to return it. cifs_write_end will
3737                 * do a sync write instead since PG_uptodate isn't set.
3738                 */
3739                cifs_readpage_worker(file, page, &page_start);
3740                put_page(page);
3741                oncethru = 1;
3742                goto start;
3743        } else {
3744                /* we could try using another file handle if there is one -
3745                   but how would we lock it to prevent close of that handle
3746                   racing with this read? In any case
3747                   this will be written out by write_end so is fine */
3748        }
3749out:
3750        *pagep = page;
3751        return rc;
3752}
3753
3754static int cifs_release_page(struct page *page, gfp_t gfp)
3755{
3756        if (PagePrivate(page))
3757                return 0;
3758
3759        return cifs_fscache_release_page(page, gfp);
3760}
3761
3762static void cifs_invalidate_page(struct page *page, unsigned int offset,
3763                                 unsigned int length)
3764{
3765        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3766
3767        if (offset == 0 && length == PAGE_SIZE)
3768                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3769}
3770
3771static int cifs_launder_page(struct page *page)
3772{
3773        int rc = 0;
3774        loff_t range_start = page_offset(page);
3775        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3776        struct writeback_control wbc = {
3777                .sync_mode = WB_SYNC_ALL,
3778                .nr_to_write = 0,
3779                .range_start = range_start,
3780                .range_end = range_end,
3781        };
3782
3783        cifs_dbg(FYI, "Launder page: %p\n", page);
3784
3785        if (clear_page_dirty_for_io(page))
3786                rc = cifs_writepage_locked(page, &wbc);
3787
3788        cifs_fscache_invalidate_page(page, page->mapping->host);
3789        return rc;
3790}
3791
3792void cifs_oplock_break(struct work_struct *work)
3793{
3794        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3795                                                  oplock_break);
3796        struct inode *inode = d_inode(cfile->dentry);
3797        struct cifsInodeInfo *cinode = CIFS_I(inode);
3798        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3799        struct TCP_Server_Info *server = tcon->ses->server;
3800        int rc = 0;
3801
3802        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3803                        TASK_UNINTERRUPTIBLE);
3804
3805        server->ops->downgrade_oplock(server, cinode,
3806                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3807
3808        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3809                                                cifs_has_mand_locks(cinode)) {
3810                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3811                         inode);
3812                cinode->oplock = 0;
3813        }
3814
3815        if (inode && S_ISREG(inode->i_mode)) {
3816                if (CIFS_CACHE_READ(cinode))
3817                        break_lease(inode, O_RDONLY);
3818                else
3819                        break_lease(inode, O_WRONLY);
3820                rc = filemap_fdatawrite(inode->i_mapping);
3821                if (!CIFS_CACHE_READ(cinode)) {
3822                        rc = filemap_fdatawait(inode->i_mapping);
3823                        mapping_set_error(inode->i_mapping, rc);
3824                        cifs_zap_mapping(inode);
3825                }
3826                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3827        }
3828
3829        rc = cifs_push_locks(cfile);
3830        if (rc)
3831                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3832
3833        /*
3834         * releasing stale oplock after recent reconnect of smb session using
3835         * a now incorrect file handle is not a data integrity issue but do
3836         * not bother sending an oplock release if session to server still is
3837         * disconnected since oplock already released by the server
3838         */
3839        if (!cfile->oplock_break_cancelled) {
3840                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3841                                                             cinode);
3842                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3843        }
3844        cifs_done_oplock_break(cinode);
3845}
3846
3847/*
3848 * The presence of cifs_direct_io() in the address space ops vector
3849 * allowes open() O_DIRECT flags which would have failed otherwise.
3850 *
3851 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3852 * so this method should never be called.
3853 *
3854 * Direct IO is not yet supported in the cached mode. 
3855 */
3856static ssize_t
3857cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3858{
3859        /*
3860         * FIXME
3861         * Eventually need to support direct IO for non forcedirectio mounts
3862         */
3863        return -EINVAL;
3864}
3865
3866
3867const struct address_space_operations cifs_addr_ops = {
3868        .readpage = cifs_readpage,
3869        .readpages = cifs_readpages,
3870        .writepage = cifs_writepage,
3871        .writepages = cifs_writepages,
3872        .write_begin = cifs_write_begin,
3873        .write_end = cifs_write_end,
3874        .set_page_dirty = __set_page_dirty_nobuffers,
3875        .releasepage = cifs_release_page,
3876        .direct_IO = cifs_direct_io,
3877        .invalidatepage = cifs_invalidate_page,
3878        .launder_page = cifs_launder_page,
3879};
3880
3881/*
3882 * cifs_readpages requires the server to support a buffer large enough to
3883 * contain the header plus one complete page of data.  Otherwise, we need
3884 * to leave cifs_readpages out of the address space operations.
3885 */
3886const struct address_space_operations cifs_addr_ops_smallbuf = {
3887        .readpage = cifs_readpage,
3888        .writepage = cifs_writepage,
3889        .writepages = cifs_writepages,
3890        .write_begin = cifs_write_begin,
3891        .write_end = cifs_write_end,
3892        .set_page_dirty = __set_page_dirty_nobuffers,
3893        .releasepage = cifs_release_page,
3894        .invalidatepage = cifs_invalidate_page,
3895        .launder_page = cifs_launder_page,
3896};
3897